diff --git a/core/src/main/java/com/alibaba/druid/sql/dialect/hive/parser/HiveLexer.java b/core/src/main/java/com/alibaba/druid/sql/dialect/hive/parser/HiveLexer.java index e912b09ee6..bc15112ca2 100644 --- a/core/src/main/java/com/alibaba/druid/sql/dialect/hive/parser/HiveLexer.java +++ b/core/src/main/java/com/alibaba/druid/sql/dialect/hive/parser/HiveLexer.java @@ -57,6 +57,7 @@ public class HiveLexer extends Lexer { PrimaryBangBangSupport ) ); + static { Map map = new HashMap<>(); @@ -180,43 +181,13 @@ protected void scanString() { arraycopy(mark + 1, buf, 0, bufPos); hasSpecial = true; } - switch (ch) { - case '0': - putChar('\0'); - break; - case '\'': - putChar('\''); - break; - case '"': - putChar('"'); - break; - case 'b': - putChar('\b'); - break; - case 'n': - putChar('\n'); - break; - case 'r': - putChar('\r'); - break; - case 't': - putChar('\t'); - break; - case '\\': - putChar('\\'); - break; - case 'Z': - putChar((char) 0x1A); // ctrl + Z - break; - case '%': - putChar('%'); - break; - case '_': - putChar('_'); - break; + // only deal with unicode other remains the same case 'u': - if ((features & SQLParserFeature.SupportUnicodeCodePoint.mask) != 0) { + if ((features & SQLParserFeature.KeepUnicodeEscape.mask) != 0) { + putChar('\\'); + putChar('u'); + } else if ((features & SQLParserFeature.SupportUnicodeCodePoint.mask) != 0) { int codePointSize = 0; for (int i = 0; i < 4; i++, codePointSize++) { char c = charAt(pos + 1 + i); @@ -234,6 +205,7 @@ protected void scanString() { } break; default: + putChar('\\'); putChar(ch); break; } diff --git a/core/src/main/java/com/alibaba/druid/sql/dialect/hive/visitor/HiveOutputVisitor.java b/core/src/main/java/com/alibaba/druid/sql/dialect/hive/visitor/HiveOutputVisitor.java index 1be2b7431e..8897e42a91 100644 --- a/core/src/main/java/com/alibaba/druid/sql/dialect/hive/visitor/HiveOutputVisitor.java +++ b/core/src/main/java/com/alibaba/druid/sql/dialect/hive/visitor/HiveOutputVisitor.java @@ -463,7 +463,7 @@ public boolean visit(SQLCharExpr x, boolean parameterized) { char ch = text.charAt(i); switch (ch) { case '\\': - buf.append("\\\\"); + buf.append("\\"); break; case '\'': buf.append("\\'"); @@ -521,6 +521,7 @@ protected void printTableOptionsPrefix(SQLCreateTableStatement x) { public boolean visit(HiveCreateTableStatement x) { return visit((SQLCreateTableStatement) x); } + @Override public boolean visit(SQLCreateTableStatement x) { printCreateTable(x, true, true); diff --git a/core/src/main/java/com/alibaba/druid/sql/parser/SQLParserFeature.java b/core/src/main/java/com/alibaba/druid/sql/parser/SQLParserFeature.java index d6ef3a5c10..cb1824cca9 100644 --- a/core/src/main/java/com/alibaba/druid/sql/parser/SQLParserFeature.java +++ b/core/src/main/java/com/alibaba/druid/sql/parser/SQLParserFeature.java @@ -54,7 +54,8 @@ public enum SQLParserFeature { Presto, MySQLSupportStandardComment, - Template; + Template, + KeepUnicodeEscape; SQLParserFeature() { mask = (1 << ordinal()); diff --git a/core/src/test/java/com/alibaba/druid/bvt/sql/hive/HiveRegContainUnicodeTest.java b/core/src/test/java/com/alibaba/druid/bvt/sql/hive/HiveRegContainUnicodeTest.java new file mode 100644 index 0000000000..9bf2c25164 --- /dev/null +++ b/core/src/test/java/com/alibaba/druid/bvt/sql/hive/HiveRegContainUnicodeTest.java @@ -0,0 +1,70 @@ +package com.alibaba.druid.bvt.sql.hive; + +import com.alibaba.druid.DbType; +import com.alibaba.druid.sql.SQLUtils; +import com.alibaba.druid.sql.ast.SQLStatement; +import com.alibaba.druid.sql.parser.SQLParserFeature; +import com.alibaba.druid.sql.parser.SQLParserUtils; +import com.alibaba.druid.sql.parser.SQLStatementParser; +import com.alibaba.druid.sql.visitor.SchemaStatVisitor; +import com.alibaba.druid.util.JdbcConstants; +import junit.framework.TestCase; + +import java.util.List; + +public class HiveRegContainUnicodeTest extends TestCase { + public void test_select() throws Exception { + String sql = "SELECT page_views.* " + + "FROM page_views " + + "WHERE page_views.name REGEXP '[\\u4e00-\\u9fa5]{2,}' and page_views.date >= '2008-03-01'"; + { + SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(sql, DbType.hive); + List statementList = parser.parseStatementList(); + String sqlString = SQLUtils.toSQLString(statementList, DbType.hive); + assertEquals("SELECT page_views.*\n" + + "FROM page_views\n" + + "WHERE page_views.name REGEXP '[一-龥]{2,}'\n" + + "\tAND page_views.date >= '2008-03-01'", sqlString); + } + + { + SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(sql, DbType.hive); + parser.config(SQLParserFeature.KeepUnicodeEscape, true); + List statementList = parser.parseStatementList(); + String sqlString = SQLUtils.toSQLString(statementList, DbType.hive); + + assertEquals("SELECT page_views.*\n" + + "FROM page_views\n" + + "WHERE page_views.name REGEXP '[\\u4e00-\\u9fa5]{2,}'\n" + + "\tAND page_views.date >= '2008-03-01'", sqlString); + } + + { + String fotmat = SQLUtils.format(sql, DbType.hive, null, null, new SQLParserFeature[]{SQLParserFeature.KeepUnicodeEscape}); + + assertEquals("SELECT page_views.*\n" + + "FROM page_views\n" + + "WHERE page_views.name REGEXP '[\\u4e00-\\u9fa5]{2,}'\n" + + "\tAND page_views.date >= '2008-03-01'", fotmat); + } + + { + String origin = "select 'asd' regexp '[\\u4e00-\\u9fa5]{2,}[\\\\s|\\.]'"; + SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(origin, DbType.hive); + parser.config(SQLParserFeature.KeepUnicodeEscape, true); + List statementList = parser.parseStatementList(); + String format = SQLUtils.toSQLString(statementList, DbType.hive); + assertEquals("SELECT 'asd' REGEXP '[\\u4e00-\\u9fa5]{2,}[\\\\s|\\.]'", format); + } + + { + String origin = "select 'asd' regexp '[\\u4e00-\\u9fa5]{2,}[\\\\s|\\.]'"; + SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(origin, DbType.hive); + List statementList = parser.parseStatementList(); + String format = SQLUtils.toSQLString(statementList, DbType.hive); + assertEquals("SELECT 'asd' REGEXP '[一-龥]{2,}[\\\\s|\\.]'", format); + } + + + } +}