Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public class HiveLexer extends Lexer {
PrimaryBangBangSupport
)
);

static {
Map<String, Token> map = new HashMap<>();

Expand Down Expand Up @@ -180,43 +181,13 @@ protected void scanString() {
arraycopy(mark + 1, buf, 0, bufPos);
hasSpecial = true;
}

switch (ch) {
case '0':
putChar('\0');
break;
case '\'':
putChar('\'');
break;
case '"':
putChar('"');
break;
case 'b':
putChar('\b');
break;
case 'n':
putChar('\n');
break;
case 'r':
putChar('\r');
break;
case 't':
putChar('\t');
break;
case '\\':
putChar('\\');
break;
case 'Z':
putChar((char) 0x1A); // ctrl + Z
break;
case '%':
putChar('%');
break;
case '_':
putChar('_');
break;
// only deal with unicode other remains the same
case 'u':
if ((features & SQLParserFeature.SupportUnicodeCodePoint.mask) != 0) {
if ((features & SQLParserFeature.KeepUnicodeEscape.mask) != 0) {
putChar('\\');
putChar('u');
} else if ((features & SQLParserFeature.SupportUnicodeCodePoint.mask) != 0) {
int codePointSize = 0;
for (int i = 0; i < 4; i++, codePointSize++) {
char c = charAt(pos + 1 + i);
Expand All @@ -234,6 +205,7 @@ protected void scanString() {
}
break;
default:
putChar('\\');
putChar(ch);
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ public boolean visit(SQLCharExpr x, boolean parameterized) {
char ch = text.charAt(i);
switch (ch) {
case '\\':
buf.append("\\\\");
buf.append("\\");
break;
case '\'':
buf.append("\\'");
Expand Down Expand Up @@ -521,6 +521,7 @@ protected void printTableOptionsPrefix(SQLCreateTableStatement x) {
public boolean visit(HiveCreateTableStatement x) {
return visit((SQLCreateTableStatement) x);
}

@Override
public boolean visit(SQLCreateTableStatement x) {
printCreateTable(x, true, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ public enum SQLParserFeature {
Presto,
MySQLSupportStandardComment,

Template;
Template,
KeepUnicodeEscape;

SQLParserFeature() {
mask = (1 << ordinal());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.alibaba.druid.bvt.sql.hive;

import com.alibaba.druid.DbType;
import com.alibaba.druid.sql.SQLUtils;
import com.alibaba.druid.sql.ast.SQLStatement;
import com.alibaba.druid.sql.parser.SQLParserFeature;
import com.alibaba.druid.sql.parser.SQLParserUtils;
import com.alibaba.druid.sql.parser.SQLStatementParser;
import com.alibaba.druid.sql.visitor.SchemaStatVisitor;
import com.alibaba.druid.util.JdbcConstants;
import junit.framework.TestCase;

import java.util.List;

public class HiveRegContainUnicodeTest extends TestCase {
public void test_select() throws Exception {
String sql = "SELECT page_views.* " +
"FROM page_views " +
"WHERE page_views.name REGEXP '[\\u4e00-\\u9fa5]{2,}' and page_views.date >= '2008-03-01'";
{
SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(sql, DbType.hive);
List<SQLStatement> statementList = parser.parseStatementList();
String sqlString = SQLUtils.toSQLString(statementList, DbType.hive);
assertEquals("SELECT page_views.*\n" +
"FROM page_views\n" +
"WHERE page_views.name REGEXP '[一-龥]{2,}'\n" +
"\tAND page_views.date >= '2008-03-01'", sqlString);
}

{
SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(sql, DbType.hive);
parser.config(SQLParserFeature.KeepUnicodeEscape, true);
List<SQLStatement> statementList = parser.parseStatementList();
String sqlString = SQLUtils.toSQLString(statementList, DbType.hive);

assertEquals("SELECT page_views.*\n" +
"FROM page_views\n" +
"WHERE page_views.name REGEXP '[\\u4e00-\\u9fa5]{2,}'\n" +
"\tAND page_views.date >= '2008-03-01'", sqlString);
}

{
String fotmat = SQLUtils.format(sql, DbType.hive, null, null, new SQLParserFeature[]{SQLParserFeature.KeepUnicodeEscape});

assertEquals("SELECT page_views.*\n" +
"FROM page_views\n" +
"WHERE page_views.name REGEXP '[\\u4e00-\\u9fa5]{2,}'\n" +
"\tAND page_views.date >= '2008-03-01'", fotmat);
}

{
String origin = "select 'asd' regexp '[\\u4e00-\\u9fa5]{2,}[\\\\s|\\.]'";
SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(origin, DbType.hive);
parser.config(SQLParserFeature.KeepUnicodeEscape, true);
List<SQLStatement> statementList = parser.parseStatementList();
String format = SQLUtils.toSQLString(statementList, DbType.hive);
assertEquals("SELECT 'asd' REGEXP '[\\u4e00-\\u9fa5]{2,}[\\\\s|\\.]'", format);
}

{
String origin = "select 'asd' regexp '[\\u4e00-\\u9fa5]{2,}[\\\\s|\\.]'";
SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(origin, DbType.hive);
List<SQLStatement> statementList = parser.parseStatementList();
String format = SQLUtils.toSQLString(statementList, DbType.hive);
assertEquals("SELECT 'asd' REGEXP '[一-龥]{2,}[\\\\s|\\.]'", format);
}


}
}