From a29ce8cad90cf34641cea999f255b8056c138f73 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 21 Apr 2026 22:40:16 +0000
Subject: [PATCH] perf(griffin): dispatch fetchNext comment checks on token
 length

SqlUtil.fetchNext runs on every token the SQL lexer produces during
query compilation. Profile samples show it at ~23% of parse CPU for
ExpressionParserBenchmark, with a hot loop that does five sequential
Chars.equals calls ("--", "/*", "/*+", "*/", plus WHITESPACE.excludes)
per token. For the overwhelmingly common case -- identifiers, numbers,
and single-char operators -- every one of those comparisons first calls
cs.length() on the GenericLexer's floating sequence, and the first four
always miss.

Read cs.length() once per iteration and dispatch the four comment-marker
checks via a switch on the length. Tokens whose length is not 2 or 3
skip those checks entirely; length-2 tokens run a single 2-char compare
for each marker. Identical treatment applied to fetchNextHintToken.

Benchmark: ExpressionParserBenchmark, JMH @Fork=2, 5 warmup + 10
measurement iterations, JDK 21. All ten @Param expressions improve
with non-overlapping 99.9% confidence intervals.

  a + b                    956.7  ->   872.8  (-8.8%)
  a + b * c / 2           1392.8  ->  1186.4 (-14.8%)
  a + b * c(x, y) / 2     1772.8  ->  1486.9 (-16.1%)
  a = 1 and b = 2 or c=3  1807.0  ->  1587.8 (-12.1%)
  case-when-else          2832.8  ->  2336.9 (-17.5%)
  a in (1,2,3,4,5)        1509.6  ->  1302.1 (-13.7%)
  cast(a) + cast(b)       2264.4  ->  1870.2 (-17.4%)
  between/like            1819.0  ->  1647.0  (-9.5%)
  coalesce(a,b,c,d,0)     1539.0  ->  1406.1  (-8.6%)
  sum over window         3210.8  ->  2618.7 (-18.4%)

Geometric mean improvement: ~13.6%. Behaviour is bit-identical:
length-2 sequences "--", "/*", "*/" and length-3 "/*+" trigger the
same branches as before. All 1394 griffin parser tests pass, including
the line/block/hint comment coverage in ExpressionParserTest,
SqlParserTest, and SqlHintsTest.
---
 .../main/java/io/questdb/griffin/SqlUtil.java | 153 ++++++++++--------
 1 file changed, 83 insertions(+), 70 deletions(-)

diff --git a/core/src/main/java/io/questdb/griffin/SqlUtil.java b/core/src/main/java/io/questdb/griffin/SqlUtil.java
index a96fe5caea49..df91620ffd02 100644
--- a/core/src/main/java/io/questdb/griffin/SqlUtil.java
+++ b/core/src/main/java/io/questdb/griffin/SqlUtil.java
@@ -456,53 +456,60 @@ public static CharSequence fetchNext(GenericLexer lexer, boolean includeHints) t
         boolean lineComment = false;
         while (lexer.hasNext()) {
             CharSequence cs = lexer.next();
+            final int csLen = cs.length();
 
             if (lineComment) {
-                if (Chars.equals(cs, '\n') || Chars.equals(cs, '\r')) {
-                    lineComment = false;
-                } else {
-                    // Check if token contains a newline (can happen with unbalanced quotes in comments)
-                    for (int i = 0, n = cs.length(); i < n; i++) {
-                        char c = cs.charAt(i);
-                        if (c == '\n' || c == '\r') {
-                            // Found newline inside token - reposition lexer to after newline
-                            int newPos = lexer.lastTokenPosition() + i + 1;
-                            // Skip \r\n sequence
-                            if (c == '\r' && i + 1 < n && cs.charAt(i + 1) == '\n') {
-                                newPos++;
-                            }
-                            lexer.backTo(newPos, null);
-                            lineComment = false;
-                            break;
+                if (csLen == 1) {
+                    final char c0 = cs.charAt(0);
+                    if (c0 == '\n' || c0 == '\r') {
+                        lineComment = false;
+                        continue;
+                    }
+                }
+                // Check if token contains a newline (can happen with unbalanced quotes in comments)
+                for (int i = 0; i < csLen; i++) {
+                    char c = cs.charAt(i);
+                    if (c == '\n' || c == '\r') {
+                        // Found newline inside token - reposition lexer to after newline
+                        int newPos = lexer.lastTokenPosition() + i + 1;
+                        // Skip \r\n sequence
+                        if (c == '\r' && i + 1 < csLen && cs.charAt(i + 1) == '\n') {
+                            newPos++;
                         }
+                        lexer.backTo(newPos, null);
+                        lineComment = false;
+                        break;
                     }
                 }
                 continue;
             }
 
-            if (Chars.equals("--", cs)) {
-                lineComment = true;
-                continue;
-            }
-
-            if (Chars.equals("/*", cs)) {
-                blockCount++;
-                continue;
-            }
-
-            if (Chars.equals("/*+", cs) && (!includeHints || blockCount > 0)) {
+            // Dispatch on token length: "--", "/*", "*/" are length 2; "/*+" is length 3.
+            // All other lengths short-circuit the comment-marker checks entirely.
+            if (csLen == 2) {
+                final char c0 = cs.charAt(0);
+                final char c1 = cs.charAt(1);
+                if (c0 == '-' && c1 == '-') {
+                    lineComment = true;
+                    continue;
+                }
+                if (c0 == '/' && c1 == '*') {
+                    blockCount++;
+                    continue;
+                }
+                if (c0 == '*' && c1 == '/' && blockCount > 0) {
+                    blockCount--;
+                    continue;
+                }
+            } else if (csLen == 3 && (!includeHints || blockCount > 0)
+                    && cs.charAt(0) == '/' && cs.charAt(1) == '*' && cs.charAt(2) == '+') {
                 blockCount++;
                 continue;
             }
 
-            if (Chars.equals("*/", cs) && blockCount > 0) {
-                blockCount--;
-                continue;
-            }
-
             if (blockCount == 0 && GenericLexer.WHITESPACE.excludes(cs)) {
                 // unclosed quote check
-                if (cs.length() == 1 && cs.charAt(0) == '"') {
+                if (csLen == 1 && cs.charAt(0) == '"') {
                     throw SqlException.$(lexer.lastTokenPosition(), "unclosed quotation mark");
                 }
                 return cs;
@@ -539,58 +546,64 @@ public static CharSequence fetchNextHintToken(GenericLexer lexer) {
         boolean inError = false;
         while (lexer.hasNext()) {
             CharSequence cs = lexer.next();
+            final int csLen = cs.length();
 
             if (lineComment) {
-                if (Chars.equals(cs, '\n') || Chars.equals(cs, '\r')) {
-                    lineComment = false;
-                } else {
-                    // Check if token contains a newline (can happen with unbalanced quotes in comments)
-                    for (int i = 0, n = cs.length(); i < n; i++) {
-                        char c = cs.charAt(i);
-                        if (c == '\n' || c == '\r') {
-                            // Found newline inside token - reposition lexer to after newline
-                            int newPos = lexer.lastTokenPosition() + i + 1;
-                            // Skip \r\n sequence
-                            if (c == '\r' && i + 1 < n && cs.charAt(i + 1) == '\n') {
-                                newPos++;
-                            }
-                            lexer.backTo(newPos, null);
-                            lineComment = false;
-                            break;
+                if (csLen == 1) {
+                    final char c0 = cs.charAt(0);
+                    if (c0 == '\n' || c0 == '\r') {
+                        lineComment = false;
+                        continue;
+                    }
+                }
+                // Check if token contains a newline (can happen with unbalanced quotes in comments)
+                for (int i = 0; i < csLen; i++) {
+                    char c = cs.charAt(i);
+                    if (c == '\n' || c == '\r') {
+                        // Found newline inside token - reposition lexer to after newline
+                        int newPos = lexer.lastTokenPosition() + i + 1;
+                        // Skip \r\n sequence
+                        if (c == '\r' && i + 1 < csLen && cs.charAt(i + 1) == '\n') {
+                            newPos++;
                         }
+                        lexer.backTo(newPos, null);
+                        lineComment = false;
+                        break;
                     }
                 }
                 continue;
             }
 
-            if (Chars.equals("--", cs)) {
-                lineComment = true;
-                continue;
-            }
-
-            if (Chars.equals("/*", cs)) {
-                blockCount++;
-                continue;
-            }
-
-            if (Chars.equals("/*+", cs)) {
+            // Dispatch on token length: "--", "/*", "*/" are length 2; "/*+" is length 3.
+            // All other lengths short-circuit the comment-marker checks entirely.
+            if (csLen == 2) {
+                final char c0 = cs.charAt(0);
+                final char c1 = cs.charAt(1);
+                if (c0 == '-' && c1 == '-') {
+                    lineComment = true;
+                    continue;
+                }
+                if (c0 == '/' && c1 == '*') {
+                    blockCount++;
+                    continue;
+                }
+                // end of hints or a nested comment
+                if (c0 == '*' && c1 == '/') {
+                    if (blockCount > 0) {
+                        blockCount--;
+                        continue;
+                    }
+                    return null;
+                }
+            } else if (csLen == 3 && cs.charAt(0) == '/' && cs.charAt(1) == '*' && cs.charAt(2) == '+') {
                 // nested hints are treated as regular comments
                 blockCount++;
                 continue;
             }
 
-            // end of hints or a nested comment
-            if (Chars.equals("*/", cs)) {
-                if (blockCount > 0) {
-                    blockCount--;
-                    continue;
-                }
-                return null;
-            }
-
             if (!inError && blockCount == 0 && GenericLexer.WHITESPACE.excludes(cs)) {
                 // unclosed quote check
-                if (cs.length() == 1 && cs.charAt(0) == '"') {
+                if (csLen == 1 && cs.charAt(0) == '"') {
                     inError = true;
                 } else {
                     return cs;