datastax · pkolaczk · Mar 9, 2026
diff --git a/src/java/org/apache/cassandra/index/sai/disk/v6/TermsDistribution.java b/src/java/org/apache/cassandra/index/sai/disk/v6/TermsDistribution.java
@@ -279,6 +279,10 @@ public static BigDecimal toBigDecimal(ByteComparable value,
         // For non numbers we just reinterpret the bytecomparable representation as decimal of fixed width.
         // Therefore, we don't need to decode anything.
         byte[] fixedLengthBytes = Arrays.copyOf(ByteSourceInverse.readBytes(value.asComparableBytes(byteComparableVersion)), 20);
+        // Flip the first bit to get a correct order for negative values,
+        // because the first bit is interpreted by BigInteger as a sign bit, but bytecomparable interpret all byts as unsigned.
+        // By flipping it, we correctly get values starting with 0 bit smaller than the ones starting with 1.
+        fixedLengthBytes[0] ^= (byte) 0x80;
         return new BigDecimal(new BigInteger(fixedLengthBytes));
     }
 

diff --git a/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java b/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java
@@ -439,73 +439,41 @@ private int estimateNumRowsMatchingExact(Expression expression)
 
     private long estimateNumRowsMatchingRange(Expression expression)
     {
-        final Trie<PrimaryKeys> subtrie = getSubtrie(expression);
-
-        // We could compute the number of matching rows by iterating the subtrie
-        // and summing the sizes of PrimaryKeys collections. But this could be very costly
-        // if the subtrie is large. Instead, we iterate a limited number of entries, and then we
-        // check how far we got by inspecting the term and comparing it to the start term and the end term.
-        // For now, we assume that term values are distributed uniformly.
-
-        var iterator = subtrie.entryIterator();
-        if (!iterator.hasNext())
+        if (minTerm == null || maxTerm == null)
             return 0;
 
         AbstractType<?> termType = indexContext.getValidator();
-        ByteBuffer endTerm = expression.upper != null && TypeUtil.compare(expression.upper.value.encoded, maxTerm, termType, version) < 0
-                             ? expression.upper.value.encoded
-                             : maxTerm;
-
-        long pointCount = 0;
-        long keyCount = 0;
-
-        ByteComparable startTerm = null;
-        ByteComparable currentTerm = null;
-
-        while (iterator.hasNext() && pointCount < 64)
-        {
-            var entry = iterator.next();
-            pointCount += 1;
-            keyCount += entry.getValue().size();
-            currentTerm = entry.getKey();
-            if (startTerm == null)
-                startTerm = currentTerm;
-        }
-        assert currentTerm != null;
-
-        // We iterated all points matched by the query, so keyCount contains the exact value of keys.
-        // This is a happy path, because the returned value will be accurate.
-        if (!iterator.hasNext())
-            return keyCount;
-
-        // There are some points remaining; let's estimate their count by extrapolation.
-        // Express the distance we iterated as a double value and the whole subtrie range also as a double.
-        // Then the ratio of those two values would give us a hint on how many total points there
-        // are in the subtrie. This should be fairly accurate assuming values are distributed uniformly.
-        BigDecimal startValue = toBigDecimal(startTerm);
-        BigDecimal endValue = toBigDecimal(endTerm);
-        BigDecimal currentValue = toBigDecimal(currentTerm);
-        double totalDistance = endValue.subtract(startValue).doubleValue() + Double.MIN_NORMAL;
-        double iteratedDistance = currentValue.subtract(startValue).doubleValue() + Double.MIN_NORMAL;
-        assert totalDistance > 0.0;
-        assert iteratedDistance > 0.0;
-
-        double extrapolatedPointCount = Math.min((pointCount - 1) * (totalDistance / iteratedDistance), this.data.valuesCount());
-        double keysPerPoint = (double) keyCount / pointCount;
-        return (long) (extrapolatedPointCount * keysPerPoint);
-    }
-
-    /**
-     * Converts the term to a BigDecimal in a way that it keeps the sort order
-     * (so terms comparing larger yield larger numbers).
-     * Works on raw representation (as passed to the index).
-     *
-     * @see #toBigDecimal(ByteComparable)
-     */
-    private BigDecimal toBigDecimal(ByteBuffer endTerm)
-    {
-        ByteComparable bc = version.onDiskFormat().encodeForTrie(endTerm, indexContext.getValidator());
-        return toBigDecimal(bc);
+        ByteComparable minTermComparable = version.onDiskFormat().encodeForTrie(minTerm, termType);
+        ByteComparable maxTermComparable = version.onDiskFormat().encodeForTrie(maxTerm, termType);
+        BigDecimal indexLowerBound = toBigDecimal(minTermComparable);
+        BigDecimal indexUpperBound = toBigDecimal(maxTermComparable);
+
+        BigDecimal queryLowerBound = expression.lower != null
+                                     ? toBigDecimal(expression.getEncodedLowerBoundByteComparable(version))
+                                     : indexLowerBound;
+        BigDecimal queryUpperBound = expression.upper != null
+                                     ? toBigDecimal(expression.getEncodedUpperBoundByteComparable(version))
+                                     : indexUpperBound;
+
+        if (queryLowerBound.compareTo(indexUpperBound) > 0 || queryUpperBound.compareTo(indexLowerBound) < 0)
+            return 0;
+        if (queryLowerBound.compareTo(indexUpperBound) == 0 && expression.lower != null && !expression.lower.inclusive)
+            return 0;
+        if (queryUpperBound.compareTo(indexLowerBound) == 0 && expression.upper != null && !expression.upper.inclusive)
+            return 0;
+        if (queryLowerBound.compareTo(indexLowerBound) <= 0 && queryUpperBound.compareTo(indexUpperBound) >= 0)
+            return indexedRows;
+
+        queryUpperBound = queryUpperBound.min(indexUpperBound).max(indexLowerBound);
+        queryLowerBound = queryLowerBound.max(indexLowerBound).min(indexUpperBound);
+        assert queryLowerBound.compareTo(queryUpperBound) <= 0
+            : "query lower bound (" + queryLowerBound + ") should be less than or equal to query upper bound (" + queryUpperBound + ')';
+
+        double indexRangeSize = indexUpperBound.subtract(indexLowerBound).doubleValue() + Double.MIN_NORMAL;
+        double queryRangeSize = queryUpperBound.subtract(queryLowerBound).doubleValue() + Double.MIN_NORMAL;
+        double selectivity = queryRangeSize / indexRangeSize;
+        assert selectivity >= 0.0 && selectivity <= 1.0 : "selectivity (" + selectivity + ") should be between 0.0 and 1.0";
+        return Math.round(selectivity * indexedRows);
     }
 
     /**

diff --git a/test/unit/org/apache/cassandra/index/sai/cql/NonNumericTermsDistributionTest.java b/test/unit/org/apache/cassandra/index/sai/cql/NonNumericTermsDistributionTest.java
@@ -93,10 +93,10 @@ public void testUtf8IndexEstimates()
         var sai = getIndex();
 
         assertInMemoryEstimateCount(sai, Operator.EQ, "ab", 1);
-        assertInMemoryEstimateCount(sai, Operator.LT, "ab", 2);
-        assertInMemoryEstimateCount(sai, Operator.GT, "ab", 3);
-        assertInMemoryEstimateCount(sai, Operator.LTE, "ab", 3);
-        assertInMemoryEstimateCount(sai, Operator.GTE, "ab", 4);
+        assertInMemoryEstimateCount(sai, Operator.LT, "ab", 2, 1);
+        assertInMemoryEstimateCount(sai, Operator.GT, "ab", 3, 1);
+        assertInMemoryEstimateCount(sai, Operator.LTE, "ab", 3, 1);
+        assertInMemoryEstimateCount(sai, Operator.GTE, "ab", 4, 1);
         assertInMemoryEstimateCount(sai, Operator.EQ, "•", 1);
         assertInMemoryEstimateCount(sai, Operator.EQ, "x", 0);
 
@@ -183,10 +183,10 @@ public void testAsciiIndexEstimates()
         var sai = getIndex();
 
         assertInMemoryEstimateCount(sai, Operator.EQ, "ab", 1);
-        assertInMemoryEstimateCount(sai, Operator.LT, "ab", 2);
-        assertInMemoryEstimateCount(sai, Operator.GT, "ab", 3);
-        assertInMemoryEstimateCount(sai, Operator.LTE, "ab", 3);
-        assertInMemoryEstimateCount(sai, Operator.GTE, "ab", 4);
+        assertInMemoryEstimateCount(sai, Operator.LT, "ab", 2, 1);
+        assertInMemoryEstimateCount(sai, Operator.GT, "ab", 3, 1);
+        assertInMemoryEstimateCount(sai, Operator.LTE, "ab", 3, 1);
+        assertInMemoryEstimateCount(sai, Operator.GTE, "ab", 4, 1);
         assertInMemoryEstimateCount(sai, Operator.EQ, "x", 0);
 
         flush();
@@ -327,10 +327,10 @@ public void testTimestampIndexEstimates()
         var sai = getIndex();
 
         assertInMemoryEstimateCount(sai, Operator.EQ, "2024-01-01 12:00:00.000", 1);
-        assertInMemoryEstimateCount(sai, Operator.LT, "2024-01-01 12:00:00.000", 2);
-        assertInMemoryEstimateCount(sai, Operator.GT, "2024-01-01 12:00:00.000", 3);
-        assertInMemoryEstimateCount(sai, Operator.LTE, "2024-01-01 12:00:00.000", 3);
-        assertInMemoryEstimateCount(sai, Operator.GTE, "2024-01-01 12:00:00.000", 4);
+        assertInMemoryEstimateCount(sai, Operator.LT, "2024-01-01 12:00:00.000", 2, 1);
+        assertInMemoryEstimateCount(sai, Operator.GT, "2024-01-01 12:00:00.000", 3, 1);
+        assertInMemoryEstimateCount(sai, Operator.LTE, "2024-01-01 12:00:00.000", 3, 1);
+        assertInMemoryEstimateCount(sai, Operator.GTE, "2024-01-01 12:00:00.000", 4, 1);
         assertInMemoryEstimateCount(sai, Operator.EQ, "2550-01-01 12:00:00", 1);
         assertInMemoryEstimateCount(sai, Operator.EQ, "2550-01-01 12:00:01", 0);
         assertInMemoryEstimateCount(sai, Operator.EQ, "1810-12-31 16:00:00", 1);

diff --git a/test/unit/org/apache/cassandra/index/sai/cql/NumericTermsDistributionTest.java b/test/unit/org/apache/cassandra/index/sai/cql/NumericTermsDistributionTest.java
@@ -191,7 +191,7 @@ public void testNumericIndexEstimatesOnManyRows() throws Throwable
         // of this test relied on the fact overwriting a primary key in the memtables didn't remove the old value
         // from the index. I've updated the test to account for more realistic uncertainty and also added a compact
         // then estimate step since that exercises a different build path in the index.
-        var uncertainty = (testType == CQL3Type.Native.TINYINT) ? 1 : 52;
+        var uncertainty = (testType == CQL3Type.Native.TINYINT) ? 2 : 52;
         assertInMemoryEstimateCount(sai, Operator.LT, MID_POINT, COUNT / 2, 0, uncertainty);
         assertInMemoryEstimateCount(sai, Operator.GT, MID_POINT, COUNT / 2, 0, uncertainty);
         assertInMemoryEstimateCount(sai, Operator.LTE, MID_POINT, COUNT / 2, 1, uncertainty);

diff --git a/test/unit/org/apache/cassandra/index/sai/disk/v6/TermsDistributionTest.java b/test/unit/org/apache/cassandra/index/sai/disk/v6/TermsDistributionTest.java
@@ -23,6 +23,9 @@
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.Random;
 
 import org.junit.Test;
 
@@ -31,12 +34,15 @@
 import org.apache.cassandra.db.marshal.DoubleType;
 import org.apache.cassandra.db.marshal.Int32Type;
 import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.TimestampType;
+import org.apache.cassandra.db.marshal.UTF8Type;
 import org.apache.cassandra.index.sai.SAIUtil;
 import org.apache.cassandra.index.sai.disk.ModernResettableByteBuffersIndexOutput;
 import org.apache.cassandra.index.sai.disk.oldlucene.ByteArrayIndexInput;
 import org.apache.cassandra.index.sai.utils.TypeUtil;
 import org.apache.cassandra.utils.bytecomparable.ByteComparable;
 
+import static org.apache.cassandra.db.marshal.ValueGenerator.randomString;
 import static org.junit.Assert.*;
 
 public class TermsDistributionTest
@@ -270,6 +276,60 @@ public void testSerde() throws IOException
         }
     }
 
+    @Test
+    public void testTimestampToBigDecimalPreservesOrder()
+    {
+        var tpe = TimestampType.instance;
+        var format = SAIUtil.currentVersion().onDiskFormat();
+        var timestamps = new long[] { Long.MIN_VALUE, -1000000000L, -1L, 0L, 1L, 1000000000L, Long.MAX_VALUE };
+
+        ByteComparable[] encoded = new ByteComparable[timestamps.length];
+        BigDecimal[] decimals = new BigDecimal[timestamps.length];
+
+        for (int i = 0; i < timestamps.length; i++)
+        {
+            encoded[i] = format.encodeForTrie(tpe.decompose(new Date(timestamps[i])), tpe);
+            decimals[i] = TermsDistribution.toBigDecimal(encoded[i], tpe, SAIUtil.currentVersion(), VERSION);
+        }
+
+        // BigDecimal representaitons should sort the same way as the original timestamps:
+        for (int i = 0; i < decimals.length - 1; i++)
+        {
+            assertTrue(ByteComparable.compare(encoded[i], encoded[i + 1], TypeUtil.BYTE_COMPARABLE_VERSION) < 0);
+            assertTrue(decimals[i].compareTo(decimals[i + 1]) < 0);
+        }
+    }
+
+    @Test
+    public void testStringToBigDecimalPreservesOrder()
+    {
+        var tpe = UTF8Type.instance;
+        var format = SAIUtil.currentVersion().onDiskFormat();
+        String[] strings = new String[100];
+        Random random = new Random(1);
+        for (int i = 0; i < strings.length; i++)
+        {
+            strings[i] = randomString(random);
+        }
+        Arrays.sort(strings);
+
+        ByteComparable[] encoded = new ByteComparable[strings.length];
+        BigDecimal[] decimals = new BigDecimal[strings.length];
+
+        for (int i = 0; i < strings.length; i++)
+        {
+            encoded[i] = format.encodeForTrie(tpe.decompose(strings[i]), tpe);
+            decimals[i] = TermsDistribution.toBigDecimal(encoded[i], tpe, SAIUtil.currentVersion(), VERSION);
+        }
+
+        // BigDecimal representations should sort the same way as the original strings:
+        for (int i = 0; i < decimals.length - 1; i++)
+        {
+            assertTrue(ByteComparable.compare(encoded[i], encoded[i + 1], TypeUtil.BYTE_COMPARABLE_VERSION) < 0);
+            assertTrue(decimals[i].compareTo(decimals[i + 1]) < 0);
+        }
+    }
+
     private ByteComparable encode(int value)
     {
         return v -> Int32Type.instance.asComparableBytes(Int32Type.instance.decompose(value), v);
@@ -291,5 +351,4 @@ private ByteComparable encodeAsBigInt(long value)
         ByteBuffer raw = IntegerType.instance.decompose(BigInteger.valueOf(value));
         return v -> TypeUtil.asComparableBytes(TypeUtil.encode(raw, IntegerType.instance), IntegerType.instance, v);
     }
-
 }