diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 86752f27a0f6..a0c30dbd010b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -787,11 +787,31 @@ public boolean next(List super ExtendedCell> outResult, ScannerContext scanner case SEEK_NEXT_USING_HINT: ExtendedCell nextKV = matcher.getNextKeyHint(cell); if (nextKV != null) { - int difference = comparator.compare(nextKV, cell); + // HBASE-28902: InnerStoreCellComparator compares families by length only, + // so cross-CF hints produce wrong ordering. + // + // * Use compareFamilies (a final method that does byte comparison) to detect them. + // + // * When the hint targets a different CF, this store has no data for it, + // so skip the rest of this row. We can't use seekAsDirection since KeyValueHeap + // also uses InnerStoreCellComparator. + // + // * Empty-family hints (e.g. MultiRowRangeFilter) fall through to the normal path + // where row comparison dominates. + int familyCmp = + nextKV.getFamilyLength() > 0 && comparator.compareRows(nextKV, cell) == 0 + ? comparator.compareFamilies(nextKV, cell) + : 0; + int difference = familyCmp != 0 ? familyCmp : comparator.compare(nextKV, cell); if ( ((!scan.isReversed() && difference > 0) || (scan.isReversed() && difference < 0)) ) { - seekAsDirection(nextKV); + if (familyCmp != 0) { + matcher.clearCurrentRow(); + seekOrSkipToNextRow(cell); + } else { + seekAsDirection(nextKV); + } NextState stateAfterSeekByHint = needToReturn(); if (stateAfterSeekByHint != null) { return scannerContext.setScannerState(stateAfterSeekByHint).hasMoreValues(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCrossCFSeekHint.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCrossCFSeekHint.java new file mode 100644 index 000000000000..279b2330e3f3 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCrossCFSeekHint.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.filter.FilterBase; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Test that SEEK_NEXT_USING_HINT with a hint pointing to a different column family does not degrade + * into cell-by-cell traversal. + *
+ * HBASE-28902: StoreScanner uses InnerStoreCellComparator which compares column families by length
+ * only (an optimization valid within a single store). When the filter hint points to a different
+ * CF, this comparison can produce the wrong result, causing the seek to be skipped and falling back
+ * to heap.next() for every cell.
+ */
+@Tag(RegionServerTests.TAG)
+@Tag(MediumTests.TAG)
+public class TestCrossCFSeekHint {
+
+ private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+
+ private static final byte[] ROW1 = Bytes.toBytes("row1");
+ private static final byte[] ROW2 = Bytes.toBytes("row2");
+ private static final byte[] QUAL = Bytes.toBytes("q");
+ private static final int NUM_CELLS = 100;
+
+ // Two CFs with different lengths: "aa" sorts first, "b" sorts second.
+ private static final byte[] CF_FIRST = Bytes.toBytes("aa");
+ private static final byte[] CF_SECOND = Bytes.toBytes("b");
+
+ // Two CFs with same length: "aa" sorts first, "bb" sorts second.
+ private static final byte[] CF_SECOND_SAME_LEN = Bytes.toBytes("bb");
+
+ static Stream
+ * This simulates a filter that wants some cells from CF1, then skips ahead to CF2. If the fix
+ * permanently closes CF1's store scanner, CF1 cells would be missing on subsequent rows.
+ */
+ public static class CrossCFHintFilter extends FilterBase {
+ private final byte[] targetFamily;
+ private final byte[] targetQualifier;
+ private final byte[] hintThreshold;
+ private long filterCellCount;
+
+ /**
+ * @param targetFamily the CF to hint/seek to
+ * @param targetQualifier qualifier for the hint cell
+ * @param hintThreshold when a data CF cell's qualifier >= this, return SEEK_NEXT_USING_HINT
+ * instead of INCLUDE. Cells before this threshold are INCLUDEd.
+ */
+ public CrossCFHintFilter(byte[] targetFamily, byte[] targetQualifier, byte[] hintThreshold) {
+ this.targetFamily = targetFamily;
+ this.targetQualifier = targetQualifier;
+ this.hintThreshold = hintThreshold;
+ }
+
+ @Override
+ public ReturnCode filterCell(Cell c) throws IOException {
+ filterCellCount++;
+ if (CellUtil.matchingFamily(c, targetFamily)) {
+ return ReturnCode.INCLUDE;
+ }
+ // Data CF cell: include if qualifier < threshold, otherwise hint to target CF
+ if (
+ Bytes.compareTo(c.getQualifierArray(), c.getQualifierOffset(), c.getQualifierLength(),
+ hintThreshold, 0, hintThreshold.length) < 0
+ ) {
+ return ReturnCode.INCLUDE;
+ }
+ return ReturnCode.SEEK_NEXT_USING_HINT;
+ }
+
+ @Override
+ public Cell getNextCellHint(Cell currentCell) {
+ return new KeyValue(CellUtil.cloneRow(currentCell), targetFamily, targetQualifier,
+ Long.MAX_VALUE, KeyValue.Type.Maximum);
+ }
+
+ public long getFilterCellCount() {
+ return filterCellCount;
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource("params")
+ public void testCrossCFSeekHint(byte[] dataCF, byte[] targetCF, boolean flush, boolean reversed)
+ throws IOException {
+ String suffix = Bytes.toString(dataCF) + "_" + Bytes.toString(targetCF)
+ + (flush ? "_flush" : "_mem") + (reversed ? "_rev" : "");
+ TableName tableName = TableName.valueOf("TestCrossCFSeekHint_" + suffix);
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName)
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of(dataCF))
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of(targetCF)).build();
+ RegionInfo info = RegionInfoBuilder.newBuilder(tableName).build();
+ HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(),
+ TEST_UTIL.getConfiguration(), td);
+
+ try {
+ // Write two rows. Each row has NUM_CELLS qualifiers in dataCF (q00000..q00099)
+ // and one cell in targetCF.
+ for (byte[] row : new byte[][] { ROW1, ROW2 }) {
+ for (int i = 0; i < NUM_CELLS; i++) {
+ Put p = new Put(row);
+ p.addColumn(dataCF, Bytes.toBytes(String.format("q%05d", i)), Bytes.toBytes("v"));
+ region.put(p);
+ }
+ Put p = new Put(row);
+ p.addColumn(targetCF, QUAL, Bytes.toBytes("target"));
+ region.put(p);
+ }
+ if (flush) {
+ region.flush(true);
+ }
+
+ // The filter INCLUDEs dataCF cells with qualifier < "q00002" (i.e. q00000, q00001),
+ // then returns SEEK_NEXT_USING_HINT at q00002 with a hint to targetCF.
+ // This means each row should return: dataCF:q00000, dataCF:q00001, targetCF:q.
+ byte[] hintThreshold = Bytes.toBytes("q00002");
+ int includedPerRow = 2; // q00000, q00001
+ CrossCFHintFilter filter = new CrossCFHintFilter(targetCF, QUAL, hintThreshold);
+ Scan scan = new Scan();
+ scan.setFilter(filter);
+ scan.setReversed(reversed);
+
+ List