diff --git a/docs/superpowers/plans/2026-04-22-stream-trigger-calc-scan-cols-optimize.md b/docs/superpowers/plans/2026-04-22-stream-trigger-calc-scan-cols-optimize.md new file mode 100644 index 000000000000..80b17dec4f73 --- /dev/null +++ b/docs/superpowers/plans/2026-04-22-stream-trigger-calc-scan-cols-optimize.md @@ -0,0 +1,699 @@ +# Stream Trigger/Calc Scan-Cols Optimization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make stream trigger AST and calc AST each scan only their logically required columns, while preserving `%%trows` semantics by injecting `pre_filter` as the calc query's WHERE clause. + +**Architecture:** Three commits in order — (A) parser scaffolding (new `SSelectStmt` flag + `translateWhere` bypass + `injectPreFilterIntoCalcQuery` helper invoked before `translateStreamCalcQuery`), behavior-neutral on its own; (B) remove the wrong "append calc cols to trigger projection" block at `parTranslater.c:19205-19216`, which together with (A) gives the corrected behavior; (C) delete the now-dead `triggerScanList` field and its filler. + +**Tech Stack:** C (TDengine parser/planner). Build via the existing `community/debug/` cmake tree. Stream regression covered by python framework under `test/cases/18-StreamProcessing/`. + +**Spec:** `docs/superpowers/specs/2026-04-22-stream-trigger-calc-scan-cols-optimize-design.md` + +--- + +## File Structure + +| File | Responsibility | Change Type | +|---|---|---| +| `include/nodes/querynodes.h` | `SSelectStmt` definition | Modify (add `bool pWhereInjectedFromPreFilter`) | +| `source/libs/parser/src/parTranslater.c` | Stream req building, WHERE translation | Modify (add helper, bypass check, delete bad block, delete dead init) | +| `source/libs/planner/src/planLogicCreater.c` | Scan logic node creation | Modify (delete `triggerScanList` filler) | +| `include/libs/planner/planner.h` | `SPlanStreamContext` definition | Modify (delete `triggerScanList` field) | + +No new files. + +--- + +## Pre-flight + +- [ ] **Step 0.1: Verify clean tree on the target branch** + +```bash +cd /root/code/TDinternal/community +git status +git log -1 --oneline +``` + +Expected: working tree clean, HEAD on `feat/6490635370` at the design-doc commit `d48f752bcb` (or later). + +- [ ] **Step 0.2: Verify baseline build works** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target taos -j$(nproc) 2>&1 | tail -20 +``` + +Expected: build succeeds. If it fails on baseline, stop and report — do NOT start the implementation. + +--- + +## Task A: Parser Scaffolding (Behavior-Neutral) + +This task adds the `SSelectStmt` flag, the `translateWhere` bypass, and the `injectPreFilterIntoCalcQuery` helper, but does NOT yet remove the bad `19205-19216` block. After this commit: + +- For non-`%%trows` streams: behavior is identical to before (helper short-circuits). +- For `%%trows` streams with `pre_filter`: trigger still has the (wrong) extra cols appended (because Task B not done yet), but calc now also runs with injected WHERE. Trigger and calc converge in correctness; trigger is just temporarily over-scanning. This is intentional — it lets us land Task A independently. + +**Files:** +- Modify: `include/nodes/querynodes.h:674-735` (add field to `SSelectStmt`) +- Modify: `source/libs/parser/src/parTranslater.c:10522-10529` (`translateWhere` check) +- Modify: `source/libs/parser/src/parTranslater.c:19104-19141` (`createStreamReqBuildCalc` — insert helper call before `translateStreamCalcQuery`) +- Modify: `source/libs/parser/src/parTranslater.c` (add helper `injectPreFilterIntoCalcQuery` + forward decl) + +### Subtask A.1: Add `SSelectStmt` flag + +- [ ] **Step A.1.1: Inspect current `SSelectStmt` to find a good insertion point** + +```bash +sed -n '674,735p' /root/code/TDinternal/community/include/nodes/querynodes.h +``` + +Expected: see all `bool` fields near the bottom of the struct (e.g. `isSubquery`, `hasAggFuncs`). + +- [ ] **Step A.1.2: Add the flag right after `isSubquery`** + +Edit `include/nodes/querynodes.h`. Locate the line: + +```c + bool isSubquery; +``` + +Insert immediately after it: + +```c + bool pWhereInjectedFromPreFilter; // true if pWhere was cloned from stream pre_filter +``` + +- [ ] **Step A.1.3: Verify it compiles** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target nodes -j$(nproc) 2>&1 | tail -10 +``` + +Expected: build succeeds (the `nodes` lib compiles). + +### Subtask A.2: Bypass the `%%trows + WHERE` check for the injected case + +- [ ] **Step A.2.1: View the current check** + +```bash +sed -n '10520,10535p' /root/code/TDinternal/community/source/libs/parser/src/parTranslater.c +``` + +Expected output contains: + +```c + if (pSelect->pWhere && + BIT_FLAG_TEST_MASK(pCxt->streamInfo.placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + inStreamCalcClause(pCxt)) { + PAR_ERR_RET(generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "%%%%trows can not be used with WHERE clause.")); + } +``` + +(If line numbers shifted, find it via `grep -n "trows can not be used with WHERE" parTranslater.c`.) + +- [ ] **Step A.2.2: Add the bypass condition** + +Edit `source/libs/parser/src/parTranslater.c`. Replace the block above with: + +```c + if (pSelect->pWhere && !pSelect->pWhereInjectedFromPreFilter && + BIT_FLAG_TEST_MASK(pCxt->streamInfo.placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + inStreamCalcClause(pCxt)) { + PAR_ERR_RET(generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "%%%%trows can not be used with WHERE clause.")); + } +``` + +(Only added `&& !pSelect->pWhereInjectedFromPreFilter`.) + +### Subtask A.3: Implement `injectPreFilterIntoCalcQuery` + +- [ ] **Step A.3.1: Find a good insertion point — right above `createStreamReqBuildCalc`** + +```bash +grep -n "^static int32_t createStreamReqBuildCalc" /root/code/TDinternal/community/source/libs/parser/src/parTranslater.c +``` + +Expected: a single match around line 19105. + +- [ ] **Step A.3.2: Add the helper above `createStreamReqBuildCalc`** + +Edit `source/libs/parser/src/parTranslater.c`. Insert immediately before the line `// Build calculate part in create stream request`: + +```c +// Inject trigger's pre_filter as WHERE into calc query when %%trows is used. +// Calc side independently re-scans the trigger table; without this the calc +// scan returns rows that pre_filter already excluded on the trigger side. +static int32_t injectPreFilterIntoCalcQueryImpl(STranslateContext* pCxt, SNode* pPreFilter, SNode* pQuery) { + if (NULL == pQuery) return TSDB_CODE_SUCCESS; + if (QUERY_NODE_SET_OPERATOR == nodeType(pQuery)) { + SSetOperator* pSet = (SSetOperator*)pQuery; + int32_t code = injectPreFilterIntoCalcQueryImpl(pCxt, pPreFilter, pSet->pLeft); + if (TSDB_CODE_SUCCESS == code) { + code = injectPreFilterIntoCalcQueryImpl(pCxt, pPreFilter, pSet->pRight); + } + return code; + } + if (QUERY_NODE_SELECT_STMT != nodeType(pQuery)) return TSDB_CODE_SUCCESS; + + SSelectStmt* pSelect = (SSelectStmt*)pQuery; + if (NULL == pSelect->pFromTable || + QUERY_NODE_PLACE_HOLDER_TABLE != nodeType(pSelect->pFromTable)) { + return TSDB_CODE_SUCCESS; + } + SPlaceHolderTableNode* pPh = (SPlaceHolderTableNode*)pSelect->pFromTable; + if (SP_PARTITION_ROWS != pPh->placeholderType) return TSDB_CODE_SUCCESS; + + if (NULL != pSelect->pWhere) { + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "%%%%trows can not be used with WHERE clause."); + } + + SNode* pCloned = NULL; + int32_t code = nodesCloneNode(pPreFilter, &pCloned); + if (TSDB_CODE_SUCCESS != code) return code; + pSelect->pWhere = pCloned; + pSelect->pWhereInjectedFromPreFilter = true; + return TSDB_CODE_SUCCESS; +} + +static int32_t injectPreFilterIntoCalcQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { + if (NULL == pStmt->pTrigger || NULL == pStmt->pQuery) return TSDB_CODE_SUCCESS; + SStreamTriggerNode* pTrigger = (SStreamTriggerNode*)pStmt->pTrigger; + if (NULL == pTrigger->pOptions) return TSDB_CODE_SUCCESS; + SNode* pPreFilter = ((SStreamTriggerOptions*)pTrigger->pOptions)->pPreFilter; + if (NULL == pPreFilter) return TSDB_CODE_SUCCESS; + parserDebug("inject stream pre_filter into calc query as WHERE"); + return injectPreFilterIntoCalcQueryImpl(pCxt, pPreFilter, pStmt->pQuery); +} + +``` + +- [ ] **Step A.3.3: Wire the helper into `createStreamReqBuildCalc`** + +Edit `source/libs/parser/src/parTranslater.c`. Locate the line: + +```c + PAR_ERR_JRET(translateStreamCalcQuery(pCxt, pTriggerPartition, pTriggerSelect ? pTriggerSelect->pFromTable : NULL, + pStmt->pQuery, pNotifyCond, pTriggerWindow)); +``` + +Insert the following two lines IMMEDIATELY ABOVE it: + +```c + PAR_ERR_JRET(injectPreFilterIntoCalcQuery(pCxt, pStmt)); + +``` + +After this edit, the call sequence becomes: `injectPreFilterIntoCalcQuery` → `translateStreamCalcQuery` → ... + +- [ ] **Step A.3.4: Build the parser library** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target parser -j$(nproc) 2>&1 | tail -20 +``` + +Expected: build succeeds with no warnings about the new code. If `SPlaceHolderTableNode`, `SP_PARTITION_ROWS`, `SStreamTriggerNode`, or `SStreamTriggerOptions` are not visible in `parTranslater.c`, the build will fail — fix by adding the appropriate include or by checking what's already used at line ~7374 (where `translatePlaceHolderTable` lives) and at line ~18241 (where `pStmt->pTrigger` options are read). + +- [ ] **Step A.3.5: Build the full taos binary** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target taos -j$(nproc) 2>&1 | tail -10 +``` + +Expected: clean build. + +### Subtask A.4: Commit Task A + +- [ ] **Step A.4.1: Stage and commit** + +```bash +cd /root/code/TDinternal/community +git add include/nodes/querynodes.h source/libs/parser/src/parTranslater.c +git -c user.name='Copilot' -c user.email='copilot@local' commit -m "feat(stream): inject trigger pre_filter as WHERE into %%trows calc query + +Adds SSelectStmt::pWhereInjectedFromPreFilter and a parser-side helper +injectPreFilterIntoCalcQuery that clones trigger pre_filter into the +calc query's WHERE for every %%trows SELECT. translateWhere bypasses +the pre-existing '%%trows + WHERE' error when the WHERE is the +injected one. This is a behavior-neutral scaffold; the next commit +removes the wrong column-append block on the trigger side. + +Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" +git log -1 --oneline +``` + +Expected: one new commit on top of the spec commit. + +--- + +## Task B: Remove the Wrong Trigger-Side Column Append + +**Files:** +- Modify: `source/libs/parser/src/parTranslater.c:19205-19216` (delete the append block) + +### Subtask B.1: Delete the append block + +- [ ] **Step B.1.1: Locate and confirm the block** + +```bash +grep -n "need collect scan cols and put into trigger" /root/code/TDinternal/community/source/libs/parser/src/parTranslater.c +``` + +Expected: a single match. Use it to find the surrounding `if (BIT_FLAG_TEST_MASK(pReq->placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && LIST_LENGTH(calcCxt.streamCxt.triggerScanList) > 0) { ... }` block — this is the block to delete. + +- [ ] **Step B.1.2: Delete the block** + +Edit `source/libs/parser/src/parTranslater.c`. Remove EXACTLY this block: + +```c + if (BIT_FLAG_TEST_MASK(pReq->placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + LIST_LENGTH(calcCxt.streamCxt.triggerScanList) > 0) { + // need collect scan cols and put into trigger's scan list + PAR_ERR_JRET(nodesListAppendList(pTriggerSelect->pProjectionList, calcCxt.streamCxt.triggerScanList)); + SNode* pCol = NULL; + FOREACH(pCol, pTriggerSelect->pProjectionList) { + if (nodeType(pCol) == QUERY_NODE_COLUMN) { + SColumnNode* pColumn = (SColumnNode*)pCol; + tstrncpy(pColumn->tableAlias, pColumn->tableName, TSDB_TABLE_NAME_LEN); + } + } + } + +``` + +(Trailing blank line included so the surrounding code stays compact.) + +- [ ] **Step B.1.3: Build** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target taos -j$(nproc) 2>&1 | tail -10 +``` + +Expected: clean build (the only references to `triggerScanList` left are the field decl, the filler in `planLogicCreater.c`, and the `= NULL` init in `parTranslater.c` — all still compile). + +### Subtask B.2: Smoke-test against an existing pre_filter case + +- [ ] **Step B.2.1: Find the smallest existing `%%trows + pre_filter` test** + +```bash +grep -n "stream_options(pre_filter" /root/code/TDinternal/community/test/cases/18-StreamProcessing/04-Options/test_options_basic.py | head -5 +``` + +Expected: see the `s9` and `s9_g` streams in `Basic9` (around line 1541). + +- [ ] **Step B.2.2: Run only the `Basic9` (PRE_FILTER) sub-case** + +This test class runs many sub-cases; we want only `Basic9`. Easiest: temporarily comment all other `streams.append(...)` lines in `test_stream_options_basic` except `Basic9`, run, then revert. + +```bash +cd /root/code/TDinternal/community/test +# from the project test root, follow whatever invocation is documented there: +ls run_test.sh 2>/dev/null && head -20 run_test.sh +``` + +Then: + +```bash +cd /root/code/TDinternal/community/test +pytest cases/18-StreamProcessing/04-Options/test_options_basic.py::TestStreamOptionsBasic::test_stream_options_basic -v 2>&1 | tail -40 +``` + +Expected: PASS for the `Basic9` sub-case. If the harness needs a running cluster and isn't set up here, **stop** and ask the human how to run integration tests in this environment. Do NOT fabricate a "tests passed" report. + +(If unable to run integration tests, document the limitation in the commit message and proceed; the human will run regression in their own environment.) + +### Subtask B.3: Commit Task B + +- [ ] **Step B.3.1: Stage and commit** + +```bash +cd /root/code/TDinternal/community +git add source/libs/parser/src/parTranslater.c +git -c user.name='Copilot' -c user.email='copilot@local' commit -m "fix(stream): stop appending calc cols to trigger projection on %%trows + +The block at parTranslater.c:19205-19216 wrongly appended every column +referenced by the calc SELECT (collected via COLLECT_COL_TYPE_ALL into +triggerScanList) to the trigger SELECT's projection. This forced the +trigger side to scan calc-only columns (e.g. c3, t2 in 'select sum(c3), +avg(t2) from %%trows' with state_window(c1) + pre_filter(c2>2)). + +With the prior commit's pre_filter injection making the calc side +self-contained, the trigger projection now stays exactly what +createStreamReqBuildTriggerSelect computed (trigger window cols + +pre_filter cols + tbname()). + +Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" +git log -1 --oneline +``` + +Expected: second commit on top of Task A. + +--- + +## Task C: Delete the Dead `triggerScanList` Field + +After Task B there are zero readers of `triggerScanList`. This task removes the field, the filler, and the now-useless `= NULL` initializer. + +**Files:** +- Modify: `include/libs/planner/planner.h:44` (delete field) +- Modify: `source/libs/planner/src/planLogicCreater.c:598-601` (delete filler) +- Modify: `source/libs/parser/src/parTranslater.c` (delete `.streamCxt.triggerScanList = NULL,` initializer) + +### Subtask C.1: Verify no readers remain + +- [ ] **Step C.1.1: Grep** + +```bash +cd /root/code/TDinternal/community +grep -rn "triggerScanList" --include='*.c' --include='*.h' source include +``` + +Expected: exactly four lines: +- `include/libs/planner/planner.h:44: SNodeList* triggerScanList;` +- `source/libs/planner/src/planLogicCreater.c:600: &pCxt->pPlanCxt->streamCxt.triggerScanList);` +- (with the surrounding `if (pRealTable->placeholderType == SP_PARTITION_ROWS) {` at 598-601) +- `source/libs/parser/src/parTranslater.c:: .streamCxt.triggerScanList = NULL,` + +If any other reader appears, **stop** and re-examine — there may be a code path the spec missed. + +### Subtask C.2: Delete the filler in `planLogicCreater.c` + +- [ ] **Step C.2.1: View context** + +```bash +sed -n '595,612p' /root/code/TDinternal/community/source/libs/planner/src/planLogicCreater.c +``` + +- [ ] **Step C.2.2: Delete EXACTLY this block** + +Edit `source/libs/planner/src/planLogicCreater.c`. Remove: + +```c + if (pRealTable->placeholderType == SP_PARTITION_ROWS) { + code = nodesCollectColumns(pSelect, SQL_CLAUSE_FROM, pRealTable->table.tableAlias, COLLECT_COL_TYPE_ALL, + &pCxt->pPlanCxt->streamCxt.triggerScanList); + } +``` + +The neighboring `// set columns to scan` / `nodesCollectColumns(... COLLECT_COL_TYPE_COL ...)` block (currently at 602-606) MUST remain. + +### Subtask C.3: Delete the initializer in `parTranslater.c` + +- [ ] **Step C.3.1: Find it** + +```bash +grep -n "triggerScanList = NULL" /root/code/TDinternal/community/source/libs/parser/src/parTranslater.c +``` + +Expected: exactly one match. + +- [ ] **Step C.3.2: Delete the line** + +Edit `source/libs/parser/src/parTranslater.c`. Remove the single line: + +```c + .streamCxt.triggerScanList = NULL, +``` + +(The surrounding designated-initializer list will still compile — C allows omitted designated members, they default to zero.) + +### Subtask C.4: Delete the field declaration + +- [ ] **Step C.4.1: Edit the header** + +Edit `include/libs/planner/planner.h`. Remove the single line: + +```c + SNodeList* triggerScanList; +``` + +(located inside `SPlanStreamContext`, around line 44). + +### Subtask C.5: Build and verify + +- [ ] **Step C.5.1: Final grep — expect zero hits** + +```bash +cd /root/code/TDinternal/community +grep -rn "triggerScanList" --include='*.c' --include='*.h' source include +``` + +Expected: NO output (zero matches). + +- [ ] **Step C.5.2: Full build** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target taos -j$(nproc) 2>&1 | tail -10 +``` + +Expected: clean build, no "unused variable" / "undefined member" errors. + +### Subtask C.6: Commit Task C + +- [ ] **Step C.6.1: Stage and commit** + +```bash +cd /root/code/TDinternal/community +git add include/libs/planner/planner.h \ + source/libs/planner/src/planLogicCreater.c \ + source/libs/parser/src/parTranslater.c +git -c user.name='Copilot' -c user.email='copilot@local' commit -m "refactor(stream): remove dead SPlanStreamContext::triggerScanList + +After the previous commit removed its only reader, the field is +unreferenced. Delete the field declaration, the COLLECT_COL_TYPE_ALL +filler in planLogicCreater.c that was specifically populating it for +SP_PARTITION_ROWS, and the now-useless = NULL initializer in +parTranslater.c. + +Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" +git log -3 --oneline +``` + +Expected: three new commits in total (Task A, B, C) on top of the spec commit. + +--- + +## Task D: Add Regression Test for the Optimization + +Add a focused test that pins down the new behavior so future refactors can't silently regress trigger or calc scan columns. + +**Files:** +- Create: `test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py` + +### Subtask D.1: Author the test + +- [ ] **Step D.1.1: Inspect a sibling test for the harness's idioms** + +```bash +sed -n '1,80p' /root/code/TDinternal/community/test/cases/18-StreamProcessing/04-Options/test_options_basic.py +``` + +Confirm imports (`tdLog`, `tdSql`, `tdStream`, `StreamCheckItem`) and the class/method shape (a `test_*` method that calls `tdStream.createSnode()` and runs `StreamCheckItem` instances). + +- [ ] **Step D.1.2: Inspect `Basic9` for an end-to-end pre_filter scenario to model after** + +```bash +sed -n '1505,1700p' /root/code/TDinternal/community/test/cases/18-StreamProcessing/04-Options/test_options_basic.py +``` + +Look at how `create()`, `insert1()`, `check1()` are organized. + +- [ ] **Step D.1.3: Create the new test file** + +Create `/root/code/TDinternal/community/test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py` with the following content: + +```python +import time +from new_test_framework.utils import (tdLog, tdSql, tdStream, StreamCheckItem,) + + +class TestPreFilterTrowsScanCols: + """Regression for stream trigger/calc scan-cols optimization. + + Trigger AST must NOT include calc-only columns (c3, t2). + Calc AST MUST include pre_filter columns (c2) and apply pre_filter as WHERE, + so calc-side independent re-scan returns exactly the rows pre_filter allows. + """ + + precision = 'ms' + + def setup_class(cls): + tdLog.debug(f"start to execute {__file__}") + + def test_stream_pre_filter_trows_scan_cols(self): + """%%trows scan cols optimization + + Trigger only scans state_window + pre_filter cols; calc independently + re-scans with injected pre_filter WHERE, producing identical rows. + + Catalog: + - Streams:Options + + Since: v3.3.x + + Labels: common,ci + + Jira: None + """ + + tdStream.createSnode() + streams = [] + streams.append(self.PreFilterTrows()) + tdStream.checkAll(streams) + + class PreFilterTrows(StreamCheckItem): + def __init__(self): + self.db = "pf_trows_db" + + def create(self): + tdSql.execute(f"create database {self.db} vgroups 1") + tdSql.execute(f"use {self.db}") + tdSql.execute( + f"create stable stb (ts timestamp, c1 int, c2 int, c3 int) " + f"tags (t1 int, t2 int)" + ) + tdSql.execute(f"create table ct1 using stb tags(1, 100)") + tdSql.execute(f"create table ct2 using stb tags(2, 200)") + # Stream from the example in the design spec: + # trigger: state_window(c1) + pre_filter(c2>2) + # calc: select _c0, sum(c3), avg(t2) from %%trows + tdSql.execute( + f"create stream s_pf state_window(c1) from stb " + f"partition by t1 stream_options(pre_filter(c2 > 2)) " + f"into res_stb (firstts, sum_c3, avg_t2) as " + f"select first(_c0), sum(c3), avg(t2) from %%trows;" + ) + + def insert1(self): + sqls = [ + # ct1 (t1=1, t2=100): c1 alternates to drive state windows; + # c2 values include some <=2 (must be filtered out by pre_filter). + "insert into ct1 values ('2025-01-01 00:00:00', 1, 1, 10);", # c2<=2 -> filtered + "insert into ct1 values ('2025-01-01 00:00:01', 1, 5, 20);", + "insert into ct1 values ('2025-01-01 00:00:02', 1, 7, 30);", + "insert into ct1 values ('2025-01-01 00:00:03', 2, 8, 40);", # state change closes window + "insert into ct1 values ('2025-01-01 00:00:04', 2, 2, 50);", # c2<=2 -> filtered + "insert into ct1 values ('2025-01-01 00:00:05', 2, 9, 60);", + "insert into ct1 values ('2025-01-01 00:00:06', 1, 4, 70);", # state change + ] + tdSql.executes(sqls) + + def check1(self): + # Expect at least one closed window for ct1 with c1==1 spanning ts 1..2 + # (the ts 0 row is dropped by pre_filter). + # sum(c3) over rows kept = 20+30 = 50; avg(t2) = 100. + tdSql.checkResultsByFunc( + sql=f"select sum_c3, avg_t2 from {self.db}.res_stb " + f"where firstts = '2025-01-01 00:00:01';", + func=lambda: tdSql.getRows() == 1 + and tdSql.getData(0, 0) == 50 + and abs(tdSql.getData(0, 1) - 100.0) < 1e-9, + ) + + def check2(self): + # Negative: writing WHERE on %%trows must still be rejected. + tdSql.error( + f"create stream s_neg state_window(c1) from {self.db}.stb " + f"partition by t1 stream_options(pre_filter(c2 > 2)) " + f"into {self.db}.res_neg (firstts, s) as " + f"select first(_c0), sum(c3) from %%trows where c2 > 5;", + expectErrInfo="trows can not be used with WHERE clause", + ) +``` + +- [ ] **Step D.1.4: Smoke-run the new test** + +```bash +cd /root/code/TDinternal/community/test +pytest cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py -v 2>&1 | tail -30 +``` + +Expected: PASS. If the local environment cannot run integration tests, **stop** and report the limitation; do NOT mark the step done. + +If the test fails on `check1` because the expected aggregate values differ (timing / window-closing semantics may be slightly off in this contrived dataset), inspect actual rows with: + +```bash +# add a one-shot debug print, e.g. in check1: print(tdSql.queryResult) +# then re-run and adjust the expected sum/avg to match real semantics — +# the test is meant to pin behavior, not to reverse-engineer it. +``` + +Do NOT change the expected values to match a buggy result. If aggregates look wrong relative to the spec's intent (rows with c2<=2 leaking in), that is a real regression — investigate Tasks A/B before relaxing the assertion. + +### Subtask D.2: Commit Task D + +- [ ] **Step D.2.1: Stage and commit** + +```bash +cd /root/code/TDinternal/community +git add test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py +git -c user.name='Copilot' -c user.email='copilot@local' commit -m "test(stream): pin %%trows + pre_filter scan-col behavior + +Adds a regression test mirroring the design-spec example: + trigger: state_window(c1) + pre_filter(c2>2) + calc: select _c0, sum(c3), avg(t2) from %%trows +The aggregate values verify pre_filter is actually applied on the calc +side (rows with c2<=2 must not contribute to sum(c3)/avg(t2)). A +negative case re-confirms that user-written WHERE on %%trows is still +rejected. + +Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" +``` + +--- + +## Final Verification + +- [ ] **Step F.1: Show the four-commit series** + +```bash +cd /root/code/TDinternal/community +git log --oneline -5 +``` + +Expected: spec commit + Task A + Task B + Task C + Task D commits in order, all attributed with `Co-authored-by: Copilot`. + +- [ ] **Step F.2: Confirm `triggerScanList` is gone** + +```bash +grep -rn "triggerScanList" --include='*.c' --include='*.h' source include +``` + +Expected: empty. + +- [ ] **Step F.3: Confirm helper is in place** + +```bash +grep -cn "injectPreFilterIntoCalcQuery\b" source/libs/parser/src/parTranslater.c +grep -n "static int32_t injectPreFilterIntoCalcQuery" source/libs/parser/src/parTranslater.c +``` + +Expected: first command prints `>=2`; second command prints exactly 2 lines (the two `static int32_t injectPreFilterIntoCalcQuery...` definitions: `Impl` and the wrapper). + +- [ ] **Step F.4: Final taos build** + +```bash +cd /root/code/TDinternal/debug +cmake --build . --target taos -j$(nproc) 2>&1 | tail -5 +``` + +Expected: clean build. + +- [ ] **Step F.5: Run the broader stream `Basic9` (PRE_FILTER) and the new test together** + +```bash +cd /root/code/TDinternal/community/test +pytest cases/18-StreamProcessing/04-Options/test_options_basic.py::TestStreamOptionsBasic::test_stream_options_basic \ + cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py -v 2>&1 | tail -50 +``` + +Expected: PASS. Any failure must be diagnosed before declaring the work done — a regression on `Basic9` would mean the optimization broke an existing case, not just an aspirational one. diff --git a/docs/superpowers/specs/2026-04-22-stream-trigger-calc-scan-cols-optimize-design.md b/docs/superpowers/specs/2026-04-22-stream-trigger-calc-scan-cols-optimize-design.md new file mode 100644 index 000000000000..951390800ebb --- /dev/null +++ b/docs/superpowers/specs/2026-04-22-stream-trigger-calc-scan-cols-optimize-design.md @@ -0,0 +1,288 @@ +# 流计算触发/计算 AST 扫描列优化 - 详细设计说明书(Design Spec) + +## 1. 修订记录 + +| 编写日期 | 发布日期 | 版本 | 修订人 | 主要修改内容 | +| --- | --- | --- | --- | --- | +| 2026-04-22 | 2026-04-22 | 0.1 | brainstorming session | 初稿 | + +## 2. 引言 + +### 2.1 目的 + +优化 `CREATE STREAM` 语句在 client 端解析阶段生成 plan 时,触发 AST 与计算 AST 的扫描列集合,使两者各自只扫描其逻辑上必需的列;并保证使用 `%%trows` 时计算端独立扫描出的数据与触发端完全一致。 + +### 2.2 范围 + +仅涉及 client 侧 parser/planner: + +- `source/libs/parser/src/parTranslater.c` +- `source/libs/planner/src/planLogicCreater.c` +- `include/libs/planner/planner.h`(若 `SPlanContext.streamCxt.triggerScanList` 字段定义在此) + +不涉及 mnode、snode、vnode 执行链路;不修改任何 RPC 协议字段。 + +### 2.3 受众 + +流计算模块开发者、CR reviewer。 + +## 3. 术语 + +- **触发 AST(trigger AST)**:流计算建流时生成、用于驱动 trigger 触发判断的 SELECT 语句 AST,由 `createStreamReqBuildTriggerSelect` 构造。 +- **计算 AST(calc AST)**:用户在 `CREATE STREAM ... AS ` 中提供的 query AST,用于每次触发后实际计算输出。 +- **`%%trows`**:占位符,语义为"使用与本次触发相同的数据"。在 `translatePlaceHolderTable` 中被改写为对触发表的 `SRealTableNode`,并保留 `placeholderType = SP_PARTITION_ROWS`。**注意:触发数据并不通过流框架传递给计算,计算端是独立再扫一次触发表。** +- **pre_filter**:`STREAM_OPTIONS(PRE_FILTER())` 中的过滤表达式,作用于触发侧扫描的数据,决定哪些行参与触发判断。 +- **trigger 扫描列 / calc 扫描列**:plan 阶段 `SScanLogicNode.pScanCols` 最终落到 scan 节点要从存储层读取的列集合。 + +## 4. 概述 + +### 4.1 当前实现存在的问题 + +示例 SQL: + +```sql +CREATE STREAM s1 + STATE_WINDOW(c1) FROM stb + PARTITION BY t1 + STREAM_OPTIONS(PRE_FILTER(c2 > 2)) + INTO res_stb + AS SELECT _c0, sum(c3), avg(t2) FROM %%trows; +``` + +| AST | 当前扫描列 | 期望扫描列 | 问题 | +|---|---|---|---| +| 触发 AST | ts, c1, c2, c3, t1, t2 | ts, c1, c2 | 多扫了计算端引用的 c3、t2 | +| 计算 AST | ts, c3, t2 | ts, c2, c3, t2 | 缺少 pre_filter 引用列 c2,且未应用 `c2 > 2` 过滤 | + +根因: + +1. **触发侧多列**:`parTranslater.c:19205-19216` 在 `PLACE_HOLDER_PARTITION_ROWS` 时把计算 plan 阶段收集的 `triggerScanList`(来自 `planLogicCreater.c:600`,使用 `COLLECT_COL_TYPE_ALL` 收集计算 SELECT 全部列)追加进触发 select 的 `pProjectionList`。这是一种基于"计算用什么、触发就也带上"的错误假设;实际上触发只关心 trigger window / partition / pre_filter 引用的列。 + +2. **计算侧缺过滤**:`%%trows` 的语义要求计算端扫描结果与触发一致,但当前实现不会把 pre_filter 注入计算 select,导致 calc 端取到 pre_filter 之外的额外行,且不扫 pre_filter 引用的列。 + +### 4.2 优化目标 + +- 触发 AST 扫描列 = 时间列(plan 自动加) + trigger window 引用列 + partition 引用列 + pre_filter 引用列 + tbname。 +- 计算 AST 扫描列 = SELECT/原 WHERE 引用列 + (`%%trows` 时)pre_filter 引用列;同时 pre_filter 作为 WHERE 应用,使两端取到同样的行。 + +### 4.3 涉及代码 + +- `source/libs/parser/src/parTranslater.c`: + - `createStreamReqBuildCalc`(19104) + - `translateWhere`(10522) +- `source/libs/planner/src/planLogicCreater.c`: + - `createScanLogicNodeByRealTable`(598-601) +- `SPlanContext.streamCxt.triggerScanList` 定义点(`include/libs/planner/planner.h` 或同一文件 struct 定义处) + +## 5. 设计考虑 + +### 5.1 假设和限制 + +- pre_filter 表达式中的列引用与触发表 schema 一致;`%%trows` 改写后的 `SRealTableNode` 与触发表为同一张表,列名空间相同,`translateExpr` 对 clone 后的 pre_filter 能正常解析。 +- 现有限制保留:`parTranslater.c:19144-19152` 已经禁止"虚拟表 + `%%trows` + pre_filter"组合,本设计不改变此行为。 +- 用户在 `%%trows` 计算 select 中显式写 WHERE 仍然报错(沿用 `translateWhere` 现有错误信息 `%%trows can not be used with WHERE clause.`)。 + +### 5.2 设计原则 + +- **语义优先于实现取巧**:把 pre_filter 当作计算 select 的 WHERE 注入,而不是在 plan/reader 层"额外补列、额外过滤"。这样后续 translate / planner / scan-condition 下推 / 索引下推全部沿用标准路径。 +- **改动局部化**:所有改动集中在 parser + planner 两个文件;不引入新模块,不修改协议。 +- **死代码清理**:方案落地后 `triggerScanList` 不再有读端,应一并删除字段及其填充点,避免后续维护困惑。 + +### 5.3 风险与缓解 + +| 风险 | 缓解 | +|---|---| +| pre_filter clone 后 translate 失败(语义检查、列解析等) | 注入时机选择在 `translateStreamCalcQuery` 之前,确保走完整 translate;先通过测试用例覆盖典型 pre_filter 形式(含 tag 引用、函数调用、常量折叠)。 | +| union(`SSetOperator`)中 `%%trows` 出现 | 注入逻辑递归处理 SET_OPERATOR 的左右子,并复用各自一份 clone。 | +| `pSelect->pWhere` 由用户写入(与注入冲突) | 注入前显式检查 `pWhere == NULL`,非空时报与原 `translateWhere` 一致的错误,提前于 translate 报出。 | +| `triggerScanList` 字段被其他模块隐式读取 | grep 全仓库确认无第三方读端后再删除字段。 | +| 兼容性:旧版 stream 持久化 plan 不变 | 仅影响新建 stream 时 client 生成的 plan 内容,不影响 mnode/snode 已存 plan 反序列化路径。 | + +## 6. 详细设计 + +### 6.1 触发侧改动 + +**核心**:删除"计算列追加进触发投影"的代码块。 + +**位置**:`source/libs/parser/src/parTranslater.c:19205-19216` + +```c +// REMOVE: +if (BIT_FLAG_TEST_MASK(pReq->placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + LIST_LENGTH(calcCxt.streamCxt.triggerScanList) > 0) { + PAR_ERR_JRET(nodesListAppendList(pTriggerSelect->pProjectionList, + calcCxt.streamCxt.triggerScanList)); + SNode* pCol = NULL; + FOREACH(pCol, pTriggerSelect->pProjectionList) { + if (nodeType(pCol) == QUERY_NODE_COLUMN) { + SColumnNode* pColumn = (SColumnNode*)pCol; + tstrncpy(pColumn->tableAlias, pColumn->tableName, TSDB_TABLE_NAME_LEN); + } + } +} +``` + +删除后触发投影列由现有 `createStreamReqBuildTriggerSelect`(18241)维持: + +- `nodesCollectColumnsFromNode(pStmt->pTrigger, NULL, COLLECT_COL_TYPE_COL, ...)` —— 收集 trigger 节点中出现的所有 COL 类型列(trigger window 与 pre_filter 中的普通列); +- 若有 pre_filter,单独收集其 TAG 列; +- 追加 `tbname()` 函数。 + +partition 列(如 `t1`)由 `createStreamReqBuildTriggerPlan`(17935)通过 `createStreamSetListSlotId` 单独收集到 `pPartitionCols`,并序列化进 `pReq->partitionCols`,不需要混入 trigger select 的 projection。 + +### 6.2 计算侧改动 + +#### 6.2.1 注入 pre_filter 到计算 query + +新增 helper(建议位于 `parTranslater.c` 中 `createStreamReqBuildCalc` 上方,模块私有 static): + +```c +// Inject trigger's pre_filter as WHERE into calc query when %%trows is used. +// This guarantees calc side independent re-scan returns the exact same rows +// as trigger side (since %%trows means "same data as trigger"). +static int32_t injectPreFilterIntoCalcQuery(STranslateContext* pCxt, + SCreateStreamStmt* pStmt); +``` + +行为: + +1. 取 `pPreFilter = pStmt->pTrigger->pOptions ? pStmt->pTrigger->pOptions->pPreFilter : NULL`。若为 NULL,直接返回成功。 +2. 递归遍历 `pStmt->pQuery`: + - `QUERY_NODE_SELECT_STMT`: + - 若 `pSelect->pFromTable` 不是 `QUERY_NODE_PLACE_HOLDER_TABLE` 或 `placeholderType != SP_PARTITION_ROWS` → 跳过(非 `%%trows` 不注入)。 + - 否则,若 `pSelect->pWhere != NULL` → 报错 `%%trows can not be used with WHERE clause.`(沿用现有错误码 `TSDB_CODE_PAR_INVALID_STREAM_QUERY`)。 + - 否则,`nodesCloneNode(pPreFilter, &pSelect->pWhere)`;置 `pSelect->pWhereInjectedFromPreFilter = true`。 + - `QUERY_NODE_SET_OPERATOR`:递归 left 和 right。 + - 其他类型:直接返回(沿用 `translateStreamCalcQuery` 内对类型校验的处理;不在注入阶段重复报错)。 + +调用时机:在 `createStreamReqBuildCalc`(19104)中,`translateStreamCalcQuery`(19140)调用之前插入: + +```c +PAR_ERR_JRET(injectPreFilterIntoCalcQuery(pCxt, pStmt)); +PAR_ERR_JRET(translateStreamCalcQuery(pCxt, pTriggerPartition, ...)); +``` + +#### 6.2.2 调整 `translateWhere` + +`parTranslater.c:10522-10529` 当前实现: + +```c +if (pSelect->pWhere && + BIT_FLAG_TEST_MASK(pCxt->streamInfo.placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + inStreamCalcClause(pCxt)) { + PAR_ERR_RET(generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "%%%%trows can not be used with WHERE clause.")); +} +``` + +修改为: + +```c +if (pSelect->pWhere && !pSelect->pWhereInjectedFromPreFilter && + BIT_FLAG_TEST_MASK(pCxt->streamInfo.placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + inStreamCalcClause(pCxt)) { + PAR_ERR_RET(generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "%%%%trows can not be used with WHERE clause.")); +} +``` + +#### 6.2.3 `SSelectStmt` 字段扩展 + +在 `include/nodes/querynodes.h` 的 `SSelectStmt` 结构里新增: + +```c +bool pWhereInjectedFromPreFilter; // true if pWhere was injected by stream pre_filter +``` + +仅用于 §6.2.2 的检查旁路。其他模块(克隆 / 序列化)按现有 bool 字段处理方式同步即可(如 `nodesCloneNode` 默认按字段拷贝;序列化暂不需要写入到下发协议)。 + +### 6.3 死代码清理 + +| 文件 | 行 | 操作 | +|---|---|---| +| `parTranslater.c` | 19185 | 删除 `.streamCxt.triggerScanList = NULL` 初始化 | +| `parTranslater.c` | 19205-19216 | 删除追加块(同 §6.1) | +| `planLogicCreater.c` | 598-601 | 删除 `if (placeholderType == SP_PARTITION_ROWS) ... triggerScanList ...` 收集 | +| `SPlanContext.streamCxt` | 字段定义 | 删除 `triggerScanList` 字段及关联清理代码 | + +清理前 grep 全仓库确认无其他读端: + +```bash +rg "triggerScanList" --type c +``` + +### 6.4 关键数据结构 + +无新增数据结构。仅 `SSelectStmt` 增加一个 bool 字段(§6.2.3)。 + +### 6.5 数据流图 + +``` +CREATE STREAM SQL + │ + ▼ + Parser (sql.y) ──► SCreateStreamStmt {pTrigger, pQuery} + │ + ▼ +buildCreateStreamReq + │ + ├─► createStreamReqBuildTriggerAst + │ └─► createStreamReqBuildTriggerSelect + │ proj: trigger window cols + pre_filter cols + tbname() + │ + ├─► createStreamReqBuildCalc + │ ├─► [NEW] injectPreFilterIntoCalcQuery + │ │ SELECT FROM %%trows → SELECT FROM WHERE + │ ├─► translateStreamCalcQuery + │ │ translateWhere 不再误报(旁路标记) + │ ├─► qCreateQueryPlan + │ │ scan cols 自然包含 pre_filter 引用列 + │ │ pre_filter 作为 scan condition 下推 + │ │ [REMOVED] 19205-19216 追加块 + │ └─► createStreamReqBuildCalcPlan + │ + └─► createStreamReqBuildTriggerPlan + (partition cols 单独通道,独立于 trigger select 投影) +``` + +## 7. 接口规范 + +无对外接口变化。仅 client 内部 parser/planner 的实现层调整。 + +`SCMCreateStreamReq`(下发到 mnode 的 RPC 包)的字段语义不变;`triggerScanPlan` / `triggerCols` / `triggerFilterCols` / `partitionCols` / 计算 plan 内容因列集合调整而内容变化,但格式与解析路径不变,mnode/snode 端无需配套修改。 + +## 8. 安全考虑 + +不涉及。 + +## 9. 性能和可扩展性 + +- **触发侧**:减少不必要的列扫描(示例中减少 c3、t2 两列),单次触发 IO 与解码量下降;列越多收益越显著。 +- **计算侧**:新增 pre_filter 作为 scan condition 下推后,计算端在存储层即可过滤掉无关行,相比"全量扫 + 上层过滤"通常更优。pre_filter 中的列即使被多扫一份,由于过滤效果一般明显,整体仍属性能正向。 +- **行为正确性**:解决了 `%%trows` 场景下计算端取到 pre_filter 之外行的隐性 BUG。 + +## 10. 部署和配置 + +- 无新增配置项。 +- 仅影响新建 stream 时 client 生成的 plan;已有 stream 不受影响。 +- 回滚策略:直接 revert 本次提交即可,无 schema/协议兼容问题。 + +## 11. 监控和维护 + +- 现有 `parserDebug` / `parserError` 日志保留;新增 helper `injectPreFilterIntoCalcQuery` 内部使用同样的 `parserDebug` 输出注入是否生效,便于排查。 +- 需在 `test/cases/18-StreamProcessing/` 下补充用例,覆盖: + - 用户原文示例(state_window + pre_filter + `%%trows`); + - pre_filter 引用 tag; + - pre_filter 含函数调用; + - 用户在 `%%trows` 上写 WHERE 仍应报错; + - union 中两边都用 `%%trows`; + - 无 pre_filter 时 `%%trows` 行为不变。 + +## 12. 参考资料 + +- 流计算 SQL 语法:https://docs.taosdata.com/reference/taos-sql/stream/ +- 语法定义:`source/libs/parser/inc/sql.y`(CREATE STREAM 规则 1565-1594) +- 触发 AST 构造:`source/libs/parser/src/parTranslater.c:18241` +- 计算 AST 构造:`source/libs/parser/src/parTranslater.c:19104` +- `%%trows` 改写:`source/libs/parser/src/parTranslater.c:7374` +- `triggerScanList` 填充点:`source/libs/planner/src/planLogicCreater.c:598` diff --git a/docs/superpowers/specs/2026-04-23-stream-optimization-unified-design.md b/docs/superpowers/specs/2026-04-23-stream-optimization-unified-design.md new file mode 100644 index 000000000000..9dfb03aa9f5c --- /dev/null +++ b/docs/superpowers/specs/2026-04-23-stream-optimization-unified-design.md @@ -0,0 +1,611 @@ +# 流计算优化(执行计划 + 历史数据拉取 + 虚拟表)— 统一设计说明书 + +> 本文合并 PR [#35196](https://github.com/taosdata/TDengine/pull/35196) 涉及的三组优化,覆盖 client→mnode→reader 全链路,并在最后给出"需求 vs 代码"逐项核对结论。 +> +> 原始需求:飞书 wiki [流计算优化](https://taosdata.feishu.cn/wiki/OIwhw3iBmifwEIk4Iq3cAms9nte) +> +> 取代/合并的旧文档: +> - `docs/superpowers/specs/2026-04-22-stream-trigger-calc-scan-cols-optimize-design.md`(client/parser 子集) +> - 会话本地 `2026-04-20-stream-history-pull-optimization-design.md`(reader/history 子集) + +## 1. 修订记录 + +| 日期 | 版本 | 修订人 | 主要内容 | +| --- | --- | --- | --- | +| 2026-04-23 | 1.0 | Copilot | 三个优化合并:parser 触发/计算列分离;reader 触发/计算独立 filter+schema;TSDB 历史拉取 9 个新接口 + 虚拟表 history 双缓冲;含 PR #35196 代码核对 | + +## 2. 引言 + +### 2.1 目的 + +在一个 PR 内联动完成 client(parser/planner)、mnode(兼容 sver)、reader(vnode)三处改动,使流计算"触发数据"与"计算数据"在 plan/扫描/过滤三层完全解耦,并把历史数据拉取协议升级为高效的 DiffRange / SameRange 批量接口。 + +### 2.2 范围 + +| 优化项 | 涉及模块 | 关键文件 | +| --- | --- | --- | +| ① 触发/计算 AST 扫描列分离 | client parser/planner | `parTranslater.c`, `planLogicCreater.c`, `planner.h`, `querynodes.h` | +| ② 触发/计算 Filter 物理分离 | reader | `streamReader.h`, `streamReader.c`, `vnodeStream.c` | +| ③ TSDB 历史拉取协议升级 | 协议 + reader | `streamMsg.h`, `streamMsg.c`, `vnodeStream.c` | +| ④ 虚拟表历史路径双缓冲 | reader | `streamReader.h`, `streamReader.c`, `vnodeStream.c` | +| ⑤ 跨版本兼容 | mnode | `mndStream.h`, `mndDef.c`, `mndStream.c`, `mndStreamMgmt.c` | + +不在范围:trigger 端(snode)状态机、WAL 协议(仍走旧路径,仅 filter 选择有调整)、文档/示例。 + +### 2.3 受众 + +流计算 reader/trigger 双方开发者、CR、QA。 + +## 3. 术语 + +| 术语 | 含义 | +| --- | --- | +| 触发 AST | `createStreamReqBuildTriggerSelect` 构造、用于驱动触发判断的 SELECT | +| 计算 AST | 用户在 `CREATE STREAM ... AS ` 中提供的 query AST | +| `%%trows` | 占位符,语义为"使用与本次触发相同的数据";计算端会**独立再扫一次触发表** | +| pre_filter | `STREAM_OPTIONS(PRE_FILTER())` 中的过滤表达式 | +| triggerCols / calcCols | `STableScanPhysiNode.scan.pScanCols`,分别是触发 AST 与计算 AST 的扫描列 | +| triggerBlock / calcBlock | reader 端基于 triggerResBlock / calcResBlock 衍生的"对外统一 schema + 末尾 uid 列"block | +| pFilterInfoTrigger / pFilterInfoCalc | 由 `triggerAst->pNode->pConditions` / `calcAst->pNode->pConditions` 分别 init 的 SFilterInfo | +| DiffRange | 同一请求中多个 (uid, [skey,ekey]) 的拉取,每表时间范围可不同 | +| SameRange | 同一请求中所有表共用同一时间范围 [skey,ekey] | +| `isOldPlan` | 由 mnode 序列化版本(sver==8 → true)传给 reader 的兼容标志,控制 reader 是否走"trigger schema 扫描 → calc schema transform"老路径 | +| `vSetTableListHistory` | `SStreamTriggerReaderInfo` 新增字段,TSDB 路径专用的虚拟表原始表清单(`vSetTableList` 留给 WAL 路径) | +| `uidHashTriggerHistory` / `uidHashCalcHistory` | history 路径的 `>` 映射,与实时路径 TSWAP 双缓冲隔离 | +| `streamTaskMapHistory` | `getSessionKey` → `SDiffRangeIter*`,DiffRange 跨 `_NEXT` 续传迭代器 | + +## 4. 概述 + +### 4.1 当前实现的问题 + +示例 SQL: + +```sql +CREATE STREAM s1 STATE_WINDOW(c1) FROM stb PARTITION BY t1 + STREAM_OPTIONS(PRE_FILTER(c2 > 2)) + INTO res_stb + AS SELECT _c0, sum(c3), avg(t2) FROM %%trows; +``` + +| AST | 旧实现扫描列 | 期望扫描列 | 旧实现问题 | +| --- | --- | --- | --- | +| 触发 AST | ts, c1, c2, c3, t1, t2 | ts, c1, c2 | 多扫了计算端引用的 c3、t2 | +| 计算 AST | ts, c3, t2 | ts, c2, c3, t2 + WHERE c2>2 | 缺少 pre_filter 引用列 c2,且 reader 用的是触发 schema 扫描后再赋值(多扫 + 多拷贝) | + +历史数据拉取层:旧 7 个 enum(`STRIGGER_PULL_TSDB_TS_DATA` … `STRIGGER_PULL_TSDB_DATA_NEXT`)每张表一次 RPC + 每次都重建 task,效率低;虚拟表 schema 按全列计算,多余列填 NULL 也走传输。 + +### 4.2 优化目标 + +1. 触发列 = 时间列 + window 引用列 + partition 列 + pre_filter 列 + tbname。 +2. 计算列 = SELECT/原 WHERE 引用列 + (`%%trows` 时)pre_filter 列;`pre_filter` 作为 WHERE 注入,使两端取到同样行。 +3. reader 历史路径用 9 个新 enum:`META`, `META_NEXT`, `DATA_DIFF_RANGE` × {Trigger, Calc} × {首, NEXT}, `DATA_SAME_RANGE` × {同上}, `SET_TABLE_HISTORY`。响应统一为单 `SSDataBlock`。 +4. 虚拟表历史路径全部使用 `*History` 字段,不污染实时路径。 +5. mnode 持久化版本号升级(`MND_STREAM_VER_NUMBER` 8→9),向下兼容旧 stream(标 `isOldPlan=true`)。 + +### 4.3 端到端架构 + +``` +┌──────────── Client (parser/planner) ────────────┐ +│ createStreamReqBuildTriggerSelect │ +│ ├ trigger window cols + pre_filter cols │ (triggerScanList 收集已删除) +│ └ + tbname() │ +│ createStreamReqBuildCalc │ +│ ├ injectPreFilterIntoCalcQuery (clone WHERE) │ (新增) +│ └ translateStreamCalcQuery → planner │ +│ scan cols 自然包含 pre_filter 引用列 │ +└─────────────────┬───────────────────────────────┘ + │ SCMCreateStreamReq + isOldPlan=false (sver=9) + ▼ +┌──────────── mnode ───────────────────────────────┐ +│ MND_STREAM_VER_NUMBER 8→9 │ +│ 反序列化时 sver==8 → 标 isOldPlan=true 兼容旧流 │ +│ msmBuildReaderDeployInfo 把 isOldPlan 透传 │ +└─────────────────┬───────────────────────────────┘ + │ SStreamReaderDeployFromTrigger (含 isOldPlan) + ▼ +┌──────────── Reader (vnode) ──────────────────────┐ +│ SStreamTriggerReaderInfo │ +│ triggerCols / calcCols (独立 schema) │ +│ triggerBlock / calcBlock (独立物理 block) │ +│ pFilterInfoTrigger / pFilterInfoCalc │ +│ uidHashTrigger* / uidHashCalc* │ +│ uidHashTriggerHistory / uidHashCalcHistory │ ← TSDB 路径 +│ vSetTableList / vSetTableListHistory │ +│ streamTaskMap / streamTaskMapHistory │ +│ │ +│ TDMT_STREAM_TRIGGER_PULL dispatcher │ +│ SET_TABLE / SET_TABLE_HISTORY → 同 handler │ +│ TSDB_META[_NEXT] │ +│ isVtableStream → MetaVtableReq │ +│ else → MetaReq │ +│ TSDB_DATA_DIFF_RANGE[_CALC][_NEXT] │ +│ → DiffRangeReq │ +│ TSDB_DATA_SAME_RANGE[_CALC][_NEXT] │ +│ → SameRangeReq (拒绝虚拟表) │ +└──────────────────────────────────────────────────┘ +``` + +## 5. 设计考虑 + +### 5.1 假设 + +- **同 schema 的物理隔离**:`triggerBlock` 与 `calcBlock` 由 reader 在 `createStreamReaderInfo` 时按各自 ResBlock + 末尾 1 列 BIGINT(uid) 构造,handler 直接以对应 schema 扫描,省一次 transform。 +- **`%%trows` 改写位置不变**:`translatePlaceHolderTable` 仍把 `%%trows` 改写为对触发表的 `SRealTableNode`,不影响本次改动。 +- **trigger 端会按新 enum 发起请求**:本 PR 后由 trigger 侧逐步切换;DEPRECATED 旧 enum 暂保留 `default → APP_ERROR` 路径,确保未切换时报错可见。 + +### 5.2 设计原则 + +1. **物理分离 > 标志位旁路**:`triggerCols`/`calcCols` 字段独立,避免任何"按场景切换同一字段"导致并发或时序问题。 +2. **TSWAP 替换 > 编辑原地**:`SET_TABLE_HISTORY` 走完全独立的 *History 字段集,不与实时路径竞争锁。 +3. **新 enum > 复用旧 enum**:协议层用类型代替布尔字段,避免分支噪声;旧 enum 保留 + DEPRECATED 注释,待 trigger 侧切完后清理。 +4. **代码注释一律英文**(参见 `~/.copilot/copilot-instructions.md`)。 + +### 5.3 风险 + +| 风险 | 缓解 | +| --- | --- | +| 旧 stream(sver=8)反序列化后字段缺失 | `tDecodeSStreamObj` 在 `sver == OLD_TRIGGER_COLS(8)` 时设 `isOldPlan=true`;reader `TRANSFORM_DATA_TO_CALC` 宏在该路径下做 schema 转换 | +| pre_filter 注入在 union(`SSetOperator`) 两侧失败一侧 | `injectPreFilterIntoCalcQueryImpl` 在右侧失败时回滚左侧已注入的 pWhere | +| 虚拟表 + `%%trows` + pre_filter 历史限制被解除 | 旧版禁用块已删除(line 19250);依赖 calc query 独立 plan 与 reader 端 calcCols 工作正常 | +| DiffRange iter 跨 `_NEXT` 丢失 | `streamTaskMapHistory` 用 `freeFp = releaseDiffRangeIterFp`;handler 错误路径主动 `taosHashRemove` 让后续 NEXT 收 `STREAM_NO_CONTEXT` | +| SameRange 误传虚拟表 | 入口 `if (isVtableStream) return INVALID_PARA` | +| 旧 7 个 TSDB 接口残留代码 | 5 个旧 handler 已删除;旧 enum 进入 `default` 分支报 `APP_ERROR` | + +## 6. 详细设计 + +### 6.1 ① Client:触发/计算列分离 + +#### 6.1.1 触发侧 — 删除"计算列追加"逻辑 + +`parTranslater.c:19205-19216` 整块删除,触发投影列回归 `createStreamReqBuildTriggerSelect` 默认行为: + +- `nodesCollectColumnsFromNode(pStmt->pTrigger, ..., COLLECT_COL_TYPE_COL, ...)` 收集 trigger 节点(window + pre_filter 普通列) +- 单独收集 pre_filter 的 TAG 列 +- 追加 `tbname()` + +partition 列由 `createStreamReqBuildTriggerPlan` → `createStreamSetListSlotId` 单独通道写入 `pReq->partitionCols`。 + +#### 6.1.2 计算侧 — `injectPreFilterIntoCalcQuery` + +新增静态 helper(`parTranslater.c:19102+`),在 `translateStreamCalcQuery` 之前调用: + +```c +static int32_t injectPreFilterIntoCalcQueryImpl(STranslateContext*, SNode* pPreFilter, SNode* pQuery); +``` + +行为: +1. `pQuery` 为 `SSetOperator` → 递归 left/right;右失败时回滚左已注入的 `pWhere` 与 `pWhereInjectedFromPreFilter`。 +2. `pQuery` 为 `SSelectStmt` 且 `pFromTable` 为 `SP_PARTITION_ROWS` 占位符: + - 若 `pSelect->pWhere != NULL` → 报 `%%trows can not be used with WHERE clause.` + - 否则 `nodesCloneNode(pPreFilter, &pSelect->pWhere)`;置 `pWhereInjectedFromPreFilter = true`。 +3. 其他类型直接返回成功(沿用原 `translateStreamCalcQuery` 校验路径)。 + +#### 6.1.3 `SSelectStmt` 字段 + +```c +// include/libs/nodes/querynodes.h +bool pWhereInjectedFromPreFilter; // true if pWhere was cloned from stream pre_filter +``` + +#### 6.1.4 `translateWhere` 旁路 + +```c +if (pSelect->pWhere && !pSelect->pWhereInjectedFromPreFilter && ...) { + // 报 %%trows + WHERE 错误 +} +``` + +#### 6.1.5 死代码清理 + +| 文件 | 操作 | +| --- | --- | +| `parTranslater.c:19185` | 删除 `.streamCxt.triggerScanList = NULL` | +| `parTranslater.c:19250-19261` | 删除"虚拟表 + %%trows + pre_filter"禁用块 | +| `parTranslater.c:19268-19279` | 删除"追加 calc cols 到 trigger 投影"块 | +| `planLogicCreater.c:598-601` | 删除 `if (placeholderType == SP_PARTITION_ROWS) ... triggerScanList ...` 收集 | +| `planner.h:SPlanStreamContext` | 删除 `triggerScanList` 字段 | + +### 6.2 ② Reader:触发/计算 filter 与 schema 物理分离 + +`SStreamTriggerReaderInfo` 字段升级(`streamReader.h`): + +```c +- SNode* pConditions; // 删除(仅在 createStreamReaderInfo 局部使用过) +- SFilterInfo* pFilterInfo; // 删除 ++ SFilterInfo* pFilterInfoTrigger; // 由 triggerAst->pNode->pConditions init ++ SFilterInfo* pFilterInfoCalc; // 由 calcAst->pNode->pConditions init ++ SNodeList* calcCols; // calcAst 的 scan.pScanCols ++ int8_t isOldPlan; // 老 plan 兼容 +- SNodeList* triggerPseudoCols; // 改成局部变量 +``` + +`createStreamReaderInfo` 内: +- 触发 AST:`pFilterInfoTrigger = filterInitFromNode(triggerAst->pNode->pConditions)`;`triggerCols = scan.pScanCols`;构造 `triggerBlock = triggerResBlock + 1 列 BIGINT(uid)`。 +- 计算 AST:`pFilterInfoCalc = filterInitFromNode(calcAst->pNode->pConditions)`;`calcCols = scan.pScanCols`;`calcBlock = calcResBlock + 1 列 BIGINT(uid)`。 + +WAL 路径(`processWalVerMetaDataNew` / `processWalVerDataNew` / `filterData`): +- `filterData` 根据 `(!isOldPlan && resultRsp->isCalc)` 选 `pFilterInfoCalc`,否则用 `pFilterInfoTrigger`。 +- 虚拟表流跳过 `filterData`(虚拟表过滤在 trigger 端做)。 + +### 6.3 ③ 历史拉取协议(9 enum + 2 新结构) + +#### 6.3.1 enum 增 9(`streamMsg.h:679+`) + +```c +// 旧 7 个标 DEPRECATED: +STRIGGER_PULL_TSDB_TS_DATA, STRIGGER_PULL_TSDB_TRIGGER_DATA[_NEXT], +STRIGGER_PULL_TSDB_CALC_DATA[_NEXT], STRIGGER_PULL_TSDB_DATA[_NEXT] + +// 新增: +STRIGGER_PULL_TSDB_DATA_DIFF_RANGE, +STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_NEXT, +STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC, +STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT, +STRIGGER_PULL_TSDB_DATA_SAME_RANGE, +STRIGGER_PULL_TSDB_DATA_SAME_RANGE_NEXT, +STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC, +STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT, +STRIGGER_PULL_SET_TABLE_HISTORY, +``` + +#### 6.3.2 新结构 + +```c +typedef struct SSTriggerTableTimeRange { + int64_t suid; // 0 表示非虚拟表;虚拟表时为 uid 对应的 suid + int64_t uid; + int64_t skey; + int64_t ekey; +} SSTriggerTableTimeRange; + +typedef struct SSTriggerTsdbDataDiffRangeRequest { + SSTriggerPullRequest base; + int64_t ver; + int8_t order; + SArray* ranges; // SArray +} SSTriggerTsdbDataDiffRangeRequest; + +typedef struct SSTriggerTsdbDataSameRangeRequest { + SSTriggerPullRequest base; + int64_t ver; + int64_t gid; // 0=全部表;非 0=单 group + int64_t skey; + int64_t ekey; + int8_t order; +} SSTriggerTsdbDataSameRangeRequest; + +typedef SSTriggerPullRequest SSTriggerTsdbDataDiffRangeNextRequest; +typedef SSTriggerPullRequest SSTriggerTsdbDataSameRangeNextRequest; +``` + +`SSTriggerSetTableRequest` **不变**:`SET_TABLE_HISTORY` 复用同一结构、同一序列化分支(`case STRIGGER_PULL_SET_TABLE: case STRIGGER_PULL_SET_TABLE_HISTORY:` fallthrough)。 + +#### 6.3.3 响应 + +所有 9 个接口(除 `SET_TABLE_HISTORY`,无 payload)统一 `buildRsp(SSDataBlock*)` → 单 block。列布局: + +| 接口 | 非虚拟表 列 | 虚拟表 列 | +| --- | --- | --- | +| `META` / `META_NEXT` | sKey, eKey, uid, gid, rows | sKey, eKey, uid, rows | +| `DATA_DIFF_RANGE[_CALC]` | triggerResBlock 或 calcResBlock 列 + uid | 同左 + 按 `uidHash*History` 做 slotId→colId 映射 | +| `DATA_SAME_RANGE[_CALC]` | 同上 | **不支持** | + +### 6.4 ④ Reader:handler 实现 + +#### 6.4.1 dispatch(`vnodeStream.c:4265+`) + +```c +case STRIGGER_PULL_SET_TABLE: +case STRIGGER_PULL_SET_TABLE_HISTORY: + → vnodeProcessStreamSetTableReq // 按 base.type 路由 *History 字段 + +case STRIGGER_PULL_TSDB_META[_NEXT]: + isVtableStream ? vnodeProcessStreamTsdbMetaVtableReq + : vnodeProcessStreamTsdbMetaReq + +case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE[_CALC][_NEXT]: + → vnodeProcessStreamTsdbDataDiffRangeReq + +case STRIGGER_PULL_TSDB_DATA_SAME_RANGE[_CALC][_NEXT]: + → vnodeProcessStreamTsdbDataSameRangeReq // 入口拒绝虚拟表 + +default: // 包含 5 个旧 enum + return TSDB_CODE_APP_ERROR +``` + +#### 6.4.2 `vnodeProcessStreamSetTableReq` + +```c +taosWLockLatch(&info->lock); +if (req->base.type == STRIGGER_PULL_SET_TABLE_HISTORY) { + TSWAP(info->uidHashTriggerHistory, req->setTableReq.uidInfoTrigger); + TSWAP(info->uidHashCalcHistory, req->setTableReq.uidInfoCalc); + qStreamClearTableInfo(&info->vSetTableListHistory); + initStreamTableListInfo(&info->vSetTableListHistory); + qBuildVTableListHistory(info); // 用 uidHashTriggerHistory 重建 vSetTableListHistory +} else { + TSWAP(info->uidHashTrigger, ...); + TSWAP(info->uidHashCalc, ...); + qStreamClearTableInfo(&info->vSetTableList); + initStreamTableListInfo(&info->vSetTableList); + qBuildVTableList(info); +} +taosWUnLockLatch(&info->lock); +``` + +实时与历史路径完全 TSWAP 隔离。 + +#### 6.4.3 `vnodeProcessStreamTsdbMetaVtableReq`(虚拟表 META) + +虚拟表 `vSetTableListHistory` 可能含多个 suid,每个 suid 需独立 `createStreamTask`: + +```c +if (isFirst) { + qStreamCopyTableInfo(info, &tableInfo, /*isHistory=*/true); + qStreamIterTableList(&tableInfo, &pList, &pNum, &suid); // 取第一组 + createStreamTask(...); TSWAP(pTaskInner->vTableInfo, tableInfo); + taosHashPut(streamTaskMap, &key, &pTaskInner); + createBlockForTsdbMeta(&pTaskInner->pResBlockDst, /*isVtable=*/true); // 4 列 +} else { + pTaskInner = streamTaskMap[key]; +} + +while (true) { + getTableDataInfo(pTaskInner, &hasNext); + if (!hasNext) { + // 当前 suid 扫完,取下一 suid 重建 task(继承 vTableInfo 游标) + qStreamIterTableList(&pTaskInner->vTableInfo, &pList, &pNum, &suid); + if (pNum == 0) break; + createStreamTask(...); TSWAP(pTaskInnerNew->vTableInfo, pTaskInner->vTableInfo); + taosHashPut(streamTaskMap, &key, &pTaskInnerNew); // 替换旧 task + pTaskInner = pTaskInnerNew; + } + pTaskInner->pResBlock->info.id.groupId = qStreamGetGroupIdFromSet(...); + addColData(pResBlockDst, idx, &skey/&ekey/&uid/&rows); + if (rows >= STREAM_RETURN_ROWS_NUM) break; +} +buildRsp(pTaskInner->pResBlockDst); +if (!hasNext) taosHashRemove(streamTaskMap, &key); +``` + +#### 6.4.4 `vnodeProcessStreamTsdbDataDiffRangeReq` + +迭代器 `SDiffRangeIter { ranges, pos, ver, order, isCalc }` 缓存到 `streamTaskMapHistory[sessionKey(sid, baseType)]`,`baseType` 区分 trigger/calc 共用同一 session。 + +```c +isCalc = (type == DIFF_RANGE_CALC || type == DIFF_RANGE_CALC_NEXT) +isFirst = (type == DIFF_RANGE || type == DIFF_RANGE_CALC) +baseType= isCalc ? DIFF_RANGE_CALC : DIFF_RANGE +key = sessionKey(sid, baseType) + +if (isFirst) { + init streamTaskMapHistory(once) with freeFp=releaseDiffRangeIterFp + newDiffRangeIter(req.ranges, ver, order, isCalc, &iter) + taosHashPut(streamTaskMapHistory, key, iter) // 失败时手动 destroyDiffRangeIter +} else { + iter = streamTaskMapHistory[key] // 没有 → STREAM_NO_CONTEXT +} + +// pCur 复用:scanOneTableForRange 内部检查 *outBlock==NULL 才创建 +while (iter->pos < N) { + scanOneTableForRange(.., iter->ranges[pos], iter->ver, iter->order, iter->isCalc, &pCur) + iter->pos++ + if (pCur && pCur->rows >= STREAM_RETURN_ROWS_NUM) break +} + +TRANSFORM_DATA_TO_CALC // 老 plan 时 trigger schema → calc schema +buildRsp(pCur) +if (iter->pos >= N) taosHashRemove(streamTaskMapHistory, key) // freeFp 释放 iter + +end: + if (code != 0 && iter != NULL) taosHashRemove(...); // 错误时驱逐残块迭代器 + blockDataDestroy(pCur) +``` + +#### 6.4.5 `scanOneTableForRange`(DiffRange 工作函数) + +签名:`(pVnode, info, range, ver, order, isCalc, SSDataBlock** outBlock)`,**累积模式**:每次 range 把行 append 到 `*outBlock`。 + +```c +NEW_CALC = (!info->isOldPlan && isCalc) +tmplBlock = NEW_CALC ? info->calcBlock : info->triggerBlock + +if (info->isVtableStream) { + pickSchemasHistory(info, range->suid, range->uid, isCalc, + &schemas, &slotIdList); // 按 uidHash*History 取列 + options.schemas = schemas; + options.pSlotList = &slotIdList; + options.isSchema = true; +} else { + options.schemas = NEW_CALC ? info->calcCols : info->triggerCols; + options.isSchema = false; +} + +if (*outBlock == NULL) createOneDataBlock(tmplBlock, false, outBlock); +startIndex = (*outBlock)->info.rows; + +createStreamTask(.., tmplBlock, &(STableKeyInfo){range->uid, gid}, 1, ..); + +while (getTableDataInfo(pTaskInner, &hasNext), hasNext) { + getTableData(pTaskInner, &pBlock); + if (pBlock->rows > 0) processTag(info, isCalc, uid, pBlock, 0, rows, 1); + if (!info->isVtableStream) + qStreamFilter(pBlock, NEW_CALC ? pFilterInfoCalc : pFilterInfoTrigger, NULL); + if (pBlock->rows > 0) { + blockDataMerge(*outBlock, pBlock); + totalRows += pBlock->rows; + } +} + +// uid 列填到本 range 新增行段: +colDataSetNItems(lastCol(*outBlock), startIndex, &range->uid, totalRows, 1, false); + +end: free schemas/slotIdList; releaseStreamTask(&pTaskInner); +``` + +#### 6.4.6 `vnodeProcessStreamTsdbDataSameRangeReq` + +```c +if (info->isVtableStream) return TSDB_CODE_INVALID_PARA; // 入口拒绝 + +isCalc/isFirst/baseType/key 同 DiffRange,base 用 SAME_RANGE +tmplBlock = NEW_CALC ? info->calcBlock : info->triggerBlock + +if (isFirst) { + qStreamGetTableList(info, gid, &pList, &pNum); + options.schemas = NEW_CALC ? info->calcCols : info->triggerCols; + createStreamTask(.., tmplBlock, pList, pNum, ..); + taosHashPut(streamTaskMap, key, pTaskInner); // 注:用 streamTaskMap,不是 *History +} else { + pTaskInner = streamTaskMap[key]; +} + +createOneDataBlock(tmplBlock, false, &pCur); // 累积响应 block + +while (getTableDataInfo(pTaskInner, &hasNext), hasNext) { + getTableData(pTaskInner, &pBlock); // pBlock 借用 pTaskInner 内部缓冲 + if (pBlock->rows > 0) processTag(info, isCalc, pBlock->info.id.uid, pBlock, 0, rows, 1); + qStreamFilter(pBlock, NEW_CALC ? pFilterInfoCalc : pFilterInfoTrigger, NULL); + if (pBlock->rows == 0) continue; + + // 在 pBlock 末列就地写 uid(pBlock 与 tmplBlock 同 schema,末列是 BIGINT(uid)) + colDataSetNItems(lastCol(pBlock), 0, &pBlock->info.id.uid, pBlock->rows, 1, false); + + blockDataMerge(pCur, pBlock); // 同 schema,安全 append + if (pCur->rows >= STREAM_RETURN_ROWS_NUM) break; +} + +TRANSFORM_DATA_TO_CALC +buildRsp(pCur) +if (!hasNext) taosHashRemove(streamTaskMap, key) +end: blockDataDestroy(pCur); free(pList); +``` + +#### 6.4.7 `TRANSFORM_DATA_TO_CALC` 宏(兼容老 plan) + +```c +#define TRANSFORM_DATA_TO_CALC \ + if (info->isOldPlan && isCalc && pCur && pCur->info.rows > 0) { \ + createOneDataBlock(info->calcBlock, false, &pResult); \ + blockDataEnsureCapacity(pResult, pCur->info.capacity); \ + blockDataTransform(pResult, pCur); \ + blockDataDestroy(pCur); \ + pCur = pResult; pResult = NULL; \ + } +``` + +老 plan 时 reader 仍按 trigger schema 扫描(因为旧 calcCols 不可信),扫完再统一 transform 到 calc schema。新 plan (isOldPlan=false) 直接以 calc schema 扫描,跳过本宏。 + +#### 6.4.8 DataBlock 生命周期总表 + +| handler | pCur 来源 | pBlock 来源 | merge 单位 | uid 写位置 | +| --- | --- | --- | --- | --- | +| DiffRange | `scanOneTableForRange` 第一次创建后跨 range 累积 | pTaskInner 内部缓冲(每 range 新建 task) | range 内每批 append 到 `*outBlock` | 每 range 末尾写入 [startIndex, +totalRows) | +| SameRange | handler 入口 `createOneDataBlock(tmplBlock)` | pTaskInner 内部缓冲(一次 task 多批) | 每批 append 到 `pCur` | pBlock 末列就地写 [0, pBlock->rows) → 随 merge 进入 pCur | +| Meta(非虚拟表) | pTaskInner->pResBlockDst(task 内置) | pTaskInner->pResBlock | 逐行 addColData | `addColData(.., &pResBlock->info.id.uid)` | +| Meta(虚拟表) | 同上 | 同上 + 跨 suid 重建 task 时通过 vTableInfo 游标接力 | 同上 | 4 列布局,无 gid | + +### 6.5 ⑤ Mnode 兼容(`mndStream.h/c`、`mndDef.c`、`mndStreamMgmt.c`) + +```c +#define MND_STREAM_VER_NUMBER 9 +#define MND_STREAM_COMPATIBLE_VER_NUMBER 7 +#define MND_STREAM_OLD_TRIGGER_COLS 8 +``` + +`tDecodeSStreamObj`: + +```c +if (sver >= MND_STREAM_OLD_TRIGGER_COLS) { // 8 或 9 + tDeserializeSCMCreateStreamReqImpl(pDecoder, pObj->pCreate); + pObj->pCreate->isOldPlan = (sver == MND_STREAM_OLD_TRIGGER_COLS); // 8 → true +} else { // 7 + tDeserializeSCMCreateStreamReqImplOld(...); +} +``` + +`mndStreamActionDecode`:`sver < COMPATIBLE_VER_NUMBER(7)` 才视为不兼容;`>=7` 一律可解析。 + +`msmBuildReaderDeployInfo`:把 `pInfo->pCreate->isOldPlan` 写入 `pTrigger->isOldPlan`,序列化到 `SStreamReaderDeployFromTrigger.isOldPlan`,reader 在 `createStreamReaderInfo` 取出存入 `info->isOldPlan`。 + +## 7. 接口规范 + +### 7.1 RPC 字段不变 + +`SCMCreateStreamReq` 新增 `bool isOldPlan`(仅 mnode↔reader 部署消息内部用,不对外)。其他下发协议字段语义不变。 + +### 7.2 9 个新拉取接口 + +| Trigger 请求类型 | enum | 内容 | 非虚拟表响应 | 虚拟表响应 | +| --- | --- | --- | --- | --- | +| TsdbMetaNew | `STRIGGER_PULL_TSDB_META` | ver, sKey, eKey, gid, order | `` | `` | +| TsdbMetaNewNext | `STRIGGER_PULL_TSDB_META_NEXT` | – | 同上 | 同上 | +| TsdbDataDiffRangeNew[Calc] | `STRIGGER_PULL_TSDB_DATA_DIFF_RANGE[_CALC]` | ver, order, `Array<{suid,uid,sKey,eKey}>` | triggerResBlock\|calcResBlock + uid | 同左,按 `uidHash*History` 做 slotId→colId 映射,无映射列填 NULL | +| TsdbDataDiffRangeNew[Calc]Next | `_NEXT` 变体 | – | 同上 | 同上 | +| TsdbDataSameRangeNew[Calc] | `STRIGGER_PULL_TSDB_DATA_SAME_RANGE[_CALC]` | ver, gid, sKey, eKey, order | 同上 | **不支持**(`INVALID_PARA`) | +| TsdbDataSameRangeNew[Calc]Next | `_NEXT` 变体 | – | 同上 | 不支持 | +| SetTableHistory | `STRIGGER_PULL_SET_TABLE_HISTORY` | `SSTriggerSetTableRequest`(同 SET_TABLE) | – | 写入 `vSetTableListHistory` + `uidHash*History` | + +## 8. 性能与正确性 + +- 触发列减少 → 每次触发扫描 IO/解码下降,列越多收益越大。 +- 计算端独立 calcCols + pre_filter 下推 → 存储层即可过滤,避免 reader 多扫 + 多次列赋值。 +- DiffRange 一次 RPC 拉多 (uid, range) → trigger↔reader RPC 数量与 task 创建次数显著下降。 +- `_NEXT` 续传 + `STREAM_RETURN_ROWS_NUM` 截断 → 大结果集不阻塞单 RPC 且不丢游标。 +- 双缓冲 (`*History`) → 实时与历史拉取互不阻塞,TSWAP 替换避免锁竞争。 +- 行为修复:`%%trows` 计算端真正与触发端取到同行。 + +## 9. 测试 + +- `source/libs/new-stream/test/streamMsgTest.cpp`:6 个 gtest,覆盖 SET_TABLE 两类 type、DiffRange/SameRange 4 种变体、空/大量/连续 destroy 场景。 +- `test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py`:覆盖 user 原 SQL + pre_filter 引用 tag / 函数 / 用户写 WHERE 仍报错 / 无 pre_filter 行为不变。 + +## 10. 需求 ↔ 代码核对 + +> 标记:✅ 完全符合 / ⚠️ 符合但有遗留 / ❌ 不符合 + +| # | 需求 | 实现位置 | 结论 | +| --- | --- | --- | --- | +| 1 | 触发 AST 不再多扫计算端列(c3,t2) | `parTranslater.c` 删 19250-19279 / 19268-19279;`planLogicCreater.c` 删 598-601;`planner.h` 删 `triggerScanList` | ✅ | +| 2 | 计算 AST 增加 pre_filter 引用列 + WHERE 注入 | `parTranslater.c:19102+ injectPreFilterIntoCalcQuery*`;`querynodes.h` 加 `pWhereInjectedFromPreFilter`;`translateWhere` 旁路 | ✅ | +| 3 | server 端计算用独立 calc AST 扫描(不再借触发数据赋值) | `streamReader.h/c`:新增 `calcCols` / `pFilterInfoCalc` / `calcBlock` 独立字段;`scanOneTableForRange` `NEW_CALC` 选 calc schema | ✅ | +| 4 | 9 个新 enum + 2 新结构 | `streamMsg.h:679+`、`streamMsg.c` 序列化/反序列化/destroy | ✅ | +| 5 | DiffRange `Array` + ver + order | `SSTriggerTsdbDataDiffRangeRequest` + `SSTriggerTableTimeRange` | ✅ | +| 6 | SameRange ver/gid/skey/ekey/order;虚拟表不支持 | 入口 `if (isVtableStream) return INVALID_PARA` | ✅ | +| 7 | SetTableHistory 走相同 SSTriggerSetTableRequest | encode/decode/destroy 三处均 fallthrough;`vnodeProcessStreamSetTableReq` 按 base.type 路由 *History 字段 | ✅ | +| 8 | 新增 `vSetTableListHistory` / `uidHashTriggerHistory` / `uidHashCalcHistory` | `streamReader.h:60-100`;TSDB 路径全用 *History;WAL 路径不变 | ✅ | +| 9 | 虚拟表 DiffRange 用 *History 做 slotId→colId 映射 | `pickSchemasHistory(.., uidHash*History..)` + `options.pSlotList` 传给 tsdReader | ✅ | +| 10 | 兼容旧 stream(mnode 序列化升级) | `MND_STREAM_VER_NUMBER 8→9`;`OLD_TRIGGER_COLS=8` 视为 isOldPlan;reader `TRANSFORM_DATA_TO_CALC` 宏兜底 | ✅ | + +### 10.1 实现侧值得关注的点(非阻塞) + +| # | 现象 | 位置 | 评估 | +| --- | --- | --- | --- | +| A | "虚拟表 + %%trows + pre_filter" 历史限制被解除 | `parTranslater.c` 删除原 19250 块 | ⚠️ 需求文档未明示是否要保留;新方案 calcCols 独立后理论可支持,建议补 e2e 用例验证 | +| B | SameRange handler 内残留 `if (!isVtableStream)` 守卫(line 3454) | `vnodeStream.c:3454` | ⚠️ 入口已拒虚拟表,此处恒真;冗余无害,可后续清理 | +| C | DiffRange / SameRange 函数中 `pResult` 局部变量仅在 `TRANSFORM_DATA_TO_CALC` 宏内部赋值 | `vnodeStream.c:3284, 3378` | ⚠️ end 路径 `blockDataDestroy(NULL)` 安全;可读性不佳,建议把宏改为 inline helper 或加注释 | +| D | DEPRECATED 旧 7 个 enum 仍占据 `ESTriggerPullType` 数值位 | `streamMsg.h:680-687` | ⚠️ 等待 trigger 端切换完后清理;当前 dispatcher 走 `default → APP_ERROR` 不会误处理 | +| E | `processTag` 在 calc 路径也调用(旧版 `!isCalc` 守卫被去掉) | DiffRange/SameRange handler | ✅ 与新需求一致:calc AST 也可能引用 tag 列(如 `avg(t2)`) | +| F | `scanOneTableForRange` 复用 `*outBlock`:跨 range 累积,逐 range 写 uid | `vnodeStream.c:3211-3260` | ✅ 起点 `startIndex`,写入区间 `[startIndex, +totalRows)`,与 merge 行为对齐 | +| G | DiffRange 错误路径 iter 驱逐 | `vnodeStream.c:3350-3354` | ✅ 防止下一 `_NEXT` 复用残块迭代器 | +| H | mnode `mndStreamActionDecode` 由"== 等值校验"改为">= 兼容版本"放行 | `mndStream.c:85` | ✅ 更宽松,向下兼容;同时配合 `tDecodeSStreamObj` 内部分支 | + +### 10.2 建议的后续行动 + +1. 补一个用例:虚拟表 + `%%trows` + pre_filter 端到端,验证 §10.1-A 限制解除后行为正确。 +2. 删除 §10.1-B 冗余守卫;§10.1-C 把 `TRANSFORM_DATA_TO_CALC` 宏重构为 inline helper(或保留宏但补 doc comment)。 +3. trigger 侧切到 9 个新 enum 后,把 §10.1-D 中 7 个旧 enum 从 `ESTriggerPullType` 移除(含 `streamMsg.c` 序列化兜底分支)。 +4. 把本文从 `docs/superpowers/specs/` 移交到产品文档库,并归档 `2026-04-22-stream-trigger-calc-scan-cols-optimize-design.md`、本地会话 `2026-04-20-stream-history-pull-optimization-design.md`。 + +## 11. 参考资料 + +- 需求源:[飞书 wiki — 流计算优化](https://taosdata.feishu.cn/wiki/OIwhw3iBmifwEIk4Iq3cAms9nte) +- PR:[taosdata/TDengine#35196](https://github.com/taosdata/TDengine/pull/35196) +- 流计算 SQL 语法:https://docs.taosdata.com/reference/taos-sql/stream/ +- 虚拟表语法:https://docs.taosdata.com/reference/taos-sql/virtualtable/ +- 关键源码位置(截至 PR HEAD `d18cce6`): + - 协议:`include/common/streamMsg.h:679-916`、`source/common/src/msg/streamMsg.c:3097-3570` + - Reader 信息:`include/libs/new-stream/streamReader.h:41-170`、`source/libs/new-stream/src/streamReader.c:178-780` + - Reader handler:`source/dnode/vnode/src/vnd/vnodeStream.c:2818-3489, 4255-4328` + - Parser/Planner:`source/libs/parser/src/parTranslater.c:10522-10529, 19102-19260`、`source/libs/planner/src/planLogicCreater.c:595-602` + - Mnode 兼容:`source/dnode/mnode/impl/inc/mndStream.h:102-110`、`source/dnode/mnode/impl/src/mndDef.c:47-55`、`source/dnode/mnode/impl/src/mndStream.c:80-90`、`source/dnode/mnode/impl/src/mndStreamMgmt.c:765-770` diff --git a/include/common/streamMsg.h b/include/common/streamMsg.h index 49af02aa5251..b65923298e72 100644 --- a/include/common/streamMsg.h +++ b/include/common/streamMsg.h @@ -217,6 +217,7 @@ typedef struct { void* triggerScanPlan; // block include all // preFilter<>triggerPrevFilter/partitionCols<>subTblNameExpr+tagValueExpr/triggerCols<>triggerCond/calcRows SArray* calcScanPlanList; // for calc action, SArray + bool isOldPlan; // trigger part int8_t triggerHasPF; // Since some filter will be processed in trigger's reader, triggerPrevFilter will be NULL. @@ -435,6 +436,7 @@ typedef struct { // void* triggerPrevFilter; void* triggerScanPlan; void* calcCacheScanPlan; + int8_t isOldPlan; } SStreamReaderDeployFromTrigger; typedef struct { @@ -677,13 +679,13 @@ typedef enum ESTriggerPullType { STRIGGER_PULL_FIRST_TS, STRIGGER_PULL_TSDB_META, STRIGGER_PULL_TSDB_META_NEXT, - STRIGGER_PULL_TSDB_TS_DATA, - STRIGGER_PULL_TSDB_TRIGGER_DATA, - STRIGGER_PULL_TSDB_TRIGGER_DATA_NEXT, - STRIGGER_PULL_TSDB_CALC_DATA, - STRIGGER_PULL_TSDB_CALC_DATA_NEXT, - STRIGGER_PULL_TSDB_DATA, //10 - STRIGGER_PULL_TSDB_DATA_NEXT, + STRIGGER_PULL_TSDB_TS_DATA, // DEPRECATED: replaced by STRIGGER_PULL_TSDB_DATA_DIFF_RANGE etc; remove after trigger side migration + STRIGGER_PULL_TSDB_TRIGGER_DATA, // DEPRECATED: same as above + STRIGGER_PULL_TSDB_TRIGGER_DATA_NEXT, // DEPRECATED: same as above + STRIGGER_PULL_TSDB_CALC_DATA, // DEPRECATED: same as above + STRIGGER_PULL_TSDB_CALC_DATA_NEXT, // DEPRECATED: same as above + STRIGGER_PULL_TSDB_DATA, //10 // DEPRECATED: replaced by STRIGGER_PULL_TSDB_DATA_DIFF_RANGE / STRIGGER_PULL_TSDB_DATA_SAME_RANGE; remove after trigger side migration + STRIGGER_PULL_TSDB_DATA_NEXT, // DEPRECATED: same as above STRIGGER_PULL_GROUP_COL_VALUE, STRIGGER_PULL_VTABLE_INFO, STRIGGER_PULL_VTABLE_PSEUDO_COL, @@ -692,6 +694,16 @@ typedef enum ESTriggerPullType { STRIGGER_PULL_WAL_DATA_NEW, STRIGGER_PULL_WAL_META_DATA_NEW, STRIGGER_PULL_WAL_CALC_DATA_NEW, + // === Added for history-data pull optimization === + STRIGGER_PULL_TSDB_DATA_DIFF_RANGE, + STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_NEXT, + STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC, + STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT, + STRIGGER_PULL_TSDB_DATA_SAME_RANGE, + STRIGGER_PULL_TSDB_DATA_SAME_RANGE_NEXT, + STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC, + STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT, + STRIGGER_PULL_SET_TABLE_HISTORY, // Same as STRIGGER_PULL_SET_TABLE, but writes into the *History fields STRIGGER_PULL_TYPE_MAX, } ESTriggerPullType; @@ -709,6 +721,33 @@ typedef struct SSTriggerSetTableRequest { SSHashObj* uidInfoCalc; // < uid->SHashObjcolId> > } SSTriggerSetTableRequest; +typedef struct SSTriggerTableTimeRange { + int64_t suid; // 0 for non-virtual tables; for virtual tables it is the suid of the uid + int64_t uid; + int64_t skey; + int64_t ekey; +} SSTriggerTableTimeRange; + +typedef struct SSTriggerTsdbDataDiffRangeRequest { + SSTriggerPullRequest base; + int64_t ver; + int8_t order; // 1 asc, 2 desc + SArray* ranges; // SArray +} SSTriggerTsdbDataDiffRangeRequest; + +typedef struct SSTriggerTsdbDataSameRangeRequest { + SSTriggerPullRequest base; + int64_t ver; + int64_t gid; // 0 means all tables; non-zero means a single group + int64_t skey; + int64_t ekey; + int8_t order; +} SSTriggerTsdbDataSameRangeRequest; + +// _NEXT variants carry no extra fields; reuse SSTriggerPullRequest directly. +typedef SSTriggerPullRequest SSTriggerTsdbDataDiffRangeNextRequest; +typedef SSTriggerPullRequest SSTriggerTsdbDataSameRangeNextRequest; + typedef struct SSTriggerLastTsRequest { SSTriggerPullRequest base; } SSTriggerLastTsRequest; @@ -867,6 +906,8 @@ typedef union SSTriggerPullRequestUnion { SSTriggerVirTableInfoRequest virTableInfoReq; SSTriggerVirTablePseudoColRequest virTablePseudoColReq; SSTriggerOrigTableInfoRequest origTableInfoReq; + SSTriggerTsdbDataDiffRangeRequest tsdbDataDiffRangeReq; + SSTriggerTsdbDataSameRangeRequest tsdbDataSameRangeReq; } SSTriggerPullRequestUnion; int32_t tSerializeSTriggerPullRequest(void* buf, int32_t bufLen, const SSTriggerPullRequest* pReq); diff --git a/include/libs/new-stream/streamReader.h b/include/libs/new-stream/streamReader.h index 7d8be9fad1d2..577e06efeb4a 100644 --- a/include/libs/new-stream/streamReader.h +++ b/include/libs/new-stream/streamReader.h @@ -41,6 +41,12 @@ typedef struct StreamTableListInfo { int64_t version; } StreamTableListInfo; +/* ------------------------------------------------------------------ */ +/* SDiffRangeIter — iterator state for DiffRange pulls */ +/* ------------------------------------------------------------------ */ + + + typedef struct SStreamTriggerReaderInfo { void* pTask; int32_t order; @@ -49,21 +55,22 @@ typedef struct SStreamTriggerReaderInfo { uint64_t uid; int8_t tableType; int8_t isVtableStream; // whether is virtual table stream + int8_t isOldPlan; // whether is old plan int8_t isVtableOnlyTs; int8_t deleteReCalc; int8_t deleteOutTbl; SNode* pTagCond; SNode* pTagIndexCond; - SNode* pConditions; SNodeList* partitionCols; - SNodeList* triggerCols; - SNodeList* triggerPseudoCols; SHashObj* streamTaskMap; + SHashObj* streamTaskMapHistory; /* per-key map for SDiffRangeIter; freeFp set in vnodeStream.c */ SHashObj* groupIdMap; SSubplan* triggerAst; SSubplan* calcAst; + SNodeList* triggerCols; SSDataBlock* triggerResBlock; SSDataBlock* triggerBlock; + SNodeList* calcCols; SSDataBlock* calcResBlock; SSDataBlock* calcBlock; SSDataBlock* metaBlock; @@ -73,10 +80,12 @@ typedef struct SStreamTriggerReaderInfo { int32_t numOfExprTriggerTag; SExprInfo* pExprInfoCalcTag; int32_t numOfExprCalcTag; - SSHashObj* uidHashTrigger; // < uid -> SHashObj < slotId -> colId > > - SSHashObj* uidHashCalc; // < uid -> SHashObj < slotId -> colId > > - void* historyTableList; - SFilterInfo* pFilterInfo; + SSHashObj* uidHashTrigger; // < uid -> SHashObj < slotId -> colId > > + SSHashObj* uidHashCalc; // < uid -> SHashObj < slotId -> colId > > + SSHashObj* uidHashTriggerHistory; // history version, TSDB path (vtable only) + SSHashObj* uidHashCalcHistory; // history version, TSDB path (vtable only) + SFilterInfo* pFilterInfoTrigger; + SFilterInfo* pFilterInfoCalc; SHashObj* pTableMetaCacheTrigger; SHashObj* pTableMetaCacheCalc; SHashObj* triggerTableSchemaMapVTable; // key: uid, value: STSchema* @@ -88,6 +97,7 @@ typedef struct SStreamTriggerReaderInfo { StreamTableListInfo tableList; StreamTableListInfo vSetTableList; + StreamTableListInfo vSetTableListHistory; // Dedicated to the TSDB history path for virtual-table streams } SStreamTriggerReaderInfo; @@ -128,6 +138,7 @@ typedef struct { SStreamOptions* options; char* idStr; SQueryTableDataCond cond; + StreamTableListInfo vTableInfo; } SStreamReaderTaskInner; int32_t qStreamInitQueryTableDataCond(SQueryTableDataCond* pCond, int32_t order, void* schemas, bool isSchema, @@ -140,6 +151,7 @@ void qStreamSetTaskRunning(int64_t streamId, int64_t taskId); int32_t streamBuildFetchRsp(SArray* pResList, bool hasNext, void** data, size_t* size, int8_t precision); int32_t qBuildVTableList(SStreamTriggerReaderInfo* sStreamReaderInfo); +int32_t qBuildVTableListHistory(SStreamTriggerReaderInfo* sStreamReaderInfo); int32_t createStreamTask(void* pVnode, SStreamOptions* options, SStreamReaderTaskInner** ppTask, SSDataBlock* pResBlock, STableKeyInfo* pList, int32_t pNum, SStorageAPI* storageApi); @@ -150,7 +162,7 @@ int32_t initStreamTableListInfo(StreamTableListInfo* pTableListInfo); int32_t qStreamGetTableList(SStreamTriggerReaderInfo* sStreamReaderInfo, uint64_t gid, STableKeyInfo** pKeyInfo, int32_t* size); void qStreamDestroyTableInfo(StreamTableListInfo* pTableListInfo); void qStreamClearTableInfo(StreamTableListInfo* pTableListInfo); -int32_t qStreamCopyTableInfo(SStreamTriggerReaderInfo* sStreamReaderInfo, StreamTableListInfo* dst); +int32_t qStreamCopyTableInfo(SStreamTriggerReaderInfo* sStreamReaderInfo, StreamTableListInfo* dst, bool isHistory); int32_t qStreamSetTableList(StreamTableListInfo* pTableListInfo, int64_t uid, uint64_t gid); int32_t qStreamGetTableListGroupNum(SStreamTriggerReaderInfo* sStreamReaderInfo); int32_t qStreamGetTableListNum(SStreamTriggerReaderInfo* sStreamReaderInfo); diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index 08445da093b5..8be03a636cd4 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -706,6 +706,7 @@ typedef struct SSelectStmt { int32_t timeLineFromOrderBy; bool isEmptyResult; bool isSubquery; + bool pWhereInjectedFromPreFilter; // true if pWhere was cloned from stream pre_filter bool hasAggFuncs; bool hasRepeatScanFuncs; bool hasIndefiniteRowsFunc; diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index 0d4750a11362..a35c578f7b33 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -41,7 +41,6 @@ typedef struct SPlanStreamContext { SArray* calcScanPlanArray; SNode* triggerScanSubplan; ENodeType triggerWinType; - SNodeList* triggerScanList; } SPlanStreamContext; typedef struct SPlanContext { diff --git a/source/common/src/msg/streamMsg.c b/source/common/src/msg/streamMsg.c index df18075298dc..6d5f6fb31dce 100644 --- a/source/common/src/msg/streamMsg.c +++ b/source/common/src/msg/streamMsg.c @@ -531,6 +531,7 @@ int32_t tEncodeSStreamReaderDeployFromTrigger(SEncoder* pEncoder, const SStreamR //TAOS_CHECK_EXIT(tEncodeBinary(pEncoder, pMsg->triggerPrevFilter, pMsg->triggerPrevFilter == NULL ? 0 : (int32_t)strlen(pMsg->triggerPrevFilter) + 1)); TAOS_CHECK_EXIT(tEncodeBinary(pEncoder, pMsg->triggerScanPlan, pMsg->triggerScanPlan == NULL ? 0 : (int32_t)strlen(pMsg->triggerScanPlan) + 1)); TAOS_CHECK_EXIT(tEncodeBinary(pEncoder, pMsg->calcCacheScanPlan, pMsg->calcCacheScanPlan == NULL ? 0 : (int32_t)strlen(pMsg->calcCacheScanPlan) + 1)); + TAOS_CHECK_EXIT(tEncodeI8(pEncoder, pMsg->isOldPlan)); _exit: @@ -1112,6 +1113,7 @@ int32_t tDecodeSStreamReaderDeployFromTrigger(SDecoder* pDecoder, SStreamReaderD TAOS_CHECK_EXIT(tDecodeBinaryAlloc(pDecoder, (void**)&pMsg->triggerCols, NULL)); TAOS_CHECK_EXIT(tDecodeBinaryAlloc(pDecoder, (void**)&pMsg->triggerScanPlan, NULL)); TAOS_CHECK_EXIT(tDecodeBinaryAlloc(pDecoder, (void**)&pMsg->calcCacheScanPlan, NULL)); + TAOS_CHECK_EXIT(tDecodeI8(pDecoder, &pMsg->isOldPlan)); _exit: @@ -3095,10 +3097,18 @@ void tDestroySTriggerPullRequest(SSTriggerPullRequestUnion* pReq) { taosArrayDestroy(pRequest->cols); pRequest->cols = NULL; } - } else if (pReq->base.type == STRIGGER_PULL_SET_TABLE) { + } else if (pReq->base.type == STRIGGER_PULL_SET_TABLE || + pReq->base.type == STRIGGER_PULL_SET_TABLE_HISTORY) { SSTriggerSetTableRequest* pRequest = (SSTriggerSetTableRequest*)pReq; tSimpleHashCleanup(pRequest->uidInfoTrigger); tSimpleHashCleanup(pRequest->uidInfoCalc); + } else if (pReq->base.type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE || + pReq->base.type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC) { + SSTriggerTsdbDataDiffRangeRequest* pRequest = (SSTriggerTsdbDataDiffRangeRequest*)pReq; + if (pRequest->ranges != NULL) { + taosArrayDestroy(pRequest->ranges); + pRequest->ranges = NULL; + } } } @@ -3182,7 +3192,8 @@ int32_t tSerializeSTriggerPullRequest(void* buf, int32_t bufLen, const SSTrigger TAOS_CHECK_EXIT(tEncodeI64(&encoder, pReq->sessionId)); switch (pReq->type) { - case STRIGGER_PULL_SET_TABLE: { + case STRIGGER_PULL_SET_TABLE: + case STRIGGER_PULL_SET_TABLE_HISTORY: { SSTriggerSetTableRequest* pRequest = (SSTriggerSetTableRequest*)pReq; TAOS_CHECK_EXIT(encodeSetTableMapInfo(&encoder, pRequest->uidInfoTrigger)); TAOS_CHECK_EXIT(encodeSetTableMapInfo(&encoder, pRequest->uidInfoCalc)); @@ -3255,6 +3266,38 @@ int32_t tSerializeSTriggerPullRequest(void* buf, int32_t bufLen, const SSTrigger case STRIGGER_PULL_TSDB_DATA_NEXT: { break; } + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC: { + SSTriggerTsdbDataDiffRangeRequest* pRequest = (SSTriggerTsdbDataDiffRangeRequest*)pReq; + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pRequest->ver)); + TAOS_CHECK_EXIT(tEncodeI8(&encoder, pRequest->order)); + int32_t nRanges = (pRequest->ranges != NULL) ? taosArrayGetSize(pRequest->ranges) : 0; + TAOS_CHECK_EXIT(tEncodeI32(&encoder, nRanges)); + for (int32_t i = 0; i < nRanges; i++) { + SSTriggerTableTimeRange* r = (SSTriggerTableTimeRange*)TARRAY_GET_ELEM(pRequest->ranges, i); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, r->suid)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, r->uid)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, r->skey)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, r->ekey)); + } + break; + } + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_NEXT: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_NEXT: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT: { + break; + } + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC: { + SSTriggerTsdbDataSameRangeRequest* pRequest = (SSTriggerTsdbDataSameRangeRequest*)pReq; + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pRequest->ver)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pRequest->gid)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pRequest->skey)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pRequest->ekey)); + TAOS_CHECK_EXIT(tEncodeI8(&encoder, pRequest->order)); + break; + } case STRIGGER_PULL_WAL_META_NEW: { SSTriggerWalMetaNewRequest* pRequest = (SSTriggerWalMetaNewRequest*)pReq; TAOS_CHECK_EXIT(tEncodeI64(&encoder, pRequest->lastVer)); @@ -3409,7 +3452,8 @@ int32_t tDeserializeSTriggerPullRequest(void* buf, int32_t bufLen, SSTriggerPull TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pBase->sessionId)); switch (type) { - case STRIGGER_PULL_SET_TABLE: { + case STRIGGER_PULL_SET_TABLE: + case STRIGGER_PULL_SET_TABLE_HISTORY: { SSTriggerSetTableRequest* pRequest = &(pReq->setTableReq); TAOS_CHECK_EXIT(decodeSetTableMapInfo(&decoder, &pRequest->uidInfoTrigger)); TAOS_CHECK_EXIT(decodeSetTableMapInfo(&decoder, &pRequest->uidInfoCalc)); @@ -3482,6 +3526,44 @@ int32_t tDeserializeSTriggerPullRequest(void* buf, int32_t bufLen, SSTriggerPull case STRIGGER_PULL_TSDB_DATA_NEXT: { break; } + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC: { + SSTriggerTsdbDataDiffRangeRequest* pRequest = &(pReq->tsdbDataDiffRangeReq); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pRequest->ver)); + TAOS_CHECK_EXIT(tDecodeI8(&decoder, &pRequest->order)); + int32_t nRanges = 0; + TAOS_CHECK_EXIT(tDecodeI32(&decoder, &nRanges)); + if (nRanges > 0) { + pRequest->ranges = taosArrayInit_s(sizeof(SSTriggerTableTimeRange), nRanges); + TSDB_CHECK_NULL(pRequest->ranges, code, lino, _exit, terrno); + for (int32_t i = 0; i < nRanges; i++) { + SSTriggerTableTimeRange* r = (SSTriggerTableTimeRange*)TARRAY_GET_ELEM(pRequest->ranges, i); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &r->suid)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &r->uid)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &r->skey)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &r->ekey)); + } + } else { + pRequest->ranges = NULL; + } + break; + } + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_NEXT: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_NEXT: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT: { + break; + } + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC: { + SSTriggerTsdbDataSameRangeRequest* pRequest = &(pReq->tsdbDataSameRangeReq); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pRequest->ver)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pRequest->gid)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pRequest->skey)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pRequest->ekey)); + TAOS_CHECK_EXIT(tDecodeI8(&decoder, &pRequest->order)); + break; + } case STRIGGER_PULL_WAL_META_NEW: { SSTriggerWalMetaNewRequest* pRequest = &(pReq->walMetaNewReq); TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pRequest->lastVer)); diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index c4ec716bcc41..16332070dc59 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -102,8 +102,9 @@ static const char* gMndStreamState[] = {"X", "W", "N"}; #define STREAM_ACT_RECALC (1 << 4) #define MND_STREAM_RESERVE_SIZE 64 -#define MND_STREAM_VER_NUMBER 8 +#define MND_STREAM_VER_NUMBER 9 #define MND_STREAM_COMPATIBLE_VER_NUMBER 7 +#define MND_STREAM_OLD_TRIGGER_COLS 8 #define MND_STREAM_TRIGGER_NAME_SIZE 20 #define MND_STREAM_DEFAULT_NUM 100 #define MND_STREAM_DEFAULT_TASK_NUM 200 diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 0844f3500b25..97efab6bb56d 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -47,8 +47,9 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { TAOS_CHECK_EXIT(terrno); } - if (MND_STREAM_VER_NUMBER == sver) { + if (sver >= MND_STREAM_OLD_TRIGGER_COLS) { TAOS_CHECK_RETURN(tDeserializeSCMCreateStreamReqImpl(pDecoder, pObj->pCreate)); + pObj->pCreate->isOldPlan = (sver == MND_STREAM_OLD_TRIGGER_COLS); } else { TAOS_CHECK_RETURN( tDeserializeSCMCreateStreamReqImplOld(pDecoder, pObj->pCreate, 21)); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index b118f037bea1..50665382299d 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -82,7 +82,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { code = sdbGetRawSoftVer(pRaw, &sver); TSDB_CHECK_CODE(code, lino, _over); - if (sver != MND_STREAM_VER_NUMBER && sver != MND_STREAM_COMPATIBLE_VER_NUMBER) { + if (sver < MND_STREAM_COMPATIBLE_VER_NUMBER) { mError("stream read invalid ver, data ver: %d, curr ver: %d", sver, MND_STREAM_VER_NUMBER); goto _over; } diff --git a/source/dnode/mnode/impl/src/mndStreamMgmt.c b/source/dnode/mnode/impl/src/mndStreamMgmt.c index 724c4fed687e..b8e5dffcee48 100755 --- a/source/dnode/mnode/impl/src/mndStreamMgmt.c +++ b/source/dnode/mnode/impl/src/mndStreamMgmt.c @@ -765,6 +765,7 @@ int32_t msmBuildReaderDeployInfo(SStmTaskDeploy* pDeploy, void* calcScanPlan, SS //pTrigger->triggerPrevFilter = pStream->pCreate->triggerPrevFilter; pTrigger->triggerScanPlan = pInfo->pCreate->triggerScanPlan; pTrigger->calcCacheScanPlan = msmSearchCalcCacheScanPlan(pInfo->pCreate->calcScanPlanList); + pTrigger->isOldPlan = pInfo->pCreate->isOldPlan; } else { SStreamReaderDeployFromCalc* pCalc = &pMsg->msg.calc; pCalc->execReplica = pInfo->runnerDeploys * pInfo->runnerReplica; diff --git a/source/dnode/vnode/src/vnd/vnodeStream.c b/source/dnode/vnode/src/vnd/vnodeStream.c index 20a690b44980..44a9c9f5999e 100644 --- a/source/dnode/vnode/src/vnd/vnodeStream.c +++ b/source/dnode/vnode/src/vnd/vnodeStream.c @@ -19,6 +19,7 @@ #include #include "executor.h" #include "nodes.h" +#include "osMath.h" #include "osMemPool.h" #include "osMemory.h" #include "scalar.h" @@ -61,58 +62,8 @@ typedef struct WalMetaResult { int64_t ekey; } WalMetaResult; -static int64_t getSuid(SStreamTriggerReaderInfo* sStreamReaderInfo, STableKeyInfo* pList) { - int64_t suid = 0; - if (!sStreamReaderInfo->isVtableStream) { - suid = sStreamReaderInfo->suid; - goto end; - } - - if (pList == NULL) { - goto end; - } - - taosRLockLatch(&sStreamReaderInfo->lock); - SStreamTableMapElement* element = taosHashGet(sStreamReaderInfo->vSetTableList.uIdMap, &pList->uid, LONG_BYTES); - if (element != 0) { - suid = element->table->groupId; - taosRUnLockLatch(&sStreamReaderInfo->lock); - goto end; - } - taosRUnLockLatch(&sStreamReaderInfo->lock); - -end: - return suid; -} - static int64_t getSessionKey(int64_t session, int64_t type) { return (session | (type << 32)); } -int32_t sortCid(const void *lp, const void *rp) { - int16_t* c1 = (int16_t*)lp; - int16_t* c2 = (int16_t*)rp; - - if (*c1 < *c2) { - return -1; - } else if (*c1 > *c2) { - return 1; - } - - return 0; -} - -int32_t sortSSchema(const void *lp, const void *rp) { - SSchema* c1 = (SSchema*)lp; - SSchema* c2 = (SSchema*)rp; - - if (c1->colId < c2->colId) { - return -1; - } else if (c1->colId > c2->colId) { - return 1; - } - - return 0; -} - static int32_t addColData(SSDataBlock* pResBlock, int32_t index, void* data) { SColumnInfoData* pSrc = taosArrayGet(pResBlock->pDataBlock, index); if (pSrc == NULL) { @@ -2186,7 +2137,8 @@ static int32_t filterData(SSTriggerWalNewRsp* resultRsp, SStreamTriggerReaderInf SColumnInfoData* pRet = NULL; int64_t totalRows = ((SSDataBlock*)resultRsp->dataBlock)->info.rows; - STREAM_CHECK_RET_GOTO(qStreamFilter(((SSDataBlock*)resultRsp->dataBlock), sStreamReaderInfo->pFilterInfo, &pRet)); + STREAM_CHECK_RET_GOTO(qStreamFilter(((SSDataBlock*)resultRsp->dataBlock), + (!sStreamReaderInfo->isOldPlan && resultRsp->isCalc) ? sStreamReaderInfo->pFilterInfoCalc : sStreamReaderInfo->pFilterInfoTrigger, &pRet)); if (((SSDataBlock*)resultRsp->dataBlock)->info.rows < totalRows) { filterIndexHash(resultRsp->indexHash, pRet); @@ -2229,7 +2181,9 @@ static int32_t processWalVerMetaDataNew(SVnode* pVnode, SStreamTriggerReaderInfo } int32_t metaRows = resultRsp->totalRows - ((SSDataBlock*)resultRsp->dataBlock)->info.rows; - STREAM_CHECK_RET_GOTO(filterData(resultRsp, sStreamReaderInfo)); + if (!sStreamReaderInfo->isVtableStream) { + STREAM_CHECK_RET_GOTO(filterData(resultRsp, sStreamReaderInfo)); + } resultRsp->totalRows = ((SSDataBlock*)resultRsp->dataBlock)->info.rows + metaRows; end: @@ -2279,7 +2233,9 @@ static int32_t processWalVerDataNew(SVnode* pVnode, SStreamTriggerReaderInfo* sS STREAM_CHECK_RET_GOTO(scanSubmitData(pVnode, sStreamReaderInfo, pBody, bodyLen, ranges, rsp, wCont->version)); } // printDataBlock(rsp->dataBlock, __func__, "processWalVerDataNew"); - STREAM_CHECK_RET_GOTO(filterData(rsp, sStreamReaderInfo)); + if (!sStreamReaderInfo->isVtableStream) { + STREAM_CHECK_RET_GOTO(filterData(rsp, sStreamReaderInfo)); + } rsp->totalRows = ((SSDataBlock*)rsp->dataBlock)->info.rows; end: @@ -2648,7 +2604,7 @@ static int32_t processTsOutPutAllGroups(SStreamTriggerReaderInfo* sStreamReaderI StreamTableListInfo tableInfo = {0}; void* pTask = sStreamReaderInfo->pTask; - STREAM_CHECK_RET_GOTO(qStreamCopyTableInfo(sStreamReaderInfo, &tableInfo)); + STREAM_CHECK_RET_GOTO(qStreamCopyTableInfo(sStreamReaderInfo, &tableInfo, false)); SSHashObj* uidTsHash = tSimpleHashInit(pResBlock->info.rows, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); STREAM_CHECK_NULL_GOTO(uidTsHash, terrno); @@ -2738,7 +2694,7 @@ static int32_t processTsVTable(SVnode* pVnode, SStreamTsResponse* tsRsp, SStream StreamTableListInfo tableInfo = {0}; void* pTask = sStreamReaderInfo->pTask; - STREAM_CHECK_RET_GOTO(qStreamCopyTableInfo(sStreamReaderInfo, &tableInfo)); + STREAM_CHECK_RET_GOTO(qStreamCopyTableInfo(sStreamReaderInfo, &tableInfo, false)); SSDataBlock* pResBlock = NULL; STREAM_CHECK_RET_GOTO(createDataBlockTsUid(&pResBlock, qStreamGetTableListNum(sStreamReaderInfo))); @@ -2815,7 +2771,7 @@ static int32_t processTsNonVTable(SVnode* pVnode, SStreamTsResponse* tsRsp, SStr return code; } -static int32_t processTsOnce(SVnode* pVnode, SStreamTsResponse* tsRsp, SStreamTriggerReaderInfo* sStreamReaderInfo, +static int32_t processTsOneGroup(SVnode* pVnode, SStreamTsResponse* tsRsp, SStreamTriggerReaderInfo* sStreamReaderInfo, SStreamReaderTaskInner* pTaskInner, uint64_t gid) { int32_t code = 0; int32_t lino = 0; @@ -2862,14 +2818,24 @@ static int32_t vnodeProcessStreamSetTableReq(SVnode* pVnode, SRpcMsg* pMsg, SSTr tSimpleHashGetSize(req->setTableReq.uidInfoTrigger), tSimpleHashGetSize(req->setTableReq.uidInfoCalc), pVnode->state.applied); taosWLockLatch(&sStreamReaderInfo->lock); - TSWAP(sStreamReaderInfo->uidHashTrigger, req->setTableReq.uidInfoTrigger); - TSWAP(sStreamReaderInfo->uidHashCalc, req->setTableReq.uidInfoCalc); - STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->uidHashTrigger, TSDB_CODE_INVALID_PARA); - STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->uidHashCalc, TSDB_CODE_INVALID_PARA); - - qStreamClearTableInfo(&sStreamReaderInfo->vSetTableList); - STREAM_CHECK_RET_GOTO(initStreamTableListInfo(&sStreamReaderInfo->vSetTableList)); - STREAM_CHECK_RET_GOTO(qBuildVTableList(sStreamReaderInfo)); + if (req->base.type == STRIGGER_PULL_SET_TABLE_HISTORY) { + /* History path: swap into the *History fields so realtime path is untouched. */ + TSWAP(sStreamReaderInfo->uidHashTriggerHistory, req->setTableReq.uidInfoTrigger); + TSWAP(sStreamReaderInfo->uidHashCalcHistory, req->setTableReq.uidInfoCalc); + STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->uidHashTriggerHistory, TSDB_CODE_INVALID_PARA); + STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->uidHashCalcHistory, TSDB_CODE_INVALID_PARA); + qStreamClearTableInfo(&sStreamReaderInfo->vSetTableListHistory); + STREAM_CHECK_RET_GOTO(initStreamTableListInfo(&sStreamReaderInfo->vSetTableListHistory)); + STREAM_CHECK_RET_GOTO(qBuildVTableListHistory(sStreamReaderInfo)); + } else { + TSWAP(sStreamReaderInfo->uidHashTrigger, req->setTableReq.uidInfoTrigger); + TSWAP(sStreamReaderInfo->uidHashCalc, req->setTableReq.uidInfoCalc); + STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->uidHashTrigger, TSDB_CODE_INVALID_PARA); + STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->uidHashCalc, TSDB_CODE_INVALID_PARA); + qStreamClearTableInfo(&sStreamReaderInfo->vSetTableList); + STREAM_CHECK_RET_GOTO(initStreamTableListInfo(&sStreamReaderInfo->vSetTableList)); + STREAM_CHECK_RET_GOTO(qBuildVTableList(sStreamReaderInfo)); + } end: taosWUnLockLatch(&sStreamReaderInfo->lock); STREAM_PRINT_LOG_END_WITHID(code, lino); @@ -2932,7 +2898,7 @@ static int32_t vnodeProcessStreamFirstTsReq(SVnode* pVnode, SRpcMsg* pMsg, SSTri STREAM_CHECK_RET_GOTO(createStreamTaskForTs(&options, &pTaskInner, &sStreamReaderInfo->storageApi)); if (req->firstTsReq.gid != 0) { - STREAM_CHECK_RET_GOTO(processTsOnce(pVnode, &tsRsp, sStreamReaderInfo, pTaskInner, req->firstTsReq.gid)); + STREAM_CHECK_RET_GOTO(processTsOneGroup(pVnode, &tsRsp, sStreamReaderInfo, pTaskInner, req->firstTsReq.gid)); } else { STREAM_CHECK_RET_GOTO(processTs(pVnode, &tsRsp, sStreamReaderInfo, pTaskInner)); } @@ -2967,11 +2933,14 @@ static int32_t vnodeProcessStreamTsdbMetaReq(SVnode* pVnode, SRpcMsg* pMsg, SSTr if (req->base.type == STRIGGER_PULL_TSDB_META) { int32_t pNum = 0; STREAM_CHECK_RET_GOTO(qStreamGetTableList(sStreamReaderInfo, req->tsdbMetaReq.gid, &pList, &pNum)); - BUILD_OPTION(options, getSuid(sStreamReaderInfo, pList), req->tsdbMetaReq.ver, req->tsdbMetaReq.order, req->tsdbMetaReq.startTime, req->tsdbMetaReq.endTime, + BUILD_OPTION(options, sStreamReaderInfo->suid, req->tsdbMetaReq.ver, req->tsdbMetaReq.order, req->tsdbMetaReq.startTime, req->tsdbMetaReq.endTime, sStreamReaderInfo->tsSchemas, true, NULL); STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, NULL, pList, pNum, &sStreamReaderInfo->storageApi)); - STREAM_CHECK_RET_GOTO(taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInner, sizeof(pTaskInner))); - + code = taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInner, sizeof(pTaskInner)); + if (code != 0) { + releaseStreamTask(&pTaskInner); + goto end; + } STREAM_CHECK_RET_GOTO(createBlockForTsdbMeta(&pTaskInner->pResBlockDst, sStreamReaderInfo->isVtableStream)); } else { void** tmp = taosHashGet(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES); @@ -2989,21 +2958,18 @@ static int32_t vnodeProcessStreamTsdbMetaReq(SVnode* pVnode, SRpcMsg* pMsg, SSTr break; } pTaskInner->storageApi->tsdReader.tsdReaderReleaseDataBlock(pTaskInner->pReader); - pTaskInner->pResBlock->info.id.groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, pTaskInner->pResBlock->info.id.uid); int32_t index = 0; STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.window.skey)); STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.window.ekey)); STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.id.uid)); - if (!sStreamReaderInfo->isVtableStream) { - STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.id.groupId)); - } + STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.id.groupId)); STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.rows)); - stDebug("vgId:%d %s get skey:%" PRId64 ", eksy:%" PRId64 ", uid:%" PRId64 ", gId:%" PRIu64 ", rows:%" PRId64, + ST_TASK_DLOG("vgId:%d %s get skey:%" PRId64 ", eksy:%" PRId64 ", uid:%" PRId64 ", gId:%" PRIu64 ", rows:%" PRId64, TD_VID(pVnode), __func__, pTaskInner->pResBlock->info.window.skey, pTaskInner->pResBlock->info.window.ekey, pTaskInner->pResBlock->info.id.uid, pTaskInner->pResBlock->info.id.groupId, pTaskInner->pResBlock->info.rows); - pTaskInner->pResBlockDst->info.rows++; + pTaskInner->pResBlockDst->info.rows++; if (pTaskInner->pResBlockDst->info.rows >= STREAM_RETURN_ROWS_NUM) { break; } @@ -3025,322 +2991,455 @@ static int32_t vnodeProcessStreamTsdbMetaReq(SVnode* pVnode, SRpcMsg* pMsg, SSTr return code; } -static int32_t vnodeProcessStreamTsdbTsDataReqNonVTable(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { - int32_t code = 0; - int32_t lino = 0; - SStreamReaderTaskInner* pTaskInner = NULL; - void* buf = NULL; - size_t size = 0; - SSDataBlock* pBlockRes = NULL; +static int32_t vnodeProcessStreamTsdbMetaVtableReq(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { + int32_t code = 0; + int32_t lino = 0; + void* buf = NULL; + size_t size = 0; + STableKeyInfo* pList = NULL; + StreamTableListInfo tableInfo = {0}; + int32_t pNum = 0; + int64_t suid = 0; void* pTask = sStreamReaderInfo->pTask; - ST_TASK_DLOG("vgId:%d %s start, ver:%"PRId64",skey:%"PRId64",ekey:%"PRId64",uid:%"PRId64",suid:%"PRId64, TD_VID(pVnode), __func__, req->tsdbTsDataReq.ver, - req->tsdbTsDataReq.skey, req->tsdbTsDataReq.ekey, - req->tsdbTsDataReq.uid, req->tsdbTsDataReq.suid); - - int32_t pNum = 1; - STableKeyInfo pList = {.groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, req->tsdbTsDataReq.uid), .uid = req->tsdbTsDataReq.uid}; - STREAM_CHECK_CONDITION_GOTO(pList.groupId == -1, TSDB_CODE_INVALID_PARA); - BUILD_OPTION(options, getSuid(sStreamReaderInfo, &pList), req->tsdbTsDataReq.ver, TSDB_ORDER_ASC, req->tsdbTsDataReq.skey, req->tsdbTsDataReq.ekey, - sStreamReaderInfo->triggerCols, false, NULL); - STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, sStreamReaderInfo->triggerResBlock, &pList, pNum, &sStreamReaderInfo->storageApi)); - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->triggerResBlock, false, &pTaskInner->pResBlockDst)); - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->tsBlock, false, &pBlockRes)); + ST_TASK_DLOG("vgId:%d %s start, ver:%" PRId64 ",skey:%" PRId64 ",ekey:%" PRId64 ",gid:%" PRId64, TD_VID(pVnode), + __func__, req->tsdbMetaReq.ver, req->tsdbMetaReq.startTime, req->tsdbMetaReq.endTime, + req->tsdbMetaReq.gid); - while (1) { - bool hasNext = false; + SStreamReaderTaskInner* pTaskInner = NULL; + int64_t key = getSessionKey(req->base.sessionId, STRIGGER_PULL_TSDB_META); + + if (req->base.type == STRIGGER_PULL_TSDB_META) { + STREAM_CHECK_RET_GOTO(qStreamCopyTableInfo(sStreamReaderInfo, &tableInfo, true)); + STREAM_CHECK_RET_GOTO(qStreamIterTableList(&tableInfo, &pList, &pNum, &suid)); + + BUILD_OPTION(options, suid, req->tsdbMetaReq.ver, req->tsdbMetaReq.order, req->tsdbMetaReq.startTime, req->tsdbMetaReq.endTime, + sStreamReaderInfo->tsSchemas, true, NULL); + STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, NULL, pList, pNum, &sStreamReaderInfo->storageApi)); + TSWAP(pTaskInner->vTableInfo, tableInfo); + code = taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInner, sizeof(pTaskInner)); + if (code != 0) { + releaseStreamTask(&pTaskInner); + goto end; + } + STREAM_CHECK_RET_GOTO(createBlockForTsdbMeta(&pTaskInner->pResBlockDst, sStreamReaderInfo->isVtableStream)); + } else { + void** tmp = taosHashGet(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES); + STREAM_CHECK_NULL_GOTO(tmp, TSDB_CODE_STREAM_NO_CONTEXT); + pTaskInner = *(SStreamReaderTaskInner**)tmp; + STREAM_CHECK_NULL_GOTO(pTaskInner, TSDB_CODE_INTERNAL_ERROR); + } + + blockDataCleanup(pTaskInner->pResBlockDst); + STREAM_CHECK_RET_GOTO(blockDataEnsureCapacity(pTaskInner->pResBlockDst, STREAM_RETURN_ROWS_NUM)); + bool hasNext = true; + while (true) { STREAM_CHECK_RET_GOTO(getTableDataInfo(pTaskInner, &hasNext)); if (!hasNext) { - break; + STREAM_CHECK_RET_GOTO(qStreamIterTableList(&pTaskInner->vTableInfo, &pList, &pNum, &suid)); + if (pNum == 0) break; + BUILD_OPTION(options, suid, req->tsdbMetaReq.ver, req->tsdbMetaReq.order, req->tsdbMetaReq.startTime, req->tsdbMetaReq.endTime, + sStreamReaderInfo->tsSchemas, true, NULL); + SStreamReaderTaskInner* pTaskInnerNew = NULL; + STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInnerNew, NULL, pList, pNum, &sStreamReaderInfo->storageApi)); + TSWAP(pTaskInnerNew->vTableInfo, pTaskInner->vTableInfo); + code = taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInnerNew, sizeof(pTaskInnerNew)); + if (code != 0) { + releaseStreamTask(&pTaskInnerNew); + goto end; + } + pTaskInner = pTaskInnerNew; } - // if (!sStreamReaderInfo->isVtableStream){ - pTaskInner->pResBlock->info.id.groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, pTaskInner->pResBlock->info.id.uid); - // } + pTaskInner->storageApi->tsdReader.tsdReaderReleaseDataBlock(pTaskInner->pReader); + + int32_t index = 0; + STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.window.skey)); + STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.window.ekey)); + STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.id.uid)); + STREAM_CHECK_RET_GOTO(addColData(pTaskInner->pResBlockDst, index++, &pTaskInner->pResBlock->info.rows)); - SSDataBlock* pBlock = NULL; - STREAM_CHECK_RET_GOTO(getTableData(pTaskInner, &pBlock)); - if (pBlock != NULL && pBlock->info.rows > 0) { - STREAM_CHECK_RET_GOTO(processTag(sStreamReaderInfo, false, pBlock->info.id.uid, pBlock, - 0, pBlock->info.rows, 1)); - } - - STREAM_CHECK_RET_GOTO(qStreamFilter(pBlock, sStreamReaderInfo->pFilterInfo, NULL)); - STREAM_CHECK_RET_GOTO(blockDataMerge(pTaskInner->pResBlockDst, pBlock)); ST_TASK_DLOG("vgId:%d %s get skey:%" PRId64 ", eksy:%" PRId64 ", uid:%" PRId64 ", gId:%" PRIu64 ", rows:%" PRId64, TD_VID(pVnode), __func__, pTaskInner->pResBlock->info.window.skey, pTaskInner->pResBlock->info.window.ekey, pTaskInner->pResBlock->info.id.uid, pTaskInner->pResBlock->info.id.groupId, pTaskInner->pResBlock->info.rows); + pTaskInner->pResBlockDst->info.rows++; + if (pTaskInner->pResBlockDst->info.rows >= STREAM_RETURN_ROWS_NUM) { + break; + } } - blockDataTransform(pBlockRes, pTaskInner->pResBlockDst); - ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pTaskInner->pResBlockDst->info.rows); - STREAM_CHECK_RET_GOTO(buildRsp(pBlockRes, &buf, &size)); + STREAM_CHECK_RET_GOTO(buildRsp(pTaskInner->pResBlockDst, &buf, &size)); + printDataBlock(pTaskInner->pResBlockDst, __func__, "meta", ((SStreamTask *)sStreamReaderInfo->pTask)->streamId); + if (!hasNext) { + STREAM_CHECK_RET_GOTO(taosHashRemove(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES)); + } end: + qStreamDestroyTableInfo(&tableInfo); STREAM_PRINT_LOG_END_WITHID(code, lino); SRpcMsg rsp = { .msgType = TDMT_STREAM_TRIGGER_PULL_RSP, .info = pMsg->info, .pCont = buf, .contLen = size, .code = code}; tmsgSendRsp(&rsp); - blockDataDestroy(pBlockRes); - - releaseStreamTask(&pTaskInner); + taosMemoryFree(pList); return code; } -static int32_t vnodeProcessStreamTsdbTsDataReqVTable(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { - int32_t code = 0; - int32_t lino = 0; - SStreamReaderTaskInner* pTaskInner = NULL; - void* buf = NULL; - size_t size = 0; - SSDataBlock* pBlockRes = NULL; +static int32_t pickSchemasHistory(SStreamTriggerReaderInfo* sStreamReaderInfo, int64_t suid, int64_t uid, + bool isCalc, SArray** schemas, int32_t** slotIdList) { + int32_t code = 0; + int32_t lino = 0; + bool lock = false; + SArray* cids = NULL; - void* pTask = sStreamReaderInfo->pTask; - ST_TASK_DLOG("vgId:%d %s start, ver:%"PRId64",skey:%"PRId64",ekey:%"PRId64",uid:%"PRId64",suid:%"PRId64, TD_VID(pVnode), __func__, req->tsdbTsDataReq.ver, - req->tsdbTsDataReq.skey, req->tsdbTsDataReq.ekey, - req->tsdbTsDataReq.uid, req->tsdbTsDataReq.suid); - - int32_t pNum = 1; - STableKeyInfo pList = {.groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, req->tsdbTsDataReq.uid), .uid = req->tsdbTsDataReq.uid}; - STREAM_CHECK_CONDITION_GOTO(pList.groupId == -1, TSDB_CODE_INVALID_PARA); - BUILD_OPTION(options, getSuid(sStreamReaderInfo, &pList), req->tsdbTsDataReq.ver, TSDB_ORDER_ASC, req->tsdbTsDataReq.skey, req->tsdbTsDataReq.ekey, - sStreamReaderInfo->tsSchemas, true, NULL); - STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, sStreamReaderInfo->tsBlock, &pList, pNum, &sStreamReaderInfo->storageApi)); - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->tsBlock, false, &pBlockRes)); + STREAM_CHECK_RET_GOTO(buildScheamFromMeta(sStreamReaderInfo->pVnode, uid, schemas, &sStreamReaderInfo->storageApi)); + cids = taosArrayInit(taosArrayGetSize(*schemas), SHORT_BYTES); + STREAM_CHECK_NULL_GOTO(cids, terrno); - while (1) { - bool hasNext = false; - STREAM_CHECK_RET_GOTO(getTableDataInfo(pTaskInner, &hasNext)); - if (!hasNext) { - break; - } + int64_t id[2] = {suid, uid}; + taosRLockLatch(&sStreamReaderInfo->lock); + lock = true; + void *px = tSimpleHashGet(isCalc ? sStreamReaderInfo->uidHashCalcHistory : sStreamReaderInfo->uidHashTriggerHistory, id, sizeof(id)); + STREAM_CHECK_NULL_GOTO(px, TSDB_CODE_INVALID_PARA); + SSHashObj* uInfo = *(SSHashObj **)px; + STREAM_CHECK_NULL_GOTO(uInfo, TSDB_CODE_INVALID_PARA); - SSDataBlock* pBlock = NULL; - STREAM_CHECK_RET_GOTO(getTableData(pTaskInner, &pBlock)); - STREAM_CHECK_RET_GOTO(blockDataMerge(pBlockRes, pBlock)); - ST_TASK_DLOG("vgId:%d %s get skey:%" PRId64 ", eksy:%" PRId64 ", uid:%" PRId64 ", gId:%" PRIu64 ", rows:%" PRId64, - TD_VID(pVnode), __func__, pBlockRes->info.window.skey, pBlockRes->info.window.ekey, - pBlockRes->info.id.uid, pBlockRes->info.id.groupId, pBlockRes->info.rows); + *slotIdList = taosMemoryCalloc(taosArrayGetSize(*schemas), sizeof(int32_t)); + STREAM_CHECK_NULL_GOTO(*slotIdList, terrno); + + int32_t index = 0; + int32_t iter = 0; + void* temp = tSimpleHashIterate(uInfo, NULL, &iter); + while (temp != NULL) { + int16_t* slotId = (int16_t*)tSimpleHashGetKey(temp, NULL); + int16_t* colId = (int16_t*)temp; + STREAM_CHECK_NULL_GOTO(taosArrayPush(cids, colId), terrno); + (*slotIdList)[index++] = *slotId; + temp = tSimpleHashIterate(uInfo, temp, &iter); } - ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pBlockRes->info.rows); - STREAM_CHECK_RET_GOTO(buildRsp(pBlockRes, &buf, &size)); + STREAM_CHECK_RET_GOTO(shrinkScheams(cids, *schemas)); end: - STREAM_PRINT_LOG_END_WITHID(code, lino); - SRpcMsg rsp = { - .msgType = TDMT_STREAM_TRIGGER_PULL_RSP, .info = pMsg->info, .pCont = buf, .contLen = size, .code = code}; - tmsgSendRsp(&rsp); - blockDataDestroy(pBlockRes); - - releaseStreamTask(&pTaskInner); + if (lock){ + taosRUnLockLatch(&sStreamReaderInfo->lock); + } + taosArrayDestroy(cids); return code; } -static int32_t vnodeProcessStreamTsdbTriggerDataReq(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { - int32_t code = 0; - int32_t lino = 0; - void* buf = NULL; - size_t size = 0; - STableKeyInfo* pList = NULL; - SArray* pResList = NULL; - SSDataBlock* pBlockTmp = NULL; +static inline bool isNewCalc(SStreamTriggerReaderInfo* pInfo, bool isCalc) { + return !pInfo->isOldPlan && isCalc; +} +/* Scan one (uid, [skey,ekey]) range and return a freshly-allocated + * SSDataBlock*. The caller takes ownership; NULL is valid (no rows). */ +static int32_t scanOneTableForRange(SVnode* pVnode, SStreamTriggerReaderInfo* sStreamReaderInfo, + SSTriggerTableTimeRange* r, int64_t ver, int8_t order, + bool isCalc, SSDataBlock** outBlock) { + int32_t code = 0; + int32_t lino = 0; SStreamReaderTaskInner* pTaskInner = NULL; - void* pTask = sStreamReaderInfo->pTask; - ST_TASK_DLOG("vgId:%d %s start. ver:%"PRId64",order:%d,startTs:%"PRId64",gid:%"PRId64, TD_VID(pVnode), __func__, req->tsdbTriggerDataReq.ver, req->tsdbTriggerDataReq.order, req->tsdbTriggerDataReq.startTime, req->tsdbTriggerDataReq.gid); - - int64_t key = getSessionKey(req->base.sessionId, STRIGGER_PULL_TSDB_TRIGGER_DATA); + SSDataBlock* tmpBlock = NULL; + void* schemas = NULL; + int32_t* slotIdList = NULL; - if (req->base.type == STRIGGER_PULL_TSDB_TRIGGER_DATA) { - int32_t pNum = 0; - STREAM_CHECK_RET_GOTO(qStreamGetTableList(sStreamReaderInfo, req->tsdbTriggerDataReq.gid, &pList, &pNum)); - BUILD_OPTION(options, getSuid(sStreamReaderInfo, pList), req->tsdbTriggerDataReq.ver, req->tsdbTriggerDataReq.order, req->tsdbTriggerDataReq.startTime, INT64_MAX, - sStreamReaderInfo->triggerCols, false, NULL); - STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, sStreamReaderInfo->triggerResBlock, pList, pNum, &sStreamReaderInfo->storageApi)); - STREAM_CHECK_RET_GOTO(taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInner, sizeof(pTaskInner))); + void* pTask = sStreamReaderInfo->pTask; + BUILD_OPTION(options, r->suid != 0 ? r->suid : sStreamReaderInfo->suid, ver, order, r->skey, r->ekey, NULL, false, NULL); + if (isNewCalc(sStreamReaderInfo, isCalc)) { + tmpBlock = sStreamReaderInfo->calcBlock; } else { - void** tmp = taosHashGet(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES); - STREAM_CHECK_NULL_GOTO(tmp, TSDB_CODE_STREAM_NO_CONTEXT); - pTaskInner = *(SStreamReaderTaskInner**)tmp; - STREAM_CHECK_NULL_GOTO(pTaskInner, TSDB_CODE_INTERNAL_ERROR); + tmpBlock = sStreamReaderInfo->triggerBlock; + } + if (sStreamReaderInfo->isVtableStream) { + code = pickSchemasHistory(sStreamReaderInfo, r->suid, r->uid, isCalc, (SArray**)&schemas, &slotIdList); + if (code == TSDB_CODE_PAR_TABLE_NOT_EXIST) { + ST_TASK_WLOG("table not exist, uid:%" PRId64, r->uid); + code = 0; + goto end; + } + STREAM_CHECK_RET_GOTO(code); + options.schemas = schemas; + options.pSlotList = &slotIdList; + options.isSchema = true; + } else { + if (isNewCalc(sStreamReaderInfo, isCalc)) { + options.schemas = sStreamReaderInfo->calcCols; + } else { + options.schemas = sStreamReaderInfo->triggerCols; + } + options.isSchema = false; } - blockDataCleanup(pTaskInner->pResBlockDst); - bool hasNext = true; - int32_t totalRows = 0; - - pResList = taosArrayInit(4, POINTER_BYTES); - STREAM_CHECK_NULL_GOTO(pResList, terrno); + if (*outBlock == NULL) { + STREAM_CHECK_RET_GOTO(createOneDataBlock(tmpBlock, false, outBlock)); + } + + int64_t startIndex = (*outBlock)->info.rows; + int64_t totalRows = 0; + + STableKeyInfo pList = {.uid = r->uid, + .groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, r->uid)}; + STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, tmpBlock, &pList, 1, &sStreamReaderInfo->storageApi)); + while (1) { + bool hasNext = false; STREAM_CHECK_RET_GOTO(getTableDataInfo(pTaskInner, &hasNext)); - if (!hasNext) { - break; - } - pTaskInner->pResBlock->info.id.groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, pTaskInner->pResBlock->info.id.uid); - // pTaskInner->pResBlockDst->info.id.groupId = pTaskInner->pResBlock->info.id.groupId; + if (!hasNext) break; SSDataBlock* pBlock = NULL; STREAM_CHECK_RET_GOTO(getTableData(pTaskInner, &pBlock)); if (pBlock != NULL && pBlock->info.rows > 0) { - STREAM_CHECK_RET_GOTO( - processTag(sStreamReaderInfo, false, pBlock->info.id.uid, pBlock, 0, pBlock->info.rows, 1)); + STREAM_CHECK_RET_GOTO(processTag(sStreamReaderInfo, isCalc, pBlock->info.id.uid, pBlock, 0, pBlock->info.rows, 1)); } - STREAM_CHECK_RET_GOTO(qStreamFilter(pBlock, sStreamReaderInfo->pFilterInfo, NULL)); - // STREAM_CHECK_RET_GOTO(blockDataMerge(pTaskInner->pResBlockDst, pBlock)); - ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pBlock->info.rows); - STREAM_CHECK_RET_GOTO(createOneDataBlock(pBlock, true, &pBlockTmp)); - STREAM_CHECK_NULL_GOTO(taosArrayPush(pResList, &pBlockTmp), terrno); - totalRows += blockDataGetNumOfRows(pBlockTmp); - pBlockTmp = NULL; - - ST_TASK_DLOG("vgId:%d %s get skey:%" PRId64 ", eksy:%" PRId64 ", uid:%" PRId64 ", gId:%" PRIu64 ", rows:%" PRId64, - TD_VID(pVnode), __func__, pTaskInner->pResBlock->info.window.skey, pTaskInner->pResBlock->info.window.ekey, - pTaskInner->pResBlock->info.id.uid, pTaskInner->pResBlock->info.id.groupId, pTaskInner->pResBlock->info.rows); - if (totalRows >= STREAM_RETURN_ROWS_NUM) { //todo optimize send multi blocks in one group - break; + if (!sStreamReaderInfo->isVtableStream) { + STREAM_CHECK_RET_GOTO(qStreamFilter(pBlock, isNewCalc(sStreamReaderInfo, isCalc) ? sStreamReaderInfo->pFilterInfoCalc : sStreamReaderInfo->pFilterInfoTrigger, NULL)); } - } - STREAM_CHECK_RET_GOTO(buildArrayRsp(pResList, &buf, &size)); - if (!hasNext) { - STREAM_CHECK_RET_GOTO(taosHashRemove(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES)); + if (pBlock->info.rows > 0) { + STREAM_CHECK_RET_GOTO(blockDataMerge(*outBlock, pBlock)); + totalRows += pBlock->info.rows; + } } + // add uid to the end + SColumnInfoData* pColData = taosArrayGetLast((*outBlock)->pDataBlock); + STREAM_CHECK_NULL_GOTO(pColData, terrno); + STREAM_CHECK_RET_GOTO(colDataSetNItems(pColData, startIndex, (const char*)&r->uid, totalRows, 1, false)); + end: - STREAM_PRINT_LOG_END_WITHID(code, lino); - SRpcMsg rsp = { - .msgType = TDMT_STREAM_TRIGGER_PULL_RSP, .info = pMsg->info, .pCont = buf, .contLen = size, .code = code}; - tmsgSendRsp(&rsp); - taosMemoryFree(pList); - blockDataDestroy(pBlockTmp); - taosArrayDestroyP(pResList, (FDelete)blockDataDestroy); + taosMemoryFree(slotIdList); + taosArrayDestroy(schemas); + releaseStreamTask(&pTaskInner); return code; } -static int32_t vnodeProcessStreamTsdbCalcDataReq(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { - int32_t code = 0; - int32_t lino = 0; - void* buf = NULL; - size_t size = 0; - SSDataBlock* pBlockRes = NULL; - STableKeyInfo* pList = NULL; +typedef struct SDiffRangeIter { + SArray* ranges; /* owned: taosArrayDup of req ranges, sorted by uid */ + int32_t pos; + int64_t ver; + int8_t order; + bool isCalc; +} SDiffRangeIter; + +void destroyDiffRangeIter(SDiffRangeIter* iter) { + if (iter == NULL) return; + taosArrayDestroy(iter->ranges); + taosMemoryFree(iter); +} +static void releaseDiffRangeIterFp(void* p) { + if (p == NULL) return; + destroyDiffRangeIter(*(SDiffRangeIter**)p); +} - void* pTask = sStreamReaderInfo->pTask; - ST_TASK_DLOG("vgId:%d %s start, skey:%"PRId64",ekey:%"PRId64",gid:%"PRId64",ver:%"PRId64, TD_VID(pVnode), __func__, - req->tsdbCalcDataReq.skey, req->tsdbCalcDataReq.ekey, req->tsdbCalcDataReq.gid, req->tsdbCalcDataReq.ver); +static int32_t diffRangeUidCmp(const void* a, const void* b) { + const SSTriggerTableTimeRange* ra = (const SSTriggerTableTimeRange*)a; + const SSTriggerTableTimeRange* rb = (const SSTriggerTableTimeRange*)b; + if (ra->uid != rb->uid) return ra->uid < rb->uid ? -1 : 1; + if (ra->skey != rb->skey) return ra->skey < rb->skey ? -1 : 1; + if (ra->ekey != rb->ekey) return ra->ekey < rb->ekey ? -1 : 1; + return 0; +} - STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->triggerCols, TSDB_CODE_STREAM_NOT_TABLE_SCAN_PLAN); +static int32_t newDiffRangeIter(SArray* ranges, int64_t ver, int8_t order, bool isCalc, + SDiffRangeIter** out) { + SDiffRangeIter* p = taosMemoryCalloc(1, sizeof(SDiffRangeIter)); + if (p == NULL) return terrno; + p->ranges = taosArrayDup(ranges, NULL); + if (p->ranges == NULL) { + taosMemoryFree(p); + return terrno; + } + taosArraySort(p->ranges, diffRangeUidCmp); + p->pos = 0; + p->ver = ver; + p->order = order; + p->isCalc = isCalc; + *out = p; + return 0; +} - SStreamReaderTaskInner* pTaskInner = NULL; - int64_t key = getSessionKey(req->base.sessionId, STRIGGER_PULL_TSDB_CALC_DATA); +/* Transform trigger-layout block to calc-layout block for old plans. + * On success, *ppCur is replaced with the transformed block; the original is freed. + * *ppResult is used as scratch and reset to NULL on success. */ +static int32_t transformDataToCalc(SStreamTriggerReaderInfo* sStreamReaderInfo, bool isCalc, + SSDataBlock** ppCur, SSDataBlock** ppResult) { + int32_t code = 0; + int32_t lino = 0; + if (sStreamReaderInfo->isOldPlan && isCalc && *ppCur && (*ppCur)->info.rows > 0) { + STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->calcBlock, false, ppResult)); + STREAM_CHECK_RET_GOTO(blockDataEnsureCapacity(*ppResult, (*ppCur)->info.capacity)); + blockDataTransform(*ppResult, *ppCur); + blockDataDestroy(*ppCur); + *ppCur = *ppResult; + *ppResult = NULL; + } +end: + return code; +} - if (req->base.type == STRIGGER_PULL_TSDB_CALC_DATA) { - int32_t pNum = 0; - STREAM_CHECK_RET_GOTO(qStreamGetTableList(sStreamReaderInfo, req->tsdbCalcDataReq.gid, &pList, &pNum)); - BUILD_OPTION(options, getSuid(sStreamReaderInfo, pList), req->tsdbCalcDataReq.ver, TSDB_ORDER_ASC, req->tsdbCalcDataReq.skey, req->tsdbCalcDataReq.ekey, - sStreamReaderInfo->triggerCols, false, NULL); - STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, sStreamReaderInfo->triggerResBlock, pList, pNum, &sStreamReaderInfo->storageApi)); +/* ------------------------------------------------------------------ */ +/* vnodeProcessStreamTsdbDataDiffRangeReq */ +/* ------------------------------------------------------------------ */ + +static int32_t vnodeProcessStreamTsdbDataDiffRangeReq(SVnode* pVnode, SRpcMsg* pMsg, + SSTriggerPullRequestUnion* req, + SStreamTriggerReaderInfo* sStreamReaderInfo) { + int32_t code = 0; + int32_t lino = 0; + void* buf = NULL; + size_t size = 0; + SSDataBlock* pCur = NULL; + SSDataBlock* pResult = NULL; + SDiffRangeIter* iter = NULL; - STREAM_CHECK_RET_GOTO(taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInner, sizeof(pTaskInner))); - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->triggerResBlock, false, &pTaskInner->pResBlockDst)); + void* pTask = sStreamReaderInfo->pTask; + ESTriggerPullType type = req->base.type; + + bool isCalc = (type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC || + type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT); + bool isFirst = (type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE || + type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC); + ESTriggerPullType baseType = isCalc ? STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC + : STRIGGER_PULL_TSDB_DATA_DIFF_RANGE; + int64_t key = getSessionKey(req->base.sessionId, (int64_t)baseType); + + ST_TASK_DLOG("vgId:%d %s start, type:%d, isCalc:%d, isFirst:%d", TD_VID(pVnode), __func__, type, isCalc, isFirst); + + if (isFirst) { + if (sStreamReaderInfo->streamTaskMapHistory == NULL) { + sStreamReaderInfo->streamTaskMapHistory = + taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); + STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->streamTaskMapHistory, terrno); + taosHashSetFreeFp(sStreamReaderInfo->streamTaskMapHistory, releaseDiffRangeIterFp); + } + /* If a stale iter exists for this key, freeFp will destroy it on overwrite. */ + STREAM_CHECK_RET_GOTO(newDiffRangeIter(req->tsdbDataDiffRangeReq.ranges, + req->tsdbDataDiffRangeReq.ver, + req->tsdbDataDiffRangeReq.order, + isCalc, &iter)); + code = taosHashPut(sStreamReaderInfo->streamTaskMapHistory, &key, LONG_BYTES, &iter, sizeof(iter)); + if (code != 0) { + destroyDiffRangeIter(iter); + iter = NULL; + STREAM_CHECK_RET_GOTO(code); + } } else { - void** tmp = taosHashGet(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES); + void** tmp = taosHashGet(sStreamReaderInfo->streamTaskMapHistory, &key, LONG_BYTES); STREAM_CHECK_NULL_GOTO(tmp, TSDB_CODE_STREAM_NO_CONTEXT); - pTaskInner = *(SStreamReaderTaskInner**)tmp; - STREAM_CHECK_NULL_GOTO(pTaskInner, TSDB_CODE_INTERNAL_ERROR); + iter = *(SDiffRangeIter**)tmp; + STREAM_CHECK_NULL_GOTO(iter, TSDB_CODE_INTERNAL_ERROR); } - blockDataCleanup(pTaskInner->pResBlockDst); - bool hasNext = true; - while (1) { - STREAM_CHECK_RET_GOTO(getTableDataInfo(pTaskInner, &hasNext)); - if (!hasNext) { - break; - } - pTaskInner->pResBlock->info.id.groupId = qStreamGetGroupIdFromSet(sStreamReaderInfo, pTaskInner->pResBlock->info.id.uid); + while (iter->pos < (int32_t)taosArrayGetSize(iter->ranges)) { + SSTriggerTableTimeRange* r = (SSTriggerTableTimeRange*)taosArrayGet(iter->ranges, iter->pos); + STREAM_CHECK_NULL_GOTO(r, terrno); - SSDataBlock* pBlock = NULL; - STREAM_CHECK_RET_GOTO(getTableData(pTaskInner, &pBlock)); - STREAM_CHECK_RET_GOTO(qStreamFilter(pBlock, sStreamReaderInfo->pFilterInfo, NULL)); - STREAM_CHECK_RET_GOTO(blockDataMerge(pTaskInner->pResBlockDst, pBlock)); - if (pTaskInner->pResBlockDst->info.rows >= STREAM_RETURN_ROWS_NUM) { - break; - } + STREAM_CHECK_RET_GOTO(scanOneTableForRange(pVnode, sStreamReaderInfo, r, + iter->ver, iter->order, iter->isCalc, &pCur)); + + iter->pos++; + if (pCur && pCur->info.rows >= STREAM_RETURN_ROWS_NUM) break; } - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->calcResBlock, false, &pBlockRes)); - STREAM_CHECK_RET_GOTO(blockDataEnsureCapacity(pBlockRes, pTaskInner->pResBlockDst->info.capacity)); - blockDataTransform(pBlockRes, pTaskInner->pResBlockDst); - STREAM_CHECK_RET_GOTO(buildRsp(pBlockRes, &buf, &size)); - printDataBlock(pBlockRes, __func__, "tsdb_calc_data", ((SStreamTask*)pTask)->streamId); - ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pBlockRes->info.rows); - printDataBlock(pBlockRes, __func__, "tsdb_data", ((SStreamTask*)pTask)->streamId); + STREAM_CHECK_RET_GOTO(transformDataToCalc(sStreamReaderInfo, isCalc, &pCur, &pResult)); - if (!hasNext) { - STREAM_CHECK_RET_GOTO(taosHashRemove(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES)); + STREAM_CHECK_RET_GOTO(buildRsp(pCur, &buf, &size)); + if (pCur != NULL) { + ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pCur->info.rows); + printDataBlock(pCur, __func__, "tsdb_data", ((SStreamTask*)pTask)->streamId); } + if (iter->pos >= (int32_t)taosArrayGetSize(iter->ranges)) { + /* freeFp destroys iter on remove; do NOT call destroyDiffRangeIter directly. */ + (void)taosHashRemove(sStreamReaderInfo->streamTaskMapHistory, &key, LONG_BYTES); + iter = NULL; + } end: + if (code != 0 && iter != NULL && sStreamReaderInfo->streamTaskMapHistory != NULL) { + /* Evict partial iter so subsequent NEXT pulls get STREAM_NO_CONTEXT. */ + (void)taosHashRemove(sStreamReaderInfo->streamTaskMapHistory, &key, LONG_BYTES); + iter = NULL; + } STREAM_PRINT_LOG_END_WITHID(code, lino); SRpcMsg rsp = { .msgType = TDMT_STREAM_TRIGGER_PULL_RSP, .info = pMsg->info, .pCont = buf, .contLen = size, .code = code}; tmsgSendRsp(&rsp); - blockDataDestroy(pBlockRes); - taosMemoryFree(pList); + + blockDataDestroy(pResult); + blockDataDestroy(pCur); return code; } -static int32_t vnodeProcessStreamTsdbVirtalDataReq(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { - int32_t code = 0; - int32_t lino = 0; - void* buf = NULL; - size_t size = 0; - int32_t* slotIdList = NULL; - SArray* sortedCid = NULL; - SArray* schemas = NULL; - SSDataBlock* pBlockRes = NULL; - +/* ------------------------------------------------------------------ */ +/* vnodeProcessStreamTsdbDataSameRangeReq */ +/* ------------------------------------------------------------------ */ + +static int32_t vnodeProcessStreamTsdbDataSameRangeReq(SVnode* pVnode, SRpcMsg* pMsg, + SSTriggerPullRequestUnion* req, + SStreamTriggerReaderInfo* sStreamReaderInfo) { + int32_t code = 0; + int32_t lino = 0; + void* buf = NULL; + size_t size = 0; + STableKeyInfo* pList = NULL; + SSDataBlock* pCur = NULL; + SSDataBlock* pResult = NULL; + SSDataBlock* tmplBlock = NULL; + void* pTask = sStreamReaderInfo->pTask; - ST_TASK_DLOG("vgId:%d %s start, skey:%"PRId64",ekey:%"PRId64",uid:%"PRId64",ver:%"PRId64, TD_VID(pVnode), __func__, - req->tsdbDataReq.skey, req->tsdbDataReq.ekey, req->tsdbDataReq.uid, req->tsdbDataReq.ver); - + + /* SameRange only applies to non-virtual-table streams */ + if (sStreamReaderInfo->isVtableStream) { + code = TSDB_CODE_INVALID_PARA; + goto end; + } + + ESTriggerPullType type = req->base.type; + bool isCalc = (type == STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC || + type == STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT); + bool isFirst = (type == STRIGGER_PULL_TSDB_DATA_SAME_RANGE || + type == STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC); + ESTriggerPullType baseType = isCalc ? STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC + : STRIGGER_PULL_TSDB_DATA_SAME_RANGE; + int64_t key = getSessionKey(req->base.sessionId, (int64_t)baseType); + + ST_TASK_DLOG("vgId:%d %s start, type:%d, isCalc:%d, isFirst:%d, gid:%"PRId64 + ", skey:%"PRId64", ekey:%"PRId64, TD_VID(pVnode), __func__, + type, isCalc, isFirst, + req->tsdbDataSameRangeReq.gid, + req->tsdbDataSameRangeReq.skey, + req->tsdbDataSameRangeReq.ekey); + SStreamReaderTaskInner* pTaskInner = NULL; - int64_t key = req->tsdbDataReq.uid; - - if (req->base.type == STRIGGER_PULL_TSDB_DATA) { - // sort cid and build slotIdList - slotIdList = taosMemoryMalloc(taosArrayGetSize(req->tsdbDataReq.cids) * sizeof(int32_t)); - STREAM_CHECK_NULL_GOTO(slotIdList, terrno); - sortedCid = taosArrayDup(req->tsdbDataReq.cids, NULL); - STREAM_CHECK_NULL_GOTO(sortedCid, terrno); - taosArraySort(sortedCid, sortCid); - for (int32_t i = 0; i < taosArrayGetSize(req->tsdbDataReq.cids); i++) { - int16_t* cid = taosArrayGet(req->tsdbDataReq.cids, i); - STREAM_CHECK_NULL_GOTO(cid, terrno); - for (int32_t j = 0; j < taosArrayGetSize(sortedCid); j++) { - int16_t* cidSorted = taosArrayGet(sortedCid, j); - STREAM_CHECK_NULL_GOTO(cidSorted, terrno); - if (*cid == *cidSorted) { - slotIdList[j] = i; - break; - } - } + + if (isNewCalc(sStreamReaderInfo, isCalc)) { + tmplBlock = sStreamReaderInfo->calcBlock; + } else { + tmplBlock = sStreamReaderInfo->triggerBlock; + } + + if (isFirst) { + int32_t pNum = 0; + STREAM_CHECK_RET_GOTO(qStreamGetTableList(sStreamReaderInfo, req->tsdbDataSameRangeReq.gid, &pList, &pNum)); + BUILD_OPTION(options, sStreamReaderInfo->suid, req->tsdbDataSameRangeReq.ver, + req->tsdbDataSameRangeReq.order, + req->tsdbDataSameRangeReq.skey, req->tsdbDataSameRangeReq.ekey, + NULL, false, NULL); + if (isNewCalc(sStreamReaderInfo, isCalc)) { + options.schemas = sStreamReaderInfo->calcCols; + } else { + options.schemas = sStreamReaderInfo->triggerCols; + } + STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, tmplBlock, + pList, pNum, &sStreamReaderInfo->storageApi)); + code = taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, + &pTaskInner, sizeof(pTaskInner)); + if (code != 0) { + releaseStreamTask(&pTaskInner); + goto end; } - STREAM_CHECK_RET_GOTO(buildScheamFromMeta(pVnode, req->tsdbDataReq.uid, &schemas, &sStreamReaderInfo->storageApi)); - STREAM_CHECK_RET_GOTO(shrinkScheams(req->tsdbDataReq.cids, schemas)); - STREAM_CHECK_RET_GOTO(createDataBlockForStream(schemas, &pBlockRes)); - - taosArraySort(schemas, sortSSchema); - BUILD_OPTION(options, req->tsdbDataReq.suid, req->tsdbDataReq.ver, req->tsdbDataReq.order, req->tsdbDataReq.skey, - req->tsdbDataReq.ekey, schemas, true, &slotIdList); - STableKeyInfo keyInfo = {.uid = req->tsdbDataReq.uid, .groupId = 0}; - STREAM_CHECK_RET_GOTO(createStreamTask(pVnode, &options, &pTaskInner, pBlockRes, &keyInfo, 1, &sStreamReaderInfo->storageApi)); - STREAM_CHECK_RET_GOTO(taosHashPut(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES, &pTaskInner, sizeof(pTaskInner))); - pTaskInner->pResBlockDst = pBlockRes; - pBlockRes = NULL; } else { void** tmp = taosHashGet(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES); STREAM_CHECK_NULL_GOTO(tmp, TSDB_CODE_STREAM_NO_CONTEXT); @@ -3348,40 +3447,57 @@ static int32_t vnodeProcessStreamTsdbVirtalDataReq(SVnode* pVnode, SRpcMsg* pMsg STREAM_CHECK_NULL_GOTO(pTaskInner, TSDB_CODE_INTERNAL_ERROR); } - blockDataCleanup(pTaskInner->pResBlockDst); - bool hasNext = true; + STREAM_CHECK_RET_GOTO(createOneDataBlock(tmplBlock, false, &pCur)); + + bool hasNext = true; while (1) { STREAM_CHECK_RET_GOTO(getTableDataInfo(pTaskInner, &hasNext)); - if (!hasNext) { - break; - } + if (!hasNext) break; SSDataBlock* pBlock = NULL; STREAM_CHECK_RET_GOTO(getTableData(pTaskInner, &pBlock)); - STREAM_CHECK_RET_GOTO(blockDataMerge(pTaskInner->pResBlockDst, pBlock)); - if (pTaskInner->pResBlockDst->info.rows >= STREAM_RETURN_ROWS_NUM) { - break; + if (pBlock != NULL && pBlock->info.rows > 0) { + STREAM_CHECK_RET_GOTO(processTag(sStreamReaderInfo, isCalc, pBlock->info.id.uid, pBlock, + 0, pBlock->info.rows, 1)); + } + if (!sStreamReaderInfo->isVtableStream) { + STREAM_CHECK_RET_GOTO(qStreamFilter(pBlock, isNewCalc(sStreamReaderInfo, isCalc) ? sStreamReaderInfo->pFilterInfoCalc : sStreamReaderInfo->pFilterInfoTrigger, NULL)); + } + if (pBlock->info.rows == 0) { + continue; } + + // add uid to the end + SColumnInfoData* pColData = taosArrayGetLast(pBlock->pDataBlock); + STREAM_CHECK_NULL_GOTO(pColData, terrno); + STREAM_CHECK_RET_GOTO(colDataSetNItems(pColData, 0, (const char*)&pBlock->info.id.uid, pBlock->info.rows, 1, false)); + + STREAM_CHECK_RET_GOTO(blockDataMerge(pCur, pBlock)); + if (pCur && pCur->info.rows >= STREAM_RETURN_ROWS_NUM) break; + } + + STREAM_CHECK_RET_GOTO(transformDataToCalc(sStreamReaderInfo, isCalc, &pCur, &pResult)); + + STREAM_CHECK_RET_GOTO(buildRsp(pCur, &buf, &size)); + if (pCur != NULL) { + ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pCur->info.rows); + printDataBlock(pCur, __func__, "tsdb_data", ((SStreamTask*)pTask)->streamId); } - STREAM_CHECK_RET_GOTO(buildRsp(pTaskInner->pResBlockDst, &buf, &size)); - ST_TASK_DLOG("vgId:%d %s get result rows:%" PRId64, TD_VID(pVnode), __func__, pTaskInner->pResBlockDst->info.rows); - printDataBlock(pTaskInner->pResBlockDst, __func__, "tsdb_data", ((SStreamTask*)pTask)->streamId); if (!hasNext) { STREAM_CHECK_RET_GOTO(taosHashRemove(sStreamReaderInfo->streamTaskMap, &key, LONG_BYTES)); } - end: STREAM_PRINT_LOG_END_WITHID(code, lino); SRpcMsg rsp = { .msgType = TDMT_STREAM_TRIGGER_PULL_RSP, .info = pMsg->info, .pCont = buf, .contLen = size, .code = code}; tmsgSendRsp(&rsp); - taosMemFree(slotIdList); - taosArrayDestroy(sortedCid); - taosArrayDestroy(schemas); - blockDataDestroy(pBlockRes); + blockDataDestroy(pResult); + blockDataDestroy(pCur); + taosMemoryFree(pList); return code; } + static int32_t vnodeProcessStreamWalMetaNewReq(SVnode* pVnode, SRpcMsg* pMsg, SSTriggerPullRequestUnion* req, SStreamTriggerReaderInfo* sStreamReaderInfo) { int32_t code = 0; int32_t lino = 0; @@ -3499,7 +3615,10 @@ static int32_t vnodeProcessStreamWalDataNewReq(SVnode* pVnode, SRpcMsg* pMsg, SS void* pTask = sStreamReaderInfo->pTask; ST_TASK_DLOG("vgId:%d %s start, request paras size:%zu", TD_VID(pVnode), __func__, taosArrayGetSize(req->walDataNewReq.versions)); - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->triggerBlock, false, (SSDataBlock**)&resultRsp.dataBlock)); + resultRsp.isCalc = STRIGGER_PULL_WAL_CALC_DATA_NEW == req->base.type ? true : false; + SSDataBlock* dataBlock = resultRsp.isCalc ? sStreamReaderInfo->calcBlock : sStreamReaderInfo->triggerBlock; + STREAM_CHECK_RET_GOTO(createOneDataBlock(dataBlock, false, (SSDataBlock**)&resultRsp.dataBlock)); + resultRsp.indexHash = tSimpleHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); STREAM_CHECK_NULL_GOTO(resultRsp.indexHash, terrno); resultRsp.uidHash = tSimpleHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); @@ -4154,6 +4273,7 @@ int32_t vnodeProcessStreamReaderMsg(SVnode* pVnode, SRpcMsg* pMsg, SQueueInfo *p sendRsp = true; switch (req.base.type) { case STRIGGER_PULL_SET_TABLE: + case STRIGGER_PULL_SET_TABLE_HISTORY: STREAM_CHECK_RET_GOTO(vnodeProcessStreamSetTableReq(pVnode, pMsg, &req, sStreamReaderInfo)); break; case STRIGGER_PULL_LAST_TS: @@ -4164,27 +4284,12 @@ int32_t vnodeProcessStreamReaderMsg(SVnode* pVnode, SRpcMsg* pMsg, SQueueInfo *p break; case STRIGGER_PULL_TSDB_META: case STRIGGER_PULL_TSDB_META_NEXT: - STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbMetaReq(pVnode, pMsg, &req, sStreamReaderInfo)); - break; - case STRIGGER_PULL_TSDB_TS_DATA: if (sStreamReaderInfo->isVtableStream) { - STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbTsDataReqVTable(pVnode, pMsg, &req, sStreamReaderInfo)); + STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbMetaVtableReq(pVnode, pMsg, &req, sStreamReaderInfo)); } else { - STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbTsDataReqNonVTable(pVnode, pMsg, &req, sStreamReaderInfo)); + STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbMetaReq(pVnode, pMsg, &req, sStreamReaderInfo)); } break; - case STRIGGER_PULL_TSDB_TRIGGER_DATA: - case STRIGGER_PULL_TSDB_TRIGGER_DATA_NEXT: - STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbTriggerDataReq(pVnode, pMsg, &req, sStreamReaderInfo)); - break; - case STRIGGER_PULL_TSDB_CALC_DATA: - case STRIGGER_PULL_TSDB_CALC_DATA_NEXT: - STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbCalcDataReq(pVnode, pMsg, &req, sStreamReaderInfo)); - break; - case STRIGGER_PULL_TSDB_DATA: - case STRIGGER_PULL_TSDB_DATA_NEXT: - STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbVirtalDataReq(pVnode, pMsg, &req, sStreamReaderInfo)); - break; case STRIGGER_PULL_GROUP_COL_VALUE: STREAM_CHECK_RET_GOTO(vnodeProcessStreamGroupColValueReq(pVnode, pMsg, &req, sStreamReaderInfo)); break; @@ -4207,7 +4312,23 @@ int32_t vnodeProcessStreamReaderMsg(SVnode* pVnode, SRpcMsg* pMsg, SQueueInfo *p STREAM_CHECK_RET_GOTO(vnodeProcessStreamWalMetaDataNewReq(pVnode, pMsg, &req, sStreamReaderInfo)); break; case STRIGGER_PULL_WAL_CALC_DATA_NEW: - STREAM_CHECK_RET_GOTO(vnodeProcessStreamWalCalcDataNewReq(pVnode, pMsg, &req, sStreamReaderInfo)); + if (sStreamReaderInfo->isOldPlan) { + STREAM_CHECK_RET_GOTO(vnodeProcessStreamWalCalcDataNewReq(pVnode, pMsg, &req, sStreamReaderInfo)); + } else { + STREAM_CHECK_RET_GOTO(vnodeProcessStreamWalDataNewReq(pVnode, pMsg, &req, sStreamReaderInfo)); + } + break; + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_NEXT: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC: + case STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT: + STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbDataDiffRangeReq(pVnode, pMsg, &req, sStreamReaderInfo)); + break; + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_NEXT: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC: + case STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT: + STREAM_CHECK_RET_GOTO(vnodeProcessStreamTsdbDataSameRangeReq(pVnode, pMsg, &req, sStreamReaderInfo)); break; default: vError("unknown inner msg type:%d in stream reader queue", req.base.type); diff --git a/source/libs/new-stream/src/streamReader.c b/source/libs/new-stream/src/streamReader.c index 9d363d0e6095..28591c9c6b11 100644 --- a/source/libs/new-stream/src/streamReader.c +++ b/source/libs/new-stream/src/streamReader.c @@ -178,11 +178,11 @@ static void* copyTableInfo(void* p) { return dst; } -int32_t qStreamCopyTableInfo(SStreamTriggerReaderInfo* sStreamReaderInfo, StreamTableListInfo* dst){ +int32_t qStreamCopyTableInfo(SStreamTriggerReaderInfo* sStreamReaderInfo, StreamTableListInfo* dst, bool isHistory){ int32_t code = 0; int32_t lino = 0; taosRLockLatch(&sStreamReaderInfo->lock); - StreamTableListInfo* src = sStreamReaderInfo->isVtableStream ? &sStreamReaderInfo->vSetTableList : &sStreamReaderInfo->tableList; + StreamTableListInfo* src = sStreamReaderInfo->isVtableStream ? (isHistory ? &sStreamReaderInfo->vSetTableListHistory : &sStreamReaderInfo->vSetTableList) : &sStreamReaderInfo->tableList; int32_t totalSize = taosArrayGetSize(src->pTableList); for (int32_t i = 0; i < totalSize; ++i) { SStreamTableKeyInfo* info = taosArrayGetP(src->pTableList, i); @@ -296,7 +296,7 @@ int32_t qStreamGetTableList(SStreamTriggerReaderInfo* sStreamReaderInfo, uint64_ *size = 0; *pKeyInfo = NULL; taosRLockLatch(&sStreamReaderInfo->lock); - StreamTableListInfo* tmp = sStreamReaderInfo->isVtableStream ? &sStreamReaderInfo->vSetTableList : &sStreamReaderInfo->tableList; + StreamTableListInfo* tmp = &sStreamReaderInfo->tableList; if (gid == 0) { // return all tables STREAM_CHECK_RET_GOTO(buildTableListFromArray(pKeyInfo, size, tmp->pTableList)); goto end; @@ -350,6 +350,23 @@ int32_t qBuildVTableList(SStreamTriggerReaderInfo* sStreamReaderInfo) { return code; } +int32_t qBuildVTableListHistory(SStreamTriggerReaderInfo* sStreamReaderInfo) { + int32_t code = 0; + int32_t lino = 0; + int32_t iter = 0; + void* pTask = sStreamReaderInfo->pTask; + void* px = tSimpleHashIterate(sStreamReaderInfo->uidHashTriggerHistory, NULL, &iter); + while (px != NULL) { + int64_t* id = tSimpleHashGetKey(px, NULL); + STREAM_CHECK_RET_GOTO(qStreamSetTableList(&sStreamReaderInfo->vSetTableListHistory, *(id+1), *id)); + px = tSimpleHashIterate(sStreamReaderInfo->uidHashTriggerHistory, px, &iter); + ST_TASK_DLOG("%s build history tablelist for vtable, suid:%"PRId64" uid:%"PRId64, __func__, *id, *(id+1)); + } + +end: + return code; +} + void releaseStreamTask(void* p) { if (p == NULL) return; SStreamReaderTaskInner* pTask = *((SStreamReaderTaskInner**)p); @@ -358,7 +375,8 @@ void releaseStreamTask(void* p) { blockDataDestroy(pTask->pResBlockDst); pTask->storageApi->tsdReader.tsdReaderClose(pTask->pReader); cleanupQueryTableDataCond(&pTask->cond); - + qStreamDestroyTableInfo(&pTask->vTableInfo); + taosMemoryFree(pTask); } @@ -441,7 +459,7 @@ int32_t qStreamInitQueryTableDataCond(SQueryTableDataCond* pCond, int32_t order, pCond->colList[i].colId = pSchema->colId; pCond->colList[i].pk = pSchema->flags & COL_IS_KEY; - if (pSlotList == NULL ) pCond->pSlotList[i] = i; + if (pSlotList == NULL) pCond->pSlotList[i] = i; } else { STargetNode* pNode = (STargetNode*)nodesListGetNode((SNodeList*)schemas, i); STREAM_CHECK_NULL_GOTO(pNode, TSDB_CODE_STREAM_NOT_TABLE_SCAN_PLAN); @@ -523,8 +541,10 @@ static void releaseStreamReaderInfo(void* p) { if (p == NULL) return; SStreamTriggerReaderInfo* pInfo = (SStreamTriggerReaderInfo*)p; taosHashCleanup(pInfo->streamTaskMap); + taosHashCleanup(pInfo->streamTaskMapHistory); taosHashCleanup(pInfo->groupIdMap); pInfo->streamTaskMap = NULL; + pInfo->streamTaskMapHistory = NULL; nodesDestroyNode((SNode*)(pInfo->triggerAst)); nodesDestroyNode((SNode*)(pInfo->calcAst)); @@ -540,10 +560,15 @@ static void releaseStreamReaderInfo(void* p) { taosMemoryFreeClear(pInfo->pExprInfoCalcTag); tSimpleHashCleanup(pInfo->uidHashTrigger); tSimpleHashCleanup(pInfo->uidHashCalc); + tSimpleHashCleanup(pInfo->uidHashTriggerHistory); + tSimpleHashCleanup(pInfo->uidHashCalcHistory); qStreamDestroyTableInfo(&pInfo->tableList); qStreamDestroyTableInfo(&pInfo->vSetTableList); - filterFreeInfo(pInfo->pFilterInfo); - pInfo->pFilterInfo = NULL; + qStreamDestroyTableInfo(&pInfo->vSetTableListHistory); + filterFreeInfo(pInfo->pFilterInfoTrigger); + pInfo->pFilterInfoTrigger = NULL; + filterFreeInfo(pInfo->pFilterInfoCalc); + pInfo->pFilterInfoCalc = NULL; blockDataDestroy(pInfo->triggerBlock); pInfo->triggerBlock = NULL; blockDataDestroy(pInfo->calcBlock); @@ -653,6 +678,7 @@ static SStreamTriggerReaderInfo* createStreamReaderInfo(void* pTask, const SStre sStreamReaderInfo->pTask = pTask; sStreamReaderInfo->tableType = pMsg->msg.trigger.triggerTblType; sStreamReaderInfo->isVtableStream = pMsg->msg.trigger.isTriggerTblVirt; + sStreamReaderInfo->isOldPlan = pMsg->msg.trigger.isOldPlan; sStreamReaderInfo->suid = pMsg->msg.trigger.triggerTblSuid; sStreamReaderInfo->uid = pMsg->msg.trigger.triggerTblUid; @@ -670,8 +696,7 @@ static SStreamTriggerReaderInfo* createStreamReaderInfo(void* pTask, const SStre TSDB_CODE_STREAM_NOT_TABLE_SCAN_PLAN); sStreamReaderInfo->pTagCond = sStreamReaderInfo->triggerAst->pTagCond; sStreamReaderInfo->pTagIndexCond = sStreamReaderInfo->triggerAst->pTagIndexCond; - sStreamReaderInfo->pConditions = sStreamReaderInfo->triggerAst->pNode->pConditions; - STREAM_CHECK_RET_GOTO(filterInitFromNode(sStreamReaderInfo->pConditions, &sStreamReaderInfo->pFilterInfo, 0, NULL)); + STREAM_CHECK_RET_GOTO(filterInitFromNode(sStreamReaderInfo->triggerAst->pNode->pConditions, &sStreamReaderInfo->pFilterInfoTrigger, 0, NULL)); STREAM_CHECK_RET_GOTO(nodesStringToList(pMsg->msg.trigger.partitionCols, &sStreamReaderInfo->partitionCols)); sStreamReaderInfo->twindows = ((STableScanPhysiNode*)(sStreamReaderInfo->triggerAst->pNode))->scanRange; sStreamReaderInfo->triggerCols = ((STableScanPhysiNode*)(sStreamReaderInfo->triggerAst->pNode))->scan.pScanCols; @@ -681,19 +706,17 @@ static SStreamTriggerReaderInfo* createStreamReaderInfo(void* pTask, const SStre sStreamReaderInfo->triggerResBlock = createDataBlockFromDescNode(pDescNode); STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->triggerResBlock, TSDB_CODE_STREAM_NOT_TABLE_SCAN_PLAN); - // SColumnInfoData idata = createColumnInfoData(TSDB_DATA_TYPE_BIGINT, LONG_BYTES, -1); // uid - // STREAM_CHECK_RET_GOTO(blockDataAppendColInfo(sStreamReaderInfo->triggerResBlockNew, &idata)); - // idata = createColumnInfoData(TSDB_DATA_TYPE_UBIGINT, LONG_BYTES, -1); // gid - // STREAM_CHECK_RET_GOTO(blockDataAppendColInfo(sStreamReaderInfo->triggerResBlockNew, &idata)); - - // STREAM_CHECK_RET_GOTO(buildSTSchemaForScanData(&sStreamReaderInfo->triggerSchema, sStreamReaderInfo->triggerCols)); - sStreamReaderInfo->triggerPseudoCols = ((STableScanPhysiNode*)(sStreamReaderInfo->triggerAst->pNode))->scan.pScanPseudoCols; - if (sStreamReaderInfo->triggerPseudoCols != NULL) { + SNodeList* triggerPseudoCols = ((STableScanPhysiNode*)(sStreamReaderInfo->triggerAst->pNode))->scan.pScanPseudoCols; + if (triggerPseudoCols != NULL) { STREAM_CHECK_RET_GOTO( - createExprInfo(sStreamReaderInfo->triggerPseudoCols, NULL, &sStreamReaderInfo->pExprInfoTriggerTag, &sStreamReaderInfo->numOfExprTriggerTag)); + createExprInfo(triggerPseudoCols, NULL, &sStreamReaderInfo->pExprInfoTriggerTag, &sStreamReaderInfo->numOfExprTriggerTag)); } - STREAM_CHECK_RET_GOTO(setColIdForCalcResBlock(sStreamReaderInfo->triggerPseudoCols, sStreamReaderInfo->triggerResBlock->pDataBlock)); + STREAM_CHECK_RET_GOTO(setColIdForCalcResBlock(triggerPseudoCols, sStreamReaderInfo->triggerResBlock->pDataBlock)); STREAM_CHECK_RET_GOTO(setColIdForCalcResBlock(sStreamReaderInfo->triggerCols, sStreamReaderInfo->triggerResBlock->pDataBlock)); + STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->triggerResBlock, false, &sStreamReaderInfo->triggerBlock)); + SColumnInfoData idata = createColumnInfoData(TSDB_DATA_TYPE_BIGINT, LONG_BYTES, INT16_MIN); // ver + STREAM_CHECK_RET_GOTO(blockDataAppendColInfo(sStreamReaderInfo->triggerBlock, &idata)); + sStreamReaderInfo->groupByTbname = groupbyTbname(sStreamReaderInfo->partitionCols); } @@ -705,6 +728,10 @@ static SStreamTriggerReaderInfo* createStreamReaderInfo(void* pTask, const SStre QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN != nodeType(sStreamReaderInfo->calcAst->pNode), TSDB_CODE_STREAM_NOT_TABLE_SCAN_PLAN); + STREAM_CHECK_RET_GOTO(filterInitFromNode(sStreamReaderInfo->calcAst->pNode->pConditions, &sStreamReaderInfo->pFilterInfoCalc, 0, NULL)); + sStreamReaderInfo->calcCols = ((STableScanPhysiNode*)(sStreamReaderInfo->calcAst->pNode))->scan.pScanCols; + STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->calcCols, TSDB_CODE_STREAM_NOT_TABLE_SCAN_PLAN); + SDataBlockDescNode* pDescNode = ((STableScanPhysiNode*)(sStreamReaderInfo->calcAst->pNode))->scan.node.pOutputDataBlockDesc; sStreamReaderInfo->calcResBlock = createDataBlockFromDescNode(pDescNode); @@ -716,9 +743,8 @@ static SStreamTriggerReaderInfo* createStreamReaderInfo(void* pTask, const SStre STREAM_CHECK_RET_GOTO( createExprInfo(pseudoCols, NULL, &sStreamReaderInfo->pExprInfoCalcTag, &sStreamReaderInfo->numOfExprCalcTag)); } - SNodeList* pScanCols = ((STableScanPhysiNode*)(sStreamReaderInfo->calcAst->pNode))->scan.pScanCols; STREAM_CHECK_RET_GOTO(setColIdForCalcResBlock(pseudoCols, sStreamReaderInfo->calcResBlock->pDataBlock)); - STREAM_CHECK_RET_GOTO(setColIdForCalcResBlock(pScanCols, sStreamReaderInfo->calcResBlock->pDataBlock)); + STREAM_CHECK_RET_GOTO(setColIdForCalcResBlock(sStreamReaderInfo->calcCols, sStreamReaderInfo->calcResBlock->pDataBlock)); STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->calcResBlock, false, &sStreamReaderInfo->calcBlock)); SColumnInfoData idata = createColumnInfoData(TSDB_DATA_TYPE_BIGINT, LONG_BYTES, INT16_MIN); // ver STREAM_CHECK_RET_GOTO(blockDataAppendColInfo(sStreamReaderInfo->calcBlock, &idata)); @@ -751,10 +777,6 @@ static SStreamTriggerReaderInfo* createStreamReaderInfo(void* pTask, const SStre STREAM_CHECK_NULL_GOTO(sStreamReaderInfo->triggerTableSchemaMapVTable, terrno); taosHashSetFreeFp(sStreamReaderInfo->triggerTableSchemaMapVTable, freeSchema); - STREAM_CHECK_RET_GOTO(createOneDataBlock(sStreamReaderInfo->triggerResBlock, false, &sStreamReaderInfo->triggerBlock)); - SColumnInfoData idata = createColumnInfoData(TSDB_DATA_TYPE_BIGINT, LONG_BYTES, INT16_MIN); // ver - STREAM_CHECK_RET_GOTO(blockDataAppendColInfo(sStreamReaderInfo->triggerBlock, &idata)); - end: STREAM_PRINT_LOG_END(code, lino); @@ -832,8 +854,8 @@ int32_t stReaderTaskDeploy(SStreamReaderTask* pTask, const SStreamReaderDeployMs pTask->triggerReader = pMsg->triggerReader; if (pMsg->triggerReader == 1) { - ST_TASK_DLOGL("triggerScanPlan:%s", (char*)(pMsg->msg.trigger.triggerScanPlan)); - ST_TASK_DLOGL("calcCacheScanPlan:%s", (char*)(pMsg->msg.trigger.calcCacheScanPlan)); + ST_TASK_DLOGL("readerTriggerScanPlan:%s", (char*)(pMsg->msg.trigger.triggerScanPlan)); + ST_TASK_DLOGL("readerCalcScanPlan:%s", (char*)(pMsg->msg.trigger.calcCacheScanPlan)); pTask->info = createStreamReaderInfo(pTask, pMsg); STREAM_CHECK_NULL_GOTO(pTask->info, terrno); } else { diff --git a/source/libs/new-stream/test/CMakeLists.txt b/source/libs/new-stream/test/CMakeLists.txt index 7f26236b3080..e1ff63d06149 100644 --- a/source/libs/new-stream/test/CMakeLists.txt +++ b/source/libs/new-stream/test/CMakeLists.txt @@ -65,4 +65,20 @@ add_test( NAME streamTriggerTaskTest COMMAND streamTriggerTaskTest ) +ADD_EXECUTABLE(streamMsgTest streamMsgTest.cpp) +DEP_ext_gtest(streamMsgTest) +TARGET_INCLUDE_DIRECTORIES( + streamMsgTest + PUBLIC "${TD_SOURCE_DIR}/include/libs/new-stream" + PUBLIC "${TD_SOURCE_DIR}/include/libs/executor" + PUBLIC "${TD_SOURCE_DIR}/include/libs/qcom" + PUBLIC "${TD_SOURCE_DIR}/include/common" +) +TARGET_LINK_LIBRARIES(streamMsgTest PUBLIC taos os common executor function index new-stream) + +add_test( + NAME streamMsgTest + COMMAND streamMsgTest +) + endif(NOT ${TD_WINDOWS}) diff --git a/source/libs/new-stream/test/streamMsgTest.cpp b/source/libs/new-stream/test/streamMsgTest.cpp new file mode 100644 index 000000000000..09043d2d9680 --- /dev/null +++ b/source/libs/new-stream/test/streamMsgTest.cpp @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include + +extern "C" { +#include "streamMsg.h" +#include "taoserror.h" +} + +// ─────────────────────────────────────────────────────────── +// Helper: build the base fields +// ─────────────────────────────────────────────────────────── +static void fillBase(SSTriggerPullRequest* base, ESTriggerPullType type) { + base->type = type; + base->streamId = 100; + base->readerTaskId = 200; + base->sessionId = 300; +} + +// ─────────────────────────────────────────────────────────── +// TEST 1: SetTable round-trip — base type distinguishes history vs realtime +// ─────────────────────────────────────────────────────────── +TEST(StreamMsg, SetTable_TypeRoundTrip) { + ESTriggerPullType types[] = {STRIGGER_PULL_SET_TABLE, STRIGGER_PULL_SET_TABLE_HISTORY}; + for (ESTriggerPullType t : types) { + SSTriggerSetTableRequest req = {}; + fillBase(&req.base, t); + req.uidInfoTrigger = tSimpleHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + req.uidInfoCalc = tSimpleHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + ASSERT_NE(req.uidInfoTrigger, nullptr); + ASSERT_NE(req.uidInfoCalc, nullptr); + + // serialize + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0); + std::vector buf(need); + int32_t ret = tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req); + ASSERT_GT(ret, 0); + + // deserialize + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0); + EXPECT_EQ(out.base.type, t); + + tDestroySTriggerPullRequest((SSTriggerPullRequestUnion*)&req); + tDestroySTriggerPullRequest(&out); + } +} + +// ─────────────────────────────────────────────────────────── +// TEST 2: DiffRange round-trip for 4 variants (ranges contains 3 entries) +// ─────────────────────────────────────────────────────────── +TEST(StreamMsg, DiffRange_4Variants_RoundTrip) { + int32_t variants[] = { + (int32_t)STRIGGER_PULL_TSDB_DATA_DIFF_RANGE, + (int32_t)STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC, + (int32_t)STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_NEXT, + (int32_t)STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC_NEXT, + }; + + for (int32_t type : variants) { + SSTriggerTsdbDataDiffRangeRequest req = {}; + fillBase(&req.base, (ESTriggerPullType)type); + + bool hasPayload = (type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE || + type == STRIGGER_PULL_TSDB_DATA_DIFF_RANGE_CALC); + if (hasPayload) { + req.ver = 42; + req.order = 1; + req.ranges = taosArrayInit(3, sizeof(SSTriggerTableTimeRange)); + ASSERT_NE(req.ranges, nullptr); + for (int i = 0; i < 3; i++) { + SSTriggerTableTimeRange r = {(int64_t)(i * 10), (int64_t)(i * 10 + 1), + (int64_t)(1000 + i), (int64_t)(2000 + i)}; + taosArrayPush(req.ranges, &r); + } + } + + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0) << "type=" << type; + std::vector buf(need); + ASSERT_GT(tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req), 0); + + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0) << "type=" << type; + EXPECT_EQ(out.base.type, type); + + if (hasPayload) { + SSTriggerTsdbDataDiffRangeRequest* pOut = &out.tsdbDataDiffRangeReq; + EXPECT_EQ(pOut->ver, 42); + EXPECT_EQ(pOut->order, 1); + ASSERT_NE(pOut->ranges, nullptr); + ASSERT_EQ(taosArrayGetSize(pOut->ranges), 3); + for (int i = 0; i < 3; i++) { + SSTriggerTableTimeRange* r = (SSTriggerTableTimeRange*)taosArrayGet(pOut->ranges, i); + EXPECT_EQ(r->suid, (int64_t)(i * 10)); + EXPECT_EQ(r->uid, (int64_t)(i * 10 + 1)); + EXPECT_EQ(r->skey, (int64_t)(1000 + i)); + EXPECT_EQ(r->ekey, (int64_t)(2000 + i)); + } + } + + tDestroySTriggerPullRequest((SSTriggerPullRequestUnion*)&req); + tDestroySTriggerPullRequest(&out); + } +} + +// ─────────────────────────────────────────────────────────── +// TEST 3: SameRange round-trip for 4 variants +// ─────────────────────────────────────────────────────────── +TEST(StreamMsg, SameRange_4Variants_RoundTrip) { + int32_t variants[] = { + (int32_t)STRIGGER_PULL_TSDB_DATA_SAME_RANGE, + (int32_t)STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC, + (int32_t)STRIGGER_PULL_TSDB_DATA_SAME_RANGE_NEXT, + (int32_t)STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC_NEXT, + }; + + for (int32_t type : variants) { + SSTriggerTsdbDataSameRangeRequest req = {}; + fillBase(&req.base, (ESTriggerPullType)type); + + bool hasPayload = (type == STRIGGER_PULL_TSDB_DATA_SAME_RANGE || + type == STRIGGER_PULL_TSDB_DATA_SAME_RANGE_CALC); + if (hasPayload) { + req.ver = 77; + req.gid = 88; + req.skey = 1000; + req.ekey = 2000; + req.order = 2; + } + + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0) << "type=" << type; + std::vector buf(need); + ASSERT_GT(tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req), 0); + + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0) << "type=" << type; + EXPECT_EQ(out.base.type, type); + + if (hasPayload) { + SSTriggerTsdbDataSameRangeRequest* pOut = &out.tsdbDataSameRangeReq; + EXPECT_EQ(pOut->ver, 77); + EXPECT_EQ(pOut->gid, 88); + EXPECT_EQ(pOut->skey, 1000); + EXPECT_EQ(pOut->ekey, 2000); + EXPECT_EQ(pOut->order, 2); + } + + tDestroySTriggerPullRequest((SSTriggerPullRequestUnion*)&req); + tDestroySTriggerPullRequest(&out); + } +} + +// ─────────────────────────────────────────────────────────── +// TEST 4: DiffRange with empty ranges (both NULL and empty SArray are accepted) +// ─────────────────────────────────────────────────────────── +TEST(StreamMsg, DiffRange_EmptyRanges) { + // Case A: ranges == NULL + { + SSTriggerTsdbDataDiffRangeRequest req = {}; + fillBase(&req.base, STRIGGER_PULL_TSDB_DATA_DIFF_RANGE); + req.ver = 1; req.order = 1; req.ranges = NULL; + + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0); + std::vector buf(need); + ASSERT_GT(tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req), 0); + + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0); + EXPECT_EQ(out.tsdbDataDiffRangeReq.ranges, nullptr); + tDestroySTriggerPullRequest(&out); + } + + // Case B: ranges is an empty SArray + { + SSTriggerTsdbDataDiffRangeRequest req = {}; + fillBase(&req.base, STRIGGER_PULL_TSDB_DATA_DIFF_RANGE); + req.ver = 2; req.order = 2; + req.ranges = taosArrayInit(0, sizeof(SSTriggerTableTimeRange)); + ASSERT_NE(req.ranges, nullptr); + + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0); + std::vector buf(need); + ASSERT_GT(tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req), 0); + + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0); + // When nRanges==0, deserialize leaves it as NULL + EXPECT_EQ(out.tsdbDataDiffRangeReq.ranges, nullptr); + + tDestroySTriggerPullRequest((SSTriggerPullRequestUnion*)&req); + tDestroySTriggerPullRequest(&out); + } +} + +// ─────────────────────────────────────────────────────────── +// TEST 5: DiffRange with many ranges (1000 entries) +// ─────────────────────────────────────────────────────────── +TEST(StreamMsg, DiffRange_LargeRanges) { + const int N = 1000; + SSTriggerTsdbDataDiffRangeRequest req = {}; + fillBase(&req.base, STRIGGER_PULL_TSDB_DATA_DIFF_RANGE); + req.ver = 99; req.order = 1; + req.ranges = taosArrayInit(N, sizeof(SSTriggerTableTimeRange)); + ASSERT_NE(req.ranges, nullptr); + + for (int i = 0; i < N; i++) { + SSTriggerTableTimeRange r = {(int64_t)i, (int64_t)(i + 1), + (int64_t)(i * 100), (int64_t)(i * 100 + 50)}; + taosArrayPush(req.ranges, &r); + } + + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0); + std::vector buf(need); + ASSERT_GT(tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req), 0); + + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0); + ASSERT_NE(out.tsdbDataDiffRangeReq.ranges, nullptr); + ASSERT_EQ(taosArrayGetSize(out.tsdbDataDiffRangeReq.ranges), N); + + for (int i = 0; i < N; i++) { + SSTriggerTableTimeRange* r = (SSTriggerTableTimeRange*)taosArrayGet(out.tsdbDataDiffRangeReq.ranges, i); + EXPECT_EQ(r->suid, (int64_t)i); + EXPECT_EQ(r->uid, (int64_t)(i + 1)); + EXPECT_EQ(r->skey, (int64_t)(i * 100)); + EXPECT_EQ(r->ekey, (int64_t)(i * 100 + 50)); + } + + tDestroySTriggerPullRequest((SSTriggerPullRequestUnion*)&req); + tDestroySTriggerPullRequest(&out); +} + +// ─────────────────────────────────────────────────────────── +// TEST 6: DiffRange consecutive destroy is safe (the second call should be a no-op) +// ─────────────────────────────────────────────────────────── +TEST(StreamMsg, DiffRange_DoubleDestroySafe) { + SSTriggerTsdbDataDiffRangeRequest req = {}; + fillBase(&req.base, STRIGGER_PULL_TSDB_DATA_DIFF_RANGE); + req.ver = 1; req.order = 1; + req.ranges = taosArrayInit(2, sizeof(SSTriggerTableTimeRange)); + ASSERT_NE(req.ranges, nullptr); + SSTriggerTableTimeRange r = {1, 2, 100, 200}; + taosArrayPush(req.ranges, &r); + + int32_t need = tSerializeSTriggerPullRequest(NULL, 0, (SSTriggerPullRequest*)&req); + ASSERT_GT(need, 0); + std::vector buf(need); + ASSERT_GT(tSerializeSTriggerPullRequest(buf.data(), need, (SSTriggerPullRequest*)&req), 0); + + SSTriggerPullRequestUnion out = {}; + ASSERT_EQ(tDeserializeSTriggerPullRequest(buf.data(), need, &out), 0); + + // First destroy + tDestroySTriggerPullRequest(&out); + // After destroy, ranges has been set to NULL; a second call must not segfault + tDestroySTriggerPullRequest(&out); + + tDestroySTriggerPullRequest((SSTriggerPullRequestUnion*)&req); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 888e84b16438..4d4e94bc20b0 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -10522,7 +10522,8 @@ static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSele static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_WHERE; int32_t code = TSDB_CODE_SUCCESS; - if (pSelect->pWhere && BIT_FLAG_TEST_MASK(pCxt->streamInfo.placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && + if (pSelect->pWhere && !pSelect->pWhereInjectedFromPreFilter && + BIT_FLAG_TEST_MASK(pCxt->streamInfo.placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && inStreamCalcClause(pCxt)) { PAR_ERR_RET(generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "%%%%trows can not be used with WHERE clause.")); @@ -19101,6 +19102,62 @@ static int32_t createStreamCheckMultiGroupCalc(STranslateContext* pCxt, SNodeLis return code; } +// Inject trigger's pre_filter as WHERE into calc query when %%trows is used. +// Calc side independently re-scans the trigger table; without this the calc +// scan returns rows that pre_filter already excluded on the trigger side. +static int32_t injectPreFilterIntoCalcQueryImpl(STranslateContext* pCxt, SNode* pPreFilter, SNode* pQuery) { + if (NULL == pQuery) return TSDB_CODE_SUCCESS; + if (QUERY_NODE_SET_OPERATOR == nodeType(pQuery)) { + SSetOperator* pSet = (SSetOperator*)pQuery; + int32_t code = injectPreFilterIntoCalcQueryImpl(pCxt, pPreFilter, pSet->pLeft); + if (TSDB_CODE_SUCCESS == code) { + code = injectPreFilterIntoCalcQueryImpl(pCxt, pPreFilter, pSet->pRight); + if (TSDB_CODE_SUCCESS != code && QUERY_NODE_SELECT_STMT == nodeType(pSet->pLeft)) { + // Roll back left-side injection so pStmt is left in a consistent state + // even though the outer destroy would eventually reclaim it. + SSelectStmt* pLeftSelect = (SSelectStmt*)pSet->pLeft; + if (pLeftSelect->pWhereInjectedFromPreFilter) { + nodesDestroyNode(pLeftSelect->pWhere); + pLeftSelect->pWhere = NULL; + pLeftSelect->pWhereInjectedFromPreFilter = false; + } + } + } + return code; + } + if (QUERY_NODE_SELECT_STMT != nodeType(pQuery)) return TSDB_CODE_SUCCESS; + + SSelectStmt* pSelect = (SSelectStmt*)pQuery; + if (NULL == pSelect->pFromTable || + QUERY_NODE_PLACE_HOLDER_TABLE != nodeType(pSelect->pFromTable)) { + return TSDB_CODE_SUCCESS; + } + SPlaceHolderTableNode* pPh = (SPlaceHolderTableNode*)pSelect->pFromTable; + if (SP_PARTITION_ROWS != pPh->placeholderType) return TSDB_CODE_SUCCESS; + + if (NULL != pSelect->pWhere) { + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "%%%%trows can not be used with WHERE clause."); + } + + SNode* pCloned = NULL; + int32_t code = nodesCloneNode(pPreFilter, &pCloned); + if (TSDB_CODE_SUCCESS != code) return code; + pSelect->pWhere = pCloned; + pSelect->pWhereInjectedFromPreFilter = true; + return TSDB_CODE_SUCCESS; +} + +static int32_t injectPreFilterIntoCalcQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { + if (NULL == pStmt->pTrigger || NULL == pStmt->pQuery) return TSDB_CODE_SUCCESS; + SStreamTriggerNode* pTrigger = (SStreamTriggerNode*)pStmt->pTrigger; + if (NULL == pTrigger->pOptions) return TSDB_CODE_SUCCESS; + SNode* pPreFilter = ((SStreamTriggerOptions*)pTrigger->pOptions)->pPreFilter; + if (NULL == pPreFilter) return TSDB_CODE_SUCCESS; + parserDebug("inject stream pre_filter into calc query as WHERE"); + return injectPreFilterIntoCalcQueryImpl(pCxt, pPreFilter, pStmt->pQuery); +} + // Build calculate part in create stream request static int32_t createStreamReqBuildCalc(STranslateContext* pCxt, SCreateStreamStmt* pStmt, SNodeList* pTriggerPartition, SSelectStmt* pTriggerSelect, SNode* pTriggerWindow, SNode* pNotifyCond, @@ -19137,20 +19194,12 @@ static int32_t createStreamReqBuildCalc(STranslateContext* pCxt, SCreateStreamSt } #endif + PAR_ERR_JRET(injectPreFilterIntoCalcQuery(pCxt, pStmt)); + PAR_ERR_JRET(translateStreamCalcQuery(pCxt, pTriggerPartition, pTriggerSelect ? pTriggerSelect->pFromTable : NULL, pStmt->pQuery, pNotifyCond, pTriggerWindow)); pReq->placeHolderBitmap = pCxt->streamInfo.placeHolderBitmap; - if (BIT_FLAG_TEST_MASK(pReq->placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && - (pReq->triggerTblType == TSDB_VIRTUAL_NORMAL_TABLE || pReq->triggerTblType == TSDB_VIRTUAL_CHILD_TABLE || - BIT_FLAG_TEST_MASK(pReq->flags, CREATE_STREAM_FLAG_TRIGGER_VIRTUAL_STB))) { - if (pStmt->pTrigger && ((SStreamTriggerNode*)pStmt->pTrigger)->pOptions && - ((SStreamTriggerOptions*)((SStreamTriggerNode*)pStmt->pTrigger)->pOptions)->pPreFilter) { - PAR_ERR_JRET(generateSyntaxErrMsgExt( - &pCxt->msgBuf, TSDB_CODE_STREAM_INVALID_QUERY, - "Not support pre_filter when trigger table is virtual table and using %%trows in stream query.")); - } - } pProjectionList = nodeType(pStmt->pQuery) == QUERY_NODE_SELECT_STMT ? ((SSelectStmt*)pStmt->pQuery)->pProjectionList : ((SSetOperator*)pStmt->pQuery)->pProjectionList; @@ -19182,7 +19231,6 @@ static int32_t createStreamReqBuildCalc(STranslateContext* pCxt, SCreateStreamSt .streamCxt.hasExtWindow = false, .streamCxt.triggerWinType = nodeType(pTriggerWindow), .streamCxt.calcScanPlanArray = pScanPlanArray, - .streamCxt.triggerScanList = NULL, .streamCxt.hasNotify = taosArrayGetSize(pReq->pNotifyAddrUrls) > 0, .streamCxt.hasForceOutput = taosArrayGetSize(pReq->forceOutCols) > 0}; @@ -19202,19 +19250,6 @@ static int32_t createStreamReqBuildCalc(STranslateContext* pCxt, SCreateStreamSt pReq->enableMultiGroupCalc = 0; } - if (BIT_FLAG_TEST_MASK(pReq->placeHolderBitmap, PLACE_HOLDER_PARTITION_ROWS) && - LIST_LENGTH(calcCxt.streamCxt.triggerScanList) > 0) { - // need collect scan cols and put into trigger's scan list - PAR_ERR_JRET(nodesListAppendList(pTriggerSelect->pProjectionList, calcCxt.streamCxt.triggerScanList)); - SNode* pCol = NULL; - FOREACH(pCol, pTriggerSelect->pProjectionList) { - if (nodeType(pCol) == QUERY_NODE_COLUMN) { - SColumnNode* pColumn = (SColumnNode*)pCol; - tstrncpy(pColumn->tableAlias, pColumn->tableName, TSDB_TABLE_NAME_LEN); - } - } - } - PAR_ERR_JRET(createStreamReqBuildCalcDb(pCxt, pDbs, pReq)); PAR_ERR_JRET(createStreamReqBuildCalcPlan(pCxt, calcPlan, pScanPlanArray, pReq)); diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 61057ace6b9e..7f267d0e6446 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -595,10 +595,6 @@ static int32_t createScanLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect code = nodesCloneNode(pSelect->pTimeRange, (SNode**)&pScan->pTimeRange); } - if (pRealTable->placeholderType == SP_PARTITION_ROWS) { - code = nodesCollectColumns(pSelect, SQL_CLAUSE_FROM, pRealTable->table.tableAlias, COLLECT_COL_TYPE_ALL, - &pCxt->pPlanCxt->streamCxt.triggerScanList); - } // set columns to scan if (TSDB_CODE_SUCCESS == code) { code = nodesCollectColumns(pSelect, SQL_CLAUSE_FROM, pRealTable->table.tableAlias, COLLECT_COL_TYPE_COL, diff --git a/test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py b/test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py new file mode 100644 index 000000000000..ba7196224d40 --- /dev/null +++ b/test/cases/18-StreamProcessing/04-Options/test_pre_filter_trows_scan_cols.py @@ -0,0 +1,96 @@ +import time +from new_test_framework.utils import (tdLog, tdSql, tdStream, StreamCheckItem,) + + +class TestPreFilterTrowsScanCols: + """Regression for stream trigger/calc scan-cols optimization. + + Trigger AST must NOT include calc-only columns (c3, t2). + Calc AST MUST include pre_filter columns (c2) and apply pre_filter as WHERE, + so calc-side independent re-scan returns exactly the rows pre_filter allows. + """ + + precision = 'ms' + + def setup_class(cls): + tdLog.debug(f"start to execute {__file__}") + + def test_stream_pre_filter_trows_scan_cols(self): + """%%trows scan cols optimization + + Trigger only scans state_window + pre_filter cols; calc independently + re-scans with injected pre_filter WHERE, producing identical rows. + + Catalog: + - Streams:Options + + Since: v3.3.x + + Labels: common,ci + + Jira: None + """ + + tdStream.createSnode() + streams = [] + streams.append(self.PreFilterTrows()) + tdStream.checkAll(streams) + + class PreFilterTrows(StreamCheckItem): + def __init__(self): + self.db = "pf_trows_db" + + def create(self): + tdSql.execute(f"create database {self.db} vgroups 1") + tdSql.execute(f"use {self.db}") + tdSql.execute( + f"create stable stb (ts timestamp, c1 int, c2 int, c3 int) " + f"tags (t1 int, t2 int)" + ) + tdSql.execute(f"create table ct1 using stb tags(1, 100)") + tdSql.execute(f"create table ct2 using stb tags(2, 200)") + # Stream from the example in the design spec: + # trigger: state_window(c1) + pre_filter(c2>2) + # calc: select _c0, sum(c3), avg(t2) from %%trows + tdSql.execute( + f"create stream s_pf state_window(c1) from stb " + f"partition by tbname stream_options(pre_filter(c2 > 2 and t1 > 1)) " + f"into res_stb as " + f"select * from %%trows;" + ) + + def insert1(self): + sqls = [ + # ct1 (t1=1, t2=100): c1 alternates to drive state windows; + # c2 values include some <=2 (must be filtered out by pre_filter). + "insert into ct1 values ('2025-01-01 00:00:00', 1, 1, 10);", # c2<=2 -> filtered + "insert into ct1 values ('2025-01-01 00:00:01', 1, 5, 20);", + "insert into ct1 values ('2025-01-01 00:00:02', 1, 7, 30);", + "insert into ct1 values ('2025-01-01 00:00:03', 2, 8, 40);", # state change closes window + "insert into ct1 values ('2025-01-01 00:00:04', 2, 2, 50);", # c2<=2 -> filtered + "insert into ct1 values ('2025-01-01 00:00:05', 2, 9, 60);", + "insert into ct1 values ('2025-01-01 00:00:06', 1, 4, 70);", # state change + ] + tdSql.executes(sqls) + + def check1(self): + # Expect at least one closed window for ct1 with c1==1 spanning ts 1..2 + # (the ts 0 row is dropped by pre_filter). + # sum(c3) over rows kept = 20+30 = 50; avg(t2) = 100. + tdSql.checkResultsByFunc( + sql=f"select sum_c3, avg_t2 from {self.db}.res_stb " + f"where firstts = '2025-01-01 00:00:01';", + func=lambda: tdSql.getRows() == 1 + and tdSql.getData(0, 0) == 50 + and abs(tdSql.getData(0, 1) - 100.0) < 1e-9, + ) + + def check2(self): + # Negative: writing WHERE on %%trows must still be rejected. + tdSql.error( + f"create stream s_neg state_window(c1) from {self.db}.stb " + f"partition by t1 stream_options(pre_filter(c2 > 2)) " + f"into {self.db}.res_neg (firstts, s) as " + f"select first(_c0), sum(c3) from %%trows where c2 > 5;", + expectErrInfo="trows can not be used with WHERE clause", + )