diff --git a/e2e/commands.js b/e2e/commands.js index 4fd12bcec..a37190c3b 100644 --- a/e2e/commands.js +++ b/e2e/commands.js @@ -33,6 +33,9 @@ const materializedViewSchemas = { btc_trades_mv: "CREATE MATERIALIZED VIEW IF NOT EXISTS btc_trades_mv WITH BASE btc_trades as (" + "SELECT timestamp, avg(amount) avg FROM btc_trades SAMPLE BY 1m) PARTITION BY week;", + btc_trades_mv_on_mv: + "CREATE MATERIALIZED VIEW IF NOT EXISTS btc_trades_mv_on_mv as (" + + "SELECT timestamp, avg(avg) avg FROM btc_trades_mv SAMPLE BY 1h) PARTITION BY week;", } const viewSchemas = { diff --git a/e2e/questdb b/e2e/questdb index de44a59bf..e3a7679cd 160000 --- a/e2e/questdb +++ b/e2e/questdb @@ -1 +1 @@ -Subproject commit de44a59bf1b65157577146aa53ec5bbda83bcf7d +Subproject commit e3a7679cdefd1976d26c9e5e07f8eb984792c581 diff --git a/e2e/tests/console/schema.spec.js b/e2e/tests/console/schema.spec.js index 6fe09939b..6a20db8c0 100644 --- a/e2e/tests/console/schema.spec.js +++ b/e2e/tests/console/schema.spec.js @@ -692,3 +692,98 @@ describe("materialized views", () => { }) }) }) + +describe("create materialized view from context menu", () => { + const sourceTable = "btc_trades" + const nonWalTable = "btc_trades_no_wal" + const nonPartitionedTable = "my_publics" + // btc_trades is PARTITION BY DAY → derived SAMPLE BY 1h → view name btc_trades_1h. + const generatedMatView = "btc_trades_1h" + + before(() => { + cy.loadConsoleWithAuth() + cy.createTable(sourceTable) + cy.createTable(nonWalTable) + cy.createTable(nonPartitionedTable) + cy.refreshSchema() + }) + + after(() => { + cy.loadConsoleWithAuth() + cy.dropMaterializedView(generatedMatView) + cy.dropTableIfExists(sourceTable) + cy.dropTableIfExists(nonWalTable) + cy.dropTableIfExists(nonPartitionedTable) + }) + + it("disables the menu item for non-WAL and non-partitioned tables, and generates a runnable matview DDL from a valid source", () => { + cy.getByDataHook("schema-table-title").contains(nonWalTable).rightclick() + cy.getByDataHook("table-context-menu-create-matview").should( + "have.attr", + "data-disabled", + ) + cy.realPress("Escape") + + cy.getByDataHook("schema-table-title") + .contains(nonPartitionedTable) + .rightclick() + cy.getByDataHook("table-context-menu-create-matview").should( + "have.attr", + "data-disabled", + ) + cy.realPress("Escape") + + cy.clearEditor() + cy.getByDataHook("schema-table-title").contains(sourceTable).rightclick() + cy.getByDataHook("table-context-menu-create-matview") + .filter(":visible") + .click() + + cy.runLine().clearEditor() + + cy.refreshSchema() + cy.expandMatViews() + cy.getByDataHook("schema-matview-title").should("contain", generatedMatView) + }) +}) + +describe("create materialized view from matview context menu", () => { + const sourceTable = "btc_trades" + const sourceMatView = "btc_trades_mv" + // btc_trades_mv is SAMPLE BY 1m → next rung 5m; name has no period token, + // so the generator appends `_5m`. + const generatedMatView = "btc_trades_mv_5m" + + before(() => { + cy.loadConsoleWithAuth() + cy.createTable(sourceTable) + cy.createMaterializedView(sourceMatView) + cy.refreshSchema() + }) + + after(() => { + cy.loadConsoleWithAuth() + cy.dropMaterializedView(generatedMatView) + cy.dropMaterializedView(sourceMatView) + cy.dropTableIfExists(sourceTable) + }) + + it("generates a runnable chained matview DDL from a matview source", () => { + cy.expandMatViews() + + cy.clearEditor() + cy.getByDataHook("schema-matview-title") + .contains(sourceMatView) + .rightclick() + cy.getByDataHook("table-context-menu-create-matview") + .filter(":visible") + .should("not.have.attr", "data-disabled") + .click() + + cy.runLine().clearEditor() + + cy.refreshSchema() + cy.expandMatViews() + cy.getByDataHook("schema-matview-title").should("contain", generatedMatView) + }) +}) diff --git a/e2e/tests/console/tableDetails.spec.js b/e2e/tests/console/tableDetails.spec.js index 7aea42b16..41c422531 100644 --- a/e2e/tests/console/tableDetails.spec.js +++ b/e2e/tests/console/tableDetails.spec.js @@ -12,6 +12,7 @@ const { const TEST_TABLE = "btc_trades" const TEST_TABLE_NO_WAL = "btc_trades_no_wal" const TEST_MATVIEW = "btc_trades_mv" +const TEST_MATVIEW_ON_MV = "btc_trades_mv_on_mv" const TEST_VIEW = "btc_trades_view" function interceptTablesQuery(modifications) { @@ -637,6 +638,53 @@ describe("TableDetailsDrawer", () => { }) }) + describe("materialized view based on another materialized view", () => { + before(() => { + cy.loadConsoleWithAuth() + cy.createTable(TEST_TABLE) + cy.createMaterializedView(TEST_MATVIEW) + cy.createMaterializedView(TEST_MATVIEW_ON_MV) + }) + + beforeEach(() => { + cy.loadConsoleWithAuth() + cy.refreshSchema() + cy.expandMatViews() + }) + + it("should open as matview and navigate to a matview base table preserving the matview kind", () => { + cy.openDetailsDrawer(TEST_MATVIEW_ON_MV, "matview") + + cy.getByDataHook("table-details-type-badge").should( + "contain", + "Materialized View", + ) + + cy.getByDataHook("table-details-tab-details").click() + + cy.getByDataHook("table-details-base-table-section").should("be.visible") + cy.getByDataHook("table-details-base-table-link").should( + "contain", + TEST_MATVIEW, + ) + + cy.getByDataHook("table-details-base-table-link").click() + + cy.getByDataHook("table-details-name").should("have.value", TEST_MATVIEW) + cy.getByDataHook("table-details-type-badge").should( + "contain", + "Materialized View", + ) + }) + + after(() => { + cy.loadConsoleWithAuth() + cy.dropMaterializedView(TEST_MATVIEW_ON_MV) + cy.dropMaterializedView(TEST_MATVIEW) + cy.dropTable(TEST_TABLE) + }) + }) + describe("materialized view invalid state (R2)", () => { before(() => { cy.loadConsoleWithAuth() diff --git a/src/modules/ConsoleEventTracker/events.ts b/src/modules/ConsoleEventTracker/events.ts index b97e2049a..9076cd81a 100644 --- a/src/modules/ConsoleEventTracker/events.ts +++ b/src/modules/ConsoleEventTracker/events.ts @@ -68,6 +68,7 @@ export enum ConsoleEvent { SCHEMA_RESUME_WAL_SUBMIT = "schema.resume_wal_submit", SCHEMA_CONTEXT_COPY_DDL = "schema.context_copy_ddl", SCHEMA_CONTEXT_EXPLAIN = "schema.context_explain", + SCHEMA_CONTEXT_CREATE_MATVIEW = "schema.context_create_matview", SCHEMA_COPY_MULTIPLE = "schema.copy_multiple", TABLE_DETAILS_TAB_SWITCH = "table_details.tab_switch", diff --git a/src/modules/OAuth2/views/login.tsx b/src/modules/OAuth2/views/login.tsx index 842cad2b4..b5c99fc5b 100644 --- a/src/modules/OAuth2/views/login.tsx +++ b/src/modules/OAuth2/views/login.tsx @@ -13,7 +13,6 @@ import { RawDqlResult } from "utils/questdb/types" import { LoadingSpinner } from "../../../components/LoadingSpinner" import { Box } from "../../../components/Box" - const LoginContainer = styled.div` width: 100%; height: 100%; diff --git a/src/scenes/Schema/TableDetailsDrawer/index.tsx b/src/scenes/Schema/TableDetailsDrawer/index.tsx index 3ee48ccfd..09ab3f3b4 100644 --- a/src/scenes/Schema/TableDetailsDrawer/index.tsx +++ b/src/scenes/Schema/TableDetailsDrawer/index.tsx @@ -259,17 +259,21 @@ export const TableDetailsDrawer = () => { const handleNavigateToBaseTable = useCallback(() => { if (!matViewData?.base_table_name || !baseTableExists) return + const baseTable = tables.find( + (t) => t.table_name === matViewData.base_table_name, + ) + const kind = baseTable ? getTableKind(baseTable) : "table" dispatch( actions.console.pushSidebarHistory({ type: "tableDetails", payload: { tableName: matViewData.base_table_name, - isMatView: false, - isView: false, + isMatView: kind === "matview", + isView: kind === "view", }, }), ) - }, [dispatch, matViewData?.base_table_name, baseTableExists]) + }, [dispatch, matViewData?.base_table_name, baseTableExists, tables]) const { handleExplainSchema, handleAskAIForHealthIssue } = useAIQuickActions() diff --git a/src/scenes/Schema/VirtualTables/index.tsx b/src/scenes/Schema/VirtualTables/index.tsx index 42bdd3b8b..74b90eadc 100644 --- a/src/scenes/Schema/VirtualTables/index.tsx +++ b/src/scenes/Schema/VirtualTables/index.tsx @@ -10,6 +10,7 @@ import React, { import { Virtuoso, VirtuosoHandle, ListRange } from "react-virtuoso" import styled from "styled-components" import { Loader3, FileCopy, Restart } from "@styled-icons/remix-line" +import { MaterializedViewIcon } from "../table-icon" import { InfoIcon } from "@phosphor-icons/react" import { spinAnimation, toast } from "../../../components" import { trackEvent } from "../../../modules/ConsoleEventTracker" @@ -48,6 +49,8 @@ import { MenuItem, } from "../../../components/ContextMenu" import { copyToClipboard } from "../../../utils/copyToClipboard" +import { useEditor } from "../../../providers/EditorProvider" +import { generateMatViewDDL } from "../../../utils/generateMatViewDDL" import { SuspensionDialog } from "../SuspensionDialog" import { useAIStatus, @@ -199,6 +202,7 @@ const VirtualTables: FC = ({ } = useAIStatus() const { handleExplainSchema } = useAIQuickActions() + const { appendQuery } = useEditor() const [schemaTree, setSchemaTree] = useState({}) const [openedContextMenu, setOpenedContextMenu] = useState( @@ -684,6 +688,55 @@ const VirtualTables: FC = ({ > Copy schema + {(item.kind === "table" || item.kind === "matview") && ( + + { + void trackEvent( + ConsoleEvent.SCHEMA_CONTEXT_CREATE_MATVIEW, + { kind: item.kind }, + ) + const sourceDDL = await getTableSchema( + item.name, + item.kind as "table" | "matview", + ) + if (!sourceDDL) return + try { + const existingNames = tables.map((t) => t.table_name) + const ddl = generateMatViewDDL( + sourceDDL, + existingNames, + ) + appendQuery(ddl, { appendAt: "end" }) + } catch (e) { + console.error(e) + toast.error( + e instanceof Error + ? `Failed to generate materialized view DDL: ${e.message}` + : "Failed to generate materialized view DDL", + ) + } + }} + icon={} + disabled={ + item.kind === "table" && + (!item.table?.designatedTimestamp || + !item.table?.walEnabled) + } + > + Create materialized view + + + )} {isConfigured && ( { + it("handles a simple trades table (DAY partition, DOUBLE price/amount)", () => { + const ddl = `CREATE TABLE 'trades' ( + symbol SYMBOL, + side SYMBOL, + price DOUBLE, + amount DOUBLE, + timestamp TIMESTAMP + ) timestamp(timestamp) PARTITION BY DAY;` + + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/CREATE MATERIALIZED VIEW/i) + expect(result).toMatch(/\btrades_1h\b/) + expect(result).toMatch(/REFRESH IMMEDIATE/i) + expect(result).not.toMatch(/REFRESH EVERY/i) + expect(result).toMatch(/SAMPLE BY 1h/i) + expect(result).toMatch(/PARTITION BY MONTH/i) + expect(result).toMatch(/last\(\s*price\s*\)\s+AS\s+last_price/i) + expect(result).toMatch(/sum\(\s*amount\s*\)\s+AS\s+sum_amount/i) + }) + + it("handles HOUR partition → 5m sample and array types excluded", () => { + const ddl = `CREATE TABLE 'market_data' ( + timestamp TIMESTAMP, + symbol SYMBOL, + bids DOUBLE[][], + asks DOUBLE[][] + ) timestamp(timestamp) PARTITION BY HOUR TTL 3 DAYS;` + + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/SAMPLE BY 5m/i) + expect(result).toMatch(/PARTITION BY MONTH/i) + // Source TTL 3 DAYS → next TTL-ladder rung is 7 DAYS. + expect(result).toMatch(/TTL\s+7\s+DAYS/i) + expect(result).not.toMatch(/bids/i) + expect(result).not.toMatch(/asks/i) + }) + + it("handles TIMESTAMP_NS designated timestamp + UUID + DEDUP source", () => { + const ddl = `CREATE TABLE 'fx_trades' ( + timestamp TIMESTAMP_NS, + symbol SYMBOL, + ecn SYMBOL, + trade_id UUID, + side SYMBOL, + passive BOOLEAN, + price DOUBLE, + quantity DOUBLE, + counterparty SYMBOL, + order_id UUID + ) timestamp(timestamp) PARTITION BY HOUR TTL 1 MONTH + DEDUP UPSERT KEYS(timestamp, trade_id);` + + const result = generateMatViewDDL(ddl) + // Designated TIMESTAMP_NS passes through (no last() wrap). + expect(result).toMatch(/\btimestamp\b/i) + expect(result).not.toMatch(/last\(\s*timestamp\s*\)/i) + expect(result).toMatch(/SAMPLE BY 5m/i) + // UUIDs go through last() like any other value type. + expect(result).toMatch(/last\(\s*trade_id\s*\)\s+AS\s+last_trade_id/i) + expect(result).toMatch(/last\(\s*order_id\s*\)\s+AS\s+last_order_id/i) + expect(result).toMatch(/last\(\s*price\s*\)/i) + expect(result).toMatch(/sum\(\s*quantity\s*\)/i) + // Source TTL 1 MONTH → next TTL-ladder rung is 1 YEAR. + expect(result).toMatch(/TTL\s+1\s+YEARS/i) + }) + + it("handles array-only table gracefully (only timestamp + array)", () => { + const ddl = `CREATE TABLE 'myarray' ( + timestamp TIMESTAMP, + myarr DOUBLE[][] + ) timestamp(timestamp) PARTITION BY DAY;` + + // Only the designated timestamp survives — empty SELECT body is degenerate + // but the function must still not throw. + expect(() => generateMatViewDDL(ddl)).not.toThrow() + }) + + it("handles LONG256/SHORT/VARCHAR/BOOLEAN/DEDUP telemetry table", () => { + const ddl = `CREATE TABLE 'telemetry_users2' ( + timestamp TIMESTAMP, + id LONG256, + event SHORT, + origin SHORT, + ip VARCHAR, + type SYMBOL, + country SYMBOL, + city SYMBOL, + organization SYMBOL, + domain SYMBOL, + cloud_provider BOOLEAN, + version SYMBOL, + os SYMBOL, + package SYMBOL + ) timestamp(timestamp) PARTITION BY MONTH + DEDUP UPSERT KEYS(timestamp, id, event, origin, ip, type, country, city, organization, domain, cloud_provider, version, os, package);` + + const result = generateMatViewDDL(ddl) + // LONG256 supports last() in QuestDB. + expect(result).toMatch(/last\(\s*id\s*\)\s+AS\s+last_id/i) + // MONTH source → 7d sample → YEAR partition per docs' default inference. + expect(result).toMatch(/SAMPLE BY 7d/i) + expect(result).toMatch(/PARTITION BY YEAR/i) + expect(result).toMatch(/\btype\b/) + expect(result).toMatch(/\bcountry\b/) + expect(result).toMatch(/last\(\s*cloud_provider\s*\)/i) + expect(result).toMatch(/last\(\s*ip\s*\)/i) + expect(result).toMatch(/last\(\s*event\s*\)/i) + }) + + it("handles health_test_lag (ts designated, DAY, DOUBLE/SYMBOL)", () => { + const ddl = `CREATE TABLE 'health_test_lag' ( + ts TIMESTAMP, + value DOUBLE, + sensor SYMBOL + ) timestamp(ts) PARTITION BY DAY;` + + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/SAMPLE BY 1h/i) + expect(result).toMatch(/last\(\s*value\s*\)/i) + expect(result).toMatch(/\bsensor\b/) + // Designated TIMESTAMP passes through (no last() wrap). + expect(result).toMatch(/\bts\b/) + expect(result).not.toMatch(/last\(\s*ts\s*\)/i) + }) + + it("handles core_price with price/volume naming patterns + HOUR partition", () => { + const ddl = `CREATE TABLE 'core_price' ( + timestamp TIMESTAMP, + symbol SYMBOL, + ecn SYMBOL, + bid_price DOUBLE, + bid_volume LONG, + ask_price DOUBLE, + ask_volume LONG, + reason SYMBOL, + indicator1 DOUBLE, + indicator2 DOUBLE + ) timestamp(timestamp) PARTITION BY HOUR;` + + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/SAMPLE BY 5m/i) + expect(result).toMatch(/sum\(\s*bid_volume\s*\)/i) + expect(result).toMatch(/sum\(\s*ask_volume\s*\)/i) + expect(result).toMatch(/last\(\s*bid_price\s*\)/i) + expect(result).toMatch(/last\(\s*ask_price\s*\)/i) + expect(result).toMatch(/last\(\s*indicator1\s*\)/i) + }) + + it("handles console_events with LONG256/UUID/VARCHAR + secondary TIMESTAMP", () => { + const ddl = `CREATE TABLE 'console_events' ( + version SYMBOL, + console_version SYMBOL, + client_os SYMBOL, + browser SYMBOL, + event_name SYMBOL, + id LONG256, + client_id UUID, + browser_version VARCHAR, + payload VARCHAR, + created TIMESTAMP, + timestamp TIMESTAMP + ) timestamp(timestamp) PARTITION BY DAY;` + + const result = generateMatViewDDL(ddl) + // LONG256 and UUID both go through last(). + expect(result).toMatch(/last\(\s*id\s*\)\s+AS\s+last_id/i) + expect(result).toMatch(/last\(\s*client_id\s*\)\s+AS\s+last_client_id/i) + expect(result).toMatch(/last\(\s*payload\s*\)/i) + expect(result).toMatch(/last\(\s*browser_version\s*\)/i) + expect(result).toMatch(/last\(\s*created\s*\)/i) + expect(result).toMatch(/\bevent_name\b/) + }) + + it("handles GEOHASH columns via last()", () => { + const ddl = `CREATE TABLE 'sometable2' ( + timestamp TIMESTAMP, + emre INT, + berk BOOLEAN, + kaya GEOHASH(5c) + ) timestamp(timestamp) PARTITION BY DAY TTL 5 DAYS;` + + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/last\(\s*kaya\s*\)\s+AS\s+last_kaya/i) + expect(result).toMatch(/last\(\s*emre\s*\)/i) + expect(result).toMatch(/last\(\s*berk\s*\)/i) + // Source TTL 5 DAYS → next TTL-ladder rung is 7 DAYS. + expect(result).toMatch(/TTL\s+7\s+DAYS/i) + }) + + it("handles non-designated TIMESTAMP / TIMESTAMP_NS via last()", () => { + const ddl = `CREATE TABLE 'events' ( + ts TIMESTAMP, + created_at TIMESTAMP, + observed_at TIMESTAMP_NS, + payload VARCHAR + ) timestamp(ts) PARTITION BY DAY;` + const result = generateMatViewDDL(ddl) + // Designated timestamp passes through. + expect(result).toMatch(/\bts\b/) + expect(result).not.toMatch(/last\(\s*ts\s*\)/i) + // Non-designated TIMESTAMP and TIMESTAMP_NS → last(). + expect(result).toMatch(/last\(\s*created_at\s*\)\s+AS\s+last_created_at/i) + expect(result).toMatch(/last\(\s*observed_at\s*\)\s+AS\s+last_observed_at/i) + }) + + it("handles mytrades with TIMESTAMP_NS designated + STRING column", () => { + const ddl = `CREATE TABLE 'mytrades' ( + mystring STRING, + myts TIMESTAMP_NS, + mysymbol SYMBOL + ) timestamp(myts) PARTITION BY DAY;` + + const result = generateMatViewDDL(ddl) + // Designated TIMESTAMP_NS passes through (no last() wrap). + expect(result).toMatch(/\bmyts\b/) + expect(result).not.toMatch(/last\(\s*myts\s*\)/i) + expect(result).toMatch(/last\(\s*mystring\s*\)/i) + expect(result).toMatch(/\bmysymbol\b/) + }) + + it("throws on non-CREATE-TABLE / non-CREATE-MATERIALIZED-VIEW input", () => { + expect(() => generateMatViewDDL("SELECT 1;")).toThrow() + }) + + describe("view naming", () => { + const ddl = `CREATE TABLE 'trades' ( + symbol SYMBOL, + price DOUBLE, + timestamp TIMESTAMP + ) timestamp(timestamp) PARTITION BY DAY;` + + it("uses {table}_{interval} when name is free", () => { + expect(generateMatViewDDL(ddl)).toMatch(/\btrades_1h\b/) + }) + + it("appends _2 when {table}_{interval} is taken", () => { + const result = generateMatViewDDL(ddl, ["trades_1h"]) + expect(result).toMatch(/\btrades_1h_2\b/) + expect(result).not.toMatch(/\btrades_1h\b(?!_)/) + }) + + it("appends _3 when both base and _2 are taken", () => { + const result = generateMatViewDDL(ddl, ["trades_1h", "trades_1h_2"]) + expect(result).toMatch(/\btrades_1h_3\b/) + }) + + it("treats existing names case-insensitively", () => { + const result = generateMatViewDDL(ddl, ["TRADES_1H"]) + expect(result).toMatch(/\btrades_1h_2\b/) + }) + + it("ignores existing names that don't collide", () => { + const result = generateMatViewDDL(ddl, [ + "other_view", + "trades_5m", + "trades_1d", + ]) + expect(result).toMatch(/\btrades_1h\b/) + }) + + it("replaces a trailing-period suffix on the source table name", () => { + // `my_table_5m` (DAY → 1h sample) → `my_table_1h`, not `my_table_5m_1h`. + const tableDDL = `CREATE TABLE 'my_table_5m' ( + symbol SYMBOL, + price DOUBLE, + timestamp TIMESTAMP + ) timestamp(timestamp) PARTITION BY DAY;` + const result = generateMatViewDDL(tableDDL) + expect(result).toMatch(/\bmy_table_1h\b/) + expect(result).not.toMatch(/\bmy_table_5m_1h\b/) + }) + }) + + describe("from materialized view source", () => { + const mv5m = `CREATE MATERIALIZED VIEW 'btc_trades_5m' WITH BASE 'btc_trades' AS ( + SELECT + symbol, + last(price) AS last_price, + sum(amount) AS sum_amount, + timestamp + FROM btc_trades + SAMPLE BY 5m + ) PARTITION BY MONTH;` + + it("bumps sample-by, re-roots FROM, rewrites aggregate args to layer-1 aliases", () => { + const result = generateMatViewDDL(mv5m) + expect(result).toMatch(/CREATE MATERIALIZED VIEW/i) + expect(result).toMatch(/\bbtc_trades_30m\b/) + expect(result).toMatch(/SAMPLE BY 30m/i) + expect(result).toMatch(/PARTITION BY MONTH/i) + expect(result).toMatch(/REFRESH IMMEDIATE/i) + expect(result).toMatch(/FROM\s+btc_trades_5m/i) + expect(result).not.toMatch(/FROM\s+btc_trades\b(?!_)/i) + expect(result).toMatch(/WITH BASE\s+btc_trades_5m/i) + expect(result).toMatch(/last\(\s*last_price\s*\)\s+AS\s+last_price/i) + expect(result).toMatch(/sum\(\s*sum_amount\s*\)\s+AS\s+sum_amount/i) + expect(result).toMatch(/\bsymbol\b/) + }) + + it("appends new period when source name has no period token", () => { + const ddl = `CREATE MATERIALIZED VIEW 'btc_trades_mv' WITH BASE 'btc_trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM btc_trades SAMPLE BY 1m + ) PARTITION BY WEEK;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/\bbtc_trades_mv_5m\b/) + expect(result).toMatch(/SAMPLE BY 5m/i) + }) + + it("replaces embedded period token in the middle of the name", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m_raw' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM trades SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/\btrades_30m_raw\b/) + expect(result).not.toMatch(/\btrades_5m_raw_30m\b/) + }) + + it("steps off-ladder source 4h → 6h", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_4h' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM trades SAMPLE BY 4h + ) PARTITION BY YEAR;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/SAMPLE BY 6h/i) + expect(result).toMatch(/\btrades_6h\b/) + }) + + it("steps 2m → 5m, 45s → 1m for other off-ladder inputs", () => { + const ddl2m = `CREATE MATERIALIZED VIEW 'x' AS ( + SELECT timestamp FROM base SAMPLE BY 2m + );` + expect(generateMatViewDDL(ddl2m)).toMatch(/SAMPLE BY 5m/i) + + const ddl45s = `CREATE MATERIALIZED VIEW 'x' AS ( + SELECT timestamp FROM base SAMPLE BY 45s + );` + expect(generateMatViewDDL(ddl45s)).toMatch(/SAMPLE BY 1m/i) + }) + + it("caps at 1y when source is 1M", () => { + const ddl = `CREATE MATERIALIZED VIEW 'yearly_1M' AS ( + SELECT timestamp FROM base SAMPLE BY 1M + ) PARTITION BY YEAR;` + const result = generateMatViewDDL(ddl) + // QuestDB rejects `1Y` (uppercase) — pin the casing explicitly. + expect(result).toContain("SAMPLE BY 1y") + expect(result).not.toContain("SAMPLE BY 1Y") + expect(result).toMatch(/\byearly_1y\b/) + expect(result).toMatch(/PARTITION BY YEAR/i) + }) + + it("steps Ny → (N+1)y above the ladder cap", () => { + const ddl1y = `CREATE MATERIALIZED VIEW 'long_1y' AS ( + SELECT timestamp FROM base SAMPLE BY 1y + ) PARTITION BY YEAR;` + expect(generateMatViewDDL(ddl1y)).toMatch(/SAMPLE BY 2y/i) + + const ddl3y = `CREATE MATERIALIZED VIEW 'long_3y' AS ( + SELECT timestamp FROM base SAMPLE BY 3y + ) PARTITION BY YEAR;` + const result3y = generateMatViewDDL(ddl3y) + expect(result3y).toMatch(/SAMPLE BY 4y/i) + expect(result3y).toMatch(/\blong_4y\b/) + }) + + it("steps non-year units past the cap to the smallest Ny greater than the source", () => { + const ddl400d = `CREATE MATERIALIZED VIEW 'big_400d' AS ( + SELECT timestamp FROM base SAMPLE BY 400d + ) PARTITION BY YEAR;` + expect(generateMatViewDDL(ddl400d)).toMatch(/SAMPLE BY 2y/i) + + const ddl2000d = `CREATE MATERIALIZED VIEW 'big_2000d' AS ( + SELECT timestamp FROM base SAMPLE BY 2000d + ) PARTITION BY YEAR;` + expect(generateMatViewDDL(ddl2000d)).toMatch(/SAMPLE BY 6y/i) + }) + + it("strips WHERE — source mat view already applied it at layer 1", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM trades + WHERE symbol = 'BTC-USD' AND amount > 100 + SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).not.toMatch(/\bWHERE\b/i) + expect(result).not.toMatch(/BTC-USD/) + expect(result).not.toMatch(/amount/) + }) + + it("strips GROUP BY — references base-table columns not in the chain", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM trades + SAMPLE BY 5m + GROUP BY symbol, side, timestamp + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).not.toMatch(/GROUP\s+BY/i) + expect(result).not.toMatch(/\bside\b/) + }) + + it("strips LATEST ON — references base-table columns not in the chain", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM trades + SAMPLE BY 5m + LATEST ON ts PARTITION BY symbol + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).not.toMatch(/LATEST\s+ON/i) + }) + + it("preserves SAMPLE BY ALIGN TO option on the clause", () => { + // FILL isn't supported in mat views per docs, so we only test ALIGN TO. + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp + FROM trades + SAMPLE BY 5m ALIGN TO CALENDAR + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/SAMPLE BY 30m/i) + expect(result).toMatch(/ALIGN TO CALENDAR/i) + }) + + it("throws when source matview has no SAMPLE BY", () => { + const ddl = `CREATE MATERIALIZED VIEW 'no_sample' AS ( + SELECT symbol, last(price) AS last_price, timestamp FROM trades + ) PARTITION BY MONTH;` + expect(() => generateMatViewDDL(ddl)).toThrow(/SAMPLE BY/i) + }) + + it("avoids collisions via existingNames", () => { + const result = generateMatViewDDL( + `CREATE MATERIALIZED VIEW 'btc_trades_5m' AS ( + SELECT timestamp FROM btc_trades SAMPLE BY 5m + ) PARTITION BY MONTH;`, + ["btc_trades_30m"], + ) + expect(result).toMatch(/\bbtc_trades_30m_2\b/) + }) + + it("preserves REFRESH EVERY 1m DEFERRED START '…' verbatim", () => { + const ddl = `CREATE MATERIALIZED VIEW 'bbo_1s' WITH BASE 'market_data' REFRESH EVERY 1m DEFERRED START '2025-06-01T00:00:00.000000Z' AS ( + SELECT timestamp, symbol, last(bid) AS bid FROM market_data SAMPLE BY 1s + ) PARTITION BY DAY;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/REFRESH\s+EVERY\s+1m/i) + expect(result).toMatch(/DEFERRED/i) + expect(result).toMatch(/START\s+'2025-06-01T00:00:00\.000000Z'/i) + }) + + it("preserves REFRESH MANUAL", () => { + const ddl = `CREATE MATERIALIZED VIEW 'x' REFRESH MANUAL AS ( + SELECT timestamp FROM base SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/REFRESH\s+MANUAL/i) + }) + + it("defaults to REFRESH IMMEDIATE when source has no refresh clause", () => { + const ddl = `CREATE MATERIALIZED VIEW 'x' AS ( + SELECT timestamp FROM base SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/REFRESH\s+IMMEDIATE/i) + }) + + it("sets WITH BASE to the source mat view, not the source's own base", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT symbol, last(price) AS last_price, timestamp FROM trades SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/WITH BASE\s+trades_5m/i) + expect(result).not.toMatch(/WITH BASE\s+trades\b(?!_)/i) + }) + + it("rewrites aggregates with array-subscript args (Alex's bbo_1s demo)", () => { + const ddl = `CREATE MATERIALIZED VIEW 'bbo_1s' WITH BASE 'market_data' REFRESH IMMEDIATE AS ( + SELECT timestamp, symbol, + last(bids[1][1]) AS bid, + last(asks[1][1]) AS ask + FROM market_data + SAMPLE BY 1s + ) PARTITION BY DAY;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/last\(\s*bid\s*\)\s+AS\s+bid/i) + expect(result).toMatch(/last\(\s*ask\s*\)\s+AS\s+ask/i) + expect(result).not.toMatch(/bids\s*\[/) + expect(result).not.toMatch(/asks\s*\[/) + }) + + it("collapses CAST expressions into bare column references at the chain level", () => { + const ddl = `CREATE MATERIALIZED VIEW 'core_price_1s' WITH BASE 'core_price' AS ( + SELECT timestamp, + cast(CUSIP as Symbol) Cusip, + cast(Currency as symbol) Currency, + last(price) AS price + FROM core_price + SAMPLE BY 1s + ) PARTITION BY DAY;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/\bCusip\b/) + expect(result).toMatch(/\bCurrency\b/) + expect(result).not.toMatch(/cast\s*\(/i) + expect(result).toMatch(/last\(\s*price\s*\)\s+AS\s+price/i) + }) + + it("falls back to last() for implicit-alias non-decomposable avg(amount) avg", () => { + const ddl = `CREATE MATERIALIZED VIEW 'btc_trades_mv' WITH BASE 'btc_trades' AS ( + SELECT timestamp, avg(amount) avg FROM btc_trades SAMPLE BY 1m + ) PARTITION BY WEEK;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/last\(\s*avg\s*\)\s+AS\s+avg/i) + expect(result).not.toMatch(/avg\(\s*amount\s*\)/i) + expect(result).not.toMatch(/avg\(\s*avg\s*\)/i) + }) + + it("preserves trailing args for self-chainable multi-arg aggregates only", () => { + const ddl = `CREATE MATERIALIZED VIEW 'metrics_5m' AS ( + SELECT timestamp, + string_agg(tag, ',') AS tags, + approx_percentile(latency_ms, 0.99, 2) AS p99, + approx_median(latency_ms, 3) AS p50 + FROM metrics SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/string_agg\(\s*tags\s*,\s*','\s*\)\s+AS\s+tags/i) + expect(result).toMatch(/last\(\s*p99\s*\)\s+AS\s+p99/i) + expect(result).toMatch(/last\(\s*p50\s*\)\s+AS\s+p50/i) + expect(result).not.toMatch(/approx_percentile\(/i) + expect(result).not.toMatch(/approx_median\(/i) + }) + + it("rewrites count(x) AS n to sum(n) AS n (sum-of-bucket-counts = total)", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT timestamp, symbol, count(price) AS n FROM trades SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/sum\(\s*n\s*\)\s+AS\s+n/i) + expect(result).not.toMatch(/count\(/i) + }) + + it("rewrites count(*) and count() to sum(alias) too", () => { + const ddl = `CREATE MATERIALIZED VIEW 'trades_5m' WITH BASE 'trades' AS ( + SELECT timestamp, count(*) AS rows_total, count() AS rows_alt + FROM trades SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/sum\(\s*rows_total\s*\)\s+AS\s+rows_total/i) + expect(result).toMatch(/sum\(\s*rows_alt\s*\)\s+AS\s+rows_alt/i) + expect(result).not.toMatch(/count\(/i) + }) + + it("drops count(DISTINCT …) — can't decompose from a scalar", () => { + const ddl = `CREATE MATERIALIZED VIEW 'metrics_5m' WITH BASE 'metrics' AS ( + SELECT timestamp, + count(distinct host) AS hosts, + sum(distinct latency_ms) AS sum_distinct_latency + FROM metrics SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + // count(DISTINCT) column is gone entirely. + expect(result).not.toMatch(/\bhosts\b/) + expect(result).not.toMatch(/count\(/i) + // sum(DISTINCT …) is preserved (sum is in PRESERVED_AGGREGATES). + expect(result).toMatch( + /sum\(\s*DISTINCT\s+sum_distinct_latency\s*\)\s+AS\s+sum_distinct_latency/i, + ) + }) + + it("replaces non-decomposable aggregates with last()", () => { + const ddl = `CREATE MATERIALIZED VIEW 'metrics_5m' WITH BASE 'metrics' AS ( + SELECT timestamp, + avg(latency_ms) AS latency_avg, + stddev(latency_ms) AS latency_sd, + approx_percentile(latency_ms, 0.99) AS latency_p99, + last(host) AS host + FROM metrics + SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/last\(\s*latency_avg\s*\)\s+AS\s+latency_avg/i) + expect(result).toMatch(/last\(\s*latency_sd\s*\)\s+AS\s+latency_sd/i) + expect(result).toMatch(/last\(\s*latency_p99\s*\)\s+AS\s+latency_p99/i) + // last is in PRESERVED_AGGREGATES → host stays as last(host). + expect(result).toMatch(/last\(\s*host\s*\)\s+AS\s+host/i) + }) + + it("drops trailing base-column args when falling back to last()", () => { + const ddl = `CREATE MATERIALIZED VIEW 'obs_5m' WITH BASE 'obs' AS ( + SELECT timestamp, symbol, + weighted_avg(price, weight) AS wavg, + arg_max(price, qty) AS argmax_price, + haversine_dist_deg(lat, lon, timestamp) AS distance + FROM obs SAMPLE BY 5m + ) PARTITION BY MONTH;` + const result = generateMatViewDDL(ddl) + // Trailing base-table columns (weight, qty, lon) are dropped with the original fn. + expect(result).toMatch(/last\(\s*wavg\s*\)\s+AS\s+wavg/i) + expect(result).toMatch(/last\(\s*argmax_price\s*\)\s+AS\s+argmax_price/i) + expect(result).toMatch(/last\(\s*distance\s*\)\s+AS\s+distance/i) + expect(result).not.toMatch(/\bweight\b/) + expect(result).not.toMatch(/\bqty\b/) + expect(result).not.toMatch(/\blon\b/) + }) + }) + + describe("TTL ladder", () => { + // TTL of the new mat view = next ladder rung strictly greater than the source TTL. + const mv = ( + ttl: string, + ) => `CREATE MATERIALIZED VIEW 'src_5m' WITH BASE 'base' AS ( + SELECT timestamp, last(price) AS price FROM base SAMPLE BY 5m + ) PARTITION BY MONTH TTL ${ttl};` + + it("source has no TTL → output has no TTL", () => { + const ddl = `CREATE MATERIALIZED VIEW 'src_5m' WITH BASE 'base' AS ( + SELECT timestamp, last(price) AS price FROM base SAMPLE BY 5m + ) PARTITION BY MONTH;` + expect(generateMatViewDDL(ddl)).not.toMatch(/TTL\s+\d/i) + }) + + it("2 HOURS → 6 HOURS", () => { + expect(generateMatViewDDL(mv("2 HOURS"))).toMatch(/TTL\s+6\s+HOURS/i) + }) + + it("1 DAYS → 7 DAYS", () => { + expect(generateMatViewDDL(mv("1 DAYS"))).toMatch(/TTL\s+7\s+DAYS/i) + }) + + it("1 WEEKS (converted to 7d) → 1 MONTHS", () => { + expect(generateMatViewDDL(mv("1 WEEKS"))).toMatch(/TTL\s+1\s+MONTHS/i) + }) + + it("3 MONTHS → 1 YEARS", () => { + expect(generateMatViewDDL(mv("3 MONTHS"))).toMatch(/TTL\s+1\s+YEARS/i) + }) + + it("1 YEARS → 2 YEARS (step in whole years above the cap)", () => { + expect(generateMatViewDDL(mv("1 YEARS"))).toMatch(/TTL\s+2\s+YEARS/i) + }) + + it("5 YEARS → 6 YEARS", () => { + expect(generateMatViewDDL(mv("5 YEARS"))).toMatch(/TTL\s+6\s+YEARS/i) + }) + + it("accepts singular-unit TTL from QuestDB's SHOW CREATE output", () => { + // Server emits TTL 1 YEAR for value=1; parser only takes plural — the + // generator must normalise before parsing. + const ddl = `CREATE MATERIALIZED VIEW 'src_5m' WITH BASE 'base' AS ( + SELECT timestamp, last(price) AS price FROM base SAMPLE BY 5m + ) PARTITION BY MONTH TTL 1 YEAR;` + expect(() => generateMatViewDDL(ddl)).not.toThrow() + const result = generateMatViewDDL(ddl) + expect(result).toMatch(/TTL\s+2\s+YEARS/i) + }) + + it("applies to table → mat view path too", () => { + const ddl = `CREATE TABLE 'trades' ( + symbol SYMBOL, price DOUBLE, amount DOUBLE, timestamp TIMESTAMP + ) timestamp(timestamp) PARTITION BY DAY TTL 1 YEARS;` + expect(generateMatViewDDL(ddl)).toMatch(/TTL\s+2\s+YEARS/i) + }) + }) +}) diff --git a/src/utils/generateMatViewDDL.ts b/src/utils/generateMatViewDDL.ts new file mode 100644 index 000000000..77607215f --- /dev/null +++ b/src/utils/generateMatViewDDL.ts @@ -0,0 +1,548 @@ +import { + parseOne, + toSql, + type CreateTableStatement, + type CreateMaterializedViewStatement, + type SelectStatement, + type SelectItem, + type ExpressionSelectItem, + type ColumnRef, + type FunctionCall, + type ColumnDefinition, + type SampleByClause, + type MaterializedViewRefresh, +} from "@questdb/sql-parser" +import { formatSql } from "./formatSql" + +// Aggregates kept verbatim in the chain (first arg → layer-1 alias, trailing +// args pass through). Anything outside this set falls back to `last(alias)`. +// `count` is handled separately: count() → sum(alias); count(DISTINCT …) → dropped. +const PRESERVED_AGGREGATES = new Set([ + "min", + "max", + "sum", + "ksum", + "nsum", + "first", + "first_not_null", + "last", + "last_not_null", + "bool_and", + "bool_or", + "bit_and", + "bit_or", + "bit_xor", + "string_agg", +]) + +const VOLUME_PATTERNS = [ + "volume", + "vol", + "count", + "qty", + "quantity", + "amount", + "size", + "total", + "shares", + "lots", + "notional", +] + +const PRICE_PATTERNS = [ + "price", + "bid", + "ask", + "rate", + "yield", + "spread", + "close", + "open", + "high", + "low", + "mid", + "px", + "premium", + "discount", + "fee", + "cost", + "margin", +] + +const NUMERIC_TYPES = new Set([ + "DOUBLE", + "FLOAT", + "INT", + "INTEGER", + "LONG", + "SHORT", + "BYTE", + "DECIMAL", +]) + +// Types where QuestDB has no matching `last()` overload, so we have to skip. +const EXCLUDED_TYPES = new Set(["BINARY", "LONG128", "INTERVAL"]) + +const LAST_TYPES = new Set([ + "STRING", + "VARCHAR", + "CHAR", + "BOOLEAN", + "DATE", + "TIMESTAMP", + "TIMESTAMP_NS", + "LONG256", + "UUID", + "IPV4", +]) + +// GEOHASH columns are typed as `GEOHASH()` so they need a prefix check. +const isLastType = (dataType: string): boolean => + LAST_TYPES.has(dataType) || dataType.startsWith("GEOHASH") + +const SAMPLE_BY_MAP: Record = { + HOUR: "5m", + DAY: "1h", + WEEK: "1d", + MONTH: "7d", + YEAR: "1M", + NONE: "1h", +} + +// Above 1y we step in whole-year increments (1y → 2y → 3y …) via YEAR_RE. +const INTERVAL_LADDER = [ + "1s", + "5s", + "30s", + "1m", + "5m", + "30m", + "1h", + "6h", + "1d", + "7d", + "1M", + "1y", +] as const + +// TTL ladder = INTERVAL_LADDER trimmed to ≥ 1h. +const TTL_LADDER = ["1h", "6h", "1d", "7d", "1M", "1y"] as const + +// Default partitioning per docs/concepts/materialized-views.md: +// SAMPLE BY > 1h → YEAR, > 1m → MONTH. +const PARTITION_BY_FOR_SAMPLE: Record< + string, + CreateMaterializedViewStatement["partitionBy"] +> = { + "1s": "DAY", + "5s": "DAY", + "30s": "DAY", + "1m": "DAY", + "5m": "MONTH", + "30m": "MONTH", + "1h": "MONTH", + "6h": "YEAR", + "1d": "YEAR", + "7d": "YEAR", + "1M": "YEAR", + "1y": "YEAR", +} + +const UNIT_SECONDS: Record = { + s: 1, + m: 60, + h: 60 * 60, + d: 24 * 60 * 60, + // Approximate — used only to order intervals against the ladder, never for time math. + M: 30 * 24 * 60 * 60, + y: 365 * 24 * 60 * 60, +} + +const INTERVAL_RE = /^(\d+)([smhdMy])$/ +const YEAR_RE = /^(\d+)y$/ + +const escapeRegExp = (s: string): string => + s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + +const toSeconds = (interval: string): number | null => { + const m = INTERVAL_RE.exec(interval) + if (!m) return null + return Number(m[1]) * UNIT_SECONDS[m[2]] +} + +const nextOnLadder = ( + current: string, + ladder: readonly string[], + fallback: string, +): string => { + const yMatch = YEAR_RE.exec(current) + if (yMatch) return `${Number(yMatch[1]) + 1}y` + + const srcSec = toSeconds(current) + if (srcSec == null) return fallback + + for (const rung of ladder) { + const rungSec = toSeconds(rung) + if (rungSec != null && rungSec > srcSec) return rung + } + // Past the top of the ladder via a non-year unit (e.g. 400d) → smallest Ny strictly > source. + const years = Math.floor(srcSec / UNIT_SECONDS.y) + 1 + return `${years}y` +} + +const nextInterval = (current: string): string => + nextOnLadder(current, INTERVAL_LADDER, "1h") + +const nextTTL = (current: string): string => + nextOnLadder(current, TTL_LADDER, "1h") + +type TTLUnit = "HOURS" | "DAYS" | "WEEKS" | "MONTHS" | "YEARS" +type TTLAst = { value: number; unit: TTLUnit } + +const TTL_UNIT_TO_LETTER: Partial> = { + HOURS: "h", + DAYS: "d", + MONTHS: "M", + YEARS: "y", +} + +const TTL_LETTER_TO_UNIT: Record = { + h: "HOURS", + d: "DAYS", + M: "MONTHS", + y: "YEARS", +} + +const ttlToLadderString = (ttl: TTLAst): string | null => { + if (ttl.unit === "WEEKS") return `${ttl.value * 7}d` + const letter = TTL_UNIT_TO_LETTER[ttl.unit] + return letter ? `${ttl.value}${letter}` : null +} + +const ladderStringToTTL = (s: string): TTLAst | null => { + const m = INTERVAL_RE.exec(s) + if (!m) return null + const unit = TTL_LETTER_TO_UNIT[m[2]] + if (!unit) return null + return { value: Number(m[1]), unit } +} + +// Returns null when source had no TTL — we never invent one. +const deriveNextTTL = (src: TTLAst | undefined): TTLAst | null => { + if (!src) return null + const srcStr = ttlToLadderString(src) + if (!srcStr) return null + const nextStr = nextTTL(srcStr) + return ladderStringToTTL(nextStr) +} + +const partitionFor = ( + interval: string, +): CreateMaterializedViewStatement["partitionBy"] => { + const mapped = PARTITION_BY_FOR_SAMPLE[interval] + if (mapped) return mapped + if (YEAR_RE.test(interval)) return "YEAR" + return "MONTH" +} + +// Pass `srcInterval = ""` for tables (no SAMPLE BY) — skips the embedded-replace branch. +const deriveNextName = ( + srcName: string, + srcInterval: string, + newInterval: string, +): string => { + // Trailing period suffix, optionally with collision counter: `_5m`, `_5m_2`, `_2y`. + const trailing = /_(\d+(?:s|m|h|d|M|y))(_\d+)?$/ + if (trailing.test(srcName)) { + return srcName.replace(trailing, `_${newInterval}`) + } + if (srcInterval) { + const embedded = new RegExp(`(^|_)${escapeRegExp(srcInterval)}(?=_|$)`) + if (embedded.test(srcName)) { + return srcName.replace( + embedded, + (_m, pre: string) => `${pre}${newInterval}`, + ) + } + } + return `${srcName}_${newInterval}` +} + +const matchesPattern = (name: string, patterns: string[]): boolean => + patterns.some((p) => name.toLowerCase().includes(p)) + +const mkColumnRef = (name: string): ColumnRef => ({ + type: "column", + name: { type: "qualifiedName", parts: [name] }, +}) + +const mkFunctionCall = (fnName: string, colName: string): FunctionCall => ({ + type: "function", + name: fnName, + args: [mkColumnRef(colName)], +}) + +const mkSelectItem = ( + expression: ColumnRef | FunctionCall, + alias?: string, +): ExpressionSelectItem => ({ + type: "selectItem", + expression, + alias, +}) + +const isExcludedType = (dataType: string): boolean => + dataType.endsWith("[]") || EXCLUDED_TYPES.has(dataType) + +const buildSelectItem = ( + col: ColumnDefinition, +): ExpressionSelectItem | null => { + const { name, dataType } = col + if (isExcludedType(dataType)) return null + if (dataType === "SYMBOL") return mkSelectItem(mkColumnRef(name)) + if (NUMERIC_TYPES.has(dataType) && matchesPattern(name, VOLUME_PATTERNS)) { + return mkSelectItem(mkFunctionCall("sum", name), `sum_${name}`) + } + if (NUMERIC_TYPES.has(dataType) && matchesPattern(name, PRICE_PATTERNS)) { + return mkSelectItem(mkFunctionCall("last", name), `last_${name}`) + } + if (NUMERIC_TYPES.has(dataType) || isLastType(dataType)) { + return mkSelectItem(mkFunctionCall("last", name), `last_${name}`) + } + return null +} + +const pickUniqueViewName = ( + base: string, + existingNames: readonly string[], +): string => { + const taken = new Set(existingNames.map((n) => n.toLowerCase())) + if (!taken.has(base.toLowerCase())) return base + for (let i = 2; ; i++) { + const candidate = `${base}_${i}` + if (!taken.has(candidate.toLowerCase())) return candidate + } +} + +const HEADER = + "-- Review SAMPLE BY, PARTITION BY, TTL, refresh clause, and aggregates before running." + +const outputName = (item: ExpressionSelectItem): string | null => { + if (item.alias) return item.alias + const e = item.expression + if (e.type === "column") { + const parts = e.name.parts + return parts[parts.length - 1] + } + if (e.type === "function") return e.name + return null +} + +// Chain SELECT items must reference the source mat view's OUTPUT columns +// (aliases), since the base-table columns no longer exist at this layer. +// Non-preserved fns fall back to last(). count(DISTINCT …) can't decompose +// from a scalar, so we drop it entirely (returns null). +const rewriteSelectItemForChain = ( + item: ExpressionSelectItem, +): ExpressionSelectItem | null => { + const e = item.expression + if (e.type === "column") return item + + const out = outputName(item) + if (!out) return item + + if (e.type === "function") { + const fnLower = e.name.toLowerCase() + if (fnLower === "count") { + if (e.distinct === true) return null + // count() / count(*) / count(col) → sum(alias) — sum-of-per-bucket-counts + // is the correct chained total. + return { + type: "selectItem", + expression: { + type: "function", + name: "sum", + args: [mkColumnRef(out)], + }, + alias: out, + } + } + const isPreserved = PRESERVED_AGGREGATES.has(fnLower) + const newExpr: FunctionCall = isPreserved + ? { + ...e, + args: + e.args.length > 1 + ? [mkColumnRef(out), ...e.args.slice(1)] + : [mkColumnRef(out)], + } + : { type: "function", name: "last", args: [mkColumnRef(out)] } + return { type: "selectItem", expression: newExpr, alias: out } + } + + // Cast / arithmetic / etc. — layer 1 already materialised it; reference the alias. + return { + type: "selectItem", + expression: mkColumnRef(out), + } +} + +const fromTable = ( + stmt: CreateTableStatement, + existingNames: readonly string[], +): string => { + const columns = stmt.columns ?? [] + const tableName = stmt.table.parts[stmt.table.parts.length - 1] + const designatedTimestamp = stmt.timestamp + const partition = stmt.partitionBy ?? "NONE" + const interval = SAMPLE_BY_MAP[partition] ?? "1h" + const viewName = pickUniqueViewName( + deriveNextName(tableName, "", interval), + existingNames, + ) + + const nonTimestampItems: ExpressionSelectItem[] = [] + let timestampItem: ExpressionSelectItem | null = null + + for (const col of columns) { + if (designatedTimestamp && col.name === designatedTimestamp) { + timestampItem = mkSelectItem(mkColumnRef(col.name)) + continue + } + const item = buildSelectItem(col) + if (item) nonTimestampItems.push(item) + } + + const selectItems: ExpressionSelectItem[] = [ + ...nonTimestampItems, + ...(timestampItem ? [timestampItem] : []), + ] + + const sampleBy: SampleByClause = { + type: "sampleBy", + duration: interval, + } + + const selectStmt: SelectStatement = { + type: "select", + columns: selectItems, + from: [ + { + type: "tableRef", + table: { type: "qualifiedName", parts: [tableName] }, + }, + ], + sampleBy, + } + + const refresh: MaterializedViewRefresh = { + type: "materializedViewRefresh", + mode: "immediate", + } + + const matViewStmt: CreateMaterializedViewStatement = { + type: "createMaterializedView", + view: { + type: "qualifiedName", + parts: [viewName], + }, + refresh, + query: selectStmt, + asParens: true, + partitionBy: PARTITION_BY_FOR_SAMPLE[interval], + } + + const nextTtl = deriveNextTTL(stmt.ttl) + if (nextTtl) matViewStmt.ttl = nextTtl + if (stmt.ownedBy) { + matViewStmt.ownedBy = stmt.ownedBy + } + + return `${HEADER}\n${formatSql(toSql(matViewStmt))};` +} + +const fromMatView = ( + src: CreateMaterializedViewStatement, + existingNames: readonly string[], +): string => { + const srcName = src.view.parts[src.view.parts.length - 1] + const srcQuery = src.query + const srcSampleBy = srcQuery.sampleBy + if (!srcSampleBy?.duration) { + throw new Error("Source materialized view has no SAMPLE BY clause") + } + const srcInterval = srcSampleBy.duration + const newInterval = nextInterval(srcInterval) + const newName = pickUniqueViewName( + deriveNextName(srcName, srcInterval, newInterval), + existingNames, + ) + + const newColumns: SelectItem[] = srcQuery.columns.flatMap( + (item): SelectItem[] => { + if (item.type !== "selectItem") return [item] + const rewritten = rewriteSelectItemForChain(item) + return rewritten ? [rewritten] : [] + }, + ) + + const newQuery: SelectStatement = { + ...srcQuery, + columns: newColumns, + from: [ + { + type: "tableRef", + table: { type: "qualifiedName", parts: [srcName] }, + }, + ], + // WHERE / GROUP BY / LATEST ON reference base-table columns that don't + // exist at the chain layer (and the source mat view already applied them + // at layer 1). + where: undefined, + groupBy: undefined, + latestOn: undefined, + sampleBy: { ...srcSampleBy, duration: newInterval }, + } + + const matViewStmt: CreateMaterializedViewStatement = { + type: "createMaterializedView", + view: { type: "qualifiedName", parts: [newName] }, + baseTable: { type: "qualifiedName", parts: [srcName] }, + query: newQuery, + asParens: true, + partitionBy: partitionFor(newInterval), + } + + // Default to IMMEDIATE so the chain DDL has an explicit refresh clause. + matViewStmt.refresh = src.refresh ?? { + type: "materializedViewRefresh", + mode: "immediate", + } + const nextTtl = deriveNextTTL(src.ttl) + if (nextTtl) matViewStmt.ttl = nextTtl + if (src.period) matViewStmt.period = src.period + if (src.ownedBy) matViewStmt.ownedBy = src.ownedBy + + return `${HEADER}\n${formatSql(toSql(matViewStmt))};` +} + +const normalizeTTLUnits = (ddl: string): string => + ddl.replace( + /\bTTL\s+(\d+)\s+(HOUR|DAY|WEEK|MONTH|YEAR)(?!S)\b/gi, + (_m: string, n: string, unit: string) => `TTL ${n} ${unit.toUpperCase()}S`, + ) + +export const generateMatViewDDL = ( + ddl: string, + existingNames: readonly string[] = [], +): string => { + const stmt = parseOne(normalizeTTLUnits(ddl)) as + | CreateTableStatement + | CreateMaterializedViewStatement + if (stmt.type === "createTable") return fromTable(stmt, existingNames) + if (stmt.type === "createMaterializedView") + return fromMatView(stmt, existingNames) + throw new Error( + "Expected a CREATE TABLE or CREATE MATERIALIZED VIEW statement", + ) +}