From 67331232754d45d6de713df3721dae7c10a5611b Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 00:59:39 +0200 Subject: [PATCH 01/10] Add SUBSAMPLE keyword documentation page New SQL reference page for the SUBSAMPLE clause covering LTTB, M4, and MinMax downsampling algorithms with SVG diagrams, runnable examples on fx_trades, gap-preserving LTTB, and configuration reference. --- .../configuration-utils/_cairo.config.json | 4 + documentation/query/sql/subsample.md | 354 ++++++++++++++++++ documentation/sidebars.js | 1 + scripts/gen_subsample_svgs.py | 263 +++++++++++++ src/css/_global.css | 5 + static/images/docs/subsample/lttb-gap.svg | 60 +++ static/images/docs/subsample/lttb.svg | 37 ++ static/images/docs/subsample/m4.svg | 41 ++ static/images/docs/subsample/minmax.svg | 42 +++ static/images/docs/subsample/raw.svg | 50 +++ 10 files changed, 857 insertions(+) create mode 100644 documentation/query/sql/subsample.md create mode 100644 scripts/gen_subsample_svgs.py create mode 100644 static/images/docs/subsample/lttb-gap.svg create mode 100644 static/images/docs/subsample/lttb.svg create mode 100644 static/images/docs/subsample/m4.svg create mode 100644 static/images/docs/subsample/minmax.svg create mode 100644 static/images/docs/subsample/raw.svg diff --git a/documentation/configuration/configuration-utils/_cairo.config.json b/documentation/configuration/configuration-utils/_cairo.config.json index bed859b73..5b95a8393 100644 --- a/documentation/configuration/configuration-utils/_cairo.config.json +++ b/documentation/configuration/configuration-utils/_cairo.config.json @@ -319,6 +319,10 @@ "default": "0", "description": "SampleBy default alignment behaviour. true corresponds to ALIGN TO CALENDAR, false corresponds to ALIGN TO FIRST OBSERVATION." }, + "cairo.sql.subsample.max.rows": { + "default": "100000000", + "description": "Maximum number of input rows SUBSAMPLE will buffer. Exceeding this limit returns an error. Must be between 1 and 2,147,483,647." + }, "cairo.date.locale": { "default": "en", "description": "The locale to handle date types." diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md new file mode 100644 index 000000000..706fcfafb --- /dev/null +++ b/documentation/query/sql/subsample.md @@ -0,0 +1,354 @@ +--- +title: SUBSAMPLE keyword +sidebar_label: SUBSAMPLE +description: SUBSAMPLE SQL keyword reference for time-series downsampling using LTTB, M4, and MinMax algorithms. +--- + +`SUBSAMPLE` reduces the number of rows in a query result while preserving the +visual shape of the data. It selects the most representative points from a +time-ordered dataset, making it ideal for rendering charts at screen resolution +without transferring millions of rows to the client. + +Unlike [SAMPLE BY](/docs/query/sql/sample-by/), which computes new aggregate +values at synthetic bucket boundaries, `SUBSAMPLE` selects actual rows from +the input. Every output row exists in the source table with its original +timestamp and values. This means output timestamps match real rows (useful for +joins), and users can drill down to the exact source record behind any point +on a chart. + +Requires a [designated timestamp](/docs/concepts/designated-timestamp/) column. + +## Syntax + +```questdb-sql +SELECT columns +FROM table +[WHERE conditions] +[SAMPLE BY ...] +SUBSAMPLE { lttb | m4 | minmax }(valueColumn, targetPoints [, gapThreshold]) +[ORDER BY ...] +[LIMIT ...] +``` + +Where: + +- **`valueColumn`** - the numeric column used to decide which points are + visually significant. All other columns pass through for selected rows. +- **`targetPoints`** - target number of output rows. Supports integer + literals, [DECLARE](/docs/query/sql/declare/) variables, and bind + variables (`$1`). Must be at least 2. Maximum is 2,147,483,647. +- **`gapThreshold`** - (LTTB only) optional interval that enables + gap-preserving mode. See [gap-preserving LTTB](#gap-preserving-lttb). + +### Execution order + +`SUBSAMPLE` runs after `SAMPLE BY`, `GROUP BY`, and window functions, but +before `ORDER BY` and `LIMIT`. All value computations are complete before +downsampling decides which rows to keep. `SUBSAMPLE` only selects rows - it +never modifies computed values. + +All three algorithms execute serially. `SUBSAMPLE` buffers its entire input, +runs the selected algorithm, then emits the chosen rows. It does not block +upstream parallel execution - for example, a parallel `SAMPLE BY` completes +before `SUBSAMPLE` buffers its output. + +### Supported value types + +The value column must be a numeric type: `DOUBLE`, `FLOAT`, `INT`, `LONG`, +`SHORT`, or `BYTE`. `NULL` values in the value column are skipped during +downsampling. + +## Algorithms + +Three algorithms are available. Each one selects real rows from the input - +no values are ever interpolated or computed. The diagrams below all use the +same 24-point series as input (think 24 hourly bars over one day): + +![Raw time series](/images/docs/subsample/raw.svg) + +### lttb - Largest Triangle Three Buckets + +Divides the data into equal-sized row-count buckets and selects the point in +each bucket that forms the largest triangle with its neighbors. The first and +last points are always kept. Output is exactly N points. + +Best for line charts where preserving the visual shape (spikes, valleys, +trend changes) matters most. + +![LTTB downsampling](/images/docs/subsample/lttb.svg) + +How it works: + +1. First and last points are always selected. +2. Remaining data is divided into N-2 equal-sized buckets by row count. +3. For each bucket, the point creating the largest triangle area with the + previously selected point and the average of the next bucket is chosen. +4. Output preserves the original timestamp order. + +```questdb-sql title="Aggregate to hourly bars, then pick the 8 most representative" demo +SELECT timestamp, avg(price) avg_price +FROM fx_trades +WHERE symbol = 'EURUSD' + AND timestamp IN '$today' +SAMPLE BY 1h +SUBSAMPLE lttb(avg_price, 8) +``` + +### m4 - Min/Max/First/Last per time interval + +Divides the time range into equal time intervals and selects up to 4 points +per interval: the first, last, minimum, and maximum values. Empty intervals +produce no output, naturally preserving data gaps. + +Best for monitoring dashboards where you must not miss spikes or drops. The +min/max envelope is pixel-accurate to the full dataset. + +![M4 downsampling](/images/docs/subsample/m4.svg) + +How it works: + +1. The total time range is divided into N/4 equal time intervals. +2. For each interval, up to 4 points are selected: first, last, min, max. +3. When multiple roles resolve to the same physical row (e.g., the minimum + value is also the first row), duplicates are removed. A bucket emits + between 1 and 4 rows depending on the data. +4. Empty intervals produce no output. + +Output is up to N points (N/4 buckets, up to 4 points each). In the diagram +above, target 8 creates 2 time buckets. The first row happens to also be +the minimum in bucket 1, so each bucket emits 3-4 distinct rows instead of 4, +giving 7 total. + +```questdb-sql title="Hourly bars reduced to 8 with M4 - spike and trough guaranteed" demo +SELECT timestamp, avg(price) avg_price +FROM fx_trades +WHERE symbol = 'EURUSD' + AND timestamp IN '$today' +SAMPLE BY 1h +SUBSAMPLE m4(avg_price, 8) +``` + +:::tip + +When sizing `targetPoints` for a pixel-wide chart, remember that N/4 gives +the number of time buckets. A 1920-pixel-wide chart needs +`SUBSAMPLE m4(col, 1920)` to get 480 time buckets with up to 4 points each. + +::: + +### minmax - Min/Max per time interval + +Divides the time range into equal time intervals and selects up to 2 points +per interval: the minimum and maximum values. Lighter than M4 (no first/last +tracking), producing roughly half the output. Empty intervals produce no +output. + +Best for simple envelope visualization where you only need the value range +per bucket, not entry/exit points. + +![MinMax downsampling](/images/docs/subsample/minmax.svg) + +How it works: + +1. The total time range is divided into N/2 equal time intervals. +2. For each interval, up to 2 points are selected: min, max. +3. Duplicate points are removed (if min and max are the same row). +4. Empty intervals produce no output. + +Output is up to N points (N/2 buckets, up to 2 points each). + +```questdb-sql title="Hourly bars reduced to 8 with MinMax - min/max per bucket" demo +SELECT timestamp, avg(price) avg_price +FROM fx_trades +WHERE symbol = 'EURUSD' + AND timestamp IN '$today' +SAMPLE BY 1h +SUBSAMPLE minmax(avg_price, 8) +``` + +### Gap-preserving LTTB + +Standard LTTB divides data by row count, so it connects across time gaps. An +optional third parameter enables gap detection: + +```questdb-sql +SUBSAMPLE lttb(price, 12, '1h') +``` + +When specified, LTTB splits data into contiguous segments where consecutive +timestamps are within the gap threshold. Each segment is downsampled +independently with its proportional share of the target points. Gaps between +segments are preserved in the output. + +![LTTB gap handling comparison](/images/docs/subsample/lttb-gap.svg) + +Without gap detection, LTTB draws a straight line across the gap. With gap +detection enabled, each segment is downsampled independently and the gap is +visible in the output. + +Supported interval units: `s` (seconds), `m` (minutes), `h` (hours), +`d` (days). + +Examples: `'30s'`, `'5m'`, `'1h'`, `'7d'` + +```questdb-sql title="Preserve gaps larger than 1 hour in the output" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, 12, '1h') +``` + +:::note + +Gap-preserving LTTB uses a soft target. Each segment receives at least its +first and last points. When many segments are detected, the total output may +exceed `targetPoints`. This is by design so that the same query does not fail +for one time range and succeed for another. Non-gap LTTB, M4, and MinMax +treat `targetPoints` as a hard maximum. + +::: + +### Algorithm comparison + +| Property | lttb | m4 | minmax | +|----------|------|----|--------| +| Bucket type | Equal row count | Equal time intervals | Equal time intervals | +| Points per bucket | Exactly 1 | Up to 4 (first, last, min, max) | Up to 2 (min, max) | +| Output count | Exactly N (non-gap mode) | Up to N | Up to N | +| Gap handling | Connects across gaps (use 3rd parameter to preserve) | Naturally preserves gaps | Naturally preserves gaps | +| Best use case | Line charts, shape preservation | Monitoring, spike detection | Lightweight envelope | + +## Examples + +### Chart-ready downsampling + +```questdb-sql title="LTTB: 500 representative points for a line chart" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, 500) +``` + +```questdb-sql title="M4: pixel-accurate envelope for a 1920px-wide chart" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE m4(price, 1920) +``` + +```questdb-sql title="MinMax: lightweight envelope at half the output of M4" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE minmax(price, 500) +``` + +### Composing with SAMPLE BY + +```questdb-sql title="Aggregate to 1-minute bars, then downsample" demo +SELECT timestamp, avg(price) avg_price +FROM fx_trades +WHERE symbol = 'EURUSD' +SAMPLE BY 1m +SUBSAMPLE lttb(avg_price, 500) +``` + +`SAMPLE BY` computes aggregate values at bucket boundaries. `SUBSAMPLE` then +selects the most representative rows from that output. The two operations +complement each other: aggregate first, then reduce for display. + +### Multiple columns pass through + +```questdb-sql title="LTTB selects rows by price; all columns emit" demo +SELECT timestamp, symbol, side, price, quantity +FROM fx_trades +WHERE symbol = 'GBPUSD' +SUBSAMPLE lttb(price, 500) +``` + +### After window functions + +```questdb-sql title="Window functions see all rows before SUBSAMPLE selects" demo +SELECT timestamp, price, + avg(price) OVER (ROWS 10 PRECEDING) ma +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, 500) +``` + +Window functions compute on the full dataset. `SUBSAMPLE` then selects from +the result, so the moving average values are accurate. + +### With DECLARE variable + +```questdb-sql title="Parameterized target point count" +DECLARE @points := 500 +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, @points) +``` + +### With bind variable + +```questdb-sql title="Grafana integration - screen width as bind variable" +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, $1) +``` + +### With ORDER BY and LIMIT + +```questdb-sql title="Downsample, then sort by price" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, 100) +ORDER BY price DESC +LIMIT 10 +``` + +### Inside subqueries + +```questdb-sql title="SUBSAMPLE works inside parenthesized subqueries" demo +SELECT count() FROM ( + SELECT timestamp, price + FROM fx_trades + WHERE symbol = 'EURUSD' + SUBSAMPLE lttb(price, 500) +) +``` + +## Behavior notes + +- If the input has fewer rows than the target, all rows are returned unchanged. +- Output rows are always in timestamp-ascending order. +- All columns from the `SELECT` clause pass through for selected rows. +- `SUBSAMPLE` works with `WHERE`, `SAMPLE BY`, `GROUP BY`, CTEs, subqueries, + `ORDER BY`, and `LIMIT`. +- `SUBSAMPLE` inside a parenthesized subquery applies inside that subquery, + not the outer query. + +## Configuration + +| Property | Default | Description | +|----------|---------|-------------| +| `cairo.sql.subsample.max.rows` | 100,000,000 | Maximum input rows SUBSAMPLE will buffer. Exceeding this limit returns an error. | + +`SUBSAMPLE` buffers its entire input before running the algorithm. For direct +table scans, memory usage is 24 bytes per row. For queries involving +`SAMPLE BY`, `GROUP BY`, or subqueries, memory also scales with the projected +row width. At the default limit, the base buffer is approximately 2.4 GB. + +## See also + +- [SAMPLE BY](/docs/query/sql/sample-by/) - time-based aggregation + (computes new values at bucket boundaries, while `SUBSAMPLE` selects + existing rows) +- [Designated timestamp](/docs/concepts/designated-timestamp/) - required + for `SUBSAMPLE` to operate +- [Steinarsson, S. (2013). "Downsampling Time Series for Visual Representation"](https://github.com/sveinn-steinarsson/flot-downsample) - + the original LTTB algorithm and thesis reference +- [Jugel, U. et al. (2014). "M4: A Visualization-Oriented Time Series Data Aggregation"](https://www.vldb.org/pvldb/vol7/p797-jugel.pdf) - + the M4 paper diff --git a/documentation/sidebars.js b/documentation/sidebars.js index bf61c7620..ed60e5dc8 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -427,6 +427,7 @@ module.exports = { "query/sql/order-by", "query/sql/pivot", "query/sql/sample-by", + "query/sql/subsample", "query/sql/unnest", "query/sql/where", "query/sql/window-join", diff --git a/scripts/gen_subsample_svgs.py b/scripts/gen_subsample_svgs.py new file mode 100644 index 000000000..0df09e9dc --- /dev/null +++ b/scripts/gen_subsample_svgs.py @@ -0,0 +1,263 @@ +"""Generate SVG diagrams for the SUBSAMPLE documentation page. + +Uses @media (prefers-color-scheme) for light/dark theme support since SVGs +loaded via tags don't inherit CSS from the parent document. +ViewBox width is ~600 to match typical content width so 1 unit ~ 1px. +""" + +import os + +OUT_DIR = os.path.join(os.path.dirname(__file__), "..", "static", "images", "docs", "subsample") + +# QuestDB palette +PINK = "#e289a4" # algorithm lines +CYAN = "#0cc0df" # titles, M4/MinMax min/max role dots +GRAY = "#888" # default dots (real rows from raw data) + +# Segment A: 24 points, i=0..23 (represents 24 hourly bars) +SEG_A = [ + 0.50, 0.55, 0.60, 0.65, 0.70, 0.95, 0.85, 0.70, 0.60, 0.55, + 0.50, 0.45, 0.40, 0.35, 0.28, 0.20, 0.25, 0.30, 0.35, 0.40, + 0.45, 0.50, 0.48, 0.46, +] + +SEG_B_START = 48 +SEG_B = [ + 0.45, 0.50, 0.55, 0.58, 0.60, 0.65, 0.70, 0.75, 0.70, 0.55, + 0.40, 0.25, 0.15, 0.25, 0.40, 0.55, 0.60, 0.62, 0.60, 0.58, 0.55, +] + +# Single panel layout (viewBox units - keep at 600 for good proportions) +W = 600 +H = 300 +XL, XR = 10, 590 # plot x range - use full width +YT, YB = 60, 240 # plot y range (180px tall) +LY = 275 # legend baseline + +# Gap SVG layout (two panels) +GH = 530 +G1T, G1B = 60, 210 # panel 1 plot area +G2T, G2B = 300, 450 # panel 2 plot area +GLY = 510 + +# Intrinsic pixel width - set larger than container so max-width:100% fills it +PX_W = 1400 + +# Sizes (viewBox units - rendered ~1.3x on screen) +TITLE_SZ = 12 +LEG_SZ = 11 +REF_SW = 1.0 +ALGO_SW = 2.0 +DOT_R = 4.5 +LEG_DOT = 3.5 +BK_SW = 0.8 + +STYLE = f"""""" + + +def xp(i, imin, imax): + if imax == imin: + return (XL + XR) / 2 + return XL + (i - imin) / (imax - imin) * (XR - XL) + + +def yp(v, yt, yb): + return yt + (1 - v) * (yb - yt) + + +def pl(ii, vv, imin, imax, yt, yb): + return " ".join(f"{xp(i,imin,imax):.1f},{yp(v,yt,yb):.1f}" for i, v in zip(ii, vv)) + + +def cd(ii, vv, imin, imax, yt, yb, fill): + return "\n".join( + f'' + for i, v in zip(ii, vv)) + + +def cdm(pcs, imin, imax, yt, yb): + return "\n".join( + f'' + for i, v, c in pcs) + + +def rpl(ii, vv, imin, imax, yt, yb): + return f'' + + +def bkl(bounds, imin, imax, yt, yb): + return "\n".join( + f'' + for b in bounds) + + +def hdr(w, h, title, desc): + px_h = int(h * PX_W / w) + return (f'\n' + f'{title}\n{desc}\n{STYLE}') + + +def gen_raw(): + """Raw data panel - 24 hourly bars.""" + N = len(SEG_A) + im, ix = 0, N - 1 + ri = list(range(N)) + h = 260 + yt, yb = 55, 200 + ly = 240 + raw_color = "#888" + raw_dots = "\n".join( + f'' + for i, v in zip(ri, SEG_A) + ) + + return f"""{hdr(W, h, "Raw time series", "24 hourly data points with a spike and a trough.")} +Raw time series: 24 hourly bars + +{raw_dots} + +Hourly bars (24) +""" + + +def gen_lttb(): + N = len(SEG_A) + im, ix = 0, N - 1 + ri = list(range(N)) + # LTTB target 8: first + last always kept, 6 interior buckets + si = [0, 4, 5, 8, 15, 19, 22, 23] + sv = [SEG_A[i] for i in si] + return f"""{hdr(W, H, "LTTB downsampling", "LTTB selects 8 points from 24.")} +LTTB: 24 hourly bars reduced to 8 +{rpl(ri, SEG_A, im, ix, YT, YB)} + +{cd(si,sv,im,ix,YT,YB,GRAY)} + +Raw data + +Selected points (8 of 24) +""" + + +def gen_m4(): + N = len(SEG_A) + im, ix = 0, N - 1 + ri = list(range(N)) + # M4 target 8 -> 2 time buckets (0..11, 12..23) + # Bucket 1: first=0(.50), last=11(.45), min=0(.50)->dup, max=5(.95) -> 3 pts + # Bucket 2: first=12(.40), last=23(.46), min=15(.20), max=21(.50) -> 4 pts + m4 = [ + (0,.50,GRAY),(5,.95,CYAN),(11,.45,GRAY), + (12,.40,GRAY),(15,.20,CYAN),(21,.50,CYAN),(23,.46,GRAY), + ] + mi = [p[0] for p in m4] + mv = [p[1] for p in m4] + return f"""{hdr(W, H, "M4 downsampling", "M4 selects 7 points from 24.")} +M4: target 8, emitted 7 (2 time buckets) +{bkl([12], im, ix, YT, YB)} +{rpl(ri, SEG_A, im, ix, YT, YB)} + +{cdm(m4, im, ix, YT, YB)} + +Raw data + +First / Last + +Min / Max + +Bucket boundary +""" + + +def gen_minmax(): + N = len(SEG_A) + im, ix = 0, N - 1 + ri = list(range(N)) + # MinMax target 8 -> 4 time buckets of 6 (0..5, 6..11, 12..17, 18..23) + # Bucket 1: min=0(.50), max=5(.95) + # Bucket 2: min=11(.45), max=6(.85) + # Bucket 3: min=15(.20), max=12(.40) + # Bucket 4: min=23(.46), max=21(.50) + mi = [0, 5, 6, 11, 12, 15, 21, 23] + mv = [.50, .95, .85, .45, .40, .20, .50, .46] + return f"""{hdr(W, H, "MinMax downsampling", "MinMax selects 8 points from 24.")} +MinMax: target 8, emitted 8 (4 time buckets) +{bkl([6, 12, 18], im, ix, YT, YB)} +{rpl(ri, SEG_A, im, ix, YT, YB)} + +{cd(mi,mv,im,ix,YT,YB,GRAY)} + +Raw data + +Selected points (8 of 24) + +Bucket boundary +""" + + +def gen_lttb_gap(): + N_A = len(SEG_A) + N_B = len(SEG_B) + im, ix = 0, SEG_B_START + N_B - 1 # 0..68 + rai = list(range(N_A)) + rbi = list(range(SEG_B_START, SEG_B_START + N_B)) + # LTTB no gap, target 12 on 45 total points + li = [0,4,5,12,15,23,51,55,59,60,65,68] + lv = [.50,.70,.95,.40,.20,.46,.25,.55,.60,.62,.58,.55] + # LTTB with gap detection, 6 per segment + g1i = [0,4,5,15,19,23] + g1v = [.50,.70,.95,.20,.40,.46] + g2i = [48,53,55,60,65,68] + g2v = [.45,.65,.75,.15,.62,.55] + + def refs(yt, yb): + return f"{rpl(rai, SEG_A, im, ix, yt, yb)}\n{rpl(rbi, SEG_B, im, ix, yt, yb)}" + + total = N_A + N_B + return f"""{hdr(W, GH, "LTTB gap handling", "Comparing LTTB with and without gap detection.")} +LTTB without gap detection: line connects across the gap +{refs(G1T, G1B)} + +{cd(li,lv,im,ix,G1T,G1B,GRAY)} + +LTTB with gap detection: each segment downsampled +{refs(G2T, G2B)} + + +{cd(g1i,g1v,im,ix,G2T,G2B,GRAY)} +{cd(g2i,g2v,im,ix,G2T,G2B,GRAY)} + +Raw data ({total} points with gap) + +Selected points (12) +""" + + +if __name__ == "__main__": + os.makedirs(OUT_DIR, exist_ok=True) + for name, fn in [("raw.svg", gen_raw), ("lttb.svg", gen_lttb), ("m4.svg", gen_m4), + ("minmax.svg", gen_minmax), ("lttb-gap.svg", gen_lttb_gap)]: + path = os.path.join(OUT_DIR, name) + with open(path, "w") as f: + f.write(fn()) + print(f"Wrote {path}") diff --git a/src/css/_global.css b/src/css/_global.css index 0f5ce0d7c..172bebcc9 100644 --- a/src/css/_global.css +++ b/src/css/_global.css @@ -485,3 +485,8 @@ html[data-theme="dark"] .DocSearch { font-family: SegoeUI, -apple-system, BlinkMacSystemFont, Ubuntu, sans-serif; font-size: var(--font-size-small); } + +/* Make doc article images fill the content width */ +article img { + width: 100%; +} diff --git a/static/images/docs/subsample/lttb-gap.svg b/static/images/docs/subsample/lttb-gap.svg new file mode 100644 index 000000000..e45cac331 --- /dev/null +++ b/static/images/docs/subsample/lttb-gap.svg @@ -0,0 +1,60 @@ + +LTTB gap handling +Comparing LTTB with and without gap detection. + +LTTB without gap detection: line connects across the gap + + + + + + + + + + + + + + + + +LTTB with gap detection: each segment downsampled + + + + + + + + + + + + + + + + + +Raw data (45 points with gap) + +Selected points (12) + \ No newline at end of file diff --git a/static/images/docs/subsample/lttb.svg b/static/images/docs/subsample/lttb.svg new file mode 100644 index 000000000..400c3fc3e --- /dev/null +++ b/static/images/docs/subsample/lttb.svg @@ -0,0 +1,37 @@ + +LTTB downsampling +LTTB selects 8 points from 24. + +LTTB: 24 hourly bars reduced to 8 + + + + + + + + + + + +Raw data + +Selected points (8 of 24) + \ No newline at end of file diff --git a/static/images/docs/subsample/m4.svg b/static/images/docs/subsample/m4.svg new file mode 100644 index 000000000..959e25498 --- /dev/null +++ b/static/images/docs/subsample/m4.svg @@ -0,0 +1,41 @@ + +M4 downsampling +M4 selects 7 points from 24. + +M4: target 8, emitted 7 (2 time buckets) + + + + + + + + + + + +Raw data + +First / Last + +Min / Max + +Bucket boundary + \ No newline at end of file diff --git a/static/images/docs/subsample/minmax.svg b/static/images/docs/subsample/minmax.svg new file mode 100644 index 000000000..45aa7ad77 --- /dev/null +++ b/static/images/docs/subsample/minmax.svg @@ -0,0 +1,42 @@ + +MinMax downsampling +MinMax selects 8 points from 24. + +MinMax: target 8, emitted 8 (4 time buckets) + + + + + + + + + + + + + + +Raw data + +Selected points (8 of 24) + +Bucket boundary + \ No newline at end of file diff --git a/static/images/docs/subsample/raw.svg b/static/images/docs/subsample/raw.svg new file mode 100644 index 000000000..31c108a40 --- /dev/null +++ b/static/images/docs/subsample/raw.svg @@ -0,0 +1,50 @@ + +Raw time series +24 hourly data points with a spike and a trough. + +Raw time series: 24 hourly bars + + + + + + + + + + + + + + + + + + + + + + + + + + +Hourly bars (24) + \ No newline at end of file From e1187bc8c2d9c2e270d2f24e03fe98b23ca7b59e Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 01:04:24 +0200 Subject: [PATCH 02/10] Scope full-width CSS rule to subsample diagrams only Avoid stretching all doc images by targeting only img[src*="/subsample/"] instead of article img. --- src/css/_global.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/css/_global.css b/src/css/_global.css index 172bebcc9..c82e2a1f9 100644 --- a/src/css/_global.css +++ b/src/css/_global.css @@ -486,7 +486,7 @@ html[data-theme="dark"] .DocSearch { font-size: var(--font-size-small); } -/* Make doc article images fill the content width */ -article img { +/* Make subsample diagram SVGs fill the content width */ +article img[src*="/subsample/"] { width: 100%; } From 33de85028e7f9cf367d05f0f2a61b6df285f1259 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 01:20:23 +0200 Subject: [PATCH 03/10] Improve SUBSAMPLE diagrams and gap-preserving LTTB section Split gap SVG into three separate charts with individual legends. Add small gap to dataset to illustrate threshold behavior. Use QuestDB color palette (pink lines, cyan titles, gray dots). Match inline SQL examples to chart storytelling (24 hourly bars). Add boundary markers distinguishing gaps from bucket boundaries. --- documentation/query/sql/subsample.md | 35 ++-- scripts/gen_subsample_svgs.py | 158 +++++++++++++----- static/images/docs/subsample/gap-detect.svg | 47 ++++++ .../images/docs/subsample/gap-no-detect.svg | 43 +++++ static/images/docs/subsample/gap-raw.svg | 74 ++++++++ static/images/docs/subsample/lttb-gap.svg | 60 ------- static/images/docs/subsample/lttb.svg | 6 +- static/images/docs/subsample/m4.svg | 6 +- static/images/docs/subsample/minmax.svg | 6 +- static/images/docs/subsample/raw.svg | 6 +- 10 files changed, 311 insertions(+), 130 deletions(-) create mode 100644 static/images/docs/subsample/gap-detect.svg create mode 100644 static/images/docs/subsample/gap-no-detect.svg create mode 100644 static/images/docs/subsample/gap-raw.svg delete mode 100644 static/images/docs/subsample/lttb-gap.svg diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index 706fcfafb..9744e021b 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -169,33 +169,44 @@ SUBSAMPLE minmax(avg_price, 8) ### Gap-preserving LTTB Standard LTTB divides data by row count, so it connects across time gaps. An -optional third parameter enables gap detection: +optional third parameter sets a gap threshold: ```questdb-sql -SUBSAMPLE lttb(price, 12, '1h') +SUBSAMPLE lttb(price, 12, '6h') ``` -When specified, LTTB splits data into contiguous segments where consecutive -timestamps are within the gap threshold. Each segment is downsampled -independently with its proportional share of the target points. Gaps between -segments are preserved in the output. +When specified, LTTB scans for gaps where consecutive timestamps are further +apart than the threshold. Gaps below the threshold are ignored - the data is +treated as continuous. Gaps above the threshold split the data into separate +segments, each downsampled independently with its proportional share of the +target points. -![LTTB gap handling comparison](/images/docs/subsample/lttb-gap.svg) +The diagrams below show a dataset with two gaps - a small one (3 hours) and +a large one (24 hours): -Without gap detection, LTTB draws a straight line across the gap. With gap -detection enabled, each segment is downsampled independently and the gap is -visible in the output. +![Raw data with gaps](/images/docs/subsample/gap-raw.svg) + +Without gap detection, LTTB treats all points as continuous and connects +across both gaps: + +![LTTB without gap detection](/images/docs/subsample/gap-no-detect.svg) + +With a threshold of `'6h'`, the small gap (3h) is below the threshold so +segments A and B are treated as continuous. The large gap (24h) exceeds the +threshold, so segment C is downsampled separately and the gap is preserved: + +![LTTB with gap detection](/images/docs/subsample/gap-detect.svg) Supported interval units: `s` (seconds), `m` (minutes), `h` (hours), `d` (days). Examples: `'30s'`, `'5m'`, `'1h'`, `'7d'` -```questdb-sql title="Preserve gaps larger than 1 hour in the output" demo +```questdb-sql title="Preserve gaps larger than 6 hours in the output" demo SELECT timestamp, price FROM fx_trades WHERE symbol = 'EURUSD' -SUBSAMPLE lttb(price, 12, '1h') +SUBSAMPLE lttb(price, 12, '6h') ``` :::note diff --git a/scripts/gen_subsample_svgs.py b/scripts/gen_subsample_svgs.py index 0df09e9dc..bd4ef4af5 100644 --- a/scripts/gen_subsample_svgs.py +++ b/scripts/gen_subsample_svgs.py @@ -21,8 +21,16 @@ 0.45, 0.50, 0.48, 0.46, ] -SEG_B_START = 48 -SEG_B = [ +# Gap dataset: 3 data segments, 1 small gap (3h), 1 big gap (24h) +# Seg A: i=0..10, Seg B: i=14..23 (small gap 11-13), Seg C: i=48..68 (big gap 24-47) +GAP_SEG_A_I = list(range(0, 11)) +GAP_SEG_A_V = [0.50, 0.55, 0.60, 0.65, 0.70, 0.95, 0.85, 0.70, 0.60, 0.55, 0.50] + +GAP_SEG_B_I = list(range(14, 24)) +GAP_SEG_B_V = [0.42, 0.38, 0.35, 0.28, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45] + +GAP_SEG_C_I = list(range(48, 69)) +GAP_SEG_C_V = [ 0.45, 0.50, 0.55, 0.58, 0.60, 0.65, 0.70, 0.75, 0.70, 0.55, 0.40, 0.25, 0.15, 0.25, 0.40, 0.55, 0.60, 0.62, 0.60, 0.58, 0.55, ] @@ -34,11 +42,12 @@ YT, YB = 60, 240 # plot y range (180px tall) LY = 275 # legend baseline -# Gap SVG layout (two panels) -GH = 530 -G1T, G1B = 60, 210 # panel 1 plot area -G2T, G2B = 300, 450 # panel 2 plot area -GLY = 510 +# Gap SVG layout (three panels: raw, no-gap LTTB, gap LTTB) +GH = 600 +G0T, G0B = 50, 140 # panel 0: raw data with gap +G1T, G1B = 200, 310 # panel 1: LTTB without gap detection +G2T, G2B = 370, 480 # panel 2: LTTB with gap detection +GLY = 520 # legend # Intrinsic pixel width - set larger than container so max-width:100% fills it PX_W = 1400 @@ -56,17 +65,17 @@ .t {{ font-size: {TITLE_SZ}px; font-weight: 600; }} .l {{ font-size: {LEG_SZ}px; }} .ref {{ stroke-width: {REF_SW}; stroke-dasharray: 6 5; fill: none; }} - .bk {{ stroke-width: {BK_SW}; stroke-dasharray: 6 5; }} + .bk {{ stroke-width: {BK_SW}; stroke-dasharray: 2 3; }} .t {{ fill: {CYAN}; }} .l {{ fill: #64748b; }} .ref {{ stroke: #bbb; }} - .bk {{ stroke: #aaa; }} + .bk {{ stroke: #5a9aa8; }} .sep {{ stroke: #ccc; }} @media (prefers-color-scheme: dark) {{ .t {{ fill: {CYAN}; }} .l {{ fill: #b1b5d3; }} .ref {{ stroke: #555; }} - .bk {{ stroke: #4a4a4a; }} + .bk {{ stroke: #2a7a8a; }} .sep {{ stroke: #3a3a3a; }} }} """ @@ -215,48 +224,105 @@ def gen_minmax(): """ -def gen_lttb_gap(): - N_A = len(SEG_A) - N_B = len(SEG_B) - im, ix = 0, SEG_B_START + N_B - 1 # 0..68 - rai = list(range(N_A)) - rbi = list(range(SEG_B_START, SEG_B_START + N_B)) - # LTTB no gap, target 12 on 45 total points - li = [0,4,5,12,15,23,51,55,59,60,65,68] - lv = [.50,.70,.95,.40,.20,.46,.25,.55,.60,.62,.58,.55] - # LTTB with gap detection, 6 per segment - g1i = [0,4,5,15,19,23] - g1v = [.50,.70,.95,.20,.40,.46] - g2i = [48,53,55,60,65,68] - g2v = [.45,.65,.75,.15,.62,.55] - - def refs(yt, yb): - return f"{rpl(rai, SEG_A, im, ix, yt, yb)}\n{rpl(rbi, SEG_B, im, ix, yt, yb)}" - - total = N_A + N_B - return f"""{hdr(W, GH, "LTTB gap handling", "Comparing LTTB with and without gap detection.")} -LTTB without gap detection: line connects across the gap -{refs(G1T, G1B)} - -{cd(li,lv,im,ix,G1T,G1B,GRAY)} - -LTTB with gap detection: each segment downsampled -{refs(G2T, G2B)} - - -{cd(g1i,g1v,im,ix,G2T,G2B,GRAY)} -{cd(g2i,g2v,im,ix,G2T,G2B,GRAY)} - -Raw data ({total} points with gap) - -Selected points (12) +def _gap_helpers(): + """Shared helpers for the three gap SVGs.""" + im, ix = 0, 68 + raw_color = "#888" + small_gap_mid = 12 + big_gap_mid = 35.5 + + def raw_pls(yt, yb): + return (f"{rpl(GAP_SEG_A_I, GAP_SEG_A_V, im, ix, yt, yb)}\n" + f"{rpl(GAP_SEG_B_I, GAP_SEG_B_V, im, ix, yt, yb)}\n" + f"{rpl(GAP_SEG_C_I, GAP_SEG_C_V, im, ix, yt, yb)}") + + def raw_dots_str(yt, yb): + parts = [] + for si, sv in [(GAP_SEG_A_I, GAP_SEG_A_V), + (GAP_SEG_B_I, GAP_SEG_B_V), + (GAP_SEG_C_I, GAP_SEG_C_V)]: + parts.extend( + f'' + for i, v in zip(si, sv)) + return "\n".join(parts) + + def raw_lines_str(yt, yb): + return ( + f'\n' + f'\n' + f'') + + return im, ix, raw_color, small_gap_mid, big_gap_mid, raw_pls, raw_dots_str, raw_lines_str + + +def gen_gap_raw(): + """Raw data with gaps - shows where the gaps are.""" + im, ix, raw_color, sg, bg, _, raw_dots_str, raw_lines_str = _gap_helpers() + total = len(GAP_SEG_A_V) + len(GAP_SEG_B_V) + len(GAP_SEG_C_V) + return f"""{hdr(W, H, "Raw data with gaps", "42 points with a small and large gap.")} +Raw data: {total} points, small gap (3h) and large gap (24h) +{bkl([sg, bg], im, ix, YT, YB)} +{raw_lines_str(YT, YB)} +{raw_dots_str(YT, YB)} + +Data points ({total}) + +Gap boundary +""" + + +def gen_gap_no_detect(): + """LTTB without gap detection - connects across all gaps.""" + im, ix, _, sg, bg, raw_pls, _, _ = _gap_helpers() + ng_i = [0, 4, 5, 10, 18, 23, 51, 55, 60, 64, 67, 68] + ng_v = [.50, .70, .95, .50, .20, .45, .55, .75, .15, .55, .60, .55] + return f"""{hdr(W, H, "LTTB without gap detection", "LTTB connects across all gaps.")} +LTTB without gap detection: connects across all gaps +{raw_pls(YT, YB)} + +{cd(ng_i,ng_v,im,ix,YT,YB,GRAY)} + +Raw data + +Selected points (12 of {len(GAP_SEG_A_V)+len(GAP_SEG_B_V)+len(GAP_SEG_C_V)}) +""" + + +def gen_gap_detect(): + """LTTB with gap detection - small gap connected, large gap preserved.""" + im, ix, _, sg, bg, raw_pls, _, _ = _gap_helpers() + g_ab_i = [0, 5, 10, 18, 22, 23] + g_ab_v = [.50, .95, .50, .20, .40, .45] + g_c_i = [48, 55, 58, 60, 65, 68] + g_c_v = [.45, .75, .55, .15, .60, .55] + return f"""{hdr(W, H, "LTTB with gap detection", "Small gap connected, large gap preserved.")} +LTTB with gap threshold '6h': small gap connected, large gap preserved +{bkl([bg], im, ix, YT, YB)} +{raw_pls(YT, YB)} + + +{cd(g_ab_i,g_ab_v,im,ix,YT,YB,GRAY)} +{cd(g_c_i,g_c_v,im,ix,YT,YB,GRAY)} + +Raw data + +Selected points (12) + +Gap boundary """ if __name__ == "__main__": os.makedirs(OUT_DIR, exist_ok=True) for name, fn in [("raw.svg", gen_raw), ("lttb.svg", gen_lttb), ("m4.svg", gen_m4), - ("minmax.svg", gen_minmax), ("lttb-gap.svg", gen_lttb_gap)]: + ("minmax.svg", gen_minmax), + ("gap-raw.svg", gen_gap_raw), + ("gap-no-detect.svg", gen_gap_no_detect), + ("gap-detect.svg", gen_gap_detect)]: path = os.path.join(OUT_DIR, name) with open(path, "w") as f: f.write(fn()) diff --git a/static/images/docs/subsample/gap-detect.svg b/static/images/docs/subsample/gap-detect.svg new file mode 100644 index 000000000..8afadec02 --- /dev/null +++ b/static/images/docs/subsample/gap-detect.svg @@ -0,0 +1,47 @@ + +LTTB with gap detection +Small gap connected, large gap preserved. + +LTTB with gap threshold '6h': small gap connected, large gap preserved + + + + + + + + + + + + + + + + + + + +Raw data + +Selected points (12) + +Gap boundary + \ No newline at end of file diff --git a/static/images/docs/subsample/gap-no-detect.svg b/static/images/docs/subsample/gap-no-detect.svg new file mode 100644 index 000000000..9d9c03e23 --- /dev/null +++ b/static/images/docs/subsample/gap-no-detect.svg @@ -0,0 +1,43 @@ + +LTTB without gap detection +LTTB connects across all gaps. + +LTTB without gap detection: connects across all gaps + + + + + + + + + + + + + + + + + +Raw data + +Selected points (12 of 42) + \ No newline at end of file diff --git a/static/images/docs/subsample/gap-raw.svg b/static/images/docs/subsample/gap-raw.svg new file mode 100644 index 000000000..26bf617a6 --- /dev/null +++ b/static/images/docs/subsample/gap-raw.svg @@ -0,0 +1,74 @@ + +Raw data with gaps +42 points with a small and large gap. + +Raw data: 42 points, small gap (3h) and large gap (24h) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Data points (42) + +Gap boundary + \ No newline at end of file diff --git a/static/images/docs/subsample/lttb-gap.svg b/static/images/docs/subsample/lttb-gap.svg deleted file mode 100644 index e45cac331..000000000 --- a/static/images/docs/subsample/lttb-gap.svg +++ /dev/null @@ -1,60 +0,0 @@ - -LTTB gap handling -Comparing LTTB with and without gap detection. - -LTTB without gap detection: line connects across the gap - - - - - - - - - - - - - - - - -LTTB with gap detection: each segment downsampled - - - - - - - - - - - - - - - - - -Raw data (45 points with gap) - -Selected points (12) - \ No newline at end of file diff --git a/static/images/docs/subsample/lttb.svg b/static/images/docs/subsample/lttb.svg index 400c3fc3e..693ece844 100644 --- a/static/images/docs/subsample/lttb.svg +++ b/static/images/docs/subsample/lttb.svg @@ -5,17 +5,17 @@ .t { font-size: 12px; font-weight: 600; } .l { font-size: 11px; } .ref { stroke-width: 1.0; stroke-dasharray: 6 5; fill: none; } - .bk { stroke-width: 0.8; stroke-dasharray: 6 5; } + .bk { stroke-width: 0.8; stroke-dasharray: 2 3; } .t { fill: #0cc0df; } .l { fill: #64748b; } .ref { stroke: #bbb; } - .bk { stroke: #aaa; } + .bk { stroke: #5a9aa8; } .sep { stroke: #ccc; } @media (prefers-color-scheme: dark) { .t { fill: #0cc0df; } .l { fill: #b1b5d3; } .ref { stroke: #555; } - .bk { stroke: #4a4a4a; } + .bk { stroke: #2a7a8a; } .sep { stroke: #3a3a3a; } } diff --git a/static/images/docs/subsample/m4.svg b/static/images/docs/subsample/m4.svg index 959e25498..9fcfaa6d9 100644 --- a/static/images/docs/subsample/m4.svg +++ b/static/images/docs/subsample/m4.svg @@ -5,17 +5,17 @@ .t { font-size: 12px; font-weight: 600; } .l { font-size: 11px; } .ref { stroke-width: 1.0; stroke-dasharray: 6 5; fill: none; } - .bk { stroke-width: 0.8; stroke-dasharray: 6 5; } + .bk { stroke-width: 0.8; stroke-dasharray: 2 3; } .t { fill: #0cc0df; } .l { fill: #64748b; } .ref { stroke: #bbb; } - .bk { stroke: #aaa; } + .bk { stroke: #5a9aa8; } .sep { stroke: #ccc; } @media (prefers-color-scheme: dark) { .t { fill: #0cc0df; } .l { fill: #b1b5d3; } .ref { stroke: #555; } - .bk { stroke: #4a4a4a; } + .bk { stroke: #2a7a8a; } .sep { stroke: #3a3a3a; } } diff --git a/static/images/docs/subsample/minmax.svg b/static/images/docs/subsample/minmax.svg index 45aa7ad77..0ab22a13b 100644 --- a/static/images/docs/subsample/minmax.svg +++ b/static/images/docs/subsample/minmax.svg @@ -5,17 +5,17 @@ .t { font-size: 12px; font-weight: 600; } .l { font-size: 11px; } .ref { stroke-width: 1.0; stroke-dasharray: 6 5; fill: none; } - .bk { stroke-width: 0.8; stroke-dasharray: 6 5; } + .bk { stroke-width: 0.8; stroke-dasharray: 2 3; } .t { fill: #0cc0df; } .l { fill: #64748b; } .ref { stroke: #bbb; } - .bk { stroke: #aaa; } + .bk { stroke: #5a9aa8; } .sep { stroke: #ccc; } @media (prefers-color-scheme: dark) { .t { fill: #0cc0df; } .l { fill: #b1b5d3; } .ref { stroke: #555; } - .bk { stroke: #4a4a4a; } + .bk { stroke: #2a7a8a; } .sep { stroke: #3a3a3a; } } diff --git a/static/images/docs/subsample/raw.svg b/static/images/docs/subsample/raw.svg index 31c108a40..c9fe3c6f8 100644 --- a/static/images/docs/subsample/raw.svg +++ b/static/images/docs/subsample/raw.svg @@ -5,17 +5,17 @@ .t { font-size: 12px; font-weight: 600; } .l { font-size: 11px; } .ref { stroke-width: 1.0; stroke-dasharray: 6 5; fill: none; } - .bk { stroke-width: 0.8; stroke-dasharray: 6 5; } + .bk { stroke-width: 0.8; stroke-dasharray: 2 3; } .t { fill: #0cc0df; } .l { fill: #64748b; } .ref { stroke: #bbb; } - .bk { stroke: #aaa; } + .bk { stroke: #5a9aa8; } .sep { stroke: #ccc; } @media (prefers-color-scheme: dark) { .t { fill: #0cc0df; } .l { fill: #b1b5d3; } .ref { stroke: #555; } - .bk { stroke: #4a4a4a; } + .bk { stroke: #2a7a8a; } .sep { stroke: #3a3a3a; } } From 6e724d4f4b12ead3f58a228af5f2ec4d1a838ee3 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 09:56:13 +0200 Subject: [PATCH 04/10] Reorder MinMax before M4 and improve algorithm explanations Tweak dataset so M4 visibly outperforms MinMax (late spike with pullback). Explain envelope, triangle method, and first/last advantage for users unfamiliar with downsampling. --- documentation/query/sql/subsample.md | 97 +++++++++++++------------ scripts/gen_subsample_svgs.py | 27 ++++--- static/images/docs/subsample/lttb.svg | 12 +-- static/images/docs/subsample/m4.svg | 10 +-- static/images/docs/subsample/minmax.svg | 10 +-- static/images/docs/subsample/raw.svg | 8 +- 6 files changed, 86 insertions(+), 78 deletions(-) diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index 9744e021b..a97e7c012 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -69,11 +69,15 @@ same 24-point series as input (think 24 hourly bars over one day): ### lttb - Largest Triangle Three Buckets Divides the data into equal-sized row-count buckets and selects the point in -each bucket that forms the largest triangle with its neighbors. The first and +each bucket that forms the largest triangle with its neighbors. The idea is +that points where the line changes direction sharply (a spike, a valley, a +sudden trend shift) form large triangles and get kept, while points in the +middle of a smooth trend form small triangles and get dropped. The first and last points are always kept. Output is exactly N points. -Best for line charts where preserving the visual shape (spikes, valleys, -trend changes) matters most. +Best for line charts where the visual shape matters most - a chart drawn +from the LTTB output looks nearly identical to one drawn from the full +dataset, despite using far fewer points. ![LTTB downsampling](/images/docs/subsample/lttb.svg) @@ -94,14 +98,45 @@ SAMPLE BY 1h SUBSAMPLE lttb(avg_price, 8) ``` +### minmax - Min/Max per time interval + +Divides the time range into equal time intervals and selects up to 2 points +per interval: the row with the minimum value and the row with the maximum +value. This creates a visual envelope - at any point on the chart, you can +see the full range the data covered during that interval. No spike or drop +is ever hidden, even under heavy compression. Empty intervals produce no +output, naturally preserving data gaps. + +![MinMax downsampling](/images/docs/subsample/minmax.svg) + +How it works: + +1. The total time range is divided into N/2 equal time intervals. +2. For each interval, up to 2 points are selected: min, max. +3. Duplicate points are removed (if min and max are the same row). +4. Empty intervals produce no output. + +Output is up to N points (N/2 buckets, up to 2 points each). + +```questdb-sql title="Hourly bars reduced to 8 with MinMax - min/max per bucket" demo +SELECT timestamp, avg(price) avg_price +FROM fx_trades +WHERE symbol = 'EURUSD' + AND timestamp IN '$today' +SAMPLE BY 1h +SUBSAMPLE minmax(avg_price, 8) +``` + ### m4 - Min/Max/First/Last per time interval -Divides the time range into equal time intervals and selects up to 4 points -per interval: the first, last, minimum, and maximum values. Empty intervals -produce no output, naturally preserving data gaps. +Builds on MinMax by also capturing the first and last rows in each time +interval. Where MinMax shows you the range of values in a bucket, M4 also +shows you where the data entered and exited - the opening and closing levels. +This matters when trends within a bucket are important: a price that opens +high, dips, then recovers looks different from one that opens low and climbs. +MinMax would show the same min/max range for both; M4 distinguishes them. -Best for monitoring dashboards where you must not miss spikes or drops. The -min/max envelope is pixel-accurate to the full dataset. +Empty intervals produce no output, naturally preserving data gaps. ![M4 downsampling](/images/docs/subsample/m4.svg) @@ -115,11 +150,11 @@ How it works: 4. Empty intervals produce no output. Output is up to N points (N/4 buckets, up to 4 points each). In the diagram -above, target 8 creates 2 time buckets. The first row happens to also be -the minimum in bucket 1, so each bucket emits 3-4 distinct rows instead of 4, -giving 7 total. +above, compare the right side with MinMax: M4 captures the exit at i=23 +(the pullback after the late spike), while MinMax ends at the peak. M4 +gives a more faithful picture of where the data actually settled. -```questdb-sql title="Hourly bars reduced to 8 with M4 - spike and trough guaranteed" demo +```questdb-sql title="Hourly bars reduced to 8 with M4 - captures entry/exit levels" demo SELECT timestamp, avg(price) avg_price FROM fx_trades WHERE symbol = 'EURUSD' @@ -136,36 +171,6 @@ the number of time buckets. A 1920-pixel-wide chart needs ::: -### minmax - Min/Max per time interval - -Divides the time range into equal time intervals and selects up to 2 points -per interval: the minimum and maximum values. Lighter than M4 (no first/last -tracking), producing roughly half the output. Empty intervals produce no -output. - -Best for simple envelope visualization where you only need the value range -per bucket, not entry/exit points. - -![MinMax downsampling](/images/docs/subsample/minmax.svg) - -How it works: - -1. The total time range is divided into N/2 equal time intervals. -2. For each interval, up to 2 points are selected: min, max. -3. Duplicate points are removed (if min and max are the same row). -4. Empty intervals produce no output. - -Output is up to N points (N/2 buckets, up to 2 points each). - -```questdb-sql title="Hourly bars reduced to 8 with MinMax - min/max per bucket" demo -SELECT timestamp, avg(price) avg_price -FROM fx_trades -WHERE symbol = 'EURUSD' - AND timestamp IN '$today' -SAMPLE BY 1h -SUBSAMPLE minmax(avg_price, 8) -``` - ### Gap-preserving LTTB Standard LTTB divides data by row count, so it connects across time gaps. An @@ -221,13 +226,13 @@ treat `targetPoints` as a hard maximum. ### Algorithm comparison -| Property | lttb | m4 | minmax | -|----------|------|----|--------| +| Property | lttb | minmax | m4 | +|----------|------|--------|-----| | Bucket type | Equal row count | Equal time intervals | Equal time intervals | -| Points per bucket | Exactly 1 | Up to 4 (first, last, min, max) | Up to 2 (min, max) | +| Points per bucket | Exactly 1 | Up to 2 (min, max) | Up to 4 (first, last, min, max) | | Output count | Exactly N (non-gap mode) | Up to N | Up to N | | Gap handling | Connects across gaps (use 3rd parameter to preserve) | Naturally preserves gaps | Naturally preserves gaps | -| Best use case | Line charts, shape preservation | Monitoring, spike detection | Lightweight envelope | +| Best use case | Line charts, shape preservation | Quick value range overview | Dashboards, SLA compliance | ## Examples diff --git a/scripts/gen_subsample_svgs.py b/scripts/gen_subsample_svgs.py index bd4ef4af5..07c1e6bc9 100644 --- a/scripts/gen_subsample_svgs.py +++ b/scripts/gen_subsample_svgs.py @@ -15,10 +15,12 @@ GRAY = "#888" # default dots (real rows from raw data) # Segment A: 24 points, i=0..23 (represents 24 hourly bars) +# Late spike at i=22 (0.65) with pullback at i=23 (0.60) makes M4 visibly +# better than MinMax: M4 captures the exit at 0.60, MinMax only sees the peak. SEG_A = [ 0.50, 0.55, 0.60, 0.65, 0.70, 0.95, 0.85, 0.70, 0.60, 0.55, - 0.50, 0.45, 0.40, 0.35, 0.28, 0.20, 0.25, 0.30, 0.35, 0.40, - 0.45, 0.50, 0.48, 0.46, + 0.50, 0.45, 0.55, 0.35, 0.28, 0.20, 0.25, 0.30, 0.35, 0.40, + 0.45, 0.50, 0.65, 0.60, ] # Gap dataset: 3 data segments, 1 small gap (3h), 1 big gap (24h) @@ -154,7 +156,7 @@ def gen_lttb(): im, ix = 0, N - 1 ri = list(range(N)) # LTTB target 8: first + last always kept, 6 interior buckets - si = [0, 4, 5, 8, 15, 19, 22, 23] + si = [0, 4, 5, 11, 15, 18, 22, 23] sv = [SEG_A[i] for i in si] return f"""{hdr(W, H, "LTTB downsampling", "LTTB selects 8 points from 24.")} LTTB: 24 hourly bars reduced to 8 @@ -173,11 +175,12 @@ def gen_m4(): im, ix = 0, N - 1 ri = list(range(N)) # M4 target 8 -> 2 time buckets (0..11, 12..23) - # Bucket 1: first=0(.50), last=11(.45), min=0(.50)->dup, max=5(.95) -> 3 pts - # Bucket 2: first=12(.40), last=23(.46), min=15(.20), max=21(.50) -> 4 pts + # Bucket 1: first=0(.50), last=11(.45), min=0(.50)->dup first, max=5(.95) -> 3 pts + # Bucket 2: first=12(.55), last=23(.60), min=15(.20), max=22(.65) -> 4 pts + # Key: M4 catches the exit at i=23 (0.60) that MinMax misses m4 = [ (0,.50,GRAY),(5,.95,CYAN),(11,.45,GRAY), - (12,.40,GRAY),(15,.20,CYAN),(21,.50,CYAN),(23,.46,GRAY), + (12,.55,GRAY),(15,.20,CYAN),(22,.65,CYAN),(23,.60,GRAY), ] mi = [p[0] for p in m4] mv = [p[1] for p in m4] @@ -205,10 +208,10 @@ def gen_minmax(): # MinMax target 8 -> 4 time buckets of 6 (0..5, 6..11, 12..17, 18..23) # Bucket 1: min=0(.50), max=5(.95) # Bucket 2: min=11(.45), max=6(.85) - # Bucket 3: min=15(.20), max=12(.40) - # Bucket 4: min=23(.46), max=21(.50) - mi = [0, 5, 6, 11, 12, 15, 21, 23] - mv = [.50, .95, .85, .45, .40, .20, .50, .46] + # Bucket 3: min=15(.20), max=12(.55) + # Bucket 4: min=18(.35), max=22(.65) -- misses the exit at i=23 (0.60) + mi = [0, 5, 6, 11, 12, 15, 18, 22] + mv = [.50, .95, .85, .45, .55, .20, .35, .65] return f"""{hdr(W, H, "MinMax downsampling", "MinMax selects 8 points from 24.")} MinMax: target 8, emitted 8 (4 time buckets) {bkl([6, 12, 18], im, ix, YT, YB)} @@ -318,8 +321,8 @@ def gen_gap_detect(): if __name__ == "__main__": os.makedirs(OUT_DIR, exist_ok=True) - for name, fn in [("raw.svg", gen_raw), ("lttb.svg", gen_lttb), ("m4.svg", gen_m4), - ("minmax.svg", gen_minmax), + for name, fn in [("raw.svg", gen_raw), ("lttb.svg", gen_lttb), + ("minmax.svg", gen_minmax), ("m4.svg", gen_m4), ("gap-raw.svg", gen_gap_raw), ("gap-no-detect.svg", gen_gap_no_detect), ("gap-detect.svg", gen_gap_detect)]: diff --git a/static/images/docs/subsample/lttb.svg b/static/images/docs/subsample/lttb.svg index 693ece844..03d7ee4c8 100644 --- a/static/images/docs/subsample/lttb.svg +++ b/static/images/docs/subsample/lttb.svg @@ -20,16 +20,16 @@ } LTTB: 24 hourly bars reduced to 8 - - + + - + - - - + + + Raw data diff --git a/static/images/docs/subsample/m4.svg b/static/images/docs/subsample/m4.svg index 9fcfaa6d9..180baec1e 100644 --- a/static/images/docs/subsample/m4.svg +++ b/static/images/docs/subsample/m4.svg @@ -21,15 +21,15 @@ M4: target 8, emitted 7 (2 time buckets) - - + + - + - - + + Raw data diff --git a/static/images/docs/subsample/minmax.svg b/static/images/docs/subsample/minmax.svg index 0ab22a13b..6e4acc54e 100644 --- a/static/images/docs/subsample/minmax.svg +++ b/static/images/docs/subsample/minmax.svg @@ -23,16 +23,16 @@ - - + + - + - - + + Raw data diff --git a/static/images/docs/subsample/raw.svg b/static/images/docs/subsample/raw.svg index c9fe3c6f8..8d9cb1add 100644 --- a/static/images/docs/subsample/raw.svg +++ b/static/images/docs/subsample/raw.svg @@ -20,7 +20,7 @@ } Raw time series: 24 hourly bars - + @@ -33,7 +33,7 @@ - + @@ -43,8 +43,8 @@ - - + + Hourly bars (24) \ No newline at end of file From 18efc4dd3660798c15f4f70f5502266c0ad76fd3 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 11:28:06 +0200 Subject: [PATCH 05/10] Add uniform and cadence algorithms to SUBSAMPLE page Move gap-preserving LTTB right after LTTB. Add uniform (evenly spaced rows) and cadence (every Nth row with optional random offset) sections. Update syntax block and comparison table. --- documentation/query/sql/subsample.md | 198 ++++++++++++++++++++------- 1 file changed, 145 insertions(+), 53 deletions(-) diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index a97e7c012..5de4ada90 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -20,24 +20,28 @@ Requires a [designated timestamp](/docs/concepts/designated-timestamp/) column. ## Syntax -```questdb-sql -SELECT columns -FROM table -[WHERE conditions] -[SAMPLE BY ...] +```questdb-sql title="Value-based algorithms" SUBSAMPLE { lttb | m4 | minmax }(valueColumn, targetPoints [, gapThreshold]) -[ORDER BY ...] -[LIMIT ...] +``` + +```questdb-sql title="Position-based algorithms" +SUBSAMPLE uniform(targetPoints) +SUBSAMPLE cadence(stride [, seed]) ``` Where: - **`valueColumn`** - the numeric column used to decide which points are - visually significant. All other columns pass through for selected rows. + visually significant. Required for `lttb`, `m4`, and `minmax`. Not used + by `uniform` or `cadence`. - **`targetPoints`** - target number of output rows. Supports integer literals, [DECLARE](/docs/query/sql/declare/) variables, and bind variables (`$1`). Must be at least 2. Maximum is 2,147,483,647. -- **`gapThreshold`** - (LTTB only) optional interval that enables +- **`stride`** - (`cadence` only) step distance between emitted rows. This + is not an output count: `cadence(500)` emits one row out of every 500. +- **`seed`** - (`cadence` only) optional integer seed or `NULL`. See + [cadence](#cadence---every-nth-row). +- **`gapThreshold`** - (`lttb` only) optional interval that enables gap-preserving mode. See [gap-preserving LTTB](#gap-preserving-lttb). ### Execution order @@ -60,9 +64,14 @@ downsampling. ## Algorithms -Three algorithms are available. Each one selects real rows from the input - -no values are ever interpolated or computed. The diagrams below all use the -same 24-point series as input (think 24 hourly bars over one day): +Five algorithms are available. The first three (`lttb`, `minmax`, `m4`) +inspect values to decide which rows are visually significant. The last two +(`uniform`, `cadence`) ignore values and select rows purely by position - +they are cheaper and useful when the input is dense or as a baseline. + +All five select real rows from the input - no values are ever interpolated +or computed. The diagrams below use a 24-point series as input (think 24 +hourly bars over one day): ![Raw time series](/images/docs/subsample/raw.svg) @@ -98,6 +107,59 @@ SAMPLE BY 1h SUBSAMPLE lttb(avg_price, 8) ``` +### Gap-preserving LTTB + +Standard LTTB divides data by row count, so it connects across time gaps. An +optional third parameter sets a gap threshold: + +```questdb-sql +SUBSAMPLE lttb(price, 12, '6h') +``` + +When specified, LTTB scans for gaps where consecutive timestamps are further +apart than the threshold. Gaps below the threshold are ignored - the data is +treated as continuous. Gaps above the threshold split the data into separate +segments, each downsampled independently with its proportional share of the +target points. + +The diagrams below show a dataset with two gaps - a small one (3 hours) and +a large one (24 hours): + +![Raw data with gaps](/images/docs/subsample/gap-raw.svg) + +Without gap detection, LTTB treats all points as continuous and connects +across both gaps: + +![LTTB without gap detection](/images/docs/subsample/gap-no-detect.svg) + +With a threshold of `'6h'`, the small gap (3h) is below the threshold so +segments A and B are treated as continuous. The large gap (24h) exceeds the +threshold, so segment C is downsampled separately and the gap is preserved: + +![LTTB with gap detection](/images/docs/subsample/gap-detect.svg) + +Supported interval units: `s` (seconds), `m` (minutes), `h` (hours), +`d` (days). + +Examples: `'30s'`, `'5m'`, `'1h'`, `'7d'` + +```questdb-sql title="Preserve gaps larger than 6 hours in the output" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, 12, '6h') +``` + +:::note + +Gap-preserving LTTB uses a soft target. Each segment receives at least its +first and last points. When many segments are detected, the total output may +exceed `targetPoints`. This is by design so that the same query does not fail +for one time range and succeed for another. Non-gap LTTB, M4, and MinMax +treat `targetPoints` as a hard maximum. + +::: + ### minmax - Min/Max per time interval Divides the time range into equal time intervals and selects up to 2 points @@ -171,68 +233,98 @@ the number of time buckets. A 1920-pixel-wide chart needs ::: -### Gap-preserving LTTB +### uniform - Evenly spaced rows -Standard LTTB divides data by row count, so it connects across time gaps. An -optional third parameter sets a gap threshold: +Selects a target number of rows spaced evenly across the input. First and +last rows are always kept, interior rows are picked at regular positions +between them. Unlike the previous algorithms, `uniform` does not inspect +values - it reduces row count purely by position in the time-ordered input. -```questdb-sql -SUBSAMPLE lttb(price, 12, '6h') +Use `uniform` when the input is dense and you care about reducing transfer +size more than preserving spikes or troughs. For a line chart where visual +fidelity matters, `lttb` or `m4` produce better results at the same target +count. For a heatmap, scatter plot, or tabular display where every row looks +similar, `uniform` is faster and the output is indistinguishable from +value-aware methods. + +How it works: + +1. First and last rows are always selected. +2. Remaining `targetPoints - 2` rows are selected at evenly spaced positions + between first and last. +3. Output is exactly `targetPoints` rows when the input is larger than the + target, otherwise all input rows are returned unchanged. + +```questdb-sql title="500 evenly spaced rows from a dense tick table" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE uniform(500) ``` -When specified, LTTB scans for gaps where consecutive timestamps are further -apart than the threshold. Gaps below the threshold are ignored - the data is -treated as continuous. Gaps above the threshold split the data into separate -segments, each downsampled independently with its proportional share of the -target points. +### cadence - Every Nth row -The diagrams below show a dataset with two gaps - a small one (3 hours) and -a large one (24 hours): +Selects one row out of every N, starting from a configurable offset. Like +`uniform`, `cadence` does not inspect values - it reduces row count by +stepping through the input at a fixed rhythm. -![Raw data with gaps](/images/docs/subsample/gap-raw.svg) +The `stride` parameter is the step distance, not the output count. To keep +500 rows, use `uniform(500)` or `lttb(col, 500)`. `cadence(500)` emits one +row out of every 500, which is a different (and input-dependent) number. -Without gap detection, LTTB treats all points as continuous and connects -across both gaps: +How it works: -![LTTB without gap detection](/images/docs/subsample/gap-no-detect.svg) +1. First and last rows are always selected (except when stride exceeds the + input size, in which case only the first row is emitted). +2. From the offset position, emit one row every `stride` rows. +3. Output is in timestamp-ascending order. -With a threshold of `'6h'`, the small gap (3h) is below the threshold so -segments A and B are treated as continuous. The large gap (24h) exceeds the -threshold, so segment C is downsampled separately and the gap is preserved: +| Form | Behavior | +|------|----------| +| `cadence(N)` | Every Nth row, deterministic, offset 0 | +| `cadence(N, seed)` | Random offset in [0, N), reproducible given seed | +| `cadence(N, NULL)` | Random offset in [0, N), fresh each run | -![LTTB with gap detection](/images/docs/subsample/gap-detect.svg) +The seeded and NULL forms exist to avoid phase-lock with periodic signals. +If the input has a 1000-row period and you stride by 1000 with offset 0, +every emitted row hits the same phase of the period and the chart loses the +periodic structure. A random offset breaks this alignment. -Supported interval units: `s` (seconds), `m` (minutes), `h` (hours), -`d` (days). +:::note -Examples: `'30s'`, `'5m'`, `'1h'`, `'7d'` +Randomizing the offset helps with aliasing on periodic signals, but it does +not make `cadence` a statistical sampler. It does not produce unbiased +estimates of aggregates like mean or percentile. For those, use +[SAMPLE BY](/docs/query/sql/sample-by/) with the appropriate aggregate +function. -```questdb-sql title="Preserve gaps larger than 6 hours in the output" demo +::: + +```questdb-sql title="Every 1000th row - simple decimation" demo SELECT timestamp, price FROM fx_trades WHERE symbol = 'EURUSD' -SUBSAMPLE lttb(price, 12, '6h') +SUBSAMPLE cadence(1000) ``` -:::note - -Gap-preserving LTTB uses a soft target. Each segment receives at least its -first and last points. When many segments are detected, the total output may -exceed `targetPoints`. This is by design so that the same query does not fail -for one time range and succeed for another. Non-gap LTTB, M4, and MinMax -treat `targetPoints` as a hard maximum. - -::: +```questdb-sql title="Anti-aliasing with reproducible seed" +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE cadence(1000, 42) +``` ### Algorithm comparison -| Property | lttb | minmax | m4 | -|----------|------|--------|-----| -| Bucket type | Equal row count | Equal time intervals | Equal time intervals | -| Points per bucket | Exactly 1 | Up to 2 (min, max) | Up to 4 (first, last, min, max) | -| Output count | Exactly N (non-gap mode) | Up to N | Up to N | -| Gap handling | Connects across gaps (use 3rd parameter to preserve) | Naturally preserves gaps | Naturally preserves gaps | -| Best use case | Line charts, shape preservation | Quick value range overview | Dashboards, SLA compliance | +| Property | lttb | minmax | m4 | uniform | cadence | +|----------|------|--------|-----|---------|---------| +| Parameter | targetPoints | targetPoints | targetPoints | targetPoints | stride | +| Inspects values | Yes | Yes | Yes | No | No | +| Bucket type | Equal row count | Equal time intervals | Equal time intervals | Equal row spacing | Fixed row stride | +| Points per bucket | Exactly 1 | Up to 2 (min, max) | Up to 4 (first, last, min, max) | N/A | N/A | +| Output count | Exactly N | Up to N | Up to N | Exactly N | ~rowCount/stride | +| Gap handling | Connects across (use threshold) | Naturally preserves | Naturally preserves | Connects across | Connects across | +| Best use case | Line charts | Value range overview | Dashboards, SLA | Dense uniform data | Decimation, anti-aliasing | ## Examples From 5bc7d4b4fee1cd86648ec09369f9d4b2f9b7b8b2 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 11:45:36 +0200 Subject: [PATCH 06/10] Add uniform and cadence algorithms, charts, and comparison table Add uniform (evenly spaced) and cadence (every Nth row) sections with SVG diagrams. Move gap-preserving LTTB under LTTB heading. Reorder MinMax before M4. Add relative cost row to comparison table. --- documentation/query/sql/subsample.md | 9 +++-- scripts/gen_subsample_svgs.py | 44 ++++++++++++++++++++++++ static/images/docs/subsample/cadence.svg | 38 ++++++++++++++++++++ static/images/docs/subsample/uniform.svg | 37 ++++++++++++++++++++ 4 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 static/images/docs/subsample/cadence.svg create mode 100644 static/images/docs/subsample/uniform.svg diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index 5de4ada90..24454c171 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -107,7 +107,7 @@ SAMPLE BY 1h SUBSAMPLE lttb(avg_price, 8) ``` -### Gap-preserving LTTB +#### Gap-preserving LTTB Standard LTTB divides data by row count, so it connects across time gaps. An optional third parameter sets a gap threshold: @@ -247,6 +247,8 @@ count. For a heatmap, scatter plot, or tabular display where every row looks similar, `uniform` is faster and the output is indistinguishable from value-aware methods. +![Uniform downsampling](/images/docs/subsample/uniform.svg) + How it works: 1. First and last rows are always selected. @@ -272,6 +274,8 @@ The `stride` parameter is the step distance, not the output count. To keep 500 rows, use `uniform(500)` or `lttb(col, 500)`. `cadence(500)` emits one row out of every 500, which is a different (and input-dependent) number. +![Cadence downsampling](/images/docs/subsample/cadence.svg) + How it works: 1. First and last rows are always selected (except when stride exceeds the @@ -322,9 +326,10 @@ SUBSAMPLE cadence(1000, 42) | Inspects values | Yes | Yes | Yes | No | No | | Bucket type | Equal row count | Equal time intervals | Equal time intervals | Equal row spacing | Fixed row stride | | Points per bucket | Exactly 1 | Up to 2 (min, max) | Up to 4 (first, last, min, max) | N/A | N/A | -| Output count | Exactly N | Up to N | Up to N | Exactly N | ~rowCount/stride | +| Output count | Exactly N (or all rows if fewer) | Up to N | Up to N | Exactly N (or all rows if fewer) | ~rowCount/stride | | Gap handling | Connects across (use threshold) | Naturally preserves | Naturally preserves | Connects across | Connects across | | Best use case | Line charts | Value range overview | Dashboards, SLA | Dense uniform data | Decimation, anti-aliasing | +| Relative cost | Higher: triangle area per point | Low: min/max per bucket | Medium: first/last/min/max per bucket | Lowest: position arithmetic | Lowest: stride arithmetic | ## Examples diff --git a/scripts/gen_subsample_svgs.py b/scripts/gen_subsample_svgs.py index 07c1e6bc9..aeacf3fe5 100644 --- a/scripts/gen_subsample_svgs.py +++ b/scripts/gen_subsample_svgs.py @@ -227,6 +227,49 @@ def gen_minmax(): """ +def gen_uniform(): + N = len(SEG_A) + im, ix = 0, N - 1 + ri = list(range(N)) + # uniform(8): evenly spaced, first and last pinned + # positions: round(i * 23 / 7) for i in 0..7 = 0, 3, 7, 10, 13, 16, 20, 23 + si = [round(i * (N - 1) / 7) for i in range(8)] + sv = [SEG_A[i] for i in si] + return f"""{hdr(W, H, "Uniform downsampling", "Uniform selects 8 evenly spaced points from 24.")} +Uniform: 8 evenly spaced from 24 +{rpl(ri, SEG_A, im, ix, YT, YB)} + +{cd(si,sv,im,ix,YT,YB,GRAY)} + +Raw data + +Selected points (8 of 24) +""" + + +def gen_cadence(): + N = len(SEG_A) + im, ix = 0, N - 1 + ri = list(range(N)) + # cadence(3): every 3rd row from offset 0, plus last row pinned + # positions: 0, 3, 6, 9, 12, 15, 18, 21, 23(pinned) + stride = 3 + si = list(range(0, N, stride)) + if si[-1] != N - 1: + si.append(N - 1) + sv = [SEG_A[i] for i in si] + return f"""{hdr(W, H, "Cadence downsampling", "Cadence selects every 3rd row from 24.")} +Cadence: stride 3, emitted {len(si)} from 24 +{rpl(ri, SEG_A, im, ix, YT, YB)} + +{cd(si,sv,im,ix,YT,YB,GRAY)} + +Raw data + +Selected points ({len(si)} of 24) +""" + + def _gap_helpers(): """Shared helpers for the three gap SVGs.""" im, ix = 0, 68 @@ -323,6 +366,7 @@ def gen_gap_detect(): os.makedirs(OUT_DIR, exist_ok=True) for name, fn in [("raw.svg", gen_raw), ("lttb.svg", gen_lttb), ("minmax.svg", gen_minmax), ("m4.svg", gen_m4), + ("uniform.svg", gen_uniform), ("cadence.svg", gen_cadence), ("gap-raw.svg", gen_gap_raw), ("gap-no-detect.svg", gen_gap_no_detect), ("gap-detect.svg", gen_gap_detect)]: diff --git a/static/images/docs/subsample/cadence.svg b/static/images/docs/subsample/cadence.svg new file mode 100644 index 000000000..1666f2dc8 --- /dev/null +++ b/static/images/docs/subsample/cadence.svg @@ -0,0 +1,38 @@ + +Cadence downsampling +Cadence selects every 3rd row from 24. + +Cadence: stride 3, emitted 9 from 24 + + + + + + + + + + + + +Raw data + +Selected points (9 of 24) + \ No newline at end of file diff --git a/static/images/docs/subsample/uniform.svg b/static/images/docs/subsample/uniform.svg new file mode 100644 index 000000000..adcf661bb --- /dev/null +++ b/static/images/docs/subsample/uniform.svg @@ -0,0 +1,37 @@ + +Uniform downsampling +Uniform selects 8 evenly spaced points from 24. + +Uniform: 8 evenly spaced from 24 + + + + + + + + + + + +Raw data + +Selected points (8 of 24) + \ No newline at end of file From 4c18b238d679cd327ba299bfab293492bdfeb3c1 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 11:50:58 +0200 Subject: [PATCH 07/10] Mention optional offset parameter in cadence intro --- documentation/query/sql/subsample.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index 24454c171..627e53168 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -268,7 +268,9 @@ SUBSAMPLE uniform(500) Selects one row out of every N, starting from a configurable offset. Like `uniform`, `cadence` does not inspect values - it reduces row count by -stepping through the input at a fixed rhythm. +stepping through the input at a fixed rhythm. An optional second parameter +sets the starting offset, either as a fixed seed for reproducible results or +as `NULL` for a fresh random offset each run. The `stride` parameter is the step distance, not the output count. To keep 500 rows, use `uniform(500)` or `lttb(col, 500)`. `cadence(500)` emits one From 153a1d02155b7045d7574f2b3a631bdd5df76b05 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 12:01:05 +0200 Subject: [PATCH 08/10] Add chart-ready examples for all algorithms and minor fixes Add uniform, cadence, and gap-preserving LTTB to the chart-ready examples section. Make DECLARE example demoable. Replace Grafana reference with generic programmatic integration. --- documentation/query/sql/subsample.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index 627e53168..c12449867 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -344,6 +344,13 @@ WHERE symbol = 'EURUSD' SUBSAMPLE lttb(price, 500) ``` +```questdb-sql title="LTTB with gap detection: preserve gaps larger than 1 hour" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE lttb(price, 500, '1h') +``` + ```questdb-sql title="M4: pixel-accurate envelope for a 1920px-wide chart" demo SELECT timestamp, price FROM fx_trades @@ -358,6 +365,20 @@ WHERE symbol = 'EURUSD' SUBSAMPLE minmax(price, 500) ``` +```questdb-sql title="Uniform: 500 evenly spaced rows for a dense table" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE uniform(500) +``` + +```questdb-sql title="Cadence: every 1000th row for quick decimation" demo +SELECT timestamp, price +FROM fx_trades +WHERE symbol = 'EURUSD' +SUBSAMPLE cadence(1000) +``` + ### Composing with SAMPLE BY ```questdb-sql title="Aggregate to 1-minute bars, then downsample" demo @@ -396,7 +417,7 @@ the result, so the moving average values are accurate. ### With DECLARE variable -```questdb-sql title="Parameterized target point count" +```questdb-sql title="Parameterized target point count" demo DECLARE @points := 500 SELECT timestamp, price FROM fx_trades @@ -406,7 +427,7 @@ SUBSAMPLE lttb(price, @points) ### With bind variable -```questdb-sql title="Grafana integration - screen width as bind variable" +```questdb-sql title="Programmatic integration - target as bind variable" SELECT timestamp, price FROM fx_trades WHERE symbol = 'EURUSD' From d77aa8548ed35c6609416d00022a66ac86a2f0e6 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 23 Apr 2026 12:03:57 +0200 Subject: [PATCH 09/10] Explain pass-through columns carry original values --- documentation/query/sql/subsample.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/documentation/query/sql/subsample.md b/documentation/query/sql/subsample.md index c12449867..54c2a8da5 100644 --- a/documentation/query/sql/subsample.md +++ b/documentation/query/sql/subsample.md @@ -395,6 +395,12 @@ complement each other: aggregate first, then reduce for display. ### Multiple columns pass through +Because `SUBSAMPLE` selects real rows rather than computing new ones, every +column in the output carries its original value from the source table. In +the query below, `side` and `quantity` are not involved in the downsampling +decision, but each output row is a real trade with the actual side and +quantity that occurred at that timestamp. + ```questdb-sql title="LTTB selects rows by price; all columns emit" demo SELECT timestamp, symbol, side, price, quantity FROM fx_trades From 50db1af69f685a21ae652257f3e923f5238e1ce8 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 24 Apr 2026 11:14:12 +0200 Subject: [PATCH 10/10] Highlight first/last dots in M4 chart to distinguish from MinMax --- scripts/gen_subsample_svgs.py | 8 ++++---- static/images/docs/subsample/m4.svg | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/gen_subsample_svgs.py b/scripts/gen_subsample_svgs.py index aeacf3fe5..c9cdd681d 100644 --- a/scripts/gen_subsample_svgs.py +++ b/scripts/gen_subsample_svgs.py @@ -179,8 +179,8 @@ def gen_m4(): # Bucket 2: first=12(.55), last=23(.60), min=15(.20), max=22(.65) -> 4 pts # Key: M4 catches the exit at i=23 (0.60) that MinMax misses m4 = [ - (0,.50,GRAY),(5,.95,CYAN),(11,.45,GRAY), - (12,.55,GRAY),(15,.20,CYAN),(22,.65,CYAN),(23,.60,GRAY), + (0,.50,CYAN),(5,.95,GRAY),(11,.45,CYAN), + (12,.55,CYAN),(15,.20,GRAY),(22,.65,GRAY),(23,.60,CYAN), ] mi = [p[0] for p in m4] mv = [p[1] for p in m4] @@ -192,9 +192,9 @@ def gen_m4(): {cdm(m4, im, ix, YT, YB)} Raw data - + First / Last - + Min / Max Bucket boundary diff --git a/static/images/docs/subsample/m4.svg b/static/images/docs/subsample/m4.svg index 180baec1e..3e2ca661b 100644 --- a/static/images/docs/subsample/m4.svg +++ b/static/images/docs/subsample/m4.svg @@ -23,18 +23,18 @@ - - - - - - - + + + + + + + Raw data - + First / Last - + Min / Max Bucket boundary