Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 12 additions & 44 deletions packages/compiler/scripts/parseLiquid.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ import {fileURLToPath} from 'node:url';
import * as ohm from 'ohm-js-legacy';
import {Bench} from 'tinybench';

import {Grammar} from 'ohm-js';
import {compileGrammars} from '../src/api.ts';
import {unparse} from '../test/_helpers.js';
import {createReader} from '../../runtime/src/cstReader.ts';
import {compileGrammars} from '../src/api.ts';
import {Grammar} from '../../runtime/src/miniohm.ts';

const __dirname = dirname(fileURLToPath(import.meta.url));
const datadir = join(__dirname, '../test/data');
Expand All @@ -35,8 +34,7 @@ const positionalArgs = process.argv.slice(2).filter(a => !a.startsWith('--'));
// https://matklad.github.io/2024/03/22/basic-things.html
const smallSize = flags.has('--small-size');
const includeUnparse = flags.has('--include-unparse');
const useCstReader = flags.has('--cst-reader');
const useCstReaderPacked = flags.has('--cst-reader-packed');
const useCstView = flags.has('--use-cstview');

// Get pattern from command line arguments
const pattern = positionalArgs[0];
Expand Down Expand Up @@ -105,52 +103,26 @@ const pattern = positionalArgs[0];
opts
);

// Walk CST using CstReader (raw handles), collecting terminal text.
function unparseCstReaderRaw(matchResult) {
const reader = createReader(matchResult);
const inp = reader.input;
let ans = '';
function walk(handle, startIdx) {
if (reader.isTerminal(handle)) {
ans += inp.slice(startIdx, startIdx + reader.matchLength(handle));
return;
}
reader.forEachChild(
handle,
(child, _leadingSpaces, offset) => {
walk(child, startIdx + offset);
},
startIdx
);
}
walk(reader.rootHandle, reader.rootStartIdx);
return ans;
}

// Walk CST using CstReader (handles with startIdx), collecting terminal text.
function unparseCstReaderPacked(matchResult) {
const reader = createReader(matchResult, {packStartIdx: true});
// Walk CST using CstView (handles with startIdx), collecting terminal text.
function unparseCstView(matchResult) {
const {cst} = matchResult;
let ans = '';
function walk(handle) {
if (reader.isTerminal(handle)) {
ans += reader.sourceString(handle);
if (cst.kind(handle) === '_terminal') {
ans += cst.sourceString(handle);
return;
}
reader.forEachChild(handle, (child, _leadingSpaces) => {
cst.forEachChild(handle, (child, _leadingSpaces) => {
walk(child);
});
}
walk(reader.root);
walk(cst.root);
return ans;
}

const wasmLabel = includeUnparse ? 'Wasm parse+unparse' : 'Wasm parse';
bench.add(
useCstReaderPacked
? `${wasmLabel} (CstReader packed)`
: useCstReader
? `${wasmLabel} (CstReader)`
: wasmLabel,
useCstView ? `${wasmLabel} (CstView)` : wasmLabel,
() => {
let overriddenDuration = 0;
for (const {input} of files) {
Expand All @@ -167,11 +139,7 @@ const pattern = positionalArgs[0];
peakWasmMemoryBytes,
exports.memory.buffer.byteLength
);
return useCstReaderPacked
? unparseCstReaderPacked(m)
: useCstReader
? unparseCstReaderRaw(m)
: unparse(g);
return useCstView ? unparseCstView(m) : unparse(g);
});
if (includeUnparse) overriddenDuration += bench.now() - start;
}
Expand Down
188 changes: 188 additions & 0 deletions packages/compiler/test/test-cstView.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import test from 'ava';
import {readFileSync} from 'node:fs';

import {CstKind} from 'ohm-js/cstView';

import {compileAndLoad, scriptRel} from './_helpers.js';

const jsonSource = readFileSync(scriptRel('../../lang-json/json.ohm'), 'utf-8');

/**
* Walk the CST via CstView and reconstruct a JS value, exercising unpack,
* forEachGroup, and the basic accessors (kind, sourceString).
*/
function toJS(cst, handle) {
const k = cst.kind(handle);
switch (k) {
case 'Object_empty':
return {};

case 'Object_nonEmpty':
// "{" Pair ("," Pair)* "}"
return cst.unpack(handle, (_open, firstPair, restPairs, _close) => {
const obj = {};
const [key, val] = parsePair(cst, firstPair);
obj[key] = val;
cst.forEachGroup(restPairs, (_comma, pair) => {
const [k2, v2] = parsePair(cst, pair);
obj[k2] = v2;
});
return obj;
});

case 'Array_empty':
return [];

case 'Array_nonEmpty':
// "[" Value ("," Value)* "]"
return cst.unpack(handle, (_open, firstValue, restValues, _close) => {
const arr = [toJS(cst, firstValue)];
cst.forEachGroup(restValues, (_comma, value) => {
arr.push(toJS(cst, value));
});
return arr;
});

case 'stringLiteral':
// "\"" doubleStringCharacter* "\""
return cst.unpack(handle, (_open, chars, _close) => {
const parts = [];
cst.forEachGroup(chars, char => {
parts.push(toJS(cst, char));
});
return parts.join('');
});

case 'doubleStringCharacter_nonEscaped':
return cst.sourceString(handle);

case 'doubleStringCharacter_escaped':
// "\\" escapeSequence
return cst.unpack(handle, (_backslash, escSeq) => toJS(cst, escSeq));

case 'escapeSequence_doubleQuote':
return '"';
case 'escapeSequence_reverseSolidus':
return '\\';
case 'escapeSequence_solidus':
return '/';
case 'escapeSequence_backspace':
return '\b';
case 'escapeSequence_formfeed':
return '\f';
case 'escapeSequence_newline':
return '\n';
case 'escapeSequence_carriageReturn':
return '\r';
case 'escapeSequence_horizontalTab':
return '\t';
case 'escapeSequence_codePoint':
// "u" fourHexDigits
return cst.unpack(handle, (_u, fourHex) => {
return String.fromCharCode(parseInt(cst.sourceString(fourHex), 16));
});

case 'numberLiteral_withExponent':
case 'numberLiteral_withoutExponent':
case 'decimal_withFract':
case 'decimal_withoutFract':
return Number(cst.sourceString(handle));

case 'True':
return true;
case 'False':
return false;
case 'Null':
return null;

default:
// Wrapper nonterminals (Value, Object, Array, String, Number, Pair,
// doubleStringCharacter, escapeSequence, etc.) — pass through to child.
if (k === CstKind.Terminal) {
return cst.sourceString(handle);
}
return toJS(cst, cst.onlyChild(handle));
}
}

function parsePair(cst, handle) {
// Pair = String ":" Value
return cst.unpack(handle, (key, _colon, value) => {
return [toJS(cst, key), toJS(cst, value)];
});
}

function parse(g, input) {
return g.match(input).use(r => {
if (r.failed()) throw new Error(r.message);
return toJS(r.cst, r.cst.root);
});
}

let jsonGrammar;

test.before(async () => {
jsonGrammar = await compileAndLoad(jsonSource);
});

test('empty object', t => {
t.deepEqual(parse(jsonGrammar, '{}'), {});
});

test('empty array', t => {
t.deepEqual(parse(jsonGrammar, '[]'), []);
});

test('strings', t => {
t.is(parse(jsonGrammar, '"hello"'), 'hello');
t.is(parse(jsonGrammar, '""'), '');
});

test('numbers', t => {
t.is(parse(jsonGrammar, '0'), 0);
t.is(parse(jsonGrammar, '42'), 42);
t.is(parse(jsonGrammar, '-1'), -1);
t.is(parse(jsonGrammar, '3.14'), 3.14);
t.is(parse(jsonGrammar, '1e10'), 1e10);
t.is(parse(jsonGrammar, '2.5E-3'), 2.5e-3);
});

test('booleans and null', t => {
t.is(parse(jsonGrammar, 'true'), true);
t.is(parse(jsonGrammar, 'false'), false);
t.is(parse(jsonGrammar, 'null'), null);
});

test('simple object', t => {
t.deepEqual(parse(jsonGrammar, '{"key": "value", "num": 42}'), {
key: 'value',
num: 42,
});
});

test('nested structures', t => {
const input = '{"a": [1, 2, {"b": true}], "c": null}';
t.deepEqual(parse(jsonGrammar, input), {
a: [1, 2, {b: true}],
c: null,
});
});

test('string escape sequences', t => {
t.is(parse(jsonGrammar, '"hello\\nworld"'), 'hello\nworld');
t.is(parse(jsonGrammar, '"tab\\there"'), 'tab\there');
t.is(parse(jsonGrammar, '"quote\\"end"'), 'quote"end');
t.is(parse(jsonGrammar, '"slash\\\\end"'), 'slash\\end');
t.is(parse(jsonGrammar, '"\\u0041"'), 'A');
});

test('array with mixed types', t => {
t.deepEqual(parse(jsonGrammar, '[1, "two", true, null, [3]]'), [1, 'two', true, null, [3]]);
});

test('deeply nested', t => {
const input = '{"a": {"b": {"c": [1, 2, 3]}}}';
t.deepEqual(parse(jsonGrammar, input), {
a: {b: {c: [1, 2, 3]}},
});
});
2 changes: 2 additions & 0 deletions packages/runtime/ohm-js.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ export interface SeqNode<TChildren extends CstNodeChildren = CstNodeChildren> ex

// @public (undocumented)
export class SucceededMatchResult extends MatchResult {
// Warning: (ae-forgotten-export) The symbol "CstView" needs to be exported by the entry point index.d.ts
get cst(): CstView;
// (undocumented)
getCstRoot(): CstNode;
}
Expand Down
6 changes: 3 additions & 3 deletions packages/runtime/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
"types": "./dist/src/unstableDebug.d.ts",
"default": "./dist/src/unstableDebug.js"
},
"./cstReader": {
"types": "./dist/src/cstReader.d.ts",
"default": "./dist/src/cstReader.js"
"./cstView": {
"types": "./dist/src/cstView.d.ts",
"default": "./dist/src/cstView.js"
}
},
"files": ["dist"],
Expand Down
Loading
Loading