From e24b9814eff85aa3458e8efc6d59abe3ce3a5831 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Fri, 17 Apr 2026 12:51:31 -0700 Subject: [PATCH] feat: add engine comparison benchmark and fix Flare package name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add examples/benchmark/ with a standalone HTML demo that benchmarks MLC (WebGPU), Transformers.js (ONNX/WASM), and Flare (GGUF/WASM) engines side-by-side on the same model and prompt. Measures model load time, TTFT, and tokens/sec with configurable runs and warmup. - Rename @aspect/flare to @sauravpanda/flare throughout the codebase — the package is published on npm as @sauravpanda/flare@0.2.0, not under the @aspect scope. Closes #299 Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/benchmark/index.html | 988 +++++++++++++++++++++++ examples/benchmark/serve.json | 12 + src/engines/flare-engine-wrapper.test.ts | 10 +- src/engines/flare-engine-wrapper.ts | 16 +- 4 files changed, 1013 insertions(+), 13 deletions(-) create mode 100644 examples/benchmark/index.html create mode 100644 examples/benchmark/serve.json diff --git a/examples/benchmark/index.html b/examples/benchmark/index.html new file mode 100644 index 0000000..fb1a5d4 --- /dev/null +++ b/examples/benchmark/index.html @@ -0,0 +1,988 @@ + + + + + + BrowserAI — Engine Benchmark + + + +
+ +
+

BrowserAI Engine Benchmark

+

+ Compare inference performance across MLC (WebGPU), Transformers.js (ONNX), and Flare (WASM/GGUF) engines. + All inference runs locally in your browser. +

+
+
+ + +
+

Configuration

+ +
+ + +
+ + +
+ + + +
+ + + + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + + +
+ + +
+ + +
+

Log

+
+
+ + + + + + +
+ + + + + + diff --git a/examples/benchmark/serve.json b/examples/benchmark/serve.json new file mode 100644 index 0000000..c6e9896 --- /dev/null +++ b/examples/benchmark/serve.json @@ -0,0 +1,12 @@ +{ + "headers": [ + { + "source": "**/*", + "headers": [ + { "key": "Cross-Origin-Opener-Policy", "value": "same-origin" }, + { "key": "Cross-Origin-Embedder-Policy", "value": "credentialless" }, + { "key": "Cross-Origin-Resource-Policy", "value": "cross-origin" } + ] + } + ] +} diff --git a/src/engines/flare-engine-wrapper.test.ts b/src/engines/flare-engine-wrapper.test.ts index fb50533..d9f47cb 100644 --- a/src/engines/flare-engine-wrapper.test.ts +++ b/src/engines/flare-engine-wrapper.test.ts @@ -1,8 +1,8 @@ /** * FlareEngineWrapper unit tests. These cover the pure-logic surface of the * engine (constructor, dispose, cache-key hashing, error guards) without - * hitting the @aspect/flare WASM runtime or network — those require a real - * browser environment and a published @aspect/flare package. + * hitting the @sauravpanda/flare WASM runtime or network — those require a real + * browser environment and a published @sauravpanda/flare package. */ import { FlareEngineWrapper } from './flare-engine-wrapper'; @@ -56,7 +56,7 @@ describe('FlareEngineWrapper', () => { expect(() => engine.dispose()).not.toThrow(); }); - test('loadModel() fails loudly when @aspect/flare is not installed', async () => { + test('loadModel() fails loudly when @sauravpanda/flare is not installed', async () => { const engine = new FlareEngineWrapper(); const cfg: FlareConfig = { engine: 'flare', @@ -67,9 +67,9 @@ describe('FlareEngineWrapper', () => { defaultQuantization: 'Q4_K_M', url: 'https://example.com/test.gguf', }; - // @aspect/flare isn't installed in the test env, so the dynamic import + // @sauravpanda/flare isn't installed in the test env, so the dynamic import // inside importFlare() should reject with a clear install instruction. - await expect(engine.loadModel(cfg)).rejects.toThrow(/@aspect\/flare/i); + await expect(engine.loadModel(cfg)).rejects.toThrow(/@sauravpanda\/flare/i); }); }); diff --git a/src/engines/flare-engine-wrapper.ts b/src/engines/flare-engine-wrapper.ts index d554d21..34b6ef3 100644 --- a/src/engines/flare-engine-wrapper.ts +++ b/src/engines/flare-engine-wrapper.ts @@ -5,15 +5,15 @@ * (no TVM compilation step). It supports WebGPU acceleration, OPFS caching for * instant repeat loads, LoRA adapter merging, and progressive model loading. * - * The `@aspect/flare` npm package must be installed for this engine to work: - * npm install @aspect/flare + * The `@sauravpanda/flare` npm package must be installed for this engine to work: + * npm install @sauravpanda/flare * * Resolves issues: #293 #295 #296 #297 #298 #300 */ import { FlareConfig } from '../config/models/types'; -// Flare WASM API types (from @aspect/flare) +// Flare WASM API types (from @sauravpanda/flare) interface FlareEngineWasm { load(bytes: Uint8Array): FlareEngineInstance; } @@ -272,7 +272,7 @@ export class FlareEngineWrapper { * On repeat calls: loads instantly from the OPFS cache (<100 ms). */ async loadModel(modelConfig: FlareConfig, options: FlareLoadOptions = {}): Promise { - // Dynamically import @aspect/flare — fails gracefully if not installed + // Dynamically import @sauravpanda/flare — fails gracefully if not installed this.flare = await this.importFlare(); const url = options.url ?? modelConfig.url; @@ -521,15 +521,15 @@ export class FlareEngineWrapper { private async importFlare(): Promise { try { // Dynamic import so the package is optional — BrowserAI still works - // without @aspect/flare as long as users don't select the Flare engine. - const mod = await import('@aspect/flare' as string); + // without @sauravpanda/flare as long as users don't select the Flare engine. + const mod = await import('@sauravpanda/flare' as string); // Initialise the WASM module await (mod as unknown as { default: () => Promise }).default(); return mod as unknown as FlareModule; } catch (err) { throw new Error( - '[Flare] Could not load @aspect/flare. ' + - 'Install it with: npm install @aspect/flare\n' + + '[Flare] Could not load @sauravpanda/flare. ' + + 'Install it with: npm install @sauravpanda/flare\n' + `Original error: ${err}`, ); }