diff --git a/.github/workflows/pr-test.yaml b/.github/workflows/pr-test.yaml new file mode 100644 index 0000000..05cf822 --- /dev/null +++ b/.github/workflows/pr-test.yaml @@ -0,0 +1,44 @@ +name: PR Tests + +on: + pull_request: + branches: [ main ] + paths: + - 'packages/pdf-processor/**' + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Install dependencies and typecheck + run: | + cd packages/pdf-processor + bun install + bun tsc --noEmit + + - name: Build package + run: | + cd packages/pdf-processor + bun build src/index.ts --outdir dist --format esm --target node --external mammoth --external turndown --external jszip + bun tsc --project tsconfig.build.json + + - name: Run tests + run: | + cd packages/pdf-processor + bun test + env: + EK_MISTRAL_API_KEY: ${{ secrets.EK_MISTRAL_API_KEY }} diff --git a/packages/pdf-processor/data/receipt.pdf b/packages/pdf-processor/data/receipt.pdf new file mode 100644 index 0000000..29c5b77 Binary files /dev/null and b/packages/pdf-processor/data/receipt.pdf differ diff --git a/packages/pdf-processor/package.json b/packages/pdf-processor/package.json index 013b5c4..fa5280a 100644 --- a/packages/pdf-processor/package.json +++ b/packages/pdf-processor/package.json @@ -18,6 +18,7 @@ "build": "bun run clean && bun run build:esm && bun run build:types", "build:esm": "bun build src/index.ts --outdir dist --format esm --target node --external sharp --external ghostscript-node --external @ai-sdk/openai --external ai --external ollama-ai-provider --external pino --external zod", "build:types": "bun tsc --project tsconfig.build.json", + "typecheck": "bun tsc --noEmit", "test": "bun test", "prepublishOnly": "bun run build", "clean": "rm -rf dist" diff --git a/packages/pdf-processor/pnpm-lock.yaml b/packages/pdf-processor/pnpm-lock.yaml index 291833d..bb40a49 100644 --- a/packages/pdf-processor/pnpm-lock.yaml +++ b/packages/pdf-processor/pnpm-lock.yaml @@ -8,21 +8,15 @@ importers: .: dependencies: + '@ai-sdk/mistral': + specifier: ^2.0.2 + version: 2.0.12(zod@3.25.76) '@ai-sdk/openai': - specifier: ^1.0.0 - version: 1.3.24(zod@3.25.76) - '@types/bun': - specifier: ^1.2.19 - version: 1.2.19(@types/react@19.1.9) + specifier: 2.0.9 + version: 2.0.9(zod@3.25.76) ai: - specifier: ^4.1.0 - version: 4.3.19(react@19.1.1)(zod@3.25.76) - bun: - specifier: ^1.2.19 - version: 1.2.19 - bun-types: - specifier: latest - version: 1.2.9 + specifier: ^5.0.9 + version: 5.0.26(zod@3.25.76) ghostscript-node: specifier: ^1.5.0 version: 1.5.0 @@ -32,9 +26,6 @@ importers: pino: specifier: ^9.8.0 version: 9.8.0 - save-dev: - specifier: 0.0.1-security - version: 0.0.1-security sharp: specifier: ^0.34.1 version: 0.34.1 @@ -42,9 +33,18 @@ importers: specifier: ^3.23.8 version: 3.25.76 devDependencies: + '@types/bun': + specifier: ^1.2.19 + version: 1.2.19(@types/react@19.1.9) '@types/node': specifier: ^20.11.28 version: 20.17.30 + bun: + specifier: ^1.2.19 + version: 1.2.19 + bun-types: + specifier: latest + version: 1.2.9 pino-pretty: specifier: ^13.0.0 version: 13.1.1 @@ -54,11 +54,23 @@ importers: packages: - '@ai-sdk/openai@1.3.24': - resolution: {integrity: sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q==} + '@ai-sdk/gateway@1.0.15': + resolution: {integrity: sha512-xySXoQ29+KbGuGfmDnABx+O6vc7Gj7qugmj1kGpn0rW0rQNn6UKUuvscKMzWyv1Uv05GyC1vqHq8ZhEOLfXscQ==} engines: {node: '>=18'} peerDependencies: - zod: ^3.0.0 + zod: ^3.25.76 || ^4 + + '@ai-sdk/mistral@2.0.12': + resolution: {integrity: sha512-o5q253v7yGqB8YeyITEYe2Wt6iG3p1whdPdlZL2SfkE3bwWqjMzqx6GWqpXyGYeFUw5I8IF1squgHo+wWmU8IQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4 + + '@ai-sdk/openai@2.0.9': + resolution: {integrity: sha512-KLWQ4LWPgKH7V3KtbZ4n8pKkTyuujj+qLwphU5fg6d2WP86OQRJGMTi11UqALZ5LTABUVp3qH62CGGXzjnJV3w==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4 '@ai-sdk/provider-utils@2.2.8': resolution: {integrity: sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA==} @@ -66,25 +78,25 @@ packages: peerDependencies: zod: ^3.23.8 - '@ai-sdk/provider@1.1.3': - resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==} + '@ai-sdk/provider-utils@3.0.1': + resolution: {integrity: sha512-/iP1sKc6UdJgGH98OCly7sWJKv+J9G47PnTjIj40IJMUQKwDrUMyf7zOOfRtPwSuNifYhSoJQ4s1WltI65gJ/g==} engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4 - '@ai-sdk/react@1.2.12': - resolution: {integrity: sha512-jK1IZZ22evPZoQW3vlkZ7wvjYGYF+tRBKXtrcolduIkQ/m/sOAVcVeVDUDvh1T91xCnWCdUGCPZg2avZ90mv3g==} + '@ai-sdk/provider-utils@3.0.7': + resolution: {integrity: sha512-o3BS5/t8KnBL3ubP8k3w77AByOypLm+pkIL/DCw0qKkhDbvhCy+L3hRTGPikpdb8WHcylAeKsjgwOxhj4cqTUA==} engines: {node: '>=18'} peerDependencies: - react: ^18 || ^19 || ^19.0.0-rc - zod: ^3.23.8 - peerDependenciesMeta: - zod: - optional: true + zod: ^3.25.76 || ^4 - '@ai-sdk/ui-utils@1.2.11': - resolution: {integrity: sha512-3zcwCc8ezzFlwp3ZD15wAPjf2Au4s3vAbKsXQVyhxODHcmu0iyPO2Eua6D/vicq/AUm/BAo60r97O6HU+EI0+w==} + '@ai-sdk/provider@1.1.3': + resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==} + engines: {node: '>=18'} + + '@ai-sdk/provider@2.0.0': + resolution: {integrity: sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA==} engines: {node: '>=18'} - peerDependencies: - zod: ^3.23.8 '@emnapi/runtime@1.4.1': resolution: {integrity: sha512-LMshMVP0ZhACNjQNYXiU1iZJ6QCcv0lUdPDPugqGvCGXt5xtRVBPdtA0qU12pEXZzpWAhWlZYptfdAFq10DOVQ==} @@ -270,12 +282,12 @@ packages: cpu: [x64] os: [win32] + '@standard-schema/spec@1.0.0': + resolution: {integrity: sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==} + '@types/bun@1.2.19': resolution: {integrity: sha512-d9ZCmrH3CJ2uYKXQIUuZ/pUnTqIvLDS0SK7pFmbx8ma+ziH/FRMoAq5bYpRG7y+w1gl+HgyNZbtqgMq4W4e2Lg==} - '@types/diff-match-patch@1.0.36': - resolution: {integrity: sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==} - '@types/node@20.17.30': resolution: {integrity: sha512-7zf4YyHA+jvBNfVrk2Gtvs6x7E8V+YDW05bNfG2XkWDJfYRXrTiP/DsB2zSYTaHX0bGIujTBQdMVAhb+j7mwpg==} @@ -289,15 +301,11 @@ packages: resolution: {integrity: sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==} engines: {node: '>=8'} - ai@4.3.19: - resolution: {integrity: sha512-dIE2bfNpqHN3r6IINp9znguYdhIOheKW2LDigAMrgt/upT3B8eBGPSCblENvaZGoq+hxaN9fSMzjWpbqloP+7Q==} + ai@5.0.26: + resolution: {integrity: sha512-bGNtG+nYQ2U+5mzuLbxIg9WxGQJ2u5jv2gYgP8C+CJ1YI4qqIjvjOgGEZWzvNet8jiOGIlqstsht9aQefKzmBw==} engines: {node: '>=18'} peerDependencies: - react: ^18 || ^19 || ^19.0.0-rc - zod: ^3.23.8 - peerDependenciesMeta: - react: - optional: true + zod: ^3.25.76 || ^4 array-union@2.1.0: resolution: {integrity: sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==} @@ -331,10 +339,6 @@ packages: os: [darwin, linux, win32] hasBin: true - chalk@5.5.0: - resolution: {integrity: sha512-1tm8DTaJhPBG3bIkVeZt1iZM9GfSX2lzOeDVZH9R9ffRHpmHvxZ/QhgQH/aDTkswQVt+YHdXAdS/In/30OjCbg==} - engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} - clean-stack@2.2.0: resolution: {integrity: sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==} engines: {node: '>=6'} @@ -373,17 +377,10 @@ packages: resolution: {integrity: sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg==} engines: {node: '>=10'} - dequal@2.0.3: - resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==} - engines: {node: '>=6'} - detect-libc@2.0.3: resolution: {integrity: sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==} engines: {node: '>=8'} - diff-match-patch@1.0.5: - resolution: {integrity: sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw==} - dir-glob@3.0.1: resolution: {integrity: sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==} engines: {node: '>=8'} @@ -391,6 +388,10 @@ packages: end-of-stream@1.4.5: resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==} + eventsource-parser@3.0.5: + resolution: {integrity: sha512-bSRG85ZrMdmWtm7qkF9He9TNRzc/Bm99gEJMaQoHJ9E6Kv9QBbsldh2oMj7iXmYNEAVvNgvv5vPorG6W+XtBhQ==} + engines: {node: '>=20.0.0'} + fast-copy@3.0.2: resolution: {integrity: sha512-dl0O9Vhju8IrcLndv2eU4ldt1ftXMqqfgN4H1cpmGV7P6jeB9FwpN9a2c8DPGE1Ys88rNUJVYDHq73CGAGOPfQ==} @@ -489,11 +490,6 @@ packages: json-schema@0.4.0: resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} - jsondiffpatch@0.6.0: - resolution: {integrity: sha512-3QItJOXp2AP1uv7waBkao5nCvhEv+QmJAd38Ybq7wNI74Q+BBmnLn4EDKz6yI9xGAIQoUF87qHt+kc1IVxB4zQ==} - engines: {node: ^18.0.0 || >=20.0.0} - hasBin: true - jsonfile@6.1.0: resolution: {integrity: sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==} @@ -577,10 +573,6 @@ packages: quick-format-unescaped@4.0.4: resolution: {integrity: sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==} - react@19.1.1: - resolution: {integrity: sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==} - engines: {node: '>=0.10.0'} - real-require@0.2.0: resolution: {integrity: sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==} engines: {node: '>= 12.13.0'} @@ -601,9 +593,6 @@ packages: resolution: {integrity: sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==} engines: {node: '>=10'} - save-dev@0.0.1-security: - resolution: {integrity: sha512-k6knZTDNK8PKKbIqnvxiOveJinuw2LcQjqDoaorZWP9M5AR2EPsnpDeSbeoZZ0pHr5ze1uoaKdK8NBGQrJ34Uw==} - secure-json-parse@2.7.0: resolution: {integrity: sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==} @@ -637,11 +626,6 @@ packages: resolution: {integrity: sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==} engines: {node: '>=14.16'} - swr@2.3.4: - resolution: {integrity: sha512-bYd2lrhc+VarcpkgWclcUi92wYCpOgMws9Sd1hG1ntAu0NEy+14CbotuFjshBU2kt9rYj9TSmDcybpxpeTU1fg==} - peerDependencies: - react: ^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 - temp-dir@2.0.0: resolution: {integrity: sha512-aoBAniQmmwtcKp/7BzsH8Cxzv8OL736p7v1ihGb5e9DJ9kTwGWHrQrVB5+lfVDzfGrdRzXch+ig7LHaY1JTOrg==} engines: {node: '>=8'} @@ -653,10 +637,6 @@ packages: thread-stream@3.1.0: resolution: {integrity: sha512-OqyPZ9u96VohAyMfJykzmivOrY2wfMSf3C5TtFJVgN+Hm6aj+voFhlK+kZEIv2FBh1X6Xp3DlnCOfEQ3B2J86A==} - throttleit@2.1.0: - resolution: {integrity: sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==} - engines: {node: '>=18'} - to-regex-range@5.0.1: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} @@ -684,11 +664,6 @@ packages: resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==} engines: {node: '>= 10.0.0'} - use-sync-external-store@1.5.0: - resolution: {integrity: sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==} - peerDependencies: - react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 - wrappy@1.0.2: resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} @@ -702,10 +677,22 @@ packages: snapshots: - '@ai-sdk/openai@1.3.24(zod@3.25.76)': + '@ai-sdk/gateway@1.0.15(zod@3.25.76)': dependencies: - '@ai-sdk/provider': 1.1.3 - '@ai-sdk/provider-utils': 2.2.8(zod@3.25.76) + '@ai-sdk/provider': 2.0.0 + '@ai-sdk/provider-utils': 3.0.7(zod@3.25.76) + zod: 3.25.76 + + '@ai-sdk/mistral@2.0.12(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 2.0.0 + '@ai-sdk/provider-utils': 3.0.7(zod@3.25.76) + zod: 3.25.76 + + '@ai-sdk/openai@2.0.9(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 2.0.0 + '@ai-sdk/provider-utils': 3.0.1(zod@3.25.76) zod: 3.25.76 '@ai-sdk/provider-utils@2.2.8(zod@3.25.76)': @@ -715,26 +702,28 @@ snapshots: secure-json-parse: 2.7.0 zod: 3.25.76 - '@ai-sdk/provider@1.1.3': + '@ai-sdk/provider-utils@3.0.1(zod@3.25.76)': dependencies: - json-schema: 0.4.0 + '@ai-sdk/provider': 2.0.0 + '@standard-schema/spec': 1.0.0 + eventsource-parser: 3.0.5 + zod: 3.25.76 + zod-to-json-schema: 3.24.6(zod@3.25.76) - '@ai-sdk/react@1.2.12(react@19.1.1)(zod@3.25.76)': + '@ai-sdk/provider-utils@3.0.7(zod@3.25.76)': dependencies: - '@ai-sdk/provider-utils': 2.2.8(zod@3.25.76) - '@ai-sdk/ui-utils': 1.2.11(zod@3.25.76) - react: 19.1.1 - swr: 2.3.4(react@19.1.1) - throttleit: 2.1.0 - optionalDependencies: + '@ai-sdk/provider': 2.0.0 + '@standard-schema/spec': 1.0.0 + eventsource-parser: 3.0.5 zod: 3.25.76 - '@ai-sdk/ui-utils@1.2.11(zod@3.25.76)': + '@ai-sdk/provider@1.1.3': dependencies: - '@ai-sdk/provider': 1.1.3 - '@ai-sdk/provider-utils': 2.2.8(zod@3.25.76) - zod: 3.25.76 - zod-to-json-schema: 3.24.6(zod@3.25.76) + json-schema: 0.4.0 + + '@ai-sdk/provider@2.0.0': + dependencies: + json-schema: 0.4.0 '@emnapi/runtime@1.4.1': dependencies: @@ -866,14 +855,14 @@ snapshots: '@oven/bun-windows-x64@1.2.19': optional: true + '@standard-schema/spec@1.0.0': {} + '@types/bun@1.2.19(@types/react@19.1.9)': dependencies: bun-types: 1.2.19(@types/react@19.1.9) transitivePeerDependencies: - '@types/react' - '@types/diff-match-patch@1.0.36': {} - '@types/node@20.17.30': dependencies: undici-types: 6.19.8 @@ -891,17 +880,13 @@ snapshots: clean-stack: 2.2.0 indent-string: 4.0.0 - ai@4.3.19(react@19.1.1)(zod@3.25.76): + ai@5.0.26(zod@3.25.76): dependencies: - '@ai-sdk/provider': 1.1.3 - '@ai-sdk/provider-utils': 2.2.8(zod@3.25.76) - '@ai-sdk/react': 1.2.12(react@19.1.1)(zod@3.25.76) - '@ai-sdk/ui-utils': 1.2.11(zod@3.25.76) + '@ai-sdk/gateway': 1.0.15(zod@3.25.76) + '@ai-sdk/provider': 2.0.0 + '@ai-sdk/provider-utils': 3.0.7(zod@3.25.76) '@opentelemetry/api': 1.9.0 - jsondiffpatch: 0.6.0 zod: 3.25.76 - optionalDependencies: - react: 19.1.1 array-union@2.1.0: {} @@ -942,8 +927,6 @@ snapshots: '@oven/bun-windows-x64': 1.2.19 '@oven/bun-windows-x64-baseline': 1.2.19 - chalk@5.5.0: {} - clean-stack@2.2.0: {} color-convert@2.0.1: @@ -983,12 +966,8 @@ snapshots: rimraf: 3.0.2 slash: 3.0.0 - dequal@2.0.3: {} - detect-libc@2.0.3: {} - diff-match-patch@1.0.5: {} - dir-glob@3.0.1: dependencies: path-type: 4.0.0 @@ -997,6 +976,8 @@ snapshots: dependencies: once: 1.4.0 + eventsource-parser@3.0.5: {} + fast-copy@3.0.2: {} fast-glob@3.3.3: @@ -1089,12 +1070,6 @@ snapshots: json-schema@0.4.0: {} - jsondiffpatch@0.6.0: - dependencies: - '@types/diff-match-patch': 1.0.36 - chalk: 5.5.0 - diff-match-patch: 1.0.5 - jsonfile@6.1.0: dependencies: universalify: 2.0.1 @@ -1189,8 +1164,6 @@ snapshots: quick-format-unescaped@4.0.4: {} - react@19.1.1: {} - real-require@0.2.0: {} reusify@1.1.0: {} @@ -1205,8 +1178,6 @@ snapshots: safe-stable-stringify@2.5.0: {} - save-dev@0.0.1-security: {} - secure-json-parse@2.7.0: {} secure-json-parse@4.0.0: {} @@ -1254,12 +1225,6 @@ snapshots: strip-json-comments@5.0.3: {} - swr@2.3.4(react@19.1.1): - dependencies: - dequal: 2.0.3 - react: 19.1.1 - use-sync-external-store: 1.5.0(react@19.1.1) - temp-dir@2.0.0: {} tempy@1.0.1: @@ -1274,8 +1239,6 @@ snapshots: dependencies: real-require: 0.2.0 - throttleit@2.1.0: {} - to-regex-range@5.0.1: dependencies: is-number: 7.0.0 @@ -1295,10 +1258,6 @@ snapshots: universalify@2.0.1: {} - use-sync-external-store@1.5.0(react@19.1.1): - dependencies: - react: 19.1.1 - wrappy@1.0.2: {} zod-to-json-schema@3.24.6(zod@3.25.76): diff --git a/packages/pdf-processor/src/api/handlers.ts b/packages/pdf-processor/src/api/handlers.ts index be8b5f7..35e8af7 100644 --- a/packages/pdf-processor/src/api/handlers.ts +++ b/packages/pdf-processor/src/api/handlers.ts @@ -1,6 +1,6 @@ import { aiVisionProcessor, extractInvoice, extractTables } from "../core/vision"; import { createModuleLogger } from "../utils/logger"; -import { validateExtractRequest, VisionExtractRequest } from './validation'; +import { validateExtractRequest } from './validation'; import { createTempFile } from './utils'; const logger = createModuleLogger('api-handlers'); @@ -57,7 +57,7 @@ export async function handleExtractRequest(req: Request, corsHeaders: Record t !== 'null'); + const nonNullType = prop.type.find((t: any) => t !== 'null'); return this.getZodTypeForString(nonNullType).nullable(); } } @@ -90,6 +90,8 @@ export class SchemaFactory { return TablesOnlySchema; case 'basic': + return ComprehensiveInvoiceSchema; + case 'simple': return ComprehensiveInvoiceSchema; diff --git a/packages/pdf-processor/src/core/schemas/invoice.ts b/packages/pdf-processor/src/core/schemas/invoice.ts index 574ed50..5c584c6 100644 --- a/packages/pdf-processor/src/core/schemas/invoice.ts +++ b/packages/pdf-processor/src/core/schemas/invoice.ts @@ -117,6 +117,8 @@ export const BasicReceiptSchema = z.object({ items: z.array(z.object({ name: z.string().nullable().optional(), price: z.number().nullable().optional(), + quantity: z.number().nullable().optional(), + total: z.number().nullable().optional(), })).optional(), }); diff --git a/packages/pdf-processor/src/core/vision/worker-manager.ts b/packages/pdf-processor/src/core/vision/worker-manager.ts index 368965e..22237c7 100644 --- a/packages/pdf-processor/src/core/vision/worker-manager.ts +++ b/packages/pdf-processor/src/core/vision/worker-manager.ts @@ -1,7 +1,7 @@ import path from "path"; import { createModuleLogger } from "../../utils/logger"; import { workerPoolManager } from "../workers/worker-pool-manager"; -import { AIVisionProcessingOptions } from './processor'; +import type { AIVisionProcessingOptions } from './processor'; import type { PdfExtractionTask, PdfExtractionResult diff --git a/packages/pdf-processor/tests/invoice-lib.test.ts b/packages/pdf-processor/tests/invoice-lib.test.ts index eaebf18..59185fe 100644 --- a/packages/pdf-processor/tests/invoice-lib.test.ts +++ b/packages/pdf-processor/tests/invoice-lib.test.ts @@ -9,43 +9,43 @@ import { describe, test, expect } from 'bun:test'; import path from 'path'; describe('Configuration pdfProcessor', () => { - const testPdfPath = path.join(__dirname, '../data/facture3.pdf'); - const testApiKey = process.env.TEST_SCALEWAY_KEY; + const testPdfPath = path.join(__dirname, '../data/invoice_1.pdf'); + const testApiKey = process.env.EK_MISTRAL_API_KEY; test('devrait extraire une facture avec configuration objet', async () => { // Skip si pas de clé API de test if (!testApiKey) { - console.warn('⚠️ TEST_SCALEWAY_KEY non définie, test ignoré'); + console.warn('⚠️ TEST API KEY non définie, test ignoré'); return; } // Configuration via objet (priorité sur les env vars) const pdfProcessor: PdfProcessorConfig = { providers: { - scaleway: { - model: "mistral-small-3.1-24b-instruct-2503", + mistral: { + model: 'mistral-medium-latest', apiKey: testApiKey, - baseURL: "https://api.scaleway.ai/v1" } } }; console.log('📄 Test d\'extraction avec configuration objet...'); - + const result = await extractInvoicePdf(testPdfPath, { - provider: 'scaleway', - pdfProcessor + provider: 'mistral', + pdfProcessor, + query: '*' }); // Vérifications de base expect(result).toBeDefined(); expect(typeof result).toBe('object'); - + // Vérifications du schéma ComprehensiveInvoice if (result.document_info) { expect(result.document_info.document_type).toBeDefined(); } - + if (result.seller_info) { expect(result.seller_info.name).toBeDefined(); } diff --git a/packages/pdf-processor/tests/receipt.test.ts b/packages/pdf-processor/tests/receipt.test.ts new file mode 100644 index 0000000..f492f37 --- /dev/null +++ b/packages/pdf-processor/tests/receipt.test.ts @@ -0,0 +1,90 @@ + +import { extractReceiptPdf } from '../src/lib'; +import type { PdfProcessorConfig } from '../src/lib'; +import { describe, test, expect } from 'bun:test'; +import path from 'path'; + + +describe('Configuration pdfProcessor', () => { + const testPdfPath = path.join(__dirname, '../data/receipt.pdf'); + const testApiKey = process.env.EK_MISTRAL_API_KEY; + + test('devrait extraire une facture avec configuration objet', async () => { + // Skip si pas de clé API de test + if (!testApiKey) { + console.warn('⚠️ TEST API KEY non définie, test ignoré'); + return; + } + + // Configuration via objet (priorité sur les env vars) + const pdfProcessor: PdfProcessorConfig = { + providers: { + mistral: { + model: 'mistral-medium-latest', + apiKey: testApiKey, + } + } + }; + + console.log('📄 Test d\'extraction avec configuration objet...'); + + const result = await extractReceiptPdf(testPdfPath, { + provider: 'mistral', + pdfProcessor, + }); + + // Vérifications de base + expect(result).toBeDefined(); + expect(typeof result).toBe('object'); + + // Vérifications des champs obligatoires + expect(result.merchant_name).toBeDefined(); + expect(result.merchant_name).not.toBeNull(); + expect(typeof result.merchant_name).toBe('string'); + expect(result.merchant_name!.length).toBeGreaterThan(0); + + expect(result.transaction_date).toBeDefined(); + expect(typeof result.transaction_date).toBe('string'); + + expect(result.total_amount).toBeDefined(); + expect(result.total_amount).not.toBeNull(); + expect(typeof result.total_amount).toBe('number'); + expect(result.total_amount!).toBeGreaterThan(0); + + expect(result.currency).toBeDefined(); + expect(typeof result.currency).toBe('string'); + + expect(result.items).toBeDefined(); + expect(result.items).not.toBeUndefined(); + expect(Array.isArray(result.items)).toBe(true); + expect(result.items!.length).toBeGreaterThan(0); + + // Vérifications des items + result.items!.forEach(item => { + expect(item.name).toBeDefined(); + expect(item.name).not.toBeNull(); + expect(typeof item.name).toBe('string'); + expect(item.name!.length).toBeGreaterThan(0); + + expect(item.price).toBeDefined(); + expect(typeof item.price).toBe('number'); + expect(item.price).toBeGreaterThan(0); + + expect(item.quantity).toBeDefined(); + expect(typeof item.quantity).toBe('number'); + expect(item.quantity).toBeGreaterThan(0); + + expect(item.total).toBeDefined(); + expect(item.total).not.toBeNull(); + expect(typeof item.total).toBe('number'); + expect(item.total!).toBeGreaterThan(0); + }); + + // Vérification de cohérence: somme des items = total + const itemsTotal = result.items!.reduce((sum, item) => sum + (item.total || 0), 0); + expect(Math.abs(itemsTotal - (result.total_amount || 0))).toBeLessThanOrEqual(0.01); + + console.log('✅ Extraction réussie avec configuration objet'); + console.log('📊 Résultat:', JSON.stringify(result, null, 2)); + }, 120000); // Timeout 120 secondes +}); \ No newline at end of file