diff --git a/package-lock.json b/package-lock.json index 7b5b40f0b5..5d5d5bc242 100644 --- a/package-lock.json +++ b/package-lock.json @@ -37,6 +37,7 @@ "match-sorter": "^8.0.0", "mathjax": "^3.2.2", "mime-types": "^2.1.35", + "mupdf": "github:edrlab/mupdf.js", "nanoid": "^5.0.8", "node-fetch": "^3.3.2", "proxy-agent": "^6.4.0", @@ -19736,6 +19737,11 @@ "multicast-dns": "cli.js" } }, + "node_modules/mupdf": { + "version": "1.0.0", + "resolved": "git+ssh://git@github.com/edrlab/mupdf.js.git#928cad2fcf7db5e3bd46070f140a2713f873bb86", + "license": "AGPL-3.0-or-later" + }, "node_modules/nanoid": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-5.0.8.tgz", diff --git a/package.json b/package.json index 3cf7c51d3c..39b2171ca2 100644 --- a/package.json +++ b/package.json @@ -291,6 +291,7 @@ "match-sorter": "^8.0.0", "mathjax": "^3.2.2", "mime-types": "^2.1.35", + "mupdf": "github:edrlab/mupdf.js", "nanoid": "^5.0.8", "node-fetch": "^3.3.2", "proxy-agent": "^6.4.0", diff --git a/src/main/pdf/extract.ts b/src/main/pdf/extract.ts index 2fc18805d2..24715e9c97 100644 --- a/src/main/pdf/extract.ts +++ b/src/main/pdf/extract.ts @@ -13,11 +13,59 @@ import { encodeURIComponent_RFC3986 } from "@r2-utils-js/_utils/http/UrlUtils"; import { IInfo } from "./extract.type"; +import { readFile } from "node:fs/promises"; + +import * as mupdfjs from "mupdf"; + const debug = debug_("readium-desktop:main/pdf/extract/index.ts"); debug("_"); type TExtractPdfData = [data: IInfo | undefined, coverPNG: Buffer | undefined]; -export const extractPDFData = + +export const extractPDFData = async (pdfPath: string): Promise => { + + try { + const pdfBuffer = await readFile(pdfPath); + + const doc = mupdfjs.PDFDocument.openDocument(pdfBuffer, "application/pdf"); + + const info: IInfo = { + Title: doc.getMetaData("info:Title"), + Subject: doc.getMetaData("info:Subject"), + Keywords: doc.getMetaData("info:Keywords"), + Author: doc.getMetaData("info:Author"), + Creator: doc.getMetaData("info:Creator"), + Producer: doc.getMetaData("info:Producer"), + CreationDate: doc.getMetaData("info:CreationDate"), + ModDate: doc.getMetaData("info:ModDate"), + numberOfPages: doc.countPages(), + }; + + const page = new mupdfjs.PDFPage(doc, 0); + + const pixmap = page.toPixmap(mupdfjs.Matrix.identity, mupdfjs.ColorSpace.DeviceRGB, false, true); + const pngImage = pixmap.asPNG(); + const img = Buffer.alloc(pngImage.byteLength); + for (let i = 0; i < img.length; ++i) { + img[i] = pngImage[i]; + } + + return [info, img]; + + + } catch (e) { + + debug("####"); + debug("####"); + debug(e); + debug("####"); + debug("####"); + } + + return [undefined, undefined]; +}; + +export const extractPDFDataPdfjs = async (pdfPath: string) : Promise => { diff --git a/src/main/pdf/extract.type.ts b/src/main/pdf/extract.type.ts index f5cc73f683..cdf856a6ed 100644 --- a/src/main/pdf/extract.type.ts +++ b/src/main/pdf/extract.type.ts @@ -7,10 +7,10 @@ export interface IInfo { PDFFormatVersion?: string; - IsAcroFormPresent?: boolean; - IsCollectionPresent?: boolean; - IsLinearized?: boolean; - IsXFAPresent?: boolean; + // IsAcroFormPresent?: boolean; + // IsCollectionPresent?: boolean; + // IsLinearized?: boolean; + // IsXFAPresent?: boolean; Title?: string; Subject?: string; Keywords?: string; diff --git a/tsconfig.json b/tsconfig.json index 90206474a7..1f75d587cc 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -44,7 +44,7 @@ "removeComments": true, "skipLibCheck": false, "module": "ES2020", - "moduleResolution": "Node", + "moduleResolution": "node10", "lib": [ "es2020", "dom", diff --git a/webpack.config.main.js b/webpack.config.main.js index 4402d982ca..ad221403d1 100644 --- a/webpack.config.main.js +++ b/webpack.config.main.js @@ -45,7 +45,7 @@ const _externalsCache = new Set(); if (nodeEnv !== "production") { const nodeExternals = require("webpack-node-externals"); const neFunc = nodeExternals({ - allowlist: ["timeout-signal", "nanoid", "normalize-url", "node-fetch", "data-uri-to-buffer", /^fetch-blob/, /^formdata-polyfill/], + allowlist: ["timeout-signal", "nanoid", "normalize-url", "node-fetch", "mupdf", "data-uri-to-buffer", /^fetch-blob/, /^formdata-polyfill/], importType: function (moduleName) { if (!_externalsCache.has(moduleName)) { console.log(`WEBPACK EXTERNAL (MAIN): [${moduleName}]`);