diff --git a/.changeset/add-markdown-for-agents.md b/.changeset/add-markdown-for-agents.md new file mode 100644 index 000000000..0d2c1b619 --- /dev/null +++ b/.changeset/add-markdown-for-agents.md @@ -0,0 +1,5 @@ +--- +'@hono/markdown-for-agents': minor +--- + +Add markdown-for-agents middleware diff --git a/packages/markdown-for-agents/README.md b/packages/markdown-for-agents/README.md new file mode 100644 index 000000000..c4296ca0a --- /dev/null +++ b/packages/markdown-for-agents/README.md @@ -0,0 +1,85 @@ +# @hono/markdown-for-agents + +Hono middleware for [markdown-for-agents](https://www.npmjs.com/package/markdown-for-agents) — converts HTML responses to clean, token-efficient Markdown for AI agents. + +> `markdown-for-agents` is an ESM-only dependency. + +When a client sends `Accept: text/markdown`, HTML responses are automatically converted to Markdown — typically saving 80–90% of tokens. Normal browser requests pass through untouched. + +## Install + +```bash +npm install @hono/markdown-for-agents +# yarn add @hono/markdown-for-agents +``` + +## Usage + +```ts +import { Hono } from 'hono' +import { markdown } from '@hono/markdown-for-agents' + +const app = new Hono() +app.use(markdown()) + +app.get('/', (c) => { + return c.html('

Hello

') +}) + +export default app +``` + +```bash +# Normal HTML response +curl http://localhost:3000 + +# Markdown response for AI agents +curl -H "Accept: text/markdown" http://localhost:3000 +``` + +## How it works + +The middleware uses content negotiation. When a client sends `Accept: text/markdown`, HTML responses are automatically converted to Markdown. The response includes: + +- `Content-Type: text/markdown; charset=utf-8` +- `x-markdown-tokens` header with the token count +- `ETag` header with a content hash for cache validation +- `Vary: Accept` header so CDNs cache HTML and Markdown separately +- `content-signal` header with publisher consent signals (when configured) + +## Options + +Accepts all [`markdown-for-agents` options](https://www.npmjs.com/package/markdown-for-agents#options): + +```ts +app.use( + markdown({ + // Strip nav, ads, sidebars, cookie banners + extract: true, + + // Resolve relative URLs + baseUrl: 'https://example.com', + + // Remove duplicate content blocks + deduplicate: true, + + // Custom token counter (e.g. tiktoken) + tokenCounter: (text) => ({ + tokens: enc.encode(text).length, + characters: text.length, + words: text.split(/\s+/).filter(Boolean).length, + }), + + // Publisher consent signal header + contentSignal: { aiTrain: true, search: true, aiInput: true }, + }) +) +``` + +## Author + +Konstantin Konstantinov + +## License + +MIT diff --git a/packages/markdown-for-agents/deno.json b/packages/markdown-for-agents/deno.json new file mode 100644 index 000000000..5d90a0375 --- /dev/null +++ b/packages/markdown-for-agents/deno.json @@ -0,0 +1,16 @@ +{ + "name": "@hono/markdown-for-agents", + "version": "0.1.0", + "license": "MIT", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "hono": "jsr:@hono/hono@^4.8.3", + "markdown-for-agents": "npm:markdown-for-agents@^1.3.1" + }, + "publish": { + "include": ["deno.json", "README.md", "src/**/*.ts"], + "exclude": ["src/**/*.test.ts"] + } +} diff --git a/packages/markdown-for-agents/package.json b/packages/markdown-for-agents/package.json new file mode 100644 index 000000000..0584861fb --- /dev/null +++ b/packages/markdown-for-agents/package.json @@ -0,0 +1,51 @@ +{ + "name": "@hono/markdown-for-agents", + "version": "0.1.0", + "description": "Hono middleware that converts HTML responses to Markdown for AI agents via Accept: text/markdown", + "type": "module", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "files": [ + "dist" + ], + "scripts": { + "build": "tsdown", + "format": "prettier --check . --ignore-path ../../.gitignore", + "lint": "eslint", + "typecheck": "tsc -b tsconfig.json", + "test": "vitest", + "version:jsr": "yarn version:set $npm_package_version" + }, + "exports": { + ".": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "license": "MIT", + "publishConfig": { + "registry": "https://registry.npmjs.org", + "access": "public", + "provenance": true + }, + "repository": { + "type": "git", + "url": "git+https://github.com/honojs/middleware.git", + "directory": "packages/markdown-for-agents" + }, + "homepage": "https://github.com/honojs/middleware", + "dependencies": { + "markdown-for-agents": "^1.3.1" + }, + "peerDependencies": { + "hono": ">=4.0.0" + }, + "devDependencies": { + "hono": "^4.11.5", + "tsdown": "^0.15.9", + "typescript": "^5.9.3", + "vitest": "^4.1.0-beta.1" + } +} diff --git a/packages/markdown-for-agents/src/index.test.ts b/packages/markdown-for-agents/src/index.test.ts new file mode 100644 index 000000000..d2db6c0ce --- /dev/null +++ b/packages/markdown-for-agents/src/index.test.ts @@ -0,0 +1,386 @@ +import { Hono } from 'hono' +import { describe, it, expect, vi } from 'vitest' +import { markdown } from './index' +import type { MiddlewareOptions } from './index' + +// --------------------------------------------------------------------------- +// Test harness helpers +// --------------------------------------------------------------------------- + +function createMockContext( + acceptHeader: string, + responseBody: string, + responseContentType: string +) { + const resHeaders = new Headers({ + 'content-type': responseContentType, + }) + + const context = { + req: { + header: (name: string): string | undefined => { + if (name === 'accept') { + return acceptHeader + } + return undefined + }, + }, + res: new Response(responseBody, { headers: resHeaders }), + } + + return context +} + +type MockContext = ReturnType + +interface HeaderTestHarness { + send: ( + options: MiddlewareOptions | undefined, + accept: string, + contentType: string, + body: string, + extraHeaders?: Record + ) => Promise<{ getHeader: (name: string) => string | null | undefined }> +} + +const honoHarness: HeaderTestHarness = { + async send(options, accept, contentType, body, extraHeaders) { + const mw = markdown(options) + const resHeaders = new Headers({ 'content-type': contentType }) + if (extraHeaders) { + for (const [k, v] of Object.entries(extraHeaders)) { + resHeaders.set(k, v) + } + } + const c: MockContext = { + req: { + header: (name: string): string | undefined => { + if (name === 'accept') { + return accept + } + return undefined + }, + }, + res: new Response(body, { headers: resHeaders }), + } + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + return { getHeader: (name: string) => c.res.headers.get(name) } + }, +} + +// --------------------------------------------------------------------------- +// Unit tests +// --------------------------------------------------------------------------- + +describe('hono middleware', () => { + it('converts HTML to markdown when Accept: text/markdown', async () => { + const mw = markdown() + const c = createMockContext('text/markdown', '

Title

Body

', 'text/html') + + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + + const body = await c.res.text() + expect(body).toContain('# Title') + expect(body).toContain('Body') + expect(c.res.headers.get('content-type')).toBe('text/markdown; charset=utf-8') + expect(c.res.headers.get('x-markdown-tokens')).toBeTruthy() + }) + + it('passes through when Accept is not text/markdown', async () => { + const mw = markdown() + const c = createMockContext('text/html', '

Title

', 'text/html') + + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + + expect(next).toHaveBeenCalled() + }) + + it('passes through non-HTML responses', async () => { + const mw = markdown() + const c = createMockContext('text/markdown', '{"ok":true}', 'application/json') + + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + + expect(c.res.headers.get('content-type')).toBe('application/json') + }) + + it('supports custom token header', async () => { + const mw = markdown({ tokenHeader: 'x-tokens' }) + const c = createMockContext('text/markdown', '

Hello

', 'text/html') + + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + + expect(c.res.headers.get('x-tokens')).toBeTruthy() + expect(c.res.headers.get('x-markdown-tokens')).toBeNull() + }) + + describe('ETag header', () => { + it('sets ETag on converted responses', async () => { + const mw = markdown() + const c = createMockContext('text/markdown', '

Title

', 'text/html') + + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + + expect(c.res.headers.get('etag')).toMatch(/^".+"$/) + }) + + it('does not set ETag on pass-through responses', async () => { + const mw = markdown() + const c = createMockContext('text/html', '

Title

', 'text/html') + + const next = vi.fn().mockResolvedValue(undefined) + // @ts-expect-error -- mock context is intentionally partial + await mw(c, next) + + expect(c.res.headers.get('etag')).toBeNull() + }) + }) + + // ----------------------------------------------------------------------- + // Content-Signal header + // ----------------------------------------------------------------------- + + describe('Content-Signal header', () => { + it('sets content-signal on converted responses when configured', async () => { + const { getHeader } = await honoHarness.send( + { contentSignal: { aiTrain: true, search: true, aiInput: true } }, + 'text/markdown', + 'text/html', + '

Title

' + ) + expect(getHeader('content-signal')).toBe('ai-train=yes, search=yes, ai-input=yes') + }) + + it('does not set content-signal when not configured', async () => { + const { getHeader } = await honoHarness.send( + undefined, + 'text/markdown', + 'text/html', + '

Title

' + ) + expect(getHeader('content-signal')).toBeFalsy() + }) + + it('does not set content-signal on pass-through responses', async () => { + const { getHeader } = await honoHarness.send( + { contentSignal: { aiTrain: true } }, + 'text/html', + 'text/html', + '

Title

' + ) + expect(getHeader('content-signal')).toBeFalsy() + }) + }) + + // ----------------------------------------------------------------------- + // Server-Timing header + // ----------------------------------------------------------------------- + + describe('Server-Timing header', () => { + it('includes mfa.convert timing when serverTiming is enabled', async () => { + const { getHeader } = await honoHarness.send( + { serverTiming: true }, + 'text/markdown', + 'text/html', + '

Title

' + ) + const timing = getHeader('server-timing') + expect(timing).toMatch(/mfa\.convert;dur=[\d.]+;desc="HTML to Markdown"/) + }) + + it('does not set Server-Timing when serverTiming is disabled', async () => { + const { getHeader } = await honoHarness.send( + undefined, + 'text/markdown', + 'text/html', + '

Title

' + ) + expect(getHeader('server-timing')).toBeFalsy() + }) + + it('does not set Server-Timing on pass-through responses', async () => { + const { getHeader } = await honoHarness.send( + { serverTiming: true }, + 'text/html', + 'text/html', + '

Title

' + ) + expect(getHeader('server-timing')).toBeFalsy() + }) + }) + + describe('x-markdown-timing header', () => { + it('sets x-markdown-timing alongside Server-Timing when serverTiming is enabled', async () => { + const { getHeader } = await honoHarness.send( + { serverTiming: true }, + 'text/markdown', + 'text/html', + '

Title

' + ) + const timing = getHeader('x-markdown-timing') + expect(timing).toMatch(/mfa\.convert;dur=[\d.]+;desc="HTML to Markdown"/) + }) + + it('does not set x-markdown-timing when serverTiming is disabled', async () => { + const { getHeader } = await honoHarness.send( + undefined, + 'text/markdown', + 'text/html', + '

Title

' + ) + expect(getHeader('x-markdown-timing')).toBeFalsy() + }) + + it('does not set x-markdown-timing on pass-through responses', async () => { + const { getHeader } = await honoHarness.send( + { serverTiming: true }, + 'text/html', + 'text/html', + '

Title

' + ) + expect(getHeader('x-markdown-timing')).toBeFalsy() + }) + + it('uses custom timingHeader name when provided', async () => { + const { getHeader } = await honoHarness.send( + { serverTiming: true, timingHeader: 'x-custom-timing' }, + 'text/markdown', + 'text/html', + '

Title

' + ) + expect(getHeader('x-custom-timing')).toMatch( + /mfa\.convert;dur=[\d.]+;desc="HTML to Markdown"/ + ) + expect(getHeader('x-markdown-timing')).toBeFalsy() + }) + }) + + // ----------------------------------------------------------------------- + // Vary header + // ----------------------------------------------------------------------- + + describe('Vary header', () => { + it('sets Vary: Accept on converted responses', async () => { + const { getHeader } = await honoHarness.send( + undefined, + 'text/markdown', + 'text/html', + '

Title

' + ) + expect(getHeader('vary')).toContain('Accept') + }) + + it('sets Vary: Accept on pass-through responses', async () => { + const { getHeader } = await honoHarness.send( + undefined, + 'text/html', + 'text/html', + '

Title

' + ) + expect(getHeader('vary')).toContain('Accept') + }) + + it('appends to existing Vary header', async () => { + const { getHeader } = await honoHarness.send( + undefined, + 'text/markdown', + 'text/html', + '

Title

', + { vary: 'Accept-Encoding' } + ) + const vary = getHeader('vary') ?? '' + expect(vary).toContain('Accept-Encoding') + expect(vary).toContain('Accept') + }) + }) +}) + +// --------------------------------------------------------------------------- +// Integration tests +// --------------------------------------------------------------------------- + +function createApp(options?: Parameters[0]) { + const app = new Hono() + app.use('*', markdown(options)) + + app.get('/html', (c) => { + return c.html('

Hello World

This is bold text.

') + }) + + app.get('/json', (c) => { + return c.json({ message: 'hello' }) + }) + + app.get('/page', (c) => { + return c.html(` + +

Article

Content here.

+
Copyright
+ `) + }) + + return app +} + +describe('hono middleware integration', () => { + it('converts HTML to markdown via Hono request', async () => { + const app = createApp() + const res = await app.request('/html', { + headers: { accept: 'text/markdown' }, + }) + const body = await res.text() + + expect(res.headers.get('content-type')).toBe('text/markdown; charset=utf-8') + expect(body).toContain('# Hello World') + expect(body).toContain('**bold**') + + const tokens = Number(res.headers.get('x-markdown-tokens')) + expect(tokens).toBeGreaterThan(0) + }) + + it('returns HTML when Accept header does not request markdown', async () => { + const app = createApp() + const res = await app.request('/html', { + headers: { accept: 'text/html' }, + }) + const body = await res.text() + + expect(res.headers.get('content-type')).toContain('text/html') + expect(body).toContain('

') + }) + + it('does not interfere with JSON responses', async () => { + const app = createApp() + const res = await app.request('/json', { + headers: { accept: 'text/markdown' }, + }) + const data: unknown = await res.json() + + expect(data).toEqual({ message: 'hello' }) + }) + + it('supports extraction via options', async () => { + const app = createApp({ extract: true }) + const res = await app.request('/page', { + headers: { accept: 'text/markdown' }, + }) + const body = await res.text() + + expect(body).toContain('Article') + expect(body).toContain('Content here.') + expect(body).not.toContain('Home') + expect(body).not.toContain('Copyright') + }) +}) diff --git a/packages/markdown-for-agents/src/index.ts b/packages/markdown-for-agents/src/index.ts new file mode 100644 index 000000000..b6bdb6594 --- /dev/null +++ b/packages/markdown-for-agents/src/index.ts @@ -0,0 +1,81 @@ +/** + * Hono middleware that converts HTML responses to Markdown + * when the client sends an `Accept: text/markdown` header. + * + * ```ts + * import { Hono } from "hono"; + * import { markdown } from "@hono/markdown-for-agents"; + * + * const app = new Hono(); + * app.use("*", markdown()); + * ``` + * @module + */ + +import type { MiddlewareHandler } from 'hono' +import { convert, buildContentSignalHeader } from 'markdown-for-agents' +import type { MiddlewareOptions } from 'markdown-for-agents' + +export type { MiddlewareOptions } from 'markdown-for-agents' + +/** + * Hono middleware that converts HTML responses to markdown + * when the client sends an `Accept: text/markdown` header. + * + * @param options - Conversion and middleware options. + * @returns A Hono middleware handler. + * + * @example + * ```ts + * import { Hono } from "hono"; + * import { markdown } from "@hono/markdown-for-agents"; + * + * const app = new Hono(); + * app.use("*", markdown()); + * ``` + */ +export function markdown(options?: MiddlewareOptions): MiddlewareHandler { + const tokenHeader = options?.tokenHeader ?? 'x-markdown-tokens' + const timingHeader = options?.timingHeader ?? 'x-markdown-timing' + + return async (c, next) => { + await next() + + // Always signal that responses vary by Accept so caches store + // separate entries for HTML and Markdown representations. + c.res.headers.append('vary', 'Accept') + + const accept = c.req.header('accept') ?? '' + if (!accept.includes('text/markdown')) { + return + } + + const contentType = c.res.headers.get('content-type') ?? '' + if (!contentType.includes('text/html')) { + return + } + + const html = await c.res.text() + + const { markdown: md, tokenEstimate, contentHash, convertDuration } = convert(html, options) + + c.res = new Response(md, { + status: c.res.status, + headers: c.res.headers, + }) + c.res.headers.set('content-type', 'text/markdown; charset=utf-8') + c.res.headers.set(tokenHeader, String(tokenEstimate.tokens)) + c.res.headers.set('etag', `"${contentHash}"`) + if (convertDuration !== undefined) { + const timingValue = `mfa.convert;dur=${convertDuration.toFixed(1)};desc="HTML to Markdown"` + c.res.headers.set('server-timing', timingValue) + c.res.headers.set(timingHeader, timingValue) + } + if (options?.contentSignal) { + const signalValue = buildContentSignalHeader(options.contentSignal) + if (signalValue) { + c.res.headers.set('content-signal', signalValue) + } + } + } +} diff --git a/packages/markdown-for-agents/tsconfig.build.json b/packages/markdown-for-agents/tsconfig.build.json new file mode 100644 index 000000000..32ed1fd26 --- /dev/null +++ b/packages/markdown-for-agents/tsconfig.build.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.build.json", + "references": [] +} diff --git a/packages/markdown-for-agents/tsconfig.json b/packages/markdown-for-agents/tsconfig.json new file mode 100644 index 000000000..d4ad6cfa3 --- /dev/null +++ b/packages/markdown-for-agents/tsconfig.json @@ -0,0 +1,12 @@ +{ + "files": [], + "include": [], + "references": [ + { + "path": "./tsconfig.build.json" + }, + { + "path": "./tsconfig.spec.json" + } + ] +} diff --git a/packages/markdown-for-agents/tsconfig.spec.json b/packages/markdown-for-agents/tsconfig.spec.json new file mode 100644 index 000000000..9606be537 --- /dev/null +++ b/packages/markdown-for-agents/tsconfig.spec.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "../../dist/packages/markdown-for-agents", + "types": ["vitest/globals"] + }, + "include": ["src", "vitest.config.ts", "tsdown.config.ts"], + "references": [] +} diff --git a/packages/markdown-for-agents/tsdown.config.ts b/packages/markdown-for-agents/tsdown.config.ts new file mode 100644 index 000000000..52e4b39b4 --- /dev/null +++ b/packages/markdown-for-agents/tsdown.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + attw: true, + clean: true, + dts: true, + entry: 'src/index.ts', + format: ['esm'], + publint: true, + tsconfig: 'tsconfig.build.json', +}) diff --git a/packages/markdown-for-agents/vitest.config.ts b/packages/markdown-for-agents/vitest.config.ts new file mode 100644 index 000000000..6f5da90b4 --- /dev/null +++ b/packages/markdown-for-agents/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineProject } from 'vitest/config' + +export default defineProject({ + test: { + globals: true, + include: ['src/**/*.test.ts'], + }, +}) diff --git a/yarn.lock b/yarn.lock index 14a4b321d..5300d7745 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2070,6 +2070,20 @@ __metadata: languageName: unknown linkType: soft +"@hono/markdown-for-agents@workspace:packages/markdown-for-agents": + version: 0.0.0-use.local + resolution: "@hono/markdown-for-agents@workspace:packages/markdown-for-agents" + dependencies: + hono: "npm:^4.11.5" + markdown-for-agents: "npm:^1.3.1" + tsdown: "npm:^0.15.9" + typescript: "npm:^5.9.3" + vitest: "npm:^4.1.0-beta.1" + peerDependencies: + hono: ">=4.0.0" + languageName: unknown + linkType: soft + "@hono/mcp@workspace:packages/mcp": version: 0.0.0-use.local resolution: "@hono/mcp@workspace:packages/mcp" @@ -7333,7 +7347,7 @@ __metadata: languageName: node linkType: hard -"domutils@npm:^3.0.1": +"domutils@npm:^3.0.1, domutils@npm:^3.2.2": version: 3.2.2 resolution: "domutils@npm:3.2.2" dependencies: @@ -7536,6 +7550,13 @@ __metadata: languageName: node linkType: hard +"entities@npm:^7.0.1": + version: 7.0.1 + resolution: "entities@npm:7.0.1" + checksum: 10c0/b4fb9937bb47ecb00aaaceb9db9cdd1cc0b0fb649c0e843d05cf5dbbd2e9d2df8f98721d8b1b286445689c72af7b54a7242fc2d63ef7c9739037a8c73363e7ca + languageName: node + linkType: hard + "env-paths@npm:^2.2.0": version: 2.2.1 resolution: "env-paths@npm:2.2.1" @@ -9663,6 +9684,18 @@ __metadata: languageName: node linkType: hard +"htmlparser2@npm:^10.1.0": + version: 10.1.0 + resolution: "htmlparser2@npm:10.1.0" + dependencies: + domelementtype: "npm:^2.3.0" + domhandler: "npm:^5.0.3" + domutils: "npm:^3.2.2" + entities: "npm:^7.0.1" + checksum: 10c0/36394e29b80cfcc5e78e0fa4d3aa21fdaac3e6778d23e5c933e625c290987cd9a724a2eb0753ab60ed0c69dfaba0ab115f0ee50fb112fd8f0c4d522e7e0089a2 + languageName: node + linkType: hard + "http-cache-semantics@npm:^4.1.1": version: 4.1.1 resolution: "http-cache-semantics@npm:4.1.1" @@ -10955,6 +10988,16 @@ __metadata: languageName: node linkType: hard +"markdown-for-agents@npm:^1.3.1": + version: 1.3.1 + resolution: "markdown-for-agents@npm:1.3.1" + dependencies: + domhandler: "npm:^5.0.3" + htmlparser2: "npm:^10.1.0" + checksum: 10c0/7dd7362ced0b887a8abed5712698d3ee44c7ce9a2266354a11f67db1d91ebf59cc4fd0aeab3800c5c1695c6624412b73dfe9c21277d3629270d05c8bb1dfa6de + languageName: node + linkType: hard + "marked-terminal@npm:^7.0.0, marked-terminal@npm:^7.1.0": version: 7.3.0 resolution: "marked-terminal@npm:7.3.0"