diff --git a/.changeset/fix-blockquote-md.md b/.changeset/fix-blockquote-md.md new file mode 100644 index 000000000..ce041ffec --- /dev/null +++ b/.changeset/fix-blockquote-md.md @@ -0,0 +1,5 @@ +--- +default: patch +--- + +Fixed blockquotes needing a double backslash to escape and require a space after the `>` in order to form a blockquote. diff --git a/.changeset/fix-latex-codeblock.md b/.changeset/fix-latex-codeblock.md new file mode 100644 index 000000000..60a5b01e2 --- /dev/null +++ b/.changeset/fix-latex-codeblock.md @@ -0,0 +1,5 @@ +--- +default: patch +--- + +Fix latex in codeblocks getting parsed. diff --git a/src/app/plugins/markdown/extensions/matrix-math.ts b/src/app/plugins/markdown/extensions/matrix-math.ts index 590d84b55..7ceece744 100644 --- a/src/app/plugins/markdown/extensions/matrix-math.ts +++ b/src/app/plugins/markdown/extensions/matrix-math.ts @@ -1,5 +1,160 @@ import type { TokenizerExtension, RendererExtension } from 'marked'; +/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). */ +export const MATH_CODE_DOLLAR_MASK = '\uE020'; + +function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number { + let j = from; + while (j < md.length && md[j] !== '\n') { + if (md[j] === tick) { + let run = 0; + while (j + run < md.length && md[j + run] === tick) run++; + if (run >= minLen) return j; + j += run; + } else { + j++; + } + } + return -1; +} + +function findMultilineFenceEnd( + md: string, + contentStart: number, + tick: string, + minLen: number +): { blockEnd: number; contentEnd: number } | null { + let p = contentStart; + while (p <= md.length) { + const nl = md.indexOf('\n', p); + const lineStart = p; + const lineEnd = nl === -1 ? md.length : nl; + const line = md.slice(lineStart, lineEnd); + const m = tick === '`' ? /^ {0,3}(`{3,})\s*$/.exec(line) : /^ {0,3}(~{3,})\s*$/.exec(line); + const fenceRun = m?.[1]; + if (fenceRun && fenceRun.length >= minLen && fenceRun[0] === tick) { + return { + blockEnd: nl === -1 ? md.length : nl + 1, + contentEnd: lineStart, + }; + } + if (nl === -1) return null; + p = nl + 1; + } + return null; +} + +function tryConsumeFence(md: string, i: number): { text: string; end: number } | null { + const atLineStart = i === 0 || md[i - 1] === '\n'; + if (!atLineStart) return null; + + const rest = md.slice(i); + const open = /^(\s{0,3})(`{3,}|~{3,})/.exec(rest); + if (!open?.[2]) return null; + + const fenceStr = open[2]; + const tick = fenceStr.charAt(0); + const openLen = fenceStr.length; + const afterOpen = i + open[0].length; + + if (afterOpen < md.length && md[afterOpen] === '\n') { + const contentStart = afterOpen + 1; + const close = findMultilineFenceEnd(md, contentStart, tick, openLen); + if (!close) { + const inner = md.slice(contentStart, md.length); + const masked = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + return { text: md.slice(i, contentStart) + masked, end: md.length }; + } + const inner = md.slice(contentStart, close.contentEnd); + const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + return { + text: md.slice(i, contentStart) + maskedInner + md.slice(close.contentEnd, close.blockEnd), + end: close.blockEnd, + }; + } + + const closeIdx = findSameLineFenceClose(md, afterOpen, tick, openLen); + if (closeIdx < 0) return null; + + let closeRun = 0; + while (closeIdx + closeRun < md.length && md[closeIdx + closeRun] === tick) closeRun++; + + const inner = md.slice(afterOpen, closeIdx); + const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + return { + text: md.slice(i, afterOpen) + maskedInner + md.slice(closeIdx, closeIdx + closeRun), + end: closeIdx + closeRun, + }; +} + +function tryConsumeInlineCode(md: string, i: number): { text: string; end: number } | null { + if (md[i] !== '`') return null; + let run = 0; + while (i + run < md.length && md[i + run] === '`') run++; + const contentStart = i + run; + let j = contentStart; + while (j < md.length) { + if (md[j] === '`') { + let cr = 0; + while (j + cr < md.length && md[j + cr] === '`') cr++; + if (cr === run) { + const inner = md.slice(contentStart, j); + const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + return { + text: md.slice(i, contentStart) + maskedInner + md.slice(j, j + run), + end: j + run, + }; + } + j += cr; + } else { + j++; + } + } + return null; +} + +/** + * Replaces `$` inside fenced and inline code so Matrix math extensions do not run on code literals. + * {@link unmaskMathCodeDollarPlaceholders} must be applied to the final HTML. + */ +export function maskDollarSignsInsideMarkdownCode(markdown: string): string { + const md = markdown.replace(/\r\n/g, '\n'); + let out = ''; + let i = 0; + const n = md.length; + + while (i < n) { + const atLineStart = i === 0 || md[i - 1] === '\n'; + + if (atLineStart) { + const fence = tryConsumeFence(md, i); + if (fence) { + out += fence.text; + i = fence.end; + continue; + } + } + + if (md[i] === '`') { + const span = tryConsumeInlineCode(md, i); + if (span) { + out += span.text; + i = span.end; + continue; + } + } + + out += md[i]; + i++; + } + + return out; +} + +export function unmaskMathCodeDollarPlaceholders(html: string): string { + return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$'); +} + function escapeHtml(text: string): string { return text .replace(/&/g, '&') diff --git a/src/app/plugins/markdown/markdownToHtml.test.ts b/src/app/plugins/markdown/markdownToHtml.test.ts index 24a7592ae..2a08e9c5b 100644 --- a/src/app/plugins/markdown/markdownToHtml.test.ts +++ b/src/app/plugins/markdown/markdownToHtml.test.ts @@ -40,6 +40,27 @@ describe('markdownToHtml', () => { expect(result).toContain('E = mc^2'); }); + it('does not parse dollars inside fenced code as math', () => { + expect(markdownToHtml('```\n$$test$$\n```')).not.toContain('data-mx-maths'); + expect(markdownToHtml('```\n$$test$$\n```')).toContain('$$test$$'); + }); + + it('does not parse dollars inside single-line fenced code as math', () => { + expect(markdownToHtml('```$$test$$```')).not.toContain('data-mx-maths'); + expect(markdownToHtml('```$$test$$```')).toContain('$$test$$'); + }); + + it('does not parse dollars inside inline code as math', () => { + expect(markdownToHtml('`$$test$$`')).not.toContain('data-mx-maths'); + expect(markdownToHtml('`$$test$$`')).toContain('$$test$$'); + }); + + it('does not parse inline math when dollars are only inside backticks in a sentence', () => { + const result = markdownToHtml('See `$$test$$` here.'); + expect(result).not.toContain('data-mx-maths'); + expect(result).toContain('$$test$$'); + }); + it('converts block math syntax', () => { const result = markdownToHtml('$$\\frac{a}{b}$$'); expect(result).toContain('data-mx-maths'); @@ -70,6 +91,24 @@ describe('markdownToHtml', () => { expect(result).toContain('not bold'); }); + it('does not treat >:3 as a block quote (requires space after >)', () => { + const result = markdownToHtml('>:3'); + expect(result).not.toContain('
'); + expect(result).toContain(':3'); + }); + + it('treats > followed by space as block quote', () => { + const result = markdownToHtml('> quoted'); + expect(result).toContain('
'); + expect(result).toContain('quoted'); + }); + + it('escapes block quote with a single backslash before >', () => { + const result = markdownToHtml('\\>:3'); + expect(result).not.toContain('
'); + expect(result).toContain(':3'); + }); + it('preserves img[data-mx-emoticon] tags with valid mxc URLs', () => { const html = ':blobcat:'; diff --git a/src/app/plugins/markdown/markdownToHtml.ts b/src/app/plugins/markdown/markdownToHtml.ts index 02338da9e..b0bad2cdc 100644 --- a/src/app/plugins/markdown/markdownToHtml.ts +++ b/src/app/plugins/markdown/markdownToHtml.ts @@ -1,10 +1,18 @@ import { marked } from 'marked'; import DOMPurify from 'dompurify'; import { matrixSpoilerExtension } from './extensions/matrix-spoiler'; -import { matrixMathExtension, matrixMathBlockExtension } from './extensions/matrix-math'; +import { + matrixMathExtension, + matrixMathBlockExtension, + maskDollarSignsInsideMarkdownCode, + unmaskMathCodeDollarPlaceholders, +} from './extensions/matrix-math'; import { matrixSubscriptExtension } from './extensions/matrix-subscript'; import { matrixEmoticonExtension, preprocessEmoticon } from './extensions/matrix-emoticon'; -import { unescapeMarkdownBlockSequences, unescapeMarkdownInlineSequences } from './utils'; +import { + escapeLineStartBlockquoteWithoutFollowingSpace, + unescapeMarkdownInlineSequences, +} from './utils'; // Configure marked with Matrix extensions const processor = marked.use({ @@ -50,13 +58,15 @@ export function markdownToHtml(markdown: string): string { // (e.g., < becomes < for link URLs) const decoded = decodeHtmlEntities(markdown); - // First unescape any block-level escape sequences (e.g., \>, \#) - const unescapedBlocks = unescapeMarkdownBlockSequences(decoded, (text) => text); + // Only treat `> ` as block quote, escape bare `>` at line start (e.g. `>:3`) + const blockquotePrefixed = escapeLineStartBlockquoteWithoutFollowingSpace(decoded); - const preprocessed = preprocessEmoticon(unescapedBlocks); + const preprocessed = preprocessEmoticon(blockquotePrefixed); + + const mathInput = maskDollarSignsInsideMarkdownCode(preprocessed); // Parse markdown to HTML using marked with our Matrix extensions - const html = processor.parse(preprocessed) as string; + const html = processor.parse(mathInput) as string; // Unescape inline sequences (e.g., \*, \_) after parsing const unescapedInline = unescapeMarkdownInlineSequences(html); @@ -136,5 +146,8 @@ export function markdownToHtml(markdown: string): string { DOMPurify.removeHook('afterSanitizeAttributes'); - return sanitized.replace(/
  • (

    <\/p>)?<\/li>/gi, '


  • '); + return unmaskMathCodeDollarPlaceholders(sanitized).replace( + /
  • (

    <\/p>)?<\/li>/gi, + '


  • ' + ); } diff --git a/src/app/plugins/markdown/utils.ts b/src/app/plugins/markdown/utils.ts index e15350e2d..e3cd00cce 100644 --- a/src/app/plugins/markdown/utils.ts +++ b/src/app/plugins/markdown/utils.ts @@ -1,10 +1,5 @@ import { findAndReplace } from '$utils/findAndReplace'; -// Regex patterns for block-level markdown escape sequences -// These match escaped markdown characters like \>, \#, \`, etc. -const ESC_BLOCK_SEQ = /^\\(\\*[#>[ `])/; -const UN_ESC_BLOCK_SEQ = /^\*[#>[ `]/; - // URL-aware pattern for inline sequences const URL_NEG_LB = '(? { }; /** - * Removes escape sequences from markdown block elements in the given plain-text. - * This function unescapes characters that are escaped with backslashes (e.g., `\>`, `\#`) - * in markdown syntax, returning the original plain-text with markdown characters in effect. - * - * @param {string} text - The input markdown plain-text containing escape characters (e.g., `\> block quote`). - * @param {function} processPart - It takes the plain-text as input and returns a modified version of it. - * @returns {string} The plain-text with markdown escape sequences removed and markdown formatting applied. + * CommonMark treats `>` at line start as a block quote marker even when not followed by + * space. We only start a block quote when `>` is followed by horizontal whitespace. + * Lines like `>:3` get a backslash so the `>` is literal. */ -export const unescapeMarkdownBlockSequences = ( - text: string, - processPart: (text: string) => string -): string => { - const match = text.match(ESC_BLOCK_SEQ); - - if (!match) return processPart(text); - - const [, g1] = match; - return text.replace(ESC_BLOCK_SEQ, g1 ?? ''); -}; - -/** - * Escapes markdown block elements by adding backslashes before markdown characters - * (e.g., `\>`, `\#`) that are normally interpreted as markdown syntax. - * - * @param {string} text - The input markdown plain-text that may contain markdown elements (e.g., `> block quote`). - * @param {function} processPart - It takes the plain-text as input and returns a modified version of it. - * @returns {string} The plain-text with markdown escape sequences added, preventing markdown formatting. - */ -export const escapeMarkdownBlockSequences = ( - text: string, - processPart: (text: string) => string -): string => { - const match = text.match(UN_ESC_BLOCK_SEQ); - - if (!match) return processPart(text); - - const [, g1] = match; - return text.replace(UN_ESC_BLOCK_SEQ, `\\${g1}`); -}; +export const escapeLineStartBlockquoteWithoutFollowingSpace = (markdown: string): string => + markdown.replace(/^(\s*)>(?![ \t])/gm, '$1\\>');