diff --git a/crates/mdbook-html/src/html/mod.rs b/crates/mdbook-html/src/html/mod.rs index 8a70700f7e..c583c92762 100644 --- a/crates/mdbook-html/src/html/mod.rs +++ b/crates/mdbook-html/src/html/mod.rs @@ -72,8 +72,8 @@ pub(crate) fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> St /// Renders markdown to a [`Tree`]. fn build_tree(text: &str, options: &HtmlRenderOptions<'_>) -> Tree { - let events = new_cmark_parser(text, &options.markdown_options); - tree::MarkdownTreeBuilder::build(options, events) + let events = new_cmark_parser(text, &options.markdown_options).into_offset_iter(); + tree::MarkdownTreeBuilder::build(options, text, events) } /// The parsed chapter, and some information about the chapter. diff --git a/crates/mdbook-html/src/html/tree.rs b/crates/mdbook-html/src/html/tree.rs index 5cb97ce378..ae9356f3b1 100644 --- a/crates/mdbook-html/src/html/tree.rs +++ b/crates/mdbook-html/src/html/tree.rs @@ -21,6 +21,16 @@ use std::collections::{HashMap, HashSet}; use std::ops::Deref; use tracing::{trace, warn}; +/// Returns the 1-based line number for a byte offset in `source`. +fn line_number_at_offset(source: &str, offset: usize) -> usize { + let offset = offset.min(source.len()); + source.as_bytes()[..offset] + .iter() + .filter(|&&b| b == b'\n') + .count() + + 1 +} + /// Helper to create a [`QualName`]. macro_rules! attr_qual_name { ($name:expr) => { @@ -79,6 +89,8 @@ pub(crate) struct Element { pub(crate) self_closing: bool, /// True if this was raw HTML written in the markdown. pub(crate) was_raw: bool, + /// 1-based line number in the source markdown where this raw tag opened. + pub(crate) source_line: Option, } impl Element { @@ -90,6 +102,7 @@ impl Element { attrs: Attributes::new(), self_closing: false, was_raw: false, + source_line: None, } } @@ -199,15 +212,23 @@ pub(crate) struct MarkdownTreeBuilder<'opts, 'event, EventIter> { /// tag. After the document has been parsed, all the definitions are moved /// to the end of the document. footnote_defs: HashMap, NodeId>, + /// The markdown source being rendered. + source: &'opts str, + /// Byte offset of the most recently processed event. + current_offset: usize, } impl<'opts, 'event, EventIter> MarkdownTreeBuilder<'opts, 'event, EventIter> where - EventIter: Iterator>, + EventIter: Iterator, std::ops::Range)>, { /// Processes a [`pulldown_cmark`] iterator of [`pulldown_cmark::Event`] /// values, and generates a tree of [`Node`] values. - pub(crate) fn build(options: &'opts HtmlRenderOptions<'opts>, events: EventIter) -> Tree { + pub(crate) fn build( + options: &'opts HtmlRenderOptions<'opts>, + source: &'opts str, + events: EventIter, + ) -> Tree { let tree = Tree::new(Node::Fragment); let root = tree.root().id(); @@ -222,6 +243,8 @@ where table_cell_index: 0, footnote_numbers: HashMap::new(), footnote_defs: HashMap::new(), + source, + current_offset: 0, }; builder.process_events(); builder.add_header_links(); @@ -302,7 +325,8 @@ where /// The main processing loop. Processes all events until the end. fn process_events(&mut self) { - while let Some(event) = self.events.next() { + while let Some((event, range)) = self.events.next() { + self.current_offset = range.start; trace!("event={event:?}"); match event { Event::Start(tag) => self.start_tag(tag), @@ -446,7 +470,8 @@ where // To process the HTML correctly, this needs to // collect it all into a single string. let mut html = String::new(); - while let Some(event) = self.events.next() { + while let Some((event, range)) = self.events.next() { + self.current_offset = range.start; match event { Event::Html(text) | Event::Text(text) => html.push_str(&text), Event::End(TagEnd::HtmlBlock) => break, @@ -571,7 +596,8 @@ where } Tag::MetadataBlock(_) => { // Eat all events till the end of MetadataBlock. - while let Some(event) = self.events.next() { + while let Some((event, range)) = self.events.next() { + self.current_offset = range.start; if matches!(event, Event::End(TagEnd::MetadataBlock(_))) { break; } @@ -597,11 +623,17 @@ where if !el.was_raw { break; } + let open_line = el + .source_line + .map(|line| format!(" (opened at line {line})")) + .unwrap_or_default(); + let exit_line = line_number_at_offset(self.source, self.current_offset); warn!( - "unclosed HTML tag `<{}>` found in `{}` while exiting {tag:?}\n\ + "unclosed HTML tag `<{}>` found in `{}{}` while exiting {tag:?} at line {exit_line}\n\ HTML tags must be closed before exiting a markdown element.", el.name.local, self.options.path.display(), + open_line, ); self.pop(); } @@ -674,6 +706,7 @@ where attrs, self_closing: tag.self_closing, was_raw: true, + source_line: Some(line_number_at_offset(self.source, self.current_offset)), }; fix_html_link(&mut el); self.push(Node::Element(el)); @@ -718,7 +751,8 @@ where /// current nesting level. fn eat_till_end(&mut self) { let mut nest = 0; - while let Some(event) = self.events.next() { + while let Some((event, range)) = self.events.next() { + self.current_offset = range.start; match event { Event::Start(_) => nest += 1, Event::End(_) => { @@ -737,7 +771,8 @@ where fn text_for_img_alt(&mut self) -> String { let mut nest = 0; let mut output = String::new(); - while let Some(event) = self.events.next() { + while let Some((event, range)) = self.events.next() { + self.current_offset = range.start; match event { Event::Start(_) => nest += 1, Event::End(_) => { @@ -776,10 +811,16 @@ where Node::Fragment => {} Node::Element(el) => { if el.was_raw { + let open_line = el + .source_line + .map(|line| format!(" (opened at line {line})")) + .unwrap_or_default(); + let exit_line = line_number_at_offset(self.source, self.source.len()); warn!( - "unclosed HTML tag `<{}>` found in `{}`", + "unclosed HTML tag `<{}>` found in `{}{}` at end of document (line {exit_line})", el.name.local, - self.options.path.display() + self.options.path.display(), + open_line, ); } else { panic!( diff --git a/tests/testsuite/rendering.rs b/tests/testsuite/rendering.rs index c128829835..a568bf1bdf 100644 --- a/tests/testsuite/rendering.rs +++ b/tests/testsuite/rendering.rs @@ -270,9 +270,9 @@ fn unclosed_html_tags() { cmd.expect_stderr(str![[r#" INFO Book building has started INFO Running the html backend - WARN unclosed HTML tag `` found in `chapter_1.md` - WARN unclosed HTML tag `` found in `chapter_1.md` - WARN unclosed HTML tag `
` found in `chapter_1.md` + WARN unclosed HTML tag `` found in `chapter_1.md (opened at line 1)` at end of document (line 1) + WARN unclosed HTML tag `` found in `chapter_1.md (opened at line 1)` at end of document (line 1) + WARN unclosed HTML tag `
` found in `chapter_1.md (opened at line 1)` at end of document (line 1) INFO HTML book written to `[ROOT]/book` "#]]); @@ -283,6 +283,26 @@ fn unclosed_html_tags() { ); } +// Unclosed HTML tags report source line numbers. +#[test] +fn unclosed_html_tags_line_numbers() { + BookTest::init(|_| {}) + .change_file( + "src/chapter_1.md", + "Intro line\n\n
\n inner\n", + ) + .run("build", |cmd| { + cmd.expect_stderr(str![[r#" + INFO Book building has started + INFO Running the html backend + WARN unclosed HTML tag `` found in `chapter_1.md (opened at line 3)` at end of document (line 5) + WARN unclosed HTML tag `
` found in `chapter_1.md (opened at line 3)` at end of document (line 5) + INFO HTML book written to `[ROOT]/book` + +"#]]); + }); +} + // Test for HTML tags out of sync. #[test] fn unbalanced_html_tags() { @@ -294,7 +314,7 @@ fn unbalanced_html_tags() { INFO Running the html backend WARN unexpected HTML end tag `
` found in `chapter_1.md` Check that the HTML tags are properly balanced. - WARN unclosed HTML tag `
` found in `chapter_1.md` + WARN unclosed HTML tag `
` found in `chapter_1.md (opened at line 1)` at end of document (line 1) INFO HTML book written to `[ROOT]/book` "#]]); @@ -311,7 +331,7 @@ fn heading_with_unbalanced_html() { cmd.expect_stderr(str![[r#" INFO Book building has started INFO Running the html backend - WARN unclosed HTML tag `` found in `chapter_1.md` while exiting Heading(H3) + WARN unclosed HTML tag `` found in `chapter_1.md (opened at line 1)` while exiting Heading(H3) at line 1 HTML tags must be closed before exiting a markdown element. INFO HTML book written to `[ROOT]/book`