diff --git a/doc/_static/css/fuse-search.css b/doc/_static/css/fuse-search.css
new file mode 100644
index 000000000..7c55c40b9
--- /dev/null
+++ b/doc/_static/css/fuse-search.css
@@ -0,0 +1,170 @@
+/* =========================================================================
+ Fuse.js search page styles
+ ========================================================================= */
+
+/* Search input row -------------------------------------------------------- */
+
+.fuse-search-input-group {
+ display: flex;
+ align-items: center;
+ gap: 0.75rem;
+ margin-bottom: 1.25rem;
+}
+
+#fuse-search-input {
+ flex: 1 1 auto;
+ max-width: 42rem;
+ padding: 0.55rem 0.85rem;
+ font-size: 1rem;
+ border: 1px solid var(--pst-color-border);
+ border-radius: 4px;
+ background: var(--pst-color-surface);
+ color: var(--pst-color-text-base);
+ outline: none;
+ transition: border-color 0.15s;
+}
+
+#fuse-search-input:focus {
+ border-color: var(--pst-color-primary);
+ box-shadow: 0 0 0 3px color-mix(in srgb, var(--pst-color-primary) 20%, transparent);
+}
+
+#fuse-search-count {
+ font-size: 0.85rem;
+ color: var(--pst-color-text-muted);
+ white-space: nowrap;
+}
+
+/* Type filter tabs -------------------------------------------------------- */
+
+.fuse-search-filters {
+ display: flex;
+ align-items: center;
+ gap: 0.5rem;
+ flex-wrap: wrap;
+ margin-bottom: 1.5rem;
+}
+
+.fuse-filter-label {
+ font-size: 0.85rem;
+ color: var(--pst-color-text-muted);
+ margin-right: 0.25rem;
+}
+
+.fuse-filter-btn {
+ cursor: pointer;
+ padding: 0.25rem 0.75rem;
+ border-radius: 20px;
+ border: 1px solid var(--pst-color-border);
+ background: var(--pst-color-surface);
+ color: var(--pst-color-text-base);
+ font-size: 0.85rem;
+ user-select: none;
+ transition: background 0.15s, border-color 0.15s, color 0.15s;
+}
+
+.fuse-filter-btn.active,
+.fuse-filter-btn:hover {
+ background: var(--pst-color-primary);
+ border-color: var(--pst-color-primary);
+ color: #fff;
+}
+
+/* Results list ------------------------------------------------------------ */
+
+.fuse-results-list {
+ list-style: none;
+ padding: 0;
+ margin: 0;
+}
+
+.fuse-result-item {
+ padding: 0.85rem 0;
+ border-bottom: 1px solid var(--pst-color-border-muted);
+}
+
+.fuse-result-item:last-child {
+ border-bottom: none;
+}
+
+.fuse-result-link {
+ display: flex;
+ align-items: baseline;
+ gap: 0.5rem;
+ text-decoration: none;
+ font-weight: 600;
+ color: var(--pst-color-primary);
+}
+
+.fuse-result-link:hover .fuse-result-title {
+ text-decoration: underline;
+}
+
+.fuse-result-breadcrumb {
+ display: block;
+ font-size: 0.8rem;
+ font-weight: 400;
+ color: var(--pst-color-text-muted);
+ margin-top: 0.15rem;
+}
+
+.fuse-result-item > p {
+ margin: 0.3rem 0 0 0;
+ font-size: 0.9rem;
+ color: var(--pst-color-text-muted);
+ line-height: 1.5;
+}
+
+/* Type badges */
+
+.fuse-badge {
+ display: inline-block;
+ padding: 0.1em 0.55em;
+ border-radius: 4px;
+ font-size: 0.72rem;
+ font-weight: 700;
+ text-transform: uppercase;
+ letter-spacing: 0.04em;
+ white-space: nowrap;
+}
+
+.fuse-badge--api {
+ background: color-mix(in srgb, var(--pst-color-secondary) 15%, transparent);
+ color: var(--pst-color-secondary);
+ border: 1px solid color-mix(in srgb, var(--pst-color-secondary) 30%, transparent);
+}
+
+.fuse-badge--guide {
+ background: color-mix(in srgb, var(--pst-color-success) 12%, transparent);
+ color: var(--pst-color-success);
+ border: 1px solid color-mix(in srgb, var(--pst-color-success) 30%, transparent);
+}
+
+.fuse-badge--section {
+ background: color-mix(in srgb, var(--pst-color-info) 12%, transparent);
+ color: var(--pst-color-info);
+ border: 1px solid color-mix(in srgb, var(--pst-color-info) 30%, transparent);
+}
+
+.fuse-badge--example {
+ background: color-mix(in srgb, var(--pst-color-warning) 18%, transparent);
+ color: var(--pst-color-warning);
+ border: 1px solid color-mix(in srgb, var(--pst-color-warning) 35%, transparent);
+}
+
+/* Highlight matches */
+
+mark {
+ background: color-mix(in srgb, var(--pst-color-warning) 35%, transparent);
+ color: inherit;
+ border-radius: 2px;
+ padding: 0 0.1em;
+}
+
+/* Misc */
+
+.fuse-no-results,
+.fuse-loading {
+ color: var(--pst-color-text-muted);
+ margin-top: 1rem;
+}
diff --git a/doc/_static/scripts/fuse-search.js b/doc/_static/scripts/fuse-search.js
new file mode 100644
index 000000000..208f7878f
--- /dev/null
+++ b/doc/_static/scripts/fuse-search.js
@@ -0,0 +1,310 @@
+/**
+ * fuse-search.js — client-side Fuse.js search for skrub documentation.
+ *
+ * This script:
+ * 1. Reads the pre-built index from window.__FUSE_SEARCH_INDEX__ (assigned
+ * by _static/fuse-search-index.js, loaded via a
+{%- endblock %}
+
+{# Replace the search body with our Fuse.js-powered form and results area.
+ The rest of the page (navbar, sidebars, footer) is inherited unchanged. #}
+{% block docs_body %}
+
+{% endblock docs_body %}
+
+{# Sphinx's searchtools/searchindex are still loaded by the parent template
+ but are never initialised — Fuse.js handles everything instead. #}
diff --git a/doc/conf.py b/doc/conf.py
index 11d44b4a2..95b76b0da 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -75,6 +75,7 @@
"sphinx_copybutton",
"sphinx_gallery.gen_gallery",
"autoshortsummary",
+ "fuse_search_index",
]
try:
@@ -278,9 +279,13 @@
html_css_files = [
"css/custom.css",
+ "css/fuse-search.css",
]
html_js_files = [
"scripts/sg_plotly_resize.js",
+ # Fuse.js search — order matters: library before custom script
+ ("scripts/fuse.min.js", {"defer": "defer"}),
+ ("scripts/fuse-search.js", {"defer": "defer"}),
]
diff --git a/doc/sphinxext/fuse_search_index.py b/doc/sphinxext/fuse_search_index.py
new file mode 100644
index 000000000..6446171aa
--- /dev/null
+++ b/doc/sphinxext/fuse_search_index.py
@@ -0,0 +1,319 @@
+"""Sphinx extension that generates a Fuse.js-compatible JSON search index.
+
+After the HTML build completes, this extension walks all generated HTML pages,
+extracts their titles and body text, and writes
+``_static/fuse-search-index.js``. The JS file assigns a flat list of entry
+objects to ``window.__FUSE_SEARCH_INDEX__``::
+
+ [
+ {
+ "title": "...",
+ "content": "...",
+ "url": "path/to/page.html", // or "page.html#section-id"
+ "type": "page" // or "api" or "section"
+ },
+ ...
+ ]
+
+For non-API pages in the user guide / examples, each ```` element
+with an ``id`` and a heading (h2–h4) also becomes its own entry so that users
+can land directly on the relevant part of a page.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from html.parser import HTMLParser
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# HTML → plain text helpers
+# ---------------------------------------------------------------------------
+
+
+class _TextExtractor(HTMLParser):
+ """Extract visible text from an HTML fragment, skipping code, nav etc."""
+
+ _SKIP_TAGS = frozenset(
+ {"script", "style", "head", "noscript", "nav", "footer", "aside", "button"}
+ )
+
+ def __init__(self):
+ super().__init__()
+ self._skip_depth = 0
+ self._parts = []
+
+ def handle_starttag(self, tag, attrs):
+ if tag in self._SKIP_TAGS:
+ self._skip_depth += 1
+
+ def handle_endtag(self, tag):
+ if tag in self._SKIP_TAGS and self._skip_depth:
+ self._skip_depth -= 1
+
+ def handle_data(self, data):
+ if self._skip_depth:
+ return
+ text = data.strip()
+ if text:
+ self._parts.append(text)
+
+ def get_text(self, max_chars=3000):
+ text = " ".join(self._parts)
+ text = re.sub(r"\s+", " ", text)
+ return text[:max_chars]
+
+
+def _plain_text(html_fragment, max_chars=3000):
+ ext = _TextExtractor()
+ ext.feed(html_fragment)
+ return ext.get_text(max_chars)
+
+
+def _extract_article_html(full_html):
+ """Return the HTML of the main article element, or the full page."""
+ m = re.search(r"(]*>)(.*?) ", full_html, re.DOTALL)
+ if m:
+ return m.group(2)
+ m = re.search(
+ r']*role=["\']main["\'][^>]*>(.*?)
', full_html, re.DOTALL
+ )
+ if m:
+ return m.group(1)
+ return full_html
+
+
+def _strip_h1(article_html):
+ """Remove the first h1 element so it isn't repeated in content snippets."""
+ return re.sub(
+ r"]*>.*? ",
+ "",
+ article_html,
+ count=1,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+
+
+def _extract_title(full_html):
+ from html import unescape
+
+ m = re.search(r"(.*?) ", full_html, re.DOTALL)
+ if not m:
+ return ""
+ title = unescape(m.group(1).strip())
+ # Strip " — skrub X.Y.Z documentation" suffix
+ title = re.sub(r"\s*[—\u2014\u2013-]\s*skrub.*$", "", title, flags=re.IGNORECASE)
+ return title.strip()
+
+
+# ---------------------------------------------------------------------------
+# Section splitting
+# ---------------------------------------------------------------------------
+
+# Matches an opening tag (Sphinx wraps every heading in one)
+_SECTION_OPEN_RE = re.compile(
+ r']*\bid=["\']([^"\']+)["\'][^>]*>', re.IGNORECASE
+)
+# Matches any heading h2–h4 at the start of a section's content
+_HEADING_RE = re.compile(r"]*>(.*?) ", re.IGNORECASE | re.DOTALL)
+# Closing tag
+_SECTION_CLOSE_RE = re.compile(r" ", re.IGNORECASE)
+
+
+def _split_sections(article_html):
+ """Return list of (section_id, heading_text, body_html) for each h2–h4
+ section found in *article_html*.
+
+ Both top-level and nested sections are indexed independently. The
+ depth-tracking is only used to find the correct matching `` ``
+ boundary for each opening tag.
+ """
+ from html import unescape
+
+ results = []
+ html = article_html
+
+ for open_m in _SECTION_OPEN_RE.finditer(html):
+ sec_id = open_m.group(1)
+ content_start = open_m.end()
+
+ # Find the matching by tracking open/close depth
+ depth = 1
+ search_pos = content_start
+ close_pos = content_start
+ while depth > 0:
+ next_open = _SECTION_OPEN_RE.search(html, search_pos)
+ next_close = _SECTION_CLOSE_RE.search(html, search_pos)
+ if not next_close:
+ break
+ if next_open and next_open.start() < next_close.start():
+ depth += 1
+ search_pos = next_open.end()
+ else:
+ depth -= 1
+ close_pos = next_close.start()
+ search_pos = next_close.end()
+
+ section_html = html[content_start:close_pos]
+
+ # Only include sections whose first element is an h2–h4 heading
+ heading_m = _HEADING_RE.search(section_html)
+ if not heading_m:
+ continue
+
+ heading_html = heading_m.group(1)
+ # Strip anchor permalink (]*class=["\'][^"\']*headerlink[^"\']*["\'][^>]*>.*? ',
+ "",
+ heading_html,
+ flags=re.DOTALL,
+ )
+ heading_text = unescape(_plain_text(heading_clean, 200))
+
+ # Body text = everything after the heading tag
+ body_html = section_html[heading_m.end() :]
+
+ results.append((sec_id, heading_text, body_html))
+
+ return results
+
+
+# ---------------------------------------------------------------------------
+# Sphinx event handler
+# ---------------------------------------------------------------------------
+
+#: Exact relative paths that are always skipped (TOC / index pages).
+_SKIP_EXACT = frozenset(
+ {
+ "documentation.html",
+ "auto_examples/index.html",
+ "auto_tutorials/index.html",
+ "auto_examples/sg_execution_times.html",
+ "auto_examples/data_ops/index.html",
+ "auto_examples/data_ops/sg_execution_times.html",
+ "reference/index.html",
+ }
+)
+
+#: Relative path prefixes that are skipped (search indexes, generated indexes…)
+_SKIP_PREFIXES = (
+ "_",
+ "genindex",
+ "search",
+ "py-modindex",
+ "sg_execution_times",
+)
+
+#: Relative path prefixes treated as API reference pages (no section splitting)
+_API_PREFIXES = ("reference/generated/",)
+
+#: Relative path prefixes treated as example-gallery pages
+_EXAMPLE_PREFIXES = (
+ "auto_examples/",
+ "auto_tutorials/",
+)
+
+
+def _build_finished(app, exception):
+ if exception:
+ return
+ if app.builder.name not in ("html", "dirhtml"):
+ return
+
+ outdir = Path(app.outdir)
+ static_dir = outdir / "_static"
+ static_dir.mkdir(exist_ok=True)
+
+ entries = []
+
+ for html_path in sorted(outdir.rglob("*.html")):
+ rel = html_path.relative_to(outdir)
+ rel_str = rel.as_posix()
+
+ # Skip utility / system pages
+ if any(rel_str.startswith(p) for p in _SKIP_PREFIXES):
+ continue
+ if rel_str in _SKIP_EXACT:
+ continue
+ # Skip reference index/TOC pages (e.g. reference/selectors.html) but
+ # keep the actual generated API entries under reference/generated/.
+ if rel_str.startswith("reference/") and not rel_str.startswith(
+ "reference/generated/"
+ ):
+ continue
+
+ try:
+ html = html_path.read_text(encoding="utf-8", errors="ignore")
+ except OSError:
+ continue
+
+ title = _extract_title(html)
+ if not title:
+ title = rel_str
+
+ article_html = _extract_article_html(html)
+
+ is_api = any(rel_str.startswith(p) for p in _API_PREFIXES)
+ is_example = any(rel_str.startswith(p) for p in _EXAMPLE_PREFIXES)
+
+ if is_api:
+ page_type = "api"
+ elif is_example:
+ page_type = "example"
+ else:
+ page_type = "page"
+
+ # Page-level entry (strip h1 from content — it's already in title)
+ entries.append(
+ {
+ "title": title,
+ "content": _plain_text(_strip_h1(article_html)),
+ "url": rel_str,
+ "type": page_type,
+ }
+ )
+
+ # Section-level entries (everything except raw API reference pages)
+ if not is_api:
+ for sec_id, heading_text, body_html in _split_sections(article_html):
+ entries.append(
+ {
+ "title": heading_text,
+ "page": title, # breadcrumb shown below the title
+ "content": _plain_text(body_html, 1500),
+ "url": f"{rel_str}#{sec_id}",
+ "type": "section",
+ }
+ )
+
+ # Write as a JS file (assigned to a global) so the search page can load
+ # it via a plain hello") == "hello"
+
+ def test_skips_style(self):
+ assert fsi._plain_text("text") == "text"
+
+ def test_skips_nav(self):
+ assert fsi._plain_text("nav content body") == "body"
+
+ def test_collapses_whitespace(self):
+ assert fsi._plain_text("a b\n\tc
") == "a b c"
+
+ def test_max_chars(self):
+ result = fsi._plain_text("" + "x" * 100 + "
", max_chars=10)
+ assert len(result) == 10
+
+ def test_nested_skip(self):
+ html = " keep me"
+ assert fsi._plain_text(html) == "keep me"
+
+
+# ---------------------------------------------------------------------------
+# _extract_article_html
+# ---------------------------------------------------------------------------
+
+
+class TestExtractArticleHtml:
+ def test_article_tag(self):
+ html = "content "
+ assert fsi._extract_article_html(html) == "content"
+
+ def test_role_main_fallback(self):
+ html = 'main content
'
+ assert fsi._extract_article_html(html) == "main content"
+
+ def test_full_page_fallback(self):
+ html = "no special tag"
+ assert fsi._extract_article_html(html) == html
+
+ def test_article_preferred_over_role_main(self):
+ html = 'main
article '
+ assert fsi._extract_article_html(html) == "article"
+
+
+# ---------------------------------------------------------------------------
+# _strip_h1
+# ---------------------------------------------------------------------------
+
+
+class TestStripH1:
+ def test_removes_h1(self):
+ html = "Page title body
"
+ result = fsi._strip_h1(html)
+ assert "Page title" not in result
+ assert "body
" in result
+
+ def test_removes_only_first_h1(self):
+ html = "First Second "
+ result = fsi._strip_h1(html)
+ assert "First" not in result
+ assert "Second" in result
+
+ def test_no_h1(self):
+ html = "no heading
"
+ assert fsi._strip_h1(html) == html
+
+ def test_case_insensitive(self):
+ html = "Title body
"
+ result = fsi._strip_h1(html)
+ assert "Title" not in result
+
+
+# ---------------------------------------------------------------------------
+# _extract_title
+# ---------------------------------------------------------------------------
+
+
+class TestExtractTitle:
+ def test_basic(self):
+ html = "My Page "
+ assert fsi._extract_title(html) == "My Page"
+
+ def test_strips_documentation_suffix_em_dash(self):
+ html = "My Page \u2014 skrub 0.5.0 documentation "
+ assert fsi._extract_title(html) == "My Page"
+
+ def test_strips_suffix_html_entity(self):
+ html = "My Page — skrub 0.5 documentation "
+ assert fsi._extract_title(html) == "My Page"
+
+ def test_strips_suffix_en_dash(self):
+ html = "My Page \u2013 skrub docs "
+ assert fsi._extract_title(html) == "My Page"
+
+ def test_no_title_tag(self):
+ assert fsi._extract_title("") == ""
+
+ def test_decodes_entities_in_title(self):
+ html = "filter() & friends — skrub "
+ assert fsi._extract_title(html) == "filter() & friends"
+
+
+# ---------------------------------------------------------------------------
+# _split_sections
+# ---------------------------------------------------------------------------
+
+
+class TestSplitSections:
+ def _make(self, *sections):
+ """Build minimal article HTML with the given (id, heading, body) items."""
+ parts = []
+ for sec_id, heading, body in sections:
+ parts.append(f'')
+ return "\n".join(parts)
+
+ def test_single_section(self):
+ html = self._make(("intro", "Introduction", "body text
"))
+ results = fsi._split_sections(html)
+ assert len(results) == 1
+ sec_id, heading, body = results[0]
+ assert sec_id == "intro"
+ assert heading == "Introduction"
+ assert "body text" in body
+
+ def test_multiple_sections(self):
+ html = self._make(
+ ("s1", "First", "one
"),
+ ("s2", "Second", "two
"),
+ )
+ results = fsi._split_sections(html)
+ assert [r[0] for r in results] == ["s1", "s2"]
+
+ def test_skips_section_without_heading(self):
+ html = ''
+ assert fsi._split_sections(html) == []
+
+ def test_h1_not_included(self):
+ # h1 is the page title; only h2–h4 should create section entries
+ html = ''
+ assert fsi._split_sections(html) == []
+
+ def test_headerlink_stripped_from_heading(self):
+ html = (
+ ''
+ 'Heading '
+ "body
"
+ " "
+ )
+ results = fsi._split_sections(html)
+ assert len(results) == 1
+ assert results[0][1] == "Heading"
+
+ def test_html_entities_decoded_in_heading(self):
+ html = ''
+ results = fsi._split_sections(html)
+ assert results[0][1] == "a & b"
+
+ def test_nested_sections_both_indexed(self):
+ # Both parent and child sections should appear as separate entries.
+ html = textwrap.dedent("""\
+
+ Parent
+ parent body
+
+
+ """)
+ results = fsi._split_sections(html)
+ ids = [r[0] for r in results]
+ assert "parent" in ids
+ assert "child" in ids
+
+
+# ---------------------------------------------------------------------------
+# _build_finished — integration test
+# ---------------------------------------------------------------------------
+
+
+class TestBuildFinished:
+ """Test the full pipeline: HTML files → fuse-search-index.js."""
+
+ def _make_page(self, title, h2s=None):
+ """Return minimal Sphinx-like HTML for a page."""
+ h2_html = ""
+ if h2s:
+ for sec_id, heading, body in h2s:
+ h2_html += (
+ f''
+ )
+ return textwrap.dedent(f"""\
+
+ {title} — skrub 0.1 documentation
+
+
+ {title}
+ Intro paragraph.
+ {h2_html}
+
+
+ """)
+
+ def test_generates_index_file(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ page = tmp_path / "index.html"
+ page.write_text(self._make_page("Home"), encoding="utf-8")
+
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ index_js = tmp_path / "_static" / "fuse-search-index.js"
+ assert index_js.exists()
+ content = index_js.read_text(encoding="utf-8")
+ assert content.startswith("window.__FUSE_SEARCH_INDEX__ = ")
+ assert content.endswith(";\n")
+
+ def test_index_contains_page_entry(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ (tmp_path / "guide.html").write_text(
+ self._make_page("My Guide", h2s=[("s1", "Section One", "detail")]),
+ encoding="utf-8",
+ )
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ js = (tmp_path / "_static" / "fuse-search-index.js").read_text()
+ data = json.loads(js.split(" = ", 1)[1].rstrip(";\n"))
+
+ urls = [e["url"] for e in data]
+ assert "guide.html" in urls
+
+ page_entry = next(e for e in data if e["url"] == "guide.html")
+ assert page_entry["title"] == "My Guide"
+ assert page_entry["type"] == "page"
+ # h1 should be stripped from content
+ assert "My Guide" not in page_entry["content"]
+
+ def test_index_contains_section_entry(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ (tmp_path / "guide.html").write_text(
+ self._make_page("My Guide", h2s=[("s1", "Section One", "detail text")]),
+ encoding="utf-8",
+ )
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ js = (tmp_path / "_static" / "fuse-search-index.js").read_text()
+ data = json.loads(js.split(" = ", 1)[1].rstrip(";\n"))
+
+ section_entry = next((e for e in data if e.get("url") == "guide.html#s1"), None)
+ assert section_entry is not None
+ assert section_entry["title"] == "Section One"
+ assert section_entry["page"] == "My Guide"
+ assert section_entry["type"] == "section"
+ assert "detail text" in section_entry["content"]
+
+ def test_skipped_pages_not_indexed(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ for name in ("genindex.html", "search.html", "py-modindex.html"):
+ (tmp_path / name).write_text(self._make_page(name), encoding="utf-8")
+ # Also write a legitimate page so the index isn't empty
+ (tmp_path / "real.html").write_text(
+ self._make_page("Real Page"), encoding="utf-8"
+ )
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ js = (tmp_path / "_static" / "fuse-search-index.js").read_text()
+ data = json.loads(js.split(" = ", 1)[1].rstrip(";\n"))
+ urls = [e["url"] for e in data]
+
+ assert "genindex.html" not in urls
+ assert "search.html" not in urls
+ assert "py-modindex.html" not in urls
+ assert "real.html" in urls
+
+ def test_api_pages_have_api_type(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ (tmp_path / "reference" / "generated").mkdir(parents=True)
+ (tmp_path / "reference" / "generated" / "skrub.Foo.html").write_text(
+ self._make_page("skrub.Foo"), encoding="utf-8"
+ )
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ js = (tmp_path / "_static" / "fuse-search-index.js").read_text()
+ data = json.loads(js.split(" = ", 1)[1].rstrip(";\n"))
+
+ api_entry = next(
+ e for e in data if e["url"] == "reference/generated/skrub.Foo.html"
+ )
+ assert api_entry["type"] == "api"
+ # API pages should not produce section entries
+ section_entries = [e for e in data if e.get("type") == "section"]
+ assert section_entries == []
+
+ def test_reference_toc_pages_skipped(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ (tmp_path / "reference").mkdir()
+ (tmp_path / "reference" / "selectors.html").write_text(
+ self._make_page("Selectors"), encoding="utf-8"
+ )
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ js = (tmp_path / "_static" / "fuse-search-index.js").read_text()
+ data = json.loads(js.split(" = ", 1)[1].rstrip(";\n"))
+ assert not any(e["url"].startswith("reference/selectors") for e in data)
+
+ def test_example_pages_have_example_type(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ (tmp_path / "auto_examples").mkdir()
+ (tmp_path / "auto_examples" / "0000_getting_started.html").write_text(
+ self._make_page("Getting Started"), encoding="utf-8"
+ )
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+
+ js = (tmp_path / "_static" / "fuse-search-index.js").read_text()
+ data = json.loads(js.split(" = ", 1)[1].rstrip(";\n"))
+
+ ex = next(
+ e for e in data if e["url"] == "auto_examples/0000_getting_started.html"
+ )
+ assert ex["type"] == "example"
+
+ def test_no_exception_propagated(self, tmp_path):
+ """build-finished with exception= must do nothing."""
+ (tmp_path / "_static").mkdir()
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="html"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=RuntimeError("build failed"))
+ assert not (tmp_path / "_static" / "fuse-search-index.js").exists()
+
+ def test_non_html_builder_skipped(self, tmp_path):
+ (tmp_path / "_static").mkdir()
+ app = SimpleNamespace(
+ builder=SimpleNamespace(name="latex"),
+ outdir=str(tmp_path),
+ )
+ fsi._build_finished(app, exception=None)
+ assert not (tmp_path / "_static" / "fuse-search-index.js").exists()
diff --git a/pyproject.toml b/pyproject.toml
index 8e235e12d..7359f7d2d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -358,7 +358,7 @@ doctest_optionflags = "NORMALIZE_WHITESPACE ELLIPSIS"
[tool.codespell]
# Ref: https://github.com/codespell-project/codespell#using-a-config-file
-skip = '.git*,*.svg,package-lock.json,*.lock,*.css,*-min.*,pyproject.toml'
+skip = '.git*,*.svg,package-lock.json,*.lock,*.css,*-min.*,pyproject.toml,doc/_static/scripts/fuse.min.js'
check-hidden = true
# ignore-regex = ''
ignore-words-list = 'ans,serie,fulfilment'