diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ee0902b..35c8086b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,10 +15,22 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - uses: actions/setup-node@v4 + with: + node-version: "21" - name: Install dependencies run: | python -m pip install --upgrade pip + # OpenMS-Insight (Phase-3) is not on PyPI: build it from the migration branch + # (Vue bundle + Python) and install from source -- mirrors the Dockerfiles. + git clone -b claude/kind-heisenberg-u6dVm --single-branch --depth 1 \ + https://github.com/t0mdavid-m/openms-insight.git /tmp/openms-insight + ( cd /tmp/openms-insight/js-component && npm install && npm run build ) + mkdir -p /tmp/openms-insight/openms_insight/js-component + cp -r /tmp/openms-insight/js-component/dist \ + /tmp/openms-insight/openms_insight/js-component/dist + pip install /tmp/openms-insight pip install -r requirements.txt # test with requirements file so can easily bump with dependabot pip install pytest fakeredis - name: Test diff --git a/Dockerfile b/Dockerfile index 2d1b5daf..071c4a2b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -76,6 +76,21 @@ RUN mkdir /thirdparty && \ chmod -R +x /thirdparty ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/Sirius:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Fido:/thirdparty/MaRaCluster:/thirdparty/MyriMatch:/thirdparty/OMSSA:/thirdparty/Percolator:/thirdparty/SpectraST:/thirdparty/XTandem:/thirdparty/crux:${PATH}" +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; the compile-openms stage pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules + # Build OpenMS and pyOpenMS. FROM setup-build-system AS compile-openms WORKDIR / @@ -99,6 +114,11 @@ RUN pip install dist/*.whl # Install other dependencies (excluding pyopenms) COPY requirements.txt ./requirements.txt RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle). The template's viewer page (visualization_template.py) imports +# it; installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN pip install -r requirements.txt WORKDIR / diff --git a/Dockerfile.arm b/Dockerfile.arm index 17659800..ccc0ebd3 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -78,6 +78,21 @@ RUN mkdir /thirdparty && \ chmod -R +x /thirdparty ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Percolator:/thirdparty/Sage:${PATH}" +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; the compile-openms stage pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules + # Build OpenMS and pyOpenMS. FROM setup-build-system AS compile-openms WORKDIR / @@ -127,6 +142,11 @@ RUN pip install dist/*.whl # Install other dependencies (excluding pyopenms) COPY requirements.txt ./requirements.txt RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle). The template's viewer page (visualization_template.py) imports +# it; installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN pip install -r requirements.txt WORKDIR / diff --git a/Dockerfile_simple b/Dockerfile_simple index 163bcfe6..8c95a0d7 100644 --- a/Dockerfile_simple +++ b/Dockerfile_simple @@ -7,6 +7,21 @@ # debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell # prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; stage1 pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules + FROM ubuntu:22.04 AS stage1 ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git ARG OPENMS_BRANCH=develop @@ -60,6 +75,10 @@ SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] COPY requirements.txt requirements.txt RUN mamba install pip RUN python -m pip install --upgrade pip +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle); installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN python -m pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN python -m pip install -r requirements.txt # Pre-create bind-mount targets so apptainer/singularity has a real attach diff --git a/Dockerfile_simple.arm b/Dockerfile_simple.arm index be57317d..2bcdd890 100644 --- a/Dockerfile_simple.arm +++ b/Dockerfile_simple.arm @@ -7,6 +7,21 @@ # debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell # prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; stage1 pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules + FROM ubuntu:22.04 AS stage1 ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git ARG OPENMS_BRANCH=develop @@ -60,6 +75,10 @@ SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] COPY requirements.txt requirements.txt RUN mamba install pip RUN python -m pip install --upgrade pip +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle); installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN python -m pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN python -m pip install -r requirements.txt # Pre-create bind-mount targets so apptainer/singularity has a real attach diff --git a/app.py b/app.py index d9b3d73d..64b5b70c 100644 --- a/app.py +++ b/app.py @@ -32,6 +32,10 @@ st.Page(Path("content", "run_example_workflow.py"), title="Run Workflow", icon="⚙️"), st.Page(Path("content", "download_section.py"), title="Download Results", icon="⬇️"), ], + "Visualization Template": [ + st.Page(Path("content", "visualization_template.py"), + title="Linked Grid Demo", icon="🔗"), + ], "Others Topics": [ st.Page(Path("content", "simple_workflow.py"), title="Simple Workflow", icon="⚙️"), st.Page(Path("content", "run_subprocess.py"), title="Run Subprocess", icon="🖥️"), diff --git a/content/visualization_template.py b/content/visualization_template.py new file mode 100644 index 00000000..fa45cb50 --- /dev/null +++ b/content/visualization_template.py @@ -0,0 +1,111 @@ +"""Linked Grid Demo — a self-contained showcase of the reusable OpenMS-Insight grid. + +Exercises the full visualization stack on small example parquet under +``example-data/insight/``: a ``Table <-> LinePlot <-> Heatmap <-> SequenceView`` linked grid, +the :class:`~src.view.grid.LayoutManager` (edit/save/upload the layout), and the +multi-experiment + side-by-side wrapping owned by +:func:`~src.common.common.show_linked_grid`. + +The four panels cross-link through one shared StateManager per experiment: +- click a row in the Spectrum table -> sets ``spectrum`` (= ``scan_id``) +- the Spectrum plot, Peak map and Sequence view all filter by ``spectrum`` +- clicking a peak (in the plot / heatmap / sequence view) sets ``peak`` (= ``peak_id``) +""" + +from pathlib import Path + +import streamlit as st + +from src.common.common import page_setup, save_params, show_linked_grid +from src.workflow.FileManager import FileManager +from src.view.grid import LayoutManager +from openms_insight import Table, LinePlot, Heatmap, SequenceView + +params = page_setup() + +st.title("🔗 Linked Grid Demo") +st.markdown( + "A demo of the reusable OpenMS-Insight linked grid built on the streamlit-template " + "`src/view/grid.py` module. Click a row in the **Spectrum table** to drive the linked " + "**Spectrum plot**, **Peak map** and **Sequence view**; click a peak to cross-highlight it." +) + +# Example fixtures shipped with the template. +DATA = Path("example-data", "insight") + +# Per-workspace results store + a dedicated Insight cache dir inside the workspace. +fm = FileManager( + st.session_state.workspace, cache_path=Path(st.session_state.workspace, "cache") +) +cache = str(Path(st.session_state.workspace, "cache", "insight")) + +# Component vocabulary for the LayoutManager (human label <-> internal name). +OPTIONS = ["Spectrum table", "Spectrum plot", "Peak map", "Sequence view"] +NAMES = ["spectra_table", "spectrum_plot", "peak_map", "sequence_view"] + + +def builders(): + """Return the comp_name -> () -> BaseComponent factory map for one experiment.""" + return { + "spectra_table": lambda: Table( + cache_id="demo_spectra", + data_path=str(DATA / "spectra.parquet"), + cache_path=cache, + interactivity={"spectrum": "scan_id"}, + index_field="scan_id", + default_row=0, + title="Spectrum Table", + ), + "spectrum_plot": lambda: LinePlot( + cache_id="demo_spectrum_plot", + data_path=str(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + x_column="mass", + y_column="intensity", + highlight_column="is_annotated", + annotation_column="ion_label", + title="MS/MS Spectrum", + ), + "peak_map": lambda: Heatmap( + cache_id="demo_peak_map", + data_path=str(DATA / "heat.parquet"), + cache_path=cache, + x_column="rt", + y_column="mass", + intensity_column="intensity", + interactivity={"spectrum": "scan_id", "peak": "peak_id"}, + title="Peak Map", + ), + "sequence_view": lambda: SequenceView( + cache_id="demo_seq", + sequence_data_path=str(DATA / "sequences.parquet"), + peaks_data_path=str(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + deconvolved=True, + title="Fragment Coverage", + ), + } + + +# Default layout used when nothing is saved (one experiment, 2x2 grid). +DEFAULT_LAYOUT = [["spectra_table", "spectrum_plot"], ["peak_map", "sequence_view"]] + +tab_view, tab_layout = st.tabs(["Viewer", "Layout Manager"]) + +lm = LayoutManager( + OPTIONS, NAMES, store=fm, layout_id="demo_layout", session_prefix="demo" +) + +with tab_layout: + lm.render() + +with tab_view: + saved = lm.get_layout() + layout, side_by_side = saved if saved else ([DEFAULT_LAYOUT], False) + show_linked_grid(layout, builders(), tool="demo", side_by_side=side_by_side) + +save_params(params) diff --git a/example-data/insight/_make_example.py b/example-data/insight/_make_example.py new file mode 100644 index 00000000..de6a83cb --- /dev/null +++ b/example-data/insight/_make_example.py @@ -0,0 +1,185 @@ +"""Generate small parquet fixtures for the Linked Grid Demo page. + +Run once to (re)create the ``.parquet`` files committed alongside this script:: + + python example-data/insight/_make_example.py + +The fixtures are intentionally tiny and hand-built, shaped like OpenMS-Insight's own test +fixtures (a few scans / peaks / a sequence), but with enough rows to exercise the +Table <-> LinePlot <-> Heatmap <-> SequenceView cross-linking on the demo page: + +- ``spectra.parquet`` master table : scan_id, rt, ms_level, precursor_mz, n_peaks +- ``peaks.parquet`` per-peak long : scan_id, peak_id, mass, intensity, is_annotated, ion_label +- ``heat.parquet`` peak map : scan_id, rt, mass, intensity, peak_id +- ``sequences.parquet`` per-scan seq : scan_id, sequence, precursor_charge + +IDs are stable and dataset-scoped: ``scan_id`` 0..N-1, ``peak_id`` globally unique across all +scans so a peak click selects exactly one peak. The same ``peak_id`` values are reused in +``heat.parquet`` so a heatmap click cross-links to the spectrum/sequence panels. +""" + +import math +from pathlib import Path + +import polars as pl + +HERE = Path(__file__).resolve().parent + +# A handful of one-letter sequences (only a few scans carry a sequence, per the plan). +SEQUENCES = { + 1: ("PEPTIDEK", 2), + 3: ("ACDEFGHIK", 3), + 7: ("MNQRSTVWYK", 2), +} + +N_SCANS = 20 +PEAKS_PER_SCAN = 20 # -> 400 peak rows total + + +def _amino_acid_masses(): + # Monoisotopic residue masses (Da) for fragment-like peak generation. + return { + "A": 71.03711, "C": 103.00919, "D": 115.02694, "E": 129.04259, + "F": 147.06841, "G": 57.02146, "H": 137.05891, "I": 113.08406, + "K": 128.09496, "L": 113.08406, "M": 131.04049, "N": 114.04293, + "P": 97.05276, "Q": 128.05858, "R": 156.10111, "S": 87.03203, + "T": 101.04768, "V": 99.06841, "W": 186.07931, "Y": 163.06333, + } + + +def build(): + aa = _amino_acid_masses() + + spectra_rows = [] + peak_rows = [] + heat_rows = [] + seq_rows = [] + + peak_id = 0 # globally unique across scans (the cross-link click target) + + for scan_id in range(N_SCANS): + rt = round(1.0 + scan_id * 0.5, 4) + ms_level = 1 if scan_id % 4 == 0 else 2 + precursor_mz = round(400.0 + scan_id * 13.37, 4) + + # Build this scan's peaks. If the scan has a sequence, lay down b-ion-like + # neutral masses for the first few peaks so the SequenceView fragment matching + # has something to annotate; fill the rest with deterministic synthetic peaks. + seq_info = SEQUENCES.get(scan_id) + annotated_masses = [] + annotated_labels = [] + if seq_info is not None: + sequence, charge = seq_info + seq_rows.append( + {"scan_id": scan_id, "sequence": sequence, "precursor_charge": charge} + ) + running = 0.0 + for i, ch in enumerate(sequence[:-1]): + running += aa.get(ch, 110.0) + # b-ion neutral mass approximation (sum of residues; close enough for a fixture) + annotated_masses.append(round(running + 1.00794, 4)) + annotated_labels.append(f"b{i + 1}") + + for j in range(PEAKS_PER_SCAN): + if j < len(annotated_masses): + mass = annotated_masses[j] + intensity = round(5000.0 - j * 137.0 + scan_id * 11.0, 2) + is_annotated = 1 + ion_label = annotated_labels[j] + else: + # deterministic synthetic peak + mass = round(150.0 + j * 97.3 + scan_id * 1.7, 4) + intensity = round( + 1000.0 * (1.0 + math.sin(j * 0.7 + scan_id * 0.3)) + 200.0, 2 + ) + is_annotated = 0 + ion_label = "" + + peak_rows.append( + { + "scan_id": scan_id, + "peak_id": peak_id, + "mass": mass, + "intensity": max(intensity, 1.0), + "is_annotated": is_annotated, + "ion_label": ion_label, + } + ) + # Peak map row: reuse peak_id + scan_id so a heatmap click cross-links. + heat_rows.append( + { + "scan_id": scan_id, + "rt": rt, + "mass": mass, + "intensity": max(intensity, 1.0), + "peak_id": peak_id, + } + ) + peak_id += 1 + + spectra_rows.append( + { + "scan_id": scan_id, + "rt": rt, + "ms_level": ms_level, + "precursor_mz": precursor_mz, + "n_peaks": PEAKS_PER_SCAN, + } + ) + + spectra = pl.DataFrame( + spectra_rows, + schema={ + "scan_id": pl.Int64, + "rt": pl.Float64, + "ms_level": pl.Int64, + "precursor_mz": pl.Float64, + "n_peaks": pl.Int64, + }, + ) + peaks = pl.DataFrame( + peak_rows, + schema={ + "scan_id": pl.Int64, + "peak_id": pl.Int64, + "mass": pl.Float64, + "intensity": pl.Float64, + "is_annotated": pl.Int64, + "ion_label": pl.Utf8, + }, + ) + heat = pl.DataFrame( + heat_rows, + schema={ + "scan_id": pl.Int64, + "rt": pl.Float64, + "mass": pl.Float64, + "intensity": pl.Float64, + "peak_id": pl.Int64, + }, + ) + sequences = pl.DataFrame( + seq_rows, + schema={ + "scan_id": pl.Int64, + "sequence": pl.Utf8, + "precursor_charge": pl.Int64, + }, + ) + + spectra.write_parquet(HERE / "spectra.parquet") + peaks.write_parquet(HERE / "peaks.parquet") + heat.write_parquet(HERE / "heat.parquet") + sequences.write_parquet(HERE / "sequences.parquet") + + print( + f"Wrote fixtures to {HERE}:\n" + f" spectra.parquet {spectra.height} rows\n" + f" peaks.parquet {peaks.height} rows\n" + f" heat.parquet {heat.height} rows\n" + f" sequences.parquet {sequences.height} rows" + ) + + +if __name__ == "__main__": + build() diff --git a/example-data/insight/heat.parquet b/example-data/insight/heat.parquet new file mode 100644 index 00000000..3b4026dc Binary files /dev/null and b/example-data/insight/heat.parquet differ diff --git a/example-data/insight/peaks.parquet b/example-data/insight/peaks.parquet new file mode 100644 index 00000000..dc94512e Binary files /dev/null and b/example-data/insight/peaks.parquet differ diff --git a/example-data/insight/sequences.parquet b/example-data/insight/sequences.parquet new file mode 100644 index 00000000..2551213e Binary files /dev/null and b/example-data/insight/sequences.parquet differ diff --git a/example-data/insight/spectra.parquet b/example-data/insight/spectra.parquet new file mode 100644 index 00000000..5979131b Binary files /dev/null and b/example-data/insight/spectra.parquet differ diff --git a/requirements.txt b/requirements.txt index dc7d7b18..a085d9a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -77,6 +77,8 @@ pillow==11.1.0 # streamlit plotly==5.22.0 # via src (pyproject.toml) +polars==1.41.2 + # via src (pyproject.toml) protobuf==6.32.0 # via streamlit psutil==7.0.0 diff --git a/src/common/common.py b/src/common/common.py index 643a2247..fc28a4d2 100644 --- a/src/common/common.py +++ b/src/common/common.py @@ -932,6 +932,59 @@ def show_fig( ) +def show_linked_grid( + layout, + builders, + *, + tool, + side_by_side=False, + grid_key="linked_grid", + height=None, + column_heights=None, +): + """Render an N-experiment linked grid of OpenMS-Insight components. + + Thin one-liner over ``src.view.grid.render_linked_grid`` that owns the multi-experiment + + side-by-side page concern, so any viewer collapses to a single call. ``layout`` is + ``List[experiment]``; each experiment is the nested rows list consumed by + ``render_linked_grid``. One independent ``StateManager`` is created per experiment + (``session_key=f"{tool}__exp{i}"``) so experiments never cross-link. When exactly two + experiments and ``side_by_side=True``, render them in two ``st.columns``; otherwise stack + them with ``st.divider()`` between experiments. + + Args: + layout (list): ``List[experiment]``; experiment = nested rows list (``List[List[str]]``). + builders (dict): comp_name -> () -> BaseComponent factory map (see grid.BuilderMap). + tool (str): namespace used to build per-experiment StateManager session_keys. + side_by_side (bool): when exactly 2 experiments, render them side by side. + grid_key (str): prefix for per-cell component keys. + height (int, optional): default px height for every component. + column_heights (dict, optional): comp_name -> height override. + """ + from src.view.grid import render_linked_grid + + def _one(exp_idx, exp_layout, container): + with container: + render_linked_grid( + exp_layout, + builders, + state_key=f"{tool}__exp{exp_idx}", + grid_key=f"{grid_key}_{exp_idx}", + height=height, + column_heights=column_heights, + ) + + if len(layout) == 2 and side_by_side: + c1, c2 = st.columns(2) + _one(0, layout[0], c1) + _one(1, layout[1], c2) + else: + for i, exp_layout in enumerate(layout): + if i: + st.divider() + _one(i, exp_layout, st.container()) + + def reset_directory(path: Path) -> None: """ Remove the given directory and re-create it. diff --git a/src/view.py b/src/view/__init__.py similarity index 100% rename from src/view.py rename to src/view/__init__.py diff --git a/src/view/grid.py b/src/view/grid.py new file mode 100644 index 00000000..3e1f7bf3 --- /dev/null +++ b/src/view/grid.py @@ -0,0 +1,608 @@ +"""Reusable, tool-agnostic linked-grid rendering for OpenMS-Insight components. + +This module is the *single source of truth* for the cross-linked component grid used +by OpenMS-ecosystem viewers (FLASHDeconv, FLASHTnT, FLASHQuant, ...). It is deliberately +free of any tool/MS-specific knowledge (it knows nothing about scans, masses, proteins, +heatmaps, or any particular dataset): everything domain-specific is supplied by the caller +through ``builders`` (a ``comp_name -> () -> BaseComponent`` map) and a ``layout`` (a nested +list of component names). Because it is tool-agnostic it can be frozen and vendored into +downstream apps byte-for-byte unchanged. + +It distills two pieces of prior FLASHApp logic: + +* ``render.py::render_grid`` inner loop -> :func:`render_linked_grid`. Per row it opens + ``st.columns`` (clamped to <=3, the oracle invariant) and, per cell, constructs the + Insight component via the registered builder and renders it against one *shared* + ``StateManager`` so every panel cross-links. All data loading / hashing / filtering that + the oracle did Python-side now lives inside each Insight component (``filters`` / + ``interactivity`` + its own preprocessing), so the grid is pure layout + a shared + StateManager. +* The two near-identical ``FLASH*LayoutManager`` page modules -> :class:`LayoutManager`, + parameterized by the bits that differed between them (component vocabulary, storage keys, + session namespace). The UI, JSON format, ``<=3`` column cap, ``"(... needed)"`` dependency + validation, side-by-side option, and JSON download/upload behavior are preserved verbatim. + +The data store is accessed only through the small :class:`Store` ``Protocol`` so the template +never imports any concrete FileManager from a downstream app. +""" + +from __future__ import annotations + +import json +from typing import ( + Callable, + Dict, + List, + Optional, + Protocol, + Tuple, + runtime_checkable, +) + +import streamlit as st +from openms_insight import BaseComponent, StateManager + +# A layout is the trimmed nested list the LayoutManager persists: +# List[row], row = List[comp_name:str], <=3 entries per row. (one experiment) +Layout = List[List[str]] +# `builders` maps a comp_name -> a zero-arg factory returning a *constructed* BaseComponent. +# Zero-arg so the grid can lazily build only the panels a given layout references, and so the +# factory can close over the caller's (dataset, file_manager, cache_path) context. +BuilderMap = Dict[str, Callable[[], BaseComponent]] + +# Maximum number of columns per row. This is the oracle's hard cap, surfaced as a module +# constant so render_linked_grid and the default LayoutManager agree on the same value. +MAX_COLUMNS = 3 + + +def render_linked_grid( + layout: Layout, + builders: BuilderMap, + state_key: str, + *, + grid_key: str = "linked_grid", + height: Optional[int] = None, + column_heights: Optional[Dict[str, int]] = None, + on_missing: str = "warn", # "warn" | "error" | "skip" +) -> StateManager: + """Render one experiment's linked grid. + + For each row in ``layout``, open ``st.columns(len(row))`` (clamped to <=3, mirroring the + oracle's hard cap) and, in each column, call ``builders[comp_name]()`` to construct the + Insight component, then render it with a SHARED ``StateManager(session_key=state_key)`` and a + per-cell Streamlit key ``f"{grid_key}_{r}_{c}"``. The shared StateManager is what cross-links + every panel in the grid: clicks (``interactivity``) write selections, other panels read them + (``filters``). Returns the StateManager so callers can introspect/seed selections. + + Args: + layout: trimmed nested list (rows of comp_names) for ONE experiment. + builders: comp_name -> () -> BaseComponent (factory; see BuilderMap). + state_key: StateManager session_key. MUST be unique per (tool, experiment) so two + experiments shown together do not share selections. ``StateManager`` stores its + state under ``st.session_state[state_key]``, so distinct ``state_key`` values are + fully independent. Baking a dataset identifier into ``state_key`` (and into each + builder's ``cache_id``) makes switching datasets yield a fresh StateManager + fresh + component caches automatically -- no manual reset needed here. + grid_key: prefix for per-cell component keys. + height: default px height passed to every comp's ``__call__`` (None -> Insight default). + column_heights: optional comp_name -> height override (e.g. heatmaps taller). + on_missing: behavior when a comp_name has no builder: + ``"warn"`` (st.warning + skip, default), ``"error"`` (raise KeyError), or + ``"skip"`` (silently skip). + + Returns: + The shared ``StateManager`` used for this experiment's grid. + """ + if on_missing not in ("warn", "error", "skip"): + raise ValueError( + f"on_missing must be 'warn', 'error' or 'skip', got {on_missing!r}" + ) + + sm = StateManager(session_key=state_key) + heights = column_heights or {} + for r, row in enumerate(layout): + # <=3 columns per row, the oracle invariant. Any extra cells in a row are ignored. + cols = st.columns(min(len(row), MAX_COLUMNS)) + for c, comp_name in enumerate(row[:MAX_COLUMNS]): + factory = builders.get(comp_name) + if factory is None: + if on_missing == "error": + raise KeyError( + f"No builder registered for component '{comp_name}'" + ) + if on_missing == "warn": + cols[c].warning(f"Unknown component: {comp_name}") + continue + h = heights.get(comp_name, height) + with cols[c]: + factory()(key=f"{grid_key}_{r}_{c}", state_manager=sm, height=h) + return sm + + +@runtime_checkable +class Store(Protocol): + """Minimal results-store interface the LayoutManager persists its layout through. + + Any object implementing these four calls satisfies the protocol -- in particular the + template/FLASHApp ``FileManager``. The template never imports a concrete FileManager; + it only relies on this structural protocol. + """ + + def get_results(self, dataset_id: str, name_tags: list) -> dict: + ... + + def store_data(self, dataset_id: str, name_tag: str, data) -> None: + ... + + def result_exists(self, dataset_id: str, name_tag: str) -> bool: + ... + + def remove_results(self, dataset_id: str) -> None: + ... + + +class LayoutManager: + """Layout-editor UI + persistence for a linked grid (distillation of both FLASH managers). + + Owns the full "Layout Manager" page: an experiment-count selector, per-experiment + expanders with add-column(+)/add-row(+)/delete(x) controls, the ``<=max_columns`` cap, a + side-by-side checkbox (offered only when exactly two experiments), Save/Edit/Reset buttons, + JSON download (disabled while the layout is invalid) + JSON upload, and success/error + toasts. It is parameterized by the things that differed between the two FLASH managers: + the component vocabulary (``component_options``/``component_names``), the FileManager + storage keys (``layout_id``/``layout_tag``), and the session-state namespace + (``session_prefix``). + + The persisted JSON is the *trimmed internal-name* nested list (so old saved layouts keep + loading), stored alongside the ``side_by_side`` flag exactly as the oracle did. + """ + + def __init__( + self, + component_options: List[str], # human labels, e.g. "Scan table" + component_names: List[str], # parallel internal names, e.g. "scan_table" + *, + store: Store, # object with get_results/store_data/result_exists/remove_results + layout_id: str = "layout", # store dataset_id for the saved layout + layout_tag: str = "layout", # store name_tag for the saved layout + max_columns: int = MAX_COLUMNS, + max_experiments: int = 5, + session_prefix: str = "lm", # namespaces all st.session_state keys + download_name: str = "layout_settings.json", + title: str = "Layout Manager", + ): + if len(component_options) != len(component_names): + raise ValueError( + "component_options and component_names must be the same length " + f"({len(component_options)} != {len(component_names)})" + ) + # Copy so add_options() does not mutate the caller's lists. + self.component_options = list(component_options) + self.component_names = list(component_names) + self.store = store + self.layout_id = layout_id + self.layout_tag = layout_tag + self.max_columns = max_columns + self.max_experiments = max_experiments + self.session_prefix = session_prefix + self.download_name = download_name + self.title = title + + # ------------------------------------------------------------------ # + # session-state key helpers (namespaced by session_prefix) + # ------------------------------------------------------------------ # + def _k(self, name: str) -> str: + """Build a namespaced session_state key.""" + return f"{self.session_prefix}__{name}" + + # ------------------------------------------------------------------ # + # persistence (replaces set_layout/get_layout in both managers) + # ------------------------------------------------------------------ # + def get_layout(self) -> Optional[Tuple[list, bool]]: + """Return ``(layout_per_experiment, side_by_side)`` or ``None`` if unset. + + ``layout_per_experiment``: ``List[experiment]``, experiment = ``List[row]``, + row = ``List[comp_name]`` (trimmed internal names). + """ + if not self.store.result_exists(self.layout_id, self.layout_tag): + return None + stored = self.store.get_results(self.layout_id, [self.layout_tag])[ + self.layout_tag + ] + return stored["layout"], stored["side_by_side"] + + def set_layout(self, layout: list, side_by_side: bool = False) -> None: + """Persist the trimmed layout + side-by-side flag (a plain dict).""" + self.store.store_data( + self.layout_id, + self.layout_tag, + {"layout": layout, "side_by_side": side_by_side}, + ) + + # ------------------------------------------------------------------ # + # label<->name transforms (oracle getTrimmed/getExpanded) + # ------------------------------------------------------------------ # + def trim(self, expanded: list) -> list: + """labels -> internal names, dropping empty cells/rows/experiments.""" + trimmed = [] + for exp in expanded: + rows = [] + for row in exp: + cols = [] + for col in row: + if col: + cols.append( + self.component_names[self.component_options.index(col)] + ) + if cols: + rows.append(cols) + if rows: + trimmed.append(rows) + return trimmed + + def expand(self, trimmed: list, drop_empty_experiments: bool = True) -> list: + """internal names -> labels, dropping empty cells/rows. + + ``drop_empty_experiments`` (default True, the edit-mode behavior) also drops + a wholly-empty experiment. The upload path passes False to match the oracle + ``handleSettingButtons``, whose inline expand keeps an empty experiment as a + ``[]`` stub so ``num_experiments`` stays ``len(uploaded)`` and the + reset-on-count-mismatch never fires (which would wipe the upload). + """ + expanded = [] + for exp in trimmed: + rows = [] + for row in exp: + cols = [] + for col in row: + if col: + cols.append( + self.component_options[self.component_names.index(col)] + ) + if cols: + rows.append(cols) + if rows or not drop_empty_experiments: + expanded.append(rows) + return expanded + + # ------------------------------------------------------------------ # + # validation (oracle validateSubmittedLayout: non-empty + "(... needed)" deps) + # ------------------------------------------------------------------ # + def validate(self, layout: Optional[list] = None) -> str: + """Return ``''`` if the layout is OK, else a human-readable error message. + + ``layout`` is in *label* form (the edit-mode representation). When ``None``, the + current edit-mode session layout is validated. Checks (verbatim from the oracle): + the layout must be non-empty, and every ``" (X needed)"`` label requires + another component starting with ``X`` to be present in the *same* experiment. + """ + layout_setting = ( + layout if layout is not None else st.session_state.get(self._k("layout")) + ) + if not layout_setting: + return "Empty input" + + # check if submitted layout is empty + if not any( + col for exp in layout_setting for row in exp for col in row if col + ): + return "Empty input" + + # check if submitted layout contains "needed" components + for exp in layout_setting: + submitted_components = [col for row in exp for col in row if col] + required_components = [ + comp.split("(")[1].split("needed")[0].rstrip() + for comp in submitted_components + if "needed" in comp + ] + if required_components: + for required in required_components: + required_exist = False + for submitted in submitted_components: + if submitted.startswith(required): + required_exist = True + if not required_exist: + return "Required component is missing" + return "" + + # ------------------------------------------------------------------ # + # extension hook (oracle setSequenceView) + # ------------------------------------------------------------------ # + def add_options(self, options: List[str], names: List[str]) -> None: + """Append ``(label, name)`` pairs at runtime. + + Mirrors the oracle's dynamic option injection (e.g. adding "Sequence view" once an + input sequence exists). Idempotent: pairs whose internal name is already known are + skipped, so repeated calls across reruns do not duplicate options. + """ + if len(options) != len(names): + raise ValueError( + "options and names must be the same length " + f"({len(options)} != {len(names)})" + ) + for label, name in zip(options, names): + if name not in self.component_names: + self.component_options.append(label) + self.component_names.append(name) + + # ------------------------------------------------------------------ # + # internal: reset to a default (empty) layout + # ------------------------------------------------------------------ # + def _reset_to_default(self, num_of_exp: int = 1) -> None: + # 1D: experiment, 2D: row, 3D: column, element = component label + layout_setting = [[[""]]] + for _ in range(1, num_of_exp): + layout_setting.append([[""]]) + st.session_state[self._k("layout")] = layout_setting + st.session_state[self._k("num_experiments")] = num_of_exp + if self.store.result_exists(self.layout_id, self.layout_tag): + self.store.remove_results(self.layout_id) + st.session_state[self._k("edit_mode")] = True + + # ------------------------------------------------------------------ # + # internal: edit-mode per-experiment editor + # ------------------------------------------------------------------ # + def _container_for_new_component(self, exp_index, row_index, col_index) -> None: + sel_key = self._k(f"select_new_{exp_index}_{row_index}_{col_index}") + + def _is_unique(new_option) -> bool: + layout_setting = st.session_state[self._k("layout")] + if any( + col + for row in layout_setting[exp_index] + for col in row + if col == new_option + ): + st.session_state[self._k("component_error")] = "Duplicated component!" + return False + return True + + def _add_new_component() -> None: + new_option = st.session_state[sel_key] + if new_option and new_option != "Select..." and _is_unique(new_option): + st.session_state[self._k("layout")][exp_index][row_index][ + col_index + ] = new_option + + st.selectbox( + "New component to add", + ["Select..."] + self.component_options, + key=sel_key, + on_change=_add_new_component, + placeholder="Select...", + ) + + def _layout_editor_per_experiment(self, exp_index) -> None: + layout_info = st.session_state[self._k("layout")][exp_index] + + for row_index, row in enumerate(layout_info): + st_cols = st.columns( + len(row) + 1 if len(row) < self.max_columns else len(row) + ) + for col_index, col in enumerate(row): + if not col: # empty -> show the "add component" selector + with st_cols[col_index].container(): + self._container_for_new_component( + exp_index, row_index, col_index + ) + else: + with st_cols[col_index]: + c1, c2 = st.columns([5, 1]) + c1.info(col) + if c2.button( + "x", + key=self._k(f"del_{exp_index}_{row_index}_{col_index}"), + type="primary", + ): + layout_info[row_index].pop(col_index) + st.rerun() + + # new column button (capped at max_columns) + if len(row) < self.max_columns: + if st_cols[-1].button( + "***+***", key=self._k(f"new_col_{exp_index}_{row_index}") + ): + layout_info[row_index].append("") + st.rerun() + + # new row button + if st.button("***+***", key=self._k(f"new_row_{exp_index}")): + layout_info.append([""]) + st.rerun() + + # ------------------------------------------------------------------ # + # internal: button handlers (edit/save/reset/upload) + # ------------------------------------------------------------------ # + def _handle_setting_buttons(self) -> None: + if st.session_state.get(self._k("reset_clicked")): + self._reset_to_default() + + uploaded = st.session_state.get(self._k("uploaded_json")) + if uploaded is not None: + uploaded_layout = json.load(uploaded) + # Validate the uploaded (trimmed, internal-name) layout BEFORE expanding, + # matching the oracle handleSettingButtons: internal names never contain + # the "(... needed)" dependency labels, so only the empty-input check + # fires on upload (dependency validation happens later, at Save time). + # Validating the expanded labels here would wrongly reject hand-crafted + # uploads, diverging from the oracle. + validated = self.validate(uploaded_layout) + if validated != "": + st.session_state[self._k("component_error")] = validated + else: + # Keep empty experiments (oracle inline-expand) so num_experiments == + # len(uploaded) and the reset-on-count-mismatch never wipes the upload. + st.session_state[self._k("layout")] = self.expand( + uploaded_layout, drop_empty_experiments=False + ) + st.session_state[self._k("num_experiments")] = len(uploaded_layout) + + def _handle_edit_and_save_buttons(self) -> None: + # "Edit" clicked: re-enter edit mode, seeded from the saved layout + if st.session_state.get(self._k("edit_clicked")): + st.session_state[self._k("edit_mode")] = True + saved = self.get_layout() + st.session_state[self._k("num_experiments")] = ( + len(saved[0]) if saved is not None else 1 + ) + if saved is not None: + st.session_state[self._k("layout")] = self.expand(saved[0]) + + # "Save" clicked: validate, persist trimmed layout + side_by_side, leave edit mode + if st.session_state.get(self._k("save_clicked")): + got_error = self.validate() + st.session_state[self._k("save_error")] = got_error + if not got_error: + self.set_layout( + self.trim(st.session_state[self._k("layout")]), + side_by_side=st.session_state.get(self._k("side_by_side"), False), + ) + st.session_state[self._k("edit_mode")] = False + + # ------------------------------------------------------------------ # + # the whole editor page + # ------------------------------------------------------------------ # + def render(self) -> None: + """Draw the full Layout Manager page (edit/saved modes, buttons, upload/download, tips).""" + # default edit mode + if st.session_state.get(self._k("edit_mode")) is None: + st.session_state[self._k("edit_mode")] = True + + # handle button onclicks + self._handle_setting_buttons() + self._handle_edit_and_save_buttons() + + # initialize layout setting + if self._k("layout") not in st.session_state: + saved = self.get_layout() + if saved is not None: + st.session_state[self._k("layout")] = self.expand(saved[0]) + st.session_state[self._k("num_experiments")] = len( + st.session_state[self._k("layout")] + ) + st.session_state[self._k("side_by_side")] = saved[1] + st.session_state[self._k("edit_mode")] = False + else: + self._reset_to_default() + # the number of experiments changed -> reset to that count + elif ( + self._k("num_experiments") in st.session_state + and len(st.session_state[self._k("layout")]) + != st.session_state[self._k("num_experiments")] + ): + self._reset_to_default(st.session_state[self._k("num_experiments")]) + + edit_mode = st.session_state[self._k("edit_mode")] + saved = self.get_layout() + + # title and setting buttons + c1, c2, c3, c4, c5 = st.columns([6, 1, 1, 1, 1]) + c1.title(self.title) + + # side-by-side view option for exactly 2 experiments + if self._k("side_by_side") not in st.session_state: + st.session_state[self._k("side_by_side")] = False + show_side_by_side = ( + st.session_state.get(self._k("num_experiments")) == 2 + ) or (not edit_mode and saved is not None and len(saved[0]) == 2) + if show_side_by_side: + self._v_space(1, c2) + st.session_state[self._k("side_by_side")] = c2.checkbox( + "Side-by-Side View", + value=st.session_state[self._k("side_by_side")], + help="If checked, experiments will be shown side-by-side", + disabled=(not edit_mode), + ) + + # Load existing layout setting file + self._v_space(1, c3) + c3.button("Load Setting", key=self._k("load_clicked")) + + # Save current layout setting (JSON download of the trimmed layout) + self._v_space(1, c4) + c4.download_button( + label="Save Setting", + data=json.dumps(self.trim(st.session_state[self._k("layout")])), + file_name=self.download_name, + mime="json", + disabled=(self.validate() != ""), + ) + + # Reset settings to default + self._v_space(1, c5) + c5.button("Reset Setting", key=self._k("reset_clicked")) + + # File uploader, shown when "Load Setting" was clicked + if st.session_state.get(self._k("load_clicked")): + st.file_uploader( + "Choose a json file", type="json", key=self._k("uploaded_json") + ) + + # Main part + if (not edit_mode) and (saved is not None): + # saved-mode + for exp_index in range(len(saved[0])): + layout_per_exp = saved[0][exp_index] + with st.expander("Experiment #%d" % (exp_index + 1), expanded=True): + for row in layout_per_exp: + st_cols = st.columns(len(row)) + for col_index, col in enumerate(row): + st_cols[col_index].info( + self.component_options[ + self.component_names.index(col) + ] + ) + else: + # edit-mode + st.selectbox( + "**#Experiments to view at once**", + list(range(1, self.max_experiments + 1)), + key=self._k("num_experiments"), + ) + for exp_index in range(st.session_state[self._k("num_experiments")]): + with st.expander("Experiment #%d" % (exp_index + 1)): + self._layout_editor_per_experiment(exp_index) + + # edit/save buttons + _, edit_btn_col, save_btn_col = st.columns([9, 1, 1]) + edit_btn_col.button("Edit", key=self._k("edit_clicked"), disabled=edit_mode) + save_btn_col.button( + "Save", key=self._k("save_clicked"), disabled=(not edit_mode) + ) + + # error/success messages + if self._k("save_error") in st.session_state and st.session_state.get( + self._k("save_clicked") + ): + error_message = st.session_state[self._k("save_error")] + if error_message: + st.error("Error: " + error_message, icon="🚨") + else: + st.success("Layouts Saved", icon="✔️") + if st.session_state.get(self._k("component_error")): + st.error( + "Error: " + st.session_state[self._k("component_error")], icon="🚨" + ) + del st.session_state[self._k("component_error")] + + # tips + st.info( + """ +**💡 Tips** + +- If nothing is set, the default layout will be used in the Viewer + +- Don't forget to click "save" on the bottom-right corner to save your setting +""" + ) + + # ------------------------------------------------------------------ # + # internal: vertical spacing helper (self-contained; no external import) + # ------------------------------------------------------------------ # + @staticmethod + def _v_space(n: int, col=None) -> None: + """Insert ``n`` blank lines (markdown ``#``) for vertical alignment of widgets.""" + target = col if col is not None else st + for _ in range(n): + target.markdown("#") diff --git a/src/workflow/FileManager.py b/src/workflow/FileManager.py index e49ef3bd..eb23dd57 100644 --- a/src/workflow/FileManager.py +++ b/src/workflow/FileManager.py @@ -1,7 +1,17 @@ -from pathlib import Path +import gzip +import shutil import string import random -import shutil +import sqlite3 + +import pandas as pd +import polars as pl +import pickle as pkl +import pyarrow.dataset as ds + +from contextlib import contextmanager +from io import BytesIO +from pathlib import Path from typing import Union, List class FileManager: @@ -11,20 +21,92 @@ class FileManager: to be flexible for handling both individual files and lists of files, with integration into a Streamlit workflow. + In addition to the path helpers (``get_files``/``_set_type``/``_set_dir``), this manager + provides a SQLite-indexed results store keyed by ``(dataset_id, name_tag)`` with optimized + storage formats: + - Polars/Pandas DataFrames and LazyFrames: stored as parquet (``.pq``) for performance + - Other data structures: stored as compressed pickle (``.pkl.gz``) + + The store can return loaded frames (pandas by default, polars LazyFrame, or pyarrow + Dataset) OR -- via ``as_path=True`` -- the on-disk parquet PATH, which is exactly what an + OpenMS-Insight component's ``data_path=`` argument expects (subprocess preprocessing + + disk cache). Usage example:: + + from src.workflow.FileManager import FileManager + from openms_insight import Heatmap, StateManager + import polars as pl + + fm = FileManager(workspace_dir, cache_path=workspace_dir / "cache") + + # 1) store a (lazy) frame -> parquet, indexed by (dataset_id, name_tag) + fm.store_data("demo", "peaks", pl.scan_parquet("raw_peaks.parquet")) + + # 2) hand the parquet PATH to an Insight component (subprocess preprocessing + cache) + sm = StateManager(session_key="demo_grid") + Heatmap( + cache_id="demo_peaks_heatmap", + data_path=fm.result_path("demo", "peaks"), # <- the new path API + x_column="rt", y_column="mass", intensity_column="intensity", + cache_path=str(fm.cache_path / "insight"), # keep Insight caches in the workspace + )(state_manager=sm) + Methods: get_files: Returns a list of file paths as strings for the specified files, optionally with new file type and results subdirectory. - collect: Collects all files in a single list (e.g. to pass to tools which can handle multiple input files at once). + store_data: Stores data with automatic format detection (polars/pandas/pickle). + get_results: Retrieves data with proper format restoration (or the parquet path). + result_path: Returns the on-disk parquet path for a single ``(dataset_id, name_tag)``. """ def __init__( self, workflow_dir: Path, + cache_path: Path = None, ): """ Initializes the FileManager object with a the current workflow results directory. + + Args: + workflow_dir (Path): The current workflow results directory. + cache_path (Path, optional): Base directory for the results-store cache. Defaults + to ``/cache``. """ self.workflow_dir = workflow_dir + # Setup Caching + self.cache_path = cache_path if cache_path is not None else Path(workflow_dir, "cache") + Path(self.cache_path, 'files').mkdir(parents=True, exist_ok=True) + self._connect_to_sql() + + def _connect_to_sql(self): + self.cache_connection = sqlite3.connect( + Path(self.cache_path, 'cache.db'), isolation_level=None + ) + self.cache_cursor = self.cache_connection.cursor() + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_data ( + id TEXT PRIMARY KEY + ); + """) + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_files ( + id TEXT PRIMARY KEY + ); + """) + + # Add display_name column to both tables + self._add_column('stored_data', 'display_name') + self._add_column('stored_files', 'display_name') + + def __getstate__(self): + state = self.__dict__.copy() + del state['cache_connection'] + del state['cache_cursor'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._connect_to_sql() + def get_files( self, files: Union[List[Union[str, Path]], Path, str, List[List[str]]], @@ -177,3 +259,479 @@ def _create_results_sub_dir(self, name: str = "") -> str: path = Path(self.workflow_dir, "results", name) path.mkdir(exist_ok=True) return str(path) + + def _get_column_list(self, table_name: str) -> List[str]: + """ + Get a list of columns in the table. + + Args: + table_name (str): The name of the table. + + Returns: + columns (List): The columns in the table. + """ + self.cache_cursor.execute(f"PRAGMA table_info({table_name});") + return [col[1] for col in self.cache_cursor.fetchall()] + + + def _add_column(self, table_name: str, column_name: str) -> None: + """ + Checks if a column is in the cache table and if it is not adds + it to the table. + + Args: + table_name (str): The name of the table + column_name (str): The name of the column + """ + + # Fetch list of columns + columns = self._get_column_list(table_name) + + # Add column to table if it does not exist + if column_name not in columns: + self.cache_cursor.execute( + f"ALTER TABLE {table_name} ADD COLUMN {column_name} TEXT;" + ) + + def _add_entry(self, table_name: str, dataset_id: str, + column_name: str, path: str) -> None: + """ + Adds an entry to the cache index. + + Args: + table_name (str): The name of the table + dataset_id (str): The name of the dataset the data is + attached to. + column_name (str): The name of the column + path (str): The path to be inserted + """ + + # Ensure column exists + self._add_column(table_name, column_name) + + # Store reference + self.cache_cursor.execute(f""" + INSERT INTO {table_name} (id, {column_name}) + VALUES ("{dataset_id}", "{path}") + ON CONFLICT(id) + DO UPDATE SET {column_name} = excluded.{column_name}; + """) + + def _store_data(self, dataset_id: str, name_tag: str, data, row_group_size=None) -> None: + """ + Stores data as a cached file. Pandas/Polars DataFrames are stored as + parquet files, while all other data structures are stored as + compressed pickle. + Args: + dataset_id (str): The name of the dataset the data is + attached to. + name_tag (str): The name of the associated data structure. + data: Any pickleable data structure. + row_group_size (int, optional): Row group size for parquet files. + If None, the library default is used. + + Returns: + file_path (Path): The file path of the stored file. + """ + + path = Path(self.cache_path, 'files', dataset_id) + path.mkdir(parents=True, exist_ok=True) + + # Polars DataFrames and LazyFrames are stored as parquet + if isinstance(data, (pl.DataFrame, pl.LazyFrame)): + path = Path(path, f"{name_tag}.pq") + if isinstance(data, pl.LazyFrame): + # Keep the streaming sink when no bounded row groups are requested + # (default callers). Only materialize when row_group_size is set, + # since sink_parquet on this polars version rejects the kwarg. + if row_group_size is None: + data.sink_parquet(path) + else: + data.collect().write_parquet(path, row_group_size=row_group_size) + else: + data.write_parquet(path, row_group_size=row_group_size) + return path + # Pandas DataFrames are stored as parquet + elif isinstance(data, pd.DataFrame): + path = Path(path, f"{name_tag}.pq") + with open(path, 'wb') as f: + data.to_parquet(f, row_group_size=row_group_size) + return path + # Other data structures are stored as compressed pickle + else: + path = Path(path, f"{name_tag}.pkl.gz") + with gzip.open(path, 'wb') as f: + pkl.dump(data, f) + return path + + def store_data(self, dataset_id: str, name_tag: str, data, row_group_size=None) -> None: + """ + Stores a given data structure. + + Args: + dataset_id (str): The name of the dataset the data is + attached to. + name_tag (str): The name of the associated data structure. + data: Any pickleable data structure. + row_group_size (int, optional): Row group size for parquet files. + If None, the library default is used. + """ + + # Store datastructure as file + data_path = self._store_data(dataset_id, name_tag, data, row_group_size=row_group_size) + + # Store reference in index + data_path = data_path.resolve() + cache_path = self.cache_path.resolve() + relative_data_path = data_path.relative_to(cache_path) + self._add_entry('stored_data', dataset_id, name_tag, relative_data_path) + + @contextmanager + def parquet_sink(self, dataset_id, name_tag): + """Reserve target path, write to .tmp, atomically rename and register + SQLite index entry on clean exit; remove tmp on exception.""" + final_path = Path(self.cache_path, 'files', dataset_id, f"{name_tag}.pq") + final_path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = final_path.with_suffix('.pq.tmp') + try: + yield tmp_path + tmp_path.replace(final_path) + self._add_entry('stored_data', dataset_id, name_tag, + final_path.relative_to(self.cache_path)) + except Exception: + tmp_path.unlink(missing_ok=True) + raise + + def store_file(self, dataset_id: str, name_tag: str, file: Path | BytesIO, + remove: bool = True, file_name = None) -> None: + """ + Stores a given file. + + Args: + dataset_id (str): The name of the dataset the data is + attached to. + name_tag (str): The name of the associated data structure. + file (Path of File-Like): The file that should be stored. + remove (bool): Wether or not the file should be removed + after copying it. + filetype (str): The file extension of the file. Only + neccessary if a file-like object is used as input. + """ + + # Define storage path + if file_name is None: + file_name = f"{name_tag}{file.suffix}" + + target_path = Path( + self.cache_path, 'files', dataset_id, file_name + ) + target_path.parent.mkdir(parents=True, exist_ok=True) + + # Store file in path + if isinstance(file, BytesIO): + with open(target_path, 'wb') as f: + f.write(file.getbuffer()) + else: + file = Path(file) + shutil.copy(file, target_path) + if remove: + file.unlink() + + # Store reference in index + target_path = target_path.resolve() + cache_path = self.cache_path.resolve() + relative_target_path = target_path.relative_to(cache_path) + self._add_entry('stored_files', dataset_id, name_tag, relative_target_path) + + def get_results_list(self, name_tags: List[str], partial=False) -> List[str]: + """ + Get all results that contain data for specified fields. + + Args: + name_tags (List): the fields to be considered. + """ + # Some columns might not have been created yet (or ever).. + available_columns = ( + set(self._get_column_list('stored_data')) + | set(self._get_column_list('stored_files')) + ) + name_tags = [n for n in name_tags if n in available_columns] + if len(name_tags) == 0: + return [] + + # Fetch data + selection_operator = 'OR' if partial else 'AND' + selection_statement = ( + f" IS NOT NULL {selection_operator} ".join(name_tags) + + " IS NOT NULL;" + ) + self.cache_cursor.execute(f""" + SELECT id + FROM ( + SELECT sd.id AS id, sd.*, sf.* + FROM stored_data sd + LEFT JOIN stored_files sf ON sd.id = sf.id + + UNION + + SELECT sf.id AS id, sd.*, sf.* + FROM stored_files sf + LEFT JOIN stored_data sd ON sf.id = sd.id + ) combined + WHERE {selection_statement} + """) + + return [row[0] for row in self.cache_cursor.fetchall()] + + def get_results(self, dataset_id, name_tags, partial=False, + use_pyarrow=False, use_polars=False, as_path=False): + """ + Retrieve stored results for a dataset, keyed by name_tag. + + Files stored via ``store_file`` are returned as ``Path`` objects. Data stored via + ``store_data`` is returned according to the format flags below. For parquet (``.pq``) + columns the precedence is ``as_path > use_pyarrow > use_polars > pandas``; pickle + (``.pkl.gz``) columns always load and return the unpickled object (there is no path + contract for non-tabular data). + + Args: + dataset_id (str): The dataset whose results to fetch. + name_tags (list): The name_tags to fetch. + partial (bool): If True, silently skip missing tags instead of raising KeyError. + use_pyarrow (bool): For ``.pq`` columns, return a ``pyarrow.dataset.Dataset``. + use_polars (bool): For ``.pq`` columns, return a polars ``LazyFrame`` + (``scan_parquet``). + as_path (bool): For ``.pq`` columns, return the ``str`` path to the parquet file + (NOT a loaded frame), so it can be passed straight to an OpenMS-Insight + component's ``data_path=``. Mutually exclusive with ``use_pyarrow``/ + ``use_polars`` (takes precedence if more than one is set). + + Returns: + dict: Mapping of name_tag -> result (Path / DataFrame / LazyFrame / Dataset / str). + """ + results = {} + # Retrieve files as Path objects + file_columns = self._get_column_list('stored_files') + file_columns = [c for c in file_columns if c in name_tags] + if len(file_columns) > 0: + self.cache_cursor.execute(f""" + SELECT {', '.join(file_columns)} + FROM stored_files + WHERE id = '{dataset_id}'; + """) + result = self.cache_cursor.fetchone() + for c, r in zip(file_columns, result): + if r is None: + if partial: + continue + else: + raise KeyError(f"{c} does not exist for {dataset_id}") + results[c] = Path(self.cache_path, r) + + # Retrieve data as Python objects + data_columns = self._get_column_list('stored_data') + data_columns = [c for c in data_columns if c in name_tags] + if len(data_columns) > 0: + self.cache_cursor.execute(f""" + SELECT {', '.join(data_columns)} + FROM stored_data + WHERE id = '{dataset_id}'; + """) + result = self.cache_cursor.fetchone() + for c, r in zip(data_columns, result): + if r is None: + if partial: + continue + else: + raise KeyError(f"{c} does not exist for {dataset_id}") + file_path = Path(self.cache_path, r) + if file_path.suffix == '.pq': + if as_path: + # Return the parquet path for Insight data_path= + data = str(file_path) + elif use_pyarrow: + data = ds.dataset(file_path, format="parquet") + elif use_polars: + # Load as polars LazyFrame + data = pl.scan_parquet(file_path) + else: + # Default to pandas for backward compatibility + data = pd.read_parquet(file_path) + else: + with gzip.open(file_path, 'rb') as f: + data = pkl.load(f) + results[c] = data + return results + + def result_path(self, dataset_id: str, name_tag: str) -> str: + """ + Return the on-disk parquet path for a single ``(dataset_id, name_tag)``. + + Sugar around ``get_results(dataset_id, [name_tag], as_path=True)[name_tag]`` -- used + pervasively to feed an OpenMS-Insight component's ``data_path=``. + + Args: + dataset_id (str): The dataset id. + name_tag (str): The name_tag of the stored parquet data. + + Returns: + str: Absolute path to the parquet file. + + Raises: + KeyError: If the ``(dataset_id, name_tag)`` does not exist. + """ + result = self.get_results(dataset_id, [name_tag], as_path=True) + if name_tag not in result: + raise KeyError(f"{name_tag} does not exist for {dataset_id}") + return result[name_tag] + + def get_all_files_except(self, dataset_id: str, exclude_tags: List[str]) -> dict: + """ + Retrieves all files for a dataset except those specified in the exclusion list. + + Args: + dataset_id (str): The ID of the dataset to retrieve files for. + exclude_tags (List[str]): List of name tags to exclude from the results. + + Returns: + dict: Dictionary mapping name_tags to file paths for all files except excluded ones. + """ + # Get all column names from stored_files table + all_columns = self._get_column_list('stored_files') + + # Exclude internal columns + internal_columns = ['id', 'display_name'] + + # Filter out internal columns and excluded tags + included_tags = [ + col for col in all_columns + if col not in internal_columns and col not in exclude_tags + ] + + # Retrieve the actual file paths using get_results with partial=True + results = self.get_results(dataset_id, included_tags, partial=True) + + return results + + def result_exists(self, dataset_id, name_tag): + + # Check which table is correct + if name_tag in self._get_column_list('stored_data'): + table = 'stored_data' + elif name_tag in self._get_column_list('stored_files'): + table = 'stored_files' + else: + return False + + # Check if field value is set + self.cache_cursor.execute(f""" + SELECT {name_tag} + FROM {table} + WHERE id = '{dataset_id}' AND {name_tag} IS NOT NULL + """) + if self.cache_cursor.fetchone(): + return True + return False + + def remove_results(self, dataset_id): + + # Remove references + self.cache_cursor.execute(f""" + DELETE FROM stored_data + WHERE id = '{dataset_id}'; + """) + self.cache_cursor.execute(f""" + DELETE FROM stored_files + WHERE id = '{dataset_id}'; + """) + + # Remove stored files + shutil.rmtree(Path(self.cache_path, 'files', dataset_id), ignore_errors=True) + + def clear_cache(self): + shutil.rmtree(Path(self.cache_path, 'files')) + Path(self.cache_path, 'files').mkdir() + self.cache_cursor.execute(f"DROP TABLE IF EXISTS stored_data;") + self.cache_cursor.execute(f"DROP TABLE IF EXISTS stored_files;") + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_data ( + id TEXT PRIMARY KEY + ); + """) + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_files ( + id TEXT PRIMARY KEY + ); + """) + + def get_display_name(self, dataset_id: str) -> str: + """ + Returns the display name for a dataset, falling back to dataset_id + if no custom display name exists. + + Args: + dataset_id (str): The ID of the dataset. + + Returns: + str: The display name or dataset_id as fallback. + """ + # Try to get display_name from stored_data table + self.cache_cursor.execute(""" + SELECT display_name + FROM stored_data + WHERE id = ? AND display_name IS NOT NULL + """, (dataset_id,)) + result = self.cache_cursor.fetchone() + if result and result[0]: + return result[0] + + # Try to get display_name from stored_files table + self.cache_cursor.execute(""" + SELECT display_name + FROM stored_files + WHERE id = ? AND display_name IS NOT NULL + """, (dataset_id,)) + result = self.cache_cursor.fetchone() + if result and result[0]: + return result[0] + + # Fallback to dataset_id + return dataset_id + + def rename_dataset(self, dataset_id: str, new_display_name: str) -> bool: + """ + Validates and updates the display name for a dataset. + + Args: + dataset_id (str): The ID of the dataset to rename. + new_display_name (str): The new display name. + + Returns: + bool: True on success, False on failure. + """ + # Validation: non-empty name + if not new_display_name or not new_display_name.strip(): + return False + + # Validation: reasonable length limit (100 characters) + if len(new_display_name) > 100: + return False + + # Trim whitespace + new_display_name = new_display_name.strip() + + # Update display_name in stored_data table if entry exists + self.cache_cursor.execute(""" + UPDATE stored_data + SET display_name = ? + WHERE id = ? + """, (new_display_name, dataset_id)) + + # Update display_name in stored_files table if entry exists + self.cache_cursor.execute(""" + UPDATE stored_files + SET display_name = ? + WHERE id = ? + """, (new_display_name, dataset_id)) + + return True diff --git a/tests/test_view_grid.py b/tests/test_view_grid.py new file mode 100644 index 00000000..9b1b35aa --- /dev/null +++ b/tests/test_view_grid.py @@ -0,0 +1,433 @@ +"""Tests for the reusable linked-grid template stack (src/view/grid.py + helpers). + +Headless / no-browser: mirrors how OpenMS-Insight's own tests construct components +(``mock_streamlit`` patching ``st.session_state`` + a temp cache dir). Components are built +from the committed example parquet via ``data_path=`` and exercised through +``_prepare_vue_data`` / ``_get_component_args``. The grid / show_linked_grid / LayoutManager +are driven under a minimal mocked Streamlit context (each component's ``__call__`` is patched +to run the data path without the Vue bridge, since AppTest cannot spawn the preprocessing +subprocess). +""" + +import tempfile +from contextlib import ExitStack +from pathlib import Path +from unittest.mock import patch + +import polars as pl +import pytest + +ROOT = Path(__file__).resolve().parents[1] +DATA = ROOT / "example-data" / "insight" + + +class MockSessionState(dict): + """Dict with attribute access, like st.session_state.""" + + def __getattr__(self, k): + try: + return self[k] + except KeyError as e: + raise AttributeError(k) from e + + def __setattr__(self, k, v): + self[k] = v + + +class _Col: + """Fake st.columns() column / container: context manager + the widgets the grid uses.""" + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def warning(self, *a, **k): + pass + + def info(self, *a, **k): + pass + + def button(self, *a, **k): + return False + + +_COLS_RECORD = [] + + +def _columns(spec, **k): + n = spec if isinstance(spec, int) else len(spec) + _COLS_RECORD.append(n) + return [_Col() for _ in range(n)] + + +def _container(*a, **k): + return _Col() + + +def _noop(*a, **k): + return None + + +@pytest.fixture +def mock_streamlit(): + state = MockSessionState() + with patch("streamlit.session_state", state): + yield state + + +@pytest.fixture +def cache_dir(): + return tempfile.mkdtemp(prefix="tmpl_view_grid_") + + +def _build_components(cache): + """Construct the four demo components from the example parquet fixtures. + + Uses ``data=pl.scan_parquet(...)`` (in-process preprocessing) rather than ``data_path=`` + so construction does not spawn a subprocess. This mirrors OpenMS-Insight's own + construction tests (which build from ``data=`` LazyFrames) and keeps these tests robust + when run in the same pytest session as the Streamlit ``AppTest`` GUI tests (the spawn + subprocess used by ``data_path=`` crashes under that shared runner -- a known AppTest + limitation, not a code defect). The demo *page* deliberately uses ``data_path=`` for the + production memory-efficiency benefit; the ``data_path=`` path itself is covered by + :func:`test_component_data_path_construction`. + """ + from openms_insight import Heatmap, LinePlot, SequenceView, Table + + return { + "spectra_table": Table( + cache_id="t_spectra", + data=pl.scan_parquet(DATA / "spectra.parquet"), + cache_path=cache, + interactivity={"spectrum": "scan_id"}, + index_field="scan_id", + default_row=0, + title="Spectrum Table", + ), + "spectrum_plot": LinePlot( + cache_id="t_spectrum_plot", + data=pl.scan_parquet(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + x_column="mass", + y_column="intensity", + highlight_column="is_annotated", + annotation_column="ion_label", + title="MS/MS Spectrum", + ), + "peak_map": Heatmap( + cache_id="t_peak_map", + data=pl.scan_parquet(DATA / "heat.parquet"), + cache_path=cache, + x_column="rt", + y_column="mass", + intensity_column="intensity", + interactivity={"spectrum": "scan_id", "peak": "peak_id"}, + title="Peak Map", + ), + "sequence_view": SequenceView( + cache_id="t_seq", + sequence_data=pl.scan_parquet(DATA / "sequences.parquet"), + peaks_data=pl.scan_parquet(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + deconvolved=True, + title="Fragment Coverage", + ), + } + + +def _patch_component_calls(stack, fake_call): + """Patch ``__call__`` on every concrete component class (they don't all share it).""" + from openms_insight import Heatmap, LinePlot, SequenceView, Table + + stack.enter_context(patch("streamlit.columns", _columns)) + stack.enter_context(patch("streamlit.container", _container)) + stack.enter_context(patch("streamlit.warning", _noop)) + stack.enter_context(patch("streamlit.divider", _noop)) + for cls in (Table, LinePlot, Heatmap, SequenceView): + stack.enter_context(patch.object(cls, "__call__", fake_call)) + + +# --------------------------------------------------------------------------- # +# fixtures (the committed example parquet) load with the documented schema +# --------------------------------------------------------------------------- # +@pytest.mark.parametrize( + "name,cols", + [ + ("spectra", {"scan_id", "rt", "ms_level", "precursor_mz", "n_peaks"}), + ("peaks", {"scan_id", "peak_id", "mass", "intensity", "is_annotated", "ion_label"}), + ("heat", {"scan_id", "rt", "mass", "intensity", "peak_id"}), + ("sequences", {"scan_id", "sequence", "precursor_charge"}), + ], +) +def test_example_fixtures_load(name, cols): + df = pl.read_parquet(DATA / f"{name}.parquet") + assert df.height > 0 + assert cols.issubset(set(df.columns)) + + +# --------------------------------------------------------------------------- # +# every component constructs from data_path= and runs the two contract methods +# --------------------------------------------------------------------------- # +def test_components_construct_and_prepare(mock_streamlit, cache_dir): + comps = _build_components(cache_dir) + assert set(comps) == {"spectra_table", "spectrum_plot", "peak_map", "sequence_view"} + for comp in comps.values(): + for state in ({}, {"spectrum": 1, "peak": 21}): + vue = comp._prepare_vue_data(state) + assert isinstance(vue, dict) + args = comp._get_component_args() + assert isinstance(args, dict) and "componentType" in args + + +# --------------------------------------------------------------------------- # +# render_linked_grid wiring: shared StateManager, per-cell keys, <=3 columns +# --------------------------------------------------------------------------- # +def test_render_linked_grid_wiring(mock_streamlit, cache_dir): + from openms_insight import StateManager + + from src.view.grid import render_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + rendered = [] + + def fc(self, key=None, state_manager=None, height=None): + self._prepare_vue_data( + state_manager.get_all_selections() if state_manager else {} + ) + rendered.append((key, id(state_manager))) + return None + + layout = [["spectra_table", "spectrum_plot"], ["peak_map", "sequence_view"]] + _COLS_RECORD.clear() + with ExitStack() as stack: + _patch_component_calls(stack, fc) + sm = render_linked_grid(layout, builders, state_key="exp0", grid_key="g") + + assert isinstance(sm, StateManager) + assert sorted(r[0] for r in rendered) == ["g_0_0", "g_0_1", "g_1_0", "g_1_1"] + # all cells shared exactly one StateManager (cross-linking) + assert len({r[1] for r in rendered}) == 1 + assert _COLS_RECORD == [2, 2] + + +def test_render_linked_grid_clamps_to_three_columns(mock_streamlit, cache_dir): + from src.view.grid import MAX_COLUMNS, render_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + rendered = [] + + def fc(self, key=None, state_manager=None, height=None): + rendered.append(key) + return None + + big = [["spectra_table", "spectrum_plot", "peak_map", "sequence_view"]] + _COLS_RECORD.clear() + with ExitStack() as stack: + _patch_component_calls(stack, fc) + render_linked_grid(big, builders, state_key="big", grid_key="b") + + assert _COLS_RECORD == [MAX_COLUMNS] + assert len(rendered) == MAX_COLUMNS + + +def test_render_linked_grid_on_missing(mock_streamlit, cache_dir): + from src.view.grid import render_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + rendered = [] + + def fc(self, key=None, state_manager=None, height=None): + rendered.append(key) + return None + + with ExitStack() as stack: + _patch_component_calls(stack, fc) + # warn -> skip, no cell rendered, no raise + render_linked_grid([["nope"]], builders, state_key="m1") + assert rendered == [] + # error -> KeyError + with pytest.raises(KeyError): + render_linked_grid([["nope"]], builders, state_key="m2", on_missing="error") + # invalid on_missing rejected up-front + with pytest.raises(ValueError): + render_linked_grid([["spectra_table"]], builders, state_key="m3", on_missing="x") + + +# --------------------------------------------------------------------------- # +# show_linked_grid: one independent StateManager per experiment +# --------------------------------------------------------------------------- # +def test_show_linked_grid_one_state_manager_per_experiment(mock_streamlit, cache_dir): + from src.common.common import show_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + two_exp = [[["spectra_table"]], [["peak_map"]]] + + def _make_fc(sink): + def fc(self, key=None, state_manager=None, height=None): + # Record the StateManager's stable session_key (not id(): unretained + # StateManagers can be GC'd and have their id() reused within a run, + # making an id-based set flaky in the full-session test order). + sink.append(state_manager._session_key) + return None + + return fc + + for side_by_side in (True, False): + seen = [] + with ExitStack() as stack: + _patch_component_calls(stack, _make_fc(seen)) + show_linked_grid(two_exp, builders, tool="demo", side_by_side=side_by_side) + # one StateManager per experiment -> two distinct session keys. + assert len(set(seen)) == 2, f"side_by_side={side_by_side}: {seen}" + assert set(seen) == {"demo__exp0", "demo__exp1"}, seen + + +# --------------------------------------------------------------------------- # +# LayoutManager: trim/expand/validate/dependency + persistence round-trip +# --------------------------------------------------------------------------- # +def test_layout_manager_trim_expand_validate(mock_streamlit): + from src.view.grid import LayoutManager + + options = ["Spectrum table", "Spectrum plot", "Peak map", "Sequence view"] + names = ["spectra_table", "spectrum_plot", "peak_map", "sequence_view"] + lm = LayoutManager(options, names, store=_DummyStore(), session_prefix="t") + + labels = [[["Spectrum table", "Spectrum plot"]], [["Peak map", ""]]] + trimmed = lm.trim(labels) + assert trimmed == [[["spectra_table", "spectrum_plot"]], [["peak_map"]]] + assert lm.expand(trimmed) == [[["Spectrum table", "Spectrum plot"]], [["Peak map"]]] + + # Upload path keeps a wholly-empty experiment (oracle parity): expand drops + # empty cells/rows but, with drop_empty_experiments=False, keeps the empty + # experiment as a [] stub so num_experiments == len(uploaded) and the upload + # is not wiped by the reset-on-count-mismatch. + uploaded = [[["spectra_table"]], []] + assert lm.expand(uploaded) == [[["Spectrum table"]]] # default drops it + kept = lm.expand(uploaded, drop_empty_experiments=False) + assert kept == [[["Spectrum table"]], []] + assert len(kept) == len(uploaded) # count matches -> no spurious reset + + assert lm.validate([[[""]]]) != "" # empty rejected + assert lm.validate(labels) == "" # valid accepted + + # "(... needed)" dependency validation + idempotent add_options + lm.add_options(["Sequence view (Spectrum table needed)"], ["seqdep"]) + before = len(lm.component_names) + lm.add_options(["Sequence view (Spectrum table needed)"], ["seqdep"]) + assert len(lm.component_names) == before + assert lm.validate([[["Sequence view (Spectrum table needed)"]]]) != "" + assert ( + lm.validate([[["Spectrum table", "Sequence view (Spectrum table needed)"]]]) + == "" + ) + + +def test_layout_manager_persistence_roundtrip(mock_streamlit): + from src.view.grid import LayoutManager + from src.workflow.FileManager import FileManager + + ws = Path(tempfile.mkdtemp(prefix="tmpl_lm_ws_")) + fm = FileManager(ws, cache_path=ws / "cache") + lm = LayoutManager( + ["Spectrum table"], + ["spectra_table"], + store=fm, + layout_id="demo_layout", + session_prefix="t2", + ) + assert lm.get_layout() is None + trimmed = [[["spectra_table"]]] + lm.set_layout(trimmed, side_by_side=True) + got = lm.get_layout() + assert got == (trimmed, True) + + +class _DummyStore: + """In-memory Store protocol impl for trim/expand/validate tests (no disk).""" + + def __init__(self): + self._d = {} + + def get_results(self, dataset_id, name_tags): + return {t: self._d[(dataset_id, t)] for t in name_tags} + + def store_data(self, dataset_id, name_tag, data): + self._d[(dataset_id, name_tag)] = data + + def result_exists(self, dataset_id, name_tag): + return (dataset_id, name_tag) in self._d + + def remove_results(self, dataset_id): + self._d = {k: v for k, v in self._d.items() if k[0] != dataset_id} + + +def test_store_protocol_satisfied_by_filemanager(): + """FileManager structurally satisfies the grid.Store protocol.""" + from src.view.grid import Store + from src.workflow.FileManager import FileManager + + ws = Path(tempfile.mkdtemp(prefix="tmpl_store_")) + fm = FileManager(ws, cache_path=ws / "cache") + assert isinstance(fm, Store) + + +def test_component_data_path_construction(): + """The demo page's ``data_path=`` path works end-to-end (subprocess preprocessing). + + Run in a clean interpreter via ``subprocess`` so it exercises the exact production path + (Insight spawns a preprocessing subprocess for ``data_path=``) without being affected by + the Streamlit ``AppTest`` GUI tests that may share this pytest session. + """ + import subprocess + import sys + import textwrap + + script = textwrap.dedent( + f""" + import tempfile + from pathlib import Path + from unittest.mock import patch + + class S(dict): + def __getattr__(s, k): + try: return s[k] + except KeyError as e: raise AttributeError(k) from e + def __setattr__(s, k, v): s[k] = v + + DATA = Path({str(DATA)!r}) + with patch("streamlit.session_state", S()): + from openms_insight import Table, LinePlot, Heatmap, SequenceView + cache = tempfile.mkdtemp() + Table(cache_id="dp_t", data_path=str(DATA/"spectra.parquet"), cache_path=cache, + interactivity={{"spectrum": "scan_id"}}, index_field="scan_id", default_row=0) + LinePlot(cache_id="dp_lp", data_path=str(DATA/"peaks.parquet"), cache_path=cache, + filters={{"spectrum": "scan_id"}}, interactivity={{"peak": "peak_id"}}, + x_column="mass", y_column="intensity") + Heatmap(cache_id="dp_h", data_path=str(DATA/"heat.parquet"), cache_path=cache, + x_column="rt", y_column="mass", intensity_column="intensity") + SequenceView(cache_id="dp_sv", sequence_data_path=str(DATA/"sequences.parquet"), + peaks_data_path=str(DATA/"peaks.parquet"), cache_path=cache, + filters={{"spectrum": "scan_id"}}, deconvolved=True) + print("DATA_PATH_OK") + """ + ) + proc = subprocess.run( + [sys.executable, "-c", script], + cwd=str(ROOT), + capture_output=True, + text=True, + timeout=180, + ) + assert "DATA_PATH_OK" in proc.stdout, ( + f"data_path construction failed:\nstdout={proc.stdout}\nstderr={proc.stderr[-2000:]}" + )