From 8f5fae64c33b0db5b0ce0b4ca361e0e4447f0098 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 12:04:43 +0000 Subject: [PATCH 1/9] Phase 3 Stage A: reusable visualization template (grid SSOT + FileManager + demo) - src/view/grid.py (NEW): tool-agnostic render_linked_grid(layout, builders, state_key, ...) + parameterized LayoutManager (folds the two FLASH*LayoutManager classes: <=3 cols, N rows/experiments, side-by-side, validation, JSON save/load). builders = comp_name -> zero-arg factory -> BaseComponent over one shared StateManager. No data/hashing (moved into Insight components). AST-verified: zero MS/FLASH identifiers. - src/view.py -> src/view/__init__.py (package; preserves raw_data_viewer's `from src import view`). - src/workflow/FileManager.py: ported the (dataset_id, name) results store + get_results(as_path=True) + result_path() returning the .pq path to feed Insight data_path=. - src/common/common.py: show_linked_grid(layout, builders, *, tool, side_by_side) one-liner (one StateManager per experiment). - content/visualization_template.py (NEW) + app.py registration: Table<->LinePlot<->Heatmap<-> SequenceView linked-grid demo + LayoutManager + side-by-side over example-data/insight/ parquet. - tests/test_view_grid.py (+13): construct-smoke for all 4 components, render_linked_grid, show_linked_grid, LayoutManager round-trip, and the data_path subprocess path. Verified: pytest 74 passed/2 skipped; parse + construct-smoke green. https://claude.ai/code/session_017kD4FyAsNvW6VFTZwVvSne --- app.py | 4 + content/visualization_template.py | 111 +++++ example-data/insight/_make_example.py | 185 ++++++++ example-data/insight/heat.parquet | Bin 0 -> 4938 bytes example-data/insight/peaks.parquet | Bin 0 -> 5327 bytes example-data/insight/sequences.parquet | Bin 0 -> 1332 bytes example-data/insight/spectra.parquet | Bin 0 -> 2133 bytes src/common/common.py | 53 +++ src/{view.py => view/__init__.py} | 0 src/view/grid.py | 595 +++++++++++++++++++++++++ src/workflow/FileManager.py | 564 ++++++++++++++++++++++- tests/test_view_grid.py | 418 +++++++++++++++++ 12 files changed, 1927 insertions(+), 3 deletions(-) create mode 100644 content/visualization_template.py create mode 100644 example-data/insight/_make_example.py create mode 100644 example-data/insight/heat.parquet create mode 100644 example-data/insight/peaks.parquet create mode 100644 example-data/insight/sequences.parquet create mode 100644 example-data/insight/spectra.parquet rename src/{view.py => view/__init__.py} (100%) create mode 100644 src/view/grid.py create mode 100644 tests/test_view_grid.py diff --git a/app.py b/app.py index d9b3d73d0..64b5b70c3 100644 --- a/app.py +++ b/app.py @@ -32,6 +32,10 @@ st.Page(Path("content", "run_example_workflow.py"), title="Run Workflow", icon="⚙️"), st.Page(Path("content", "download_section.py"), title="Download Results", icon="⬇️"), ], + "Visualization Template": [ + st.Page(Path("content", "visualization_template.py"), + title="Linked Grid Demo", icon="🔗"), + ], "Others Topics": [ st.Page(Path("content", "simple_workflow.py"), title="Simple Workflow", icon="⚙️"), st.Page(Path("content", "run_subprocess.py"), title="Run Subprocess", icon="🖥️"), diff --git a/content/visualization_template.py b/content/visualization_template.py new file mode 100644 index 000000000..fa45cb500 --- /dev/null +++ b/content/visualization_template.py @@ -0,0 +1,111 @@ +"""Linked Grid Demo — a self-contained showcase of the reusable OpenMS-Insight grid. + +Exercises the full visualization stack on small example parquet under +``example-data/insight/``: a ``Table <-> LinePlot <-> Heatmap <-> SequenceView`` linked grid, +the :class:`~src.view.grid.LayoutManager` (edit/save/upload the layout), and the +multi-experiment + side-by-side wrapping owned by +:func:`~src.common.common.show_linked_grid`. + +The four panels cross-link through one shared StateManager per experiment: +- click a row in the Spectrum table -> sets ``spectrum`` (= ``scan_id``) +- the Spectrum plot, Peak map and Sequence view all filter by ``spectrum`` +- clicking a peak (in the plot / heatmap / sequence view) sets ``peak`` (= ``peak_id``) +""" + +from pathlib import Path + +import streamlit as st + +from src.common.common import page_setup, save_params, show_linked_grid +from src.workflow.FileManager import FileManager +from src.view.grid import LayoutManager +from openms_insight import Table, LinePlot, Heatmap, SequenceView + +params = page_setup() + +st.title("🔗 Linked Grid Demo") +st.markdown( + "A demo of the reusable OpenMS-Insight linked grid built on the streamlit-template " + "`src/view/grid.py` module. Click a row in the **Spectrum table** to drive the linked " + "**Spectrum plot**, **Peak map** and **Sequence view**; click a peak to cross-highlight it." +) + +# Example fixtures shipped with the template. +DATA = Path("example-data", "insight") + +# Per-workspace results store + a dedicated Insight cache dir inside the workspace. +fm = FileManager( + st.session_state.workspace, cache_path=Path(st.session_state.workspace, "cache") +) +cache = str(Path(st.session_state.workspace, "cache", "insight")) + +# Component vocabulary for the LayoutManager (human label <-> internal name). +OPTIONS = ["Spectrum table", "Spectrum plot", "Peak map", "Sequence view"] +NAMES = ["spectra_table", "spectrum_plot", "peak_map", "sequence_view"] + + +def builders(): + """Return the comp_name -> () -> BaseComponent factory map for one experiment.""" + return { + "spectra_table": lambda: Table( + cache_id="demo_spectra", + data_path=str(DATA / "spectra.parquet"), + cache_path=cache, + interactivity={"spectrum": "scan_id"}, + index_field="scan_id", + default_row=0, + title="Spectrum Table", + ), + "spectrum_plot": lambda: LinePlot( + cache_id="demo_spectrum_plot", + data_path=str(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + x_column="mass", + y_column="intensity", + highlight_column="is_annotated", + annotation_column="ion_label", + title="MS/MS Spectrum", + ), + "peak_map": lambda: Heatmap( + cache_id="demo_peak_map", + data_path=str(DATA / "heat.parquet"), + cache_path=cache, + x_column="rt", + y_column="mass", + intensity_column="intensity", + interactivity={"spectrum": "scan_id", "peak": "peak_id"}, + title="Peak Map", + ), + "sequence_view": lambda: SequenceView( + cache_id="demo_seq", + sequence_data_path=str(DATA / "sequences.parquet"), + peaks_data_path=str(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + deconvolved=True, + title="Fragment Coverage", + ), + } + + +# Default layout used when nothing is saved (one experiment, 2x2 grid). +DEFAULT_LAYOUT = [["spectra_table", "spectrum_plot"], ["peak_map", "sequence_view"]] + +tab_view, tab_layout = st.tabs(["Viewer", "Layout Manager"]) + +lm = LayoutManager( + OPTIONS, NAMES, store=fm, layout_id="demo_layout", session_prefix="demo" +) + +with tab_layout: + lm.render() + +with tab_view: + saved = lm.get_layout() + layout, side_by_side = saved if saved else ([DEFAULT_LAYOUT], False) + show_linked_grid(layout, builders(), tool="demo", side_by_side=side_by_side) + +save_params(params) diff --git a/example-data/insight/_make_example.py b/example-data/insight/_make_example.py new file mode 100644 index 000000000..de6a83cba --- /dev/null +++ b/example-data/insight/_make_example.py @@ -0,0 +1,185 @@ +"""Generate small parquet fixtures for the Linked Grid Demo page. + +Run once to (re)create the ``.parquet`` files committed alongside this script:: + + python example-data/insight/_make_example.py + +The fixtures are intentionally tiny and hand-built, shaped like OpenMS-Insight's own test +fixtures (a few scans / peaks / a sequence), but with enough rows to exercise the +Table <-> LinePlot <-> Heatmap <-> SequenceView cross-linking on the demo page: + +- ``spectra.parquet`` master table : scan_id, rt, ms_level, precursor_mz, n_peaks +- ``peaks.parquet`` per-peak long : scan_id, peak_id, mass, intensity, is_annotated, ion_label +- ``heat.parquet`` peak map : scan_id, rt, mass, intensity, peak_id +- ``sequences.parquet`` per-scan seq : scan_id, sequence, precursor_charge + +IDs are stable and dataset-scoped: ``scan_id`` 0..N-1, ``peak_id`` globally unique across all +scans so a peak click selects exactly one peak. The same ``peak_id`` values are reused in +``heat.parquet`` so a heatmap click cross-links to the spectrum/sequence panels. +""" + +import math +from pathlib import Path + +import polars as pl + +HERE = Path(__file__).resolve().parent + +# A handful of one-letter sequences (only a few scans carry a sequence, per the plan). +SEQUENCES = { + 1: ("PEPTIDEK", 2), + 3: ("ACDEFGHIK", 3), + 7: ("MNQRSTVWYK", 2), +} + +N_SCANS = 20 +PEAKS_PER_SCAN = 20 # -> 400 peak rows total + + +def _amino_acid_masses(): + # Monoisotopic residue masses (Da) for fragment-like peak generation. + return { + "A": 71.03711, "C": 103.00919, "D": 115.02694, "E": 129.04259, + "F": 147.06841, "G": 57.02146, "H": 137.05891, "I": 113.08406, + "K": 128.09496, "L": 113.08406, "M": 131.04049, "N": 114.04293, + "P": 97.05276, "Q": 128.05858, "R": 156.10111, "S": 87.03203, + "T": 101.04768, "V": 99.06841, "W": 186.07931, "Y": 163.06333, + } + + +def build(): + aa = _amino_acid_masses() + + spectra_rows = [] + peak_rows = [] + heat_rows = [] + seq_rows = [] + + peak_id = 0 # globally unique across scans (the cross-link click target) + + for scan_id in range(N_SCANS): + rt = round(1.0 + scan_id * 0.5, 4) + ms_level = 1 if scan_id % 4 == 0 else 2 + precursor_mz = round(400.0 + scan_id * 13.37, 4) + + # Build this scan's peaks. If the scan has a sequence, lay down b-ion-like + # neutral masses for the first few peaks so the SequenceView fragment matching + # has something to annotate; fill the rest with deterministic synthetic peaks. + seq_info = SEQUENCES.get(scan_id) + annotated_masses = [] + annotated_labels = [] + if seq_info is not None: + sequence, charge = seq_info + seq_rows.append( + {"scan_id": scan_id, "sequence": sequence, "precursor_charge": charge} + ) + running = 0.0 + for i, ch in enumerate(sequence[:-1]): + running += aa.get(ch, 110.0) + # b-ion neutral mass approximation (sum of residues; close enough for a fixture) + annotated_masses.append(round(running + 1.00794, 4)) + annotated_labels.append(f"b{i + 1}") + + for j in range(PEAKS_PER_SCAN): + if j < len(annotated_masses): + mass = annotated_masses[j] + intensity = round(5000.0 - j * 137.0 + scan_id * 11.0, 2) + is_annotated = 1 + ion_label = annotated_labels[j] + else: + # deterministic synthetic peak + mass = round(150.0 + j * 97.3 + scan_id * 1.7, 4) + intensity = round( + 1000.0 * (1.0 + math.sin(j * 0.7 + scan_id * 0.3)) + 200.0, 2 + ) + is_annotated = 0 + ion_label = "" + + peak_rows.append( + { + "scan_id": scan_id, + "peak_id": peak_id, + "mass": mass, + "intensity": max(intensity, 1.0), + "is_annotated": is_annotated, + "ion_label": ion_label, + } + ) + # Peak map row: reuse peak_id + scan_id so a heatmap click cross-links. + heat_rows.append( + { + "scan_id": scan_id, + "rt": rt, + "mass": mass, + "intensity": max(intensity, 1.0), + "peak_id": peak_id, + } + ) + peak_id += 1 + + spectra_rows.append( + { + "scan_id": scan_id, + "rt": rt, + "ms_level": ms_level, + "precursor_mz": precursor_mz, + "n_peaks": PEAKS_PER_SCAN, + } + ) + + spectra = pl.DataFrame( + spectra_rows, + schema={ + "scan_id": pl.Int64, + "rt": pl.Float64, + "ms_level": pl.Int64, + "precursor_mz": pl.Float64, + "n_peaks": pl.Int64, + }, + ) + peaks = pl.DataFrame( + peak_rows, + schema={ + "scan_id": pl.Int64, + "peak_id": pl.Int64, + "mass": pl.Float64, + "intensity": pl.Float64, + "is_annotated": pl.Int64, + "ion_label": pl.Utf8, + }, + ) + heat = pl.DataFrame( + heat_rows, + schema={ + "scan_id": pl.Int64, + "rt": pl.Float64, + "mass": pl.Float64, + "intensity": pl.Float64, + "peak_id": pl.Int64, + }, + ) + sequences = pl.DataFrame( + seq_rows, + schema={ + "scan_id": pl.Int64, + "sequence": pl.Utf8, + "precursor_charge": pl.Int64, + }, + ) + + spectra.write_parquet(HERE / "spectra.parquet") + peaks.write_parquet(HERE / "peaks.parquet") + heat.write_parquet(HERE / "heat.parquet") + sequences.write_parquet(HERE / "sequences.parquet") + + print( + f"Wrote fixtures to {HERE}:\n" + f" spectra.parquet {spectra.height} rows\n" + f" peaks.parquet {peaks.height} rows\n" + f" heat.parquet {heat.height} rows\n" + f" sequences.parquet {sequences.height} rows" + ) + + +if __name__ == "__main__": + build() diff --git a/example-data/insight/heat.parquet b/example-data/insight/heat.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3b4026dcfb4519fc7173bd6ac8144e7954ddeef3 GIT binary patch literal 4938 zcmb7I3p|wD7XRj9yx(RT3`tC*l)*_7^^L@k3Ux~8!9<4fnh`<_hGD#Cyk^XpLGnoQ zNI7vXd4;4-Z@p8;sW>W5rIT~rZ%FmKzjJ@Td-lxiz4l&v{r6h?+k1U$jT_0s9F0VG z!O;n>XaX7l079$TUj~v^81MoP05AX!AOIvF1E2s|Kn{=x6aYm)2~Y-9098N@PzN*s zO-M*uH1|MrVQ2t-7)>`tOHgPvGzyKvSpfvOr72js#iTiph!?6C=*l&ssuHHIzfY=iAHL#2T*F-SXlt<)WPu&5w_B;`#aqh z&5)@5of#cs$c(jHaT8;35Ad5A>kWWjvn4ya{6GO8Dz8E_j)4JXaidJ|Maonfcy$;m zvl*ZzA{&*Lm9Ne#D$p&G6yy1)1w>)1$VLn@FM!EoDV|`>`m39EQfS~Wh7Ud^*hhi} zlqoNJUM#pv9}~3W1{0(LVM(kY7Wmvsr|SVd9mP@0R?g$*V|k(i!=mb9Q+~5xgRnzH z5}Q)?hIJjg754{oK1H6b#nI2!%H!wj^G+6+6g3oE^4kQPg*_s&7+f;REVL5V@ys9m zh&oi_x`GZ)Fj~={&11nm9;+l7O-W#2%WFpK@f48!ELLk>Hye~4(RRP@l4+W)n!Q8l>y%Gn>QVT3y(9w5f^ExjA~aaG*>#hzdffF9qmE$1O`}NhYm(NpK(@;svvW3` z4ud;y?SD)WgNjCkgsh~);6CrB&dKgj@Zx>WU;cj4;K7T|N^mX{{Cs6?$%xt`)bNBT-kyVjj1!O!H$5LyUWQD^JUzk+=QsG$qL`U35Xzw! zL2n{IC1Cs|=LW-$vvNw@??BQM;UD6FvQ$_9_VDXbFXH2q*sOfEkaNoOkspK+J(nz7 zmX+dvX976EDG=A5JM@A&1%V(+(^blNKl#5yMeCj@si?nj2?BZ_3zVcPl;&!c3EuiE zMXZRnI#JblvG*Dcf=NP^WT}@H>6S_G0fY$KX!jE>KM&k^$e2w)6s9YeaIvK#!)l*^ ztr4Elv=d!dZa;k;JI_)m%+)I4>zAH1Y2XDMj9^5kkKB9y`)m@5o2gb>ee-y+98y1_nX?I*p5w{3cE3WrvxJst{lScM$(`8;mG>U!CeKeaa)!8Tp> zgzFea_WIO_4xoQE|5a7( zC?4J8vN83rNny^)+oH9PC)23!-dOz_b=yOMb~1=7Jes-gzGLDVv)~(o(dOT6+e|LR zyh{st8#ecWn)1hF$S^zLPUO_oLq}|VXUKbYYmB$*zivkz?AUfOW4`c1$ne&=uWaKH}+U;PaijMHjxdUjn120&$8qa5^rM1u7t zpovdz_xsPq;0FRkWN^Uc5KvoMMVEz)6a=qJ24fw0Aav^+oUBA3uSFrLdA2ERyzxNe0ONW_Sus+gQ?4rl^;hyh zyN^i|zQN?kot#I-pgTqa_PmFd=a&8^iIsqp?vQFBe2|=y{UAD)3i|w(07fwK2TSk1 zk?f;Mh;#J%{4M^oy8U5#cRtqmNgaJO=P78hjJq5 zY*67~(Jry7X*sA>(>o$?&TEOwg;QH%M{;A%9s5aAAVB3I;{fX6IFr0)L8s)*pR3FL z#3zbQvww~-40T~~3s#mvkhz;I*}Ee;L)%%m3f#-e)~SnIK1;My@8rXngVfJay;+*U z{*ucxlINwoeEYXQE!OXIkD2Y5Nc{!Iq_}#nbZi{lnI#E-M6?gEhXmH!f8- z5@g-rD=dz9s%l8oozKpx{yMMKi_qPhvX}i!(Y@qShp86j!2~1tK|jWZjG)aQfn6`K zmZl`a`Fu@Zo!x0`n`Aa*_&oAcu6=oav=KR_w%s|hqf3Jo%^UFc(mn6^*~;|NRR&8& z<cY7tz zQoZ=VCY$qPe;9jgDRuwVH6(t6t&b5dr^>GJApV$x-NjoIzx%_p`n3(*m{(^44+v6H zCkMWEsp!WH$|5QVt@fU^^r-Ysi=6U5LqQ@e+jYK$pyFVv?M#1g1#tf+-N0 zz}(Gi>opi9-AZ_3;;SnMr|HEPhmF^TQLlL2pcge~8Jiw@HMF;xURdX9ym}XP$b(BS zs8BK{YyI+vSeWVHO%fTIOHpW|v|s7f8M*P!PmD;XKgzr4J8 zIwCiAc%}ZEz{@r_B68?iD{)U=_FFYaWCyvf)VUVeZ^4bo@>8O$-f7O+MH|!L#q=7g zn5S=CqV>rkT;6DeR8v*2p2UmQOxKS;kya*hx%Qr9Fjf#&p3|h|Kv9aUW~1 z#gEODeW+h$_nXPJnC=c6*Izd(^VTUn~a1f&y+r%#BaDzdzI!lBe+wK zUteu7S|3RF`OysH^tQbS%DS>IxN3`Jr)0lL`E+VoFBb*Vtl6Tz3`~* zlhKi5r)Gq|;*MH(j$RE3m=W9-2K*o%9rnl1l=R^Oh>4?D4vbG1wF>=B4~-7}cxt+! z3g=JQGCH^;V47PfJYuwbbYL5PIxiJ>#9;p3WxMfrImd()+>?9#>rcJQ^2brIo%j0I z2E5}Sl5%TSAm?Du-sq{nT<*O2T7TktW4_KZB=mMEl110aF1%g7S8s!Yq)wWhzkcd3${yBtiG)3!gEp1+dJCG77$nZaA-Rzc?5 zS8)p0cs2))c2{K*WP+6i*Fftqh^24sqE4X;bQ~*t<^cAO)=9iM|-Ms#aozg zc^!wEyz;H-8YW)7HTjiJd4|pchgxpc_7V%~{oYz$^)88hhR(LSg4*3R9<^S5b%m$* zHTn$-ww*3IT0Pq zu#U+YzcZa)Oz~v*~2{?z3u1g zM_U4K)*ZNV{>+05;m?Oe+b=XcYLA(D{piYtvrjsi2t|F@w#H{Y>6&W}4!1Quzx*wq z3-|X8{+iF_*kJCfdrA!kIXk!52d-mHcUuPPF-?Nu8 zizPM`{Cv+|ifxwIkf;2fy>Kj{3SuuQ_=SzbRKU>vFh~tR8;5y_K-a;)(63SGS+y_p z5EebH@`X;bKqt#dX*67$Mn=-Z!XnWM02*lw|C;IoXa%STrHYHhLKL@1e)I8PB))7W z0OiKoIBg$%G7|S~r~hf@j5%kA#$x_$gWe3_(@?mjDDwZ$Suw*>fR0U4fU#v?0scuC zig_W0_;UfGW7SuPZ-)P$FE`Ab6sOHrs>;)?-)+i2K7N>j%@EQDDH7)T9T>FJcsb07 zm6r60W0goA9=rD0FerikA->oSgqgGu-R(&v(l+QvvW|lKG44_l>Msm!Cy^MWtt1lE zx|7zpLHy`wDc_kyI{Ibak5KEdL)r4<)}4|9{Kx_$I%guakKo#d5(MQnb|G<5J{B%@!6RZF5){fvPpq{fc%b*&ii2 zEliP`Ms^`WMl&Fe*)HkCMWOvtlH?9;8??x_qmU`m)?K=f&}?Z1m5tK%AZe&9r~-+) uNO7T-EFGj+>7yr!|@AqS-!H|?`lrmV7sNcvkq(W_ZRUVBD<2B>8$a5IR+jtFwLGnoQ zNZGPWNT}5I);qPfVpVLat^NDo-=OyO|9t*?hu=N-o_oIMocp`y%(>%4aWTSTu)Sz( zyd#!`g&>I3Y52R2uoDG6LqiY>LPKH@1`>xPAW29Hl7?g;Sx63&hZG=1NC{GgR3KF# zgf!LNU@Zy@VZE?S1FTR2tAv%nN)XH-lGM@?ywt){(Nm;|oP+`jAV?^I5+^|<6bVfd zBVkD5BngrvNs1&*k|D{GZXu{k~&_J^6NJCiM|^nOZWe!hp|a#Sq?;)0LY zrq{rG0Wla>9#?`)8OXXD;cj520Ul zzs4+X&(bqE@M2_7JF}?CQE$yo`iM&bv#>@^kF@2*@D3)ES9WO?(U3mu;KD2@$Xc~( z`HLZ2V`e_rah0wjeaKpcnU^fLN@xE0h0QZzxiObkYQOToU~w%hhncmK@aXxVS$kM^ zfa6MyEB=GV1z}k}a=vSJ7;$$p##MOH1G)-E>073rD6pdYHAcy4hKwgNyQ2G4a>(rE zQ;#J~qR+|fASWv_9-~#Gd(jHy#QC|{ca2x##%C+%npa!>tbZlCZ?WvrlM?;fm%O6e=H=J&~%W1|sAX9YhI4w?6iT@Lb_ExRf3`>JZ} z5{)=pdY<4%P8hqmZ(^pnQ$RB~FgEhd(V4{C}u$p+26Cv;=Ji0Szdh|^ejo^##xpRJ<-y5!2Qf|Wv%qP^7S6a%?m)mc?GMHeh z$Jbbf0fA5~iEWbnODN=`7DC`}q2P59a#L7}1!)?Dmcqlr#3V5(6_RVOFeyb+Ntap* zS0|^+;*I_9^j4>E6m_iA@st{NhAPRW!S!5CYUVPs&rm6)HZ5Dzoca9Dxmr%{ayVH^ z-TFv+{z_^A$*u2525+@f4V7f4i)Ho z3^wyS4qdIWFSk8j)){!Goqlidc=_p&$9)-^b}fSMSkJFDdR%C!I2-kPe7M~1MCJFf z?`K}zyKthaCjkn@g%eMbcL9Y-&ho3mctAMU8XQU#~TXgQp0__3DCTeENEWaIUEr&@;! zht8&($6PxC9*lGzyM4l|pR2X)RP$Je|MjMQ7f&6(cRKXxNX53(Ef3B{&%S(c@$`vD zJ&9tn+KyeVPx{kU*Y3a6)%Ntlmoybh^A7x+rgAKBca@1J<%##jM0x6;4g@$mImTF_ zlIZL}pVO%5E%s3d>QbM zy7xZBTZh9szSHj3ycl?=HZEZMwM3W>g(R^9*zi*`joy3AbP29fwtQYeKAu-ms9RiL zV!&@N+aTz!pi~+7?g{CQxDoqn;(UrUM~$nUt(M2n*XA85)Gux+G2wTWZ5H%bP^;i2 zmkfhzQQc2y@H>eErH(6@@Fc4f`_U){-tDqln9-I5`**x#b$*)yQ=i1Bt?%Q&lB4R* zcO5bfvNiLB`FgyjLUM6ui3Pu>%tkO$v9pTr+aEF)AseTZq`_XrG30K{*3GNWH{i7u zniY4KQ22vo+XPoD_EcT<{ScxYNsQA?%@xR+1OI*Q2dxsxKHmbC>%&%_j=pf5u)8U62TW zyf{6))gcAeWkko+G1K5A%=uiS%nW$h*yy|K9=YJnR9;?VQDAl!9^7{Io4Z~=1yr#P z?g;gbjE=KSa$%$I*l*heN()=af3%0@9PAbFolmsw3@e=+($)adAl;c0NbI zJ?i?v2Vg{fN|vn5N}=7Fgbs5Hs~S%pct)QFAY$d|3YEO?Xm2EplIphPg&D4)pWYKF#xYp^sK7Ra>i(gX7(!Kav%r8T!-#{2mwKb;8?BZ?7yT94H*CAUHPF&w zXXSITS{3>RKh)LRR*mg;*qG|2UzD@*X2rUPQ)%=!ugrdmyy+stI1*4GIFz~mu5H3v z!@z50W9`3KcIlsvev=mTI^@$FJ>}P_pi3OTTjA5w_igdbJwb0dozWhOKirJm-~IKO zjQOIupqZb8v$x-VIFoY2lJ%f7D2`tY-k1kdOi%vq+0>3_V#9hC39>Re*RV3`dXR)V z9uwrvVl7lg6UMmrO2lTIB~pt4KOpEYNn|14lqvjEzPZ@}@=cd^>ZCJ7hk>Z5X5z?d z5umShKIfYPI99F2?#WdF><8L}N;T{PL;|&iu)b$*-`kHR@Ei#c8C-Zd0P0E$U&v2J zf$)(~E1BEplVF+KwV_sGJh6(s_-fchUg+0d0+Zyp3n>c|UKY4M8IG~#!652e?93`) zUWZIl`}?+#iPn9sL#(SAl_eOVMzv-Iti6&4TfIw~^bVwk@8CWtft_(exc@CsUr_$D zFh&SZIRn#z_%Jmk`(9KG9rpY=9u8yW50~G5CEUvplKH4?>!=obeZr}HNFe@R*w~>{ z(yox&_NkS1;{>6YShcQtV`W?6?F4peU@$j~$$@3|7w@c6G^mEv8U{wo?DINebJ6sU zn9a(+zfD7Zk3n1dzF#OgOv3 zdVi8&BPra3oN*tztj2y^P4WtP#%nF~D^YX0|^63Rpw%Jbn36^lO zx%{SMN54gTTN|mZ3a4?xTwdKc2RC@|8d?w)%zOQ9BY-O4g@-D5r!b*HIKhk82Yr^NfxLvu6ISh5W5+@#+FE`=YDd}*3! zay_VPE^Lw`Nmq(vaN90I-yq>A9Phz5h~lC_@wyLyxM@6 z=AM*2oF9wtB$wMvcgPRNuR`zlVQt6=*!&LK`3!GjKp~yVSM}D|mA0-;d_#uk10VUu z=cmS6G1E%h?8CczRoGFyArE)WQ??(?49;CDcUm!K{Nrx6)@IubI<$Sy)?z*OMaG+mDI2zp1Z6g_ZC0|0 zE!C$}A891=S2GRWyEA2P-buX-+9%AJ3542s)e z>A8xKQ)kt>pBQ0db>_z8FEn)4pt_Dz;^kTYePt=BQ$vQv_e}7W#{|Z+Hzl1=OTjKp z4-TgVv*_X9evbGV1!5NIOT_;rjKNJv2W#vIy2X&zQAz^TVq2_HA&?iySnI!3uuIIp zRj@kvbI?)&iy}c{2{0r!pNE1}P;+9?=W0lf&N|>792^qv9Zp-kx&45`jiZR&=ET<< zL?zKbX#kdnW8EFGYM%|Z&_h98h{52V3i%irn|x(zw!vI9*eHoX!@Vd848co(A8^6} z>-lFV&^{aSh^Xies;B??4vGP0uh>eH1eSx21B@3(dTFrqIq*Sl@!}~>4+%cN@b;lG z7O$RYTsoRKxQTep0&!pwU0oTi{$-N@xpWQ2Lh3jO`Wt(x68s~3sVZ6m0*U*N?4`WC z#0GEqk8H$6qyhHQp8kOirh%o469ilh3Ok4bc0uX{+(~(CveY8ojKjWHTBNf`*cpXI zy4xIEytICB6Sh!9W6|nyR17mDBpfRT-nkz7^Cf+8v{(y6SQ)?*eMc>jUrzAC!ap4o zu=8N`mtEI`90|ZsaO+nms56Mk7{ce{^{;;uz~#YMJg&VYMy&|{G7fO*6d*ml1*;3;y6REjtr4YgIp%7}NyE!gTwHeUwP5-B(}KM5r5Hn` zeRas1LZN&O{wd~>U>xm?kYFsD+D4(UC|fBM&^uGsIstxE6vDTsP!27wb3=M(@S&I> z)HY<^nv9S;kq5${*pLkYHWe8n+C=i;=_1kv2pe<66u<&X1Hsrj)W_J=&&SS|?!H%~ zFA(7arGJ)p7R?~Dz6gH-O=S0WWYXJpho9SC(+D3E7ypBHdk|m>#Wn`4H1|bF(aHrs ztSKTjU~)SX;RBZ`l&uJ$9U}tBM)(>t$TVbZFOpw!9!!YHFY-=H@@^FCLx>7Q$U5B3 zbw>;!C=|#4AuobK_`8ArjiRL@Q3_(Jb(H&FTc)cqBapIFB>xq%U|~NN^@9#5+lA-@ zz00CqqJ0qU|DX2rkOK~bPacLG0U F{uhMSa~l8v literal 0 HcmV?d00001 diff --git a/example-data/insight/sequences.parquet b/example-data/insight/sequences.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2551213e6833cfdd06804b6684c52b3b588cca18 GIT binary patch literal 1332 zcmbtU&rcIU6n?X9w_DOfHOyo;*#ieyNi+ef7)sDM&|*tVX)6KRkkGm|ph1dV`FYbn zgqw*+LewJ% z*ct~tVVP0SZEUSQe#GNK7spJTF=y2={>X*uioAYV-L(`019?lC(qb;1$;WUmXVRJ3 z``KK+4}7_W{QEYh4XFJU;x2Yt5V&W@vPD9QIO^NQw zhpGp7vVbn&PQ38DfiX7LHgSsECHb4iyk2!ob(dj6^U@LM+dgWD&I#Q zuxxP};)k8-d4%uMlxtQY8*aB+&$*6|V@T^h4B&B$x=kN~f=&69^4noS=$36wIj=Rl z+mU~KVB$@T_lxgQeixjRh|T}w*2Tvd7vi14?}2j~y-@UF~gsYwWt<}`c&TUq2E*-eZ)WJ%!*s89~ zZPgSqGeP-T;<(IL@foB{D;yydO}eHJYbwdL_Aetp`b`UR@T!=P2wNP4Jf9qY)K zeVu6qN|~bbj;cWIiluXOKJ-e8n=EcT+)QoPq`U*BUTSSN+MYFqZ_oe03H~5U_<#8m D=ZyA3 literal 0 HcmV?d00001 diff --git a/example-data/insight/spectra.parquet b/example-data/insight/spectra.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5979131b321c6f90ba6525ff012e52c29baaf555 GIT binary patch literal 2133 zcmb7GZD>o)gl}I%(av1ZA&qWJkS3Km;sY2PIvHg_CoO@ug~71wro}cb(Qv_vfuU>R-8K zYUC>hUDsw#1oz~f=8dzyy)Vz~A|9(6T$ZV(CIs4f%~(}J3)-8Ug{L7v>Vu@N6&H!d zpX03CgM;D&%P>fIBd1bl>e3nwMLphb$*JOASt&2_sE?-m67k7+!iJ_8x0TS6EA+2Z zF!{82Hj|6$MkYP?E+!k2r;1EwsLZ_ZjRVfP3sh!k$Bpfu{qb9dew}Ra+`F8mtKr@= zw{pj6%foEd)y@-i?n@lIKB0d`(NLSFXmv69`!qds>+f*y_n*?Ek(uvi=+AgKW9DXa zwCZZ}!|Y0~`y?Iu4IP!|{!mu-bvZIx##RlmNpb9JI0|`Ry#>PmUsmmQ2 zipFry0y*JgSKMQL$$0-qX#wZ7V*H?lp7=sHeWEEcLTN0kQne&P0Nl#gMZhvpP@;Z( zplDf5OV#qqpj!65R4rVPq?kh2QH04rcDXk=CHms})U5HIQ^ zuNC%Bd}M-eiE1TU9I5f~3F1M{tJUJG2jamKJ}6sUyu`MALq`ja1wz3HTLG&#bXNo|&4Z2Bo0OO?bh9*9Uw-Xf9> z^>6M*8xJ_9ulyX-mGX5=&wLS$H4f=M4`kfM-{z2ReCxprVFgSd>*zSr`9?Z65+Ch1 zm$e!`YFUIa)`VZSZxYWRNBI`dLi9FcY1YUXu2HtL9raUFTz`nM;ey?JxE}20hogLI z;S<}-CmxtJeuz%{q%jJ_HexSae-QQc+>)oj5yNlfSl6CNqPue<%ia?HTxcVb{sIOH z54ZC82oX{6sU?!>>1?ZwjkQh267>^tk*@I_G^#<+1yCG6j;OsLh$gNV>Fq2+G0@dE zG|>EhT~DW-Y>L;Pu|4@LlCma|mY;WW{-zGG$S`Xb#qiuMhj$~gG>?CT8`(~RFOeTf zY8u9*e2v<;WWai!54~~NRARL}E`|^+$LiWf1`_qj9>yBLbhVEs`crAEHvmxjf!i8D IJN}{g50e|B9smFU literal 0 HcmV?d00001 diff --git a/src/common/common.py b/src/common/common.py index 643a22471..fc28a4d2b 100644 --- a/src/common/common.py +++ b/src/common/common.py @@ -932,6 +932,59 @@ def show_fig( ) +def show_linked_grid( + layout, + builders, + *, + tool, + side_by_side=False, + grid_key="linked_grid", + height=None, + column_heights=None, +): + """Render an N-experiment linked grid of OpenMS-Insight components. + + Thin one-liner over ``src.view.grid.render_linked_grid`` that owns the multi-experiment + + side-by-side page concern, so any viewer collapses to a single call. ``layout`` is + ``List[experiment]``; each experiment is the nested rows list consumed by + ``render_linked_grid``. One independent ``StateManager`` is created per experiment + (``session_key=f"{tool}__exp{i}"``) so experiments never cross-link. When exactly two + experiments and ``side_by_side=True``, render them in two ``st.columns``; otherwise stack + them with ``st.divider()`` between experiments. + + Args: + layout (list): ``List[experiment]``; experiment = nested rows list (``List[List[str]]``). + builders (dict): comp_name -> () -> BaseComponent factory map (see grid.BuilderMap). + tool (str): namespace used to build per-experiment StateManager session_keys. + side_by_side (bool): when exactly 2 experiments, render them side by side. + grid_key (str): prefix for per-cell component keys. + height (int, optional): default px height for every component. + column_heights (dict, optional): comp_name -> height override. + """ + from src.view.grid import render_linked_grid + + def _one(exp_idx, exp_layout, container): + with container: + render_linked_grid( + exp_layout, + builders, + state_key=f"{tool}__exp{exp_idx}", + grid_key=f"{grid_key}_{exp_idx}", + height=height, + column_heights=column_heights, + ) + + if len(layout) == 2 and side_by_side: + c1, c2 = st.columns(2) + _one(0, layout[0], c1) + _one(1, layout[1], c2) + else: + for i, exp_layout in enumerate(layout): + if i: + st.divider() + _one(i, exp_layout, st.container()) + + def reset_directory(path: Path) -> None: """ Remove the given directory and re-create it. diff --git a/src/view.py b/src/view/__init__.py similarity index 100% rename from src/view.py rename to src/view/__init__.py diff --git a/src/view/grid.py b/src/view/grid.py new file mode 100644 index 000000000..27c447a82 --- /dev/null +++ b/src/view/grid.py @@ -0,0 +1,595 @@ +"""Reusable, tool-agnostic linked-grid rendering for OpenMS-Insight components. + +This module is the *single source of truth* for the cross-linked component grid used +by OpenMS-ecosystem viewers (FLASHDeconv, FLASHTnT, FLASHQuant, ...). It is deliberately +free of any tool/MS-specific knowledge (it knows nothing about scans, masses, proteins, +heatmaps, or any particular dataset): everything domain-specific is supplied by the caller +through ``builders`` (a ``comp_name -> () -> BaseComponent`` map) and a ``layout`` (a nested +list of component names). Because it is tool-agnostic it can be frozen and vendored into +downstream apps byte-for-byte unchanged. + +It distills two pieces of prior FLASHApp logic: + +* ``render.py::render_grid`` inner loop -> :func:`render_linked_grid`. Per row it opens + ``st.columns`` (clamped to <=3, the oracle invariant) and, per cell, constructs the + Insight component via the registered builder and renders it against one *shared* + ``StateManager`` so every panel cross-links. All data loading / hashing / filtering that + the oracle did Python-side now lives inside each Insight component (``filters`` / + ``interactivity`` + its own preprocessing), so the grid is pure layout + a shared + StateManager. +* The two near-identical ``FLASH*LayoutManager`` page modules -> :class:`LayoutManager`, + parameterized by the bits that differed between them (component vocabulary, storage keys, + session namespace). The UI, JSON format, ``<=3`` column cap, ``"(... needed)"`` dependency + validation, side-by-side option, and JSON download/upload behavior are preserved verbatim. + +The data store is accessed only through the small :class:`Store` ``Protocol`` so the template +never imports any concrete FileManager from a downstream app. +""" + +from __future__ import annotations + +import json +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Protocol, + Sequence, + Tuple, + runtime_checkable, +) + +import streamlit as st +from openms_insight import BaseComponent, StateManager + +# A layout is the trimmed nested list the LayoutManager persists: +# List[row], row = List[comp_name:str], <=3 entries per row. (one experiment) +Layout = List[List[str]] +# `builders` maps a comp_name -> a zero-arg factory returning a *constructed* BaseComponent. +# Zero-arg so the grid can lazily build only the panels a given layout references, and so the +# factory can close over the caller's (dataset, file_manager, cache_path) context. +BuilderMap = Dict[str, Callable[[], BaseComponent]] + +# Maximum number of columns per row. This is the oracle's hard cap, surfaced as a module +# constant so render_linked_grid and the default LayoutManager agree on the same value. +MAX_COLUMNS = 3 + + +def render_linked_grid( + layout: Layout, + builders: BuilderMap, + state_key: str, + *, + grid_key: str = "linked_grid", + height: Optional[int] = None, + column_heights: Optional[Dict[str, int]] = None, + on_missing: str = "warn", # "warn" | "error" | "skip" +) -> StateManager: + """Render one experiment's linked grid. + + For each row in ``layout``, open ``st.columns(len(row))`` (clamped to <=3, mirroring the + oracle's hard cap) and, in each column, call ``builders[comp_name]()`` to construct the + Insight component, then render it with a SHARED ``StateManager(session_key=state_key)`` and a + per-cell Streamlit key ``f"{grid_key}_{r}_{c}"``. The shared StateManager is what cross-links + every panel in the grid: clicks (``interactivity``) write selections, other panels read them + (``filters``). Returns the StateManager so callers can introspect/seed selections. + + Args: + layout: trimmed nested list (rows of comp_names) for ONE experiment. + builders: comp_name -> () -> BaseComponent (factory; see BuilderMap). + state_key: StateManager session_key. MUST be unique per (tool, experiment) so two + experiments shown together do not share selections. ``StateManager`` stores its + state under ``st.session_state[state_key]``, so distinct ``state_key`` values are + fully independent. Baking a dataset identifier into ``state_key`` (and into each + builder's ``cache_id``) makes switching datasets yield a fresh StateManager + fresh + component caches automatically -- no manual reset needed here. + grid_key: prefix for per-cell component keys. + height: default px height passed to every comp's ``__call__`` (None -> Insight default). + column_heights: optional comp_name -> height override (e.g. heatmaps taller). + on_missing: behavior when a comp_name has no builder: + ``"warn"`` (st.warning + skip, default), ``"error"`` (raise KeyError), or + ``"skip"`` (silently skip). + + Returns: + The shared ``StateManager`` used for this experiment's grid. + """ + if on_missing not in ("warn", "error", "skip"): + raise ValueError( + f"on_missing must be 'warn', 'error' or 'skip', got {on_missing!r}" + ) + + sm = StateManager(session_key=state_key) + heights = column_heights or {} + for r, row in enumerate(layout): + # <=3 columns per row, the oracle invariant. Any extra cells in a row are ignored. + cols = st.columns(min(len(row), MAX_COLUMNS)) + for c, comp_name in enumerate(row[:MAX_COLUMNS]): + factory = builders.get(comp_name) + if factory is None: + if on_missing == "error": + raise KeyError( + f"No builder registered for component '{comp_name}'" + ) + if on_missing == "warn": + cols[c].warning(f"Unknown component: {comp_name}") + continue + h = heights.get(comp_name, height) + with cols[c]: + factory()(key=f"{grid_key}_{r}_{c}", state_manager=sm, height=h) + return sm + + +@runtime_checkable +class Store(Protocol): + """Minimal results-store interface the LayoutManager persists its layout through. + + Any object implementing these four calls satisfies the protocol -- in particular the + template/FLASHApp ``FileManager``. The template never imports a concrete FileManager; + it only relies on this structural protocol. + """ + + def get_results(self, dataset_id: str, name_tags: list) -> dict: + ... + + def store_data(self, dataset_id: str, name_tag: str, data) -> None: + ... + + def result_exists(self, dataset_id: str, name_tag: str) -> bool: + ... + + def remove_results(self, dataset_id: str) -> None: + ... + + +class LayoutManager: + """Layout-editor UI + persistence for a linked grid (distillation of both FLASH managers). + + Owns the full "Layout Manager" page: an experiment-count selector, per-experiment + expanders with add-column(+)/add-row(+)/delete(x) controls, the ``<=max_columns`` cap, a + side-by-side checkbox (offered only when exactly two experiments), Save/Edit/Reset buttons, + JSON download (disabled while the layout is invalid) + JSON upload, and success/error + toasts. It is parameterized by the things that differed between the two FLASH managers: + the component vocabulary (``component_options``/``component_names``), the FileManager + storage keys (``layout_id``/``layout_tag``), and the session-state namespace + (``session_prefix``). + + The persisted JSON is the *trimmed internal-name* nested list (so old saved layouts keep + loading), stored alongside the ``side_by_side`` flag exactly as the oracle did. + """ + + def __init__( + self, + component_options: List[str], # human labels, e.g. "Scan table" + component_names: List[str], # parallel internal names, e.g. "scan_table" + *, + store: Store, # object with get_results/store_data/result_exists/remove_results + layout_id: str = "layout", # store dataset_id for the saved layout + layout_tag: str = "layout", # store name_tag for the saved layout + max_columns: int = MAX_COLUMNS, + max_experiments: int = 5, + session_prefix: str = "lm", # namespaces all st.session_state keys + download_name: str = "layout_settings.json", + title: str = "Layout Manager", + ): + if len(component_options) != len(component_names): + raise ValueError( + "component_options and component_names must be the same length " + f"({len(component_options)} != {len(component_names)})" + ) + # Copy so add_options() does not mutate the caller's lists. + self.component_options = list(component_options) + self.component_names = list(component_names) + self.store = store + self.layout_id = layout_id + self.layout_tag = layout_tag + self.max_columns = max_columns + self.max_experiments = max_experiments + self.session_prefix = session_prefix + self.download_name = download_name + self.title = title + + # ------------------------------------------------------------------ # + # session-state key helpers (namespaced by session_prefix) + # ------------------------------------------------------------------ # + def _k(self, name: str) -> str: + """Build a namespaced session_state key.""" + return f"{self.session_prefix}__{name}" + + # ------------------------------------------------------------------ # + # persistence (replaces set_layout/get_layout in both managers) + # ------------------------------------------------------------------ # + def get_layout(self) -> Optional[Tuple[list, bool]]: + """Return ``(layout_per_experiment, side_by_side)`` or ``None`` if unset. + + ``layout_per_experiment``: ``List[experiment]``, experiment = ``List[row]``, + row = ``List[comp_name]`` (trimmed internal names). + """ + if not self.store.result_exists(self.layout_id, self.layout_tag): + return None + stored = self.store.get_results(self.layout_id, [self.layout_tag])[ + self.layout_tag + ] + return stored["layout"], stored["side_by_side"] + + def set_layout(self, layout: list, side_by_side: bool = False) -> None: + """Persist the trimmed layout + side-by-side flag (a plain dict).""" + self.store.store_data( + self.layout_id, + self.layout_tag, + {"layout": layout, "side_by_side": side_by_side}, + ) + + # ------------------------------------------------------------------ # + # label<->name transforms (oracle getTrimmed/getExpanded) + # ------------------------------------------------------------------ # + def trim(self, expanded: list) -> list: + """labels -> internal names, dropping empty cells/rows/experiments.""" + trimmed = [] + for exp in expanded: + rows = [] + for row in exp: + cols = [] + for col in row: + if col: + cols.append( + self.component_names[self.component_options.index(col)] + ) + if cols: + rows.append(cols) + if rows: + trimmed.append(rows) + return trimmed + + def expand(self, trimmed: list) -> list: + """internal names -> labels, dropping empty cells/rows/experiments.""" + expanded = [] + for exp in trimmed: + rows = [] + for row in exp: + cols = [] + for col in row: + if col: + cols.append( + self.component_options[self.component_names.index(col)] + ) + if cols: + rows.append(cols) + if rows: + expanded.append(rows) + return expanded + + # ------------------------------------------------------------------ # + # validation (oracle validateSubmittedLayout: non-empty + "(... needed)" deps) + # ------------------------------------------------------------------ # + def validate(self, layout: Optional[list] = None) -> str: + """Return ``''`` if the layout is OK, else a human-readable error message. + + ``layout`` is in *label* form (the edit-mode representation). When ``None``, the + current edit-mode session layout is validated. Checks (verbatim from the oracle): + the layout must be non-empty, and every ``" (X needed)"`` label requires + another component starting with ``X`` to be present in the *same* experiment. + """ + layout_setting = ( + layout if layout is not None else st.session_state.get(self._k("layout")) + ) + if not layout_setting: + return "Empty input" + + # check if submitted layout is empty + if not any( + col for exp in layout_setting for row in exp for col in row if col + ): + return "Empty input" + + # check if submitted layout contains "needed" components + for exp in layout_setting: + submitted_components = [col for row in exp for col in row if col] + required_components = [ + comp.split("(")[1].split("needed")[0].rstrip() + for comp in submitted_components + if "needed" in comp + ] + if required_components: + for required in required_components: + required_exist = False + for submitted in submitted_components: + if submitted.startswith(required): + required_exist = True + if not required_exist: + return "Required component is missing" + return "" + + # ------------------------------------------------------------------ # + # extension hook (oracle setSequenceView) + # ------------------------------------------------------------------ # + def add_options(self, options: List[str], names: List[str]) -> None: + """Append ``(label, name)`` pairs at runtime. + + Mirrors the oracle's dynamic option injection (e.g. adding "Sequence view" once an + input sequence exists). Idempotent: pairs whose internal name is already known are + skipped, so repeated calls across reruns do not duplicate options. + """ + if len(options) != len(names): + raise ValueError( + "options and names must be the same length " + f"({len(options)} != {len(names)})" + ) + for label, name in zip(options, names): + if name not in self.component_names: + self.component_options.append(label) + self.component_names.append(name) + + # ------------------------------------------------------------------ # + # internal: reset to a default (empty) layout + # ------------------------------------------------------------------ # + def _reset_to_default(self, num_of_exp: int = 1) -> None: + # 1D: experiment, 2D: row, 3D: column, element = component label + layout_setting = [[[""]]] + for _ in range(1, num_of_exp): + layout_setting.append([[""]]) + st.session_state[self._k("layout")] = layout_setting + st.session_state[self._k("num_experiments")] = num_of_exp + if self.store.result_exists(self.layout_id, self.layout_tag): + self.store.remove_results(self.layout_id) + st.session_state[self._k("edit_mode")] = True + + # ------------------------------------------------------------------ # + # internal: edit-mode per-experiment editor + # ------------------------------------------------------------------ # + def _container_for_new_component(self, exp_index, row_index, col_index) -> None: + sel_key = self._k(f"select_new_{exp_index}_{row_index}_{col_index}") + + def _is_unique(new_option) -> bool: + layout_setting = st.session_state[self._k("layout")] + if any( + col + for row in layout_setting[exp_index] + for col in row + if col == new_option + ): + st.session_state[self._k("component_error")] = "Duplicated component!" + return False + return True + + def _add_new_component() -> None: + new_option = st.session_state[sel_key] + if new_option and new_option != "Select..." and _is_unique(new_option): + st.session_state[self._k("layout")][exp_index][row_index][ + col_index + ] = new_option + + st.selectbox( + "New component to add", + ["Select..."] + self.component_options, + key=sel_key, + on_change=_add_new_component, + placeholder="Select...", + ) + + def _layout_editor_per_experiment(self, exp_index) -> None: + layout_info = st.session_state[self._k("layout")][exp_index] + + for row_index, row in enumerate(layout_info): + st_cols = st.columns( + len(row) + 1 if len(row) < self.max_columns else len(row) + ) + for col_index, col in enumerate(row): + if not col: # empty -> show the "add component" selector + with st_cols[col_index].container(): + self._container_for_new_component( + exp_index, row_index, col_index + ) + else: + with st_cols[col_index]: + c1, c2 = st.columns([5, 1]) + c1.info(col) + if c2.button( + "x", + key=self._k(f"del_{exp_index}_{row_index}_{col_index}"), + type="primary", + ): + layout_info[row_index].pop(col_index) + st.rerun() + + # new column button (capped at max_columns) + if len(row) < self.max_columns: + if st_cols[-1].button( + "***+***", key=self._k(f"new_col_{exp_index}_{row_index}") + ): + layout_info[row_index].append("") + st.rerun() + + # new row button + if st.button("***+***", key=self._k(f"new_row_{exp_index}")): + layout_info.append([""]) + st.rerun() + + # ------------------------------------------------------------------ # + # internal: button handlers (edit/save/reset/upload) + # ------------------------------------------------------------------ # + def _handle_setting_buttons(self) -> None: + if st.session_state.get(self._k("reset_clicked")): + self._reset_to_default() + + uploaded = st.session_state.get(self._k("uploaded_json")) + if uploaded is not None: + uploaded_layout = json.load(uploaded) + # uploaded layout is trimmed (internal names); expand to labels for validation/edit + expanded = self.expand(uploaded_layout) + validated = self.validate(expanded) + if validated != "": + st.session_state[self._k("component_error")] = validated + else: + st.session_state[self._k("layout")] = expanded + st.session_state[self._k("num_experiments")] = len(expanded) + + def _handle_edit_and_save_buttons(self) -> None: + # "Edit" clicked: re-enter edit mode, seeded from the saved layout + if st.session_state.get(self._k("edit_clicked")): + st.session_state[self._k("edit_mode")] = True + saved = self.get_layout() + st.session_state[self._k("num_experiments")] = ( + len(saved[0]) if saved is not None else 1 + ) + if saved is not None: + st.session_state[self._k("layout")] = self.expand(saved[0]) + + # "Save" clicked: validate, persist trimmed layout + side_by_side, leave edit mode + if st.session_state.get(self._k("save_clicked")): + got_error = self.validate() + st.session_state[self._k("save_error")] = got_error + if not got_error: + self.set_layout( + self.trim(st.session_state[self._k("layout")]), + side_by_side=st.session_state.get(self._k("side_by_side"), False), + ) + st.session_state[self._k("edit_mode")] = False + + # ------------------------------------------------------------------ # + # the whole editor page + # ------------------------------------------------------------------ # + def render(self) -> None: + """Draw the full Layout Manager page (edit/saved modes, buttons, upload/download, tips).""" + # default edit mode + if st.session_state.get(self._k("edit_mode")) is None: + st.session_state[self._k("edit_mode")] = True + + # handle button onclicks + self._handle_setting_buttons() + self._handle_edit_and_save_buttons() + + # initialize layout setting + if self._k("layout") not in st.session_state: + saved = self.get_layout() + if saved is not None: + st.session_state[self._k("layout")] = self.expand(saved[0]) + st.session_state[self._k("num_experiments")] = len( + st.session_state[self._k("layout")] + ) + st.session_state[self._k("side_by_side")] = saved[1] + st.session_state[self._k("edit_mode")] = False + else: + self._reset_to_default() + # the number of experiments changed -> reset to that count + elif ( + self._k("num_experiments") in st.session_state + and len(st.session_state[self._k("layout")]) + != st.session_state[self._k("num_experiments")] + ): + self._reset_to_default(st.session_state[self._k("num_experiments")]) + + edit_mode = st.session_state[self._k("edit_mode")] + saved = self.get_layout() + + # title and setting buttons + c1, c2, c3, c4, c5 = st.columns([6, 1, 1, 1, 1]) + c1.title(self.title) + + # side-by-side view option for exactly 2 experiments + if self._k("side_by_side") not in st.session_state: + st.session_state[self._k("side_by_side")] = False + show_side_by_side = ( + st.session_state.get(self._k("num_experiments")) == 2 + ) or (not edit_mode and saved is not None and len(saved[0]) == 2) + if show_side_by_side: + self._v_space(1, c2) + st.session_state[self._k("side_by_side")] = c2.checkbox( + "Side-by-Side View", + value=st.session_state[self._k("side_by_side")], + help="If checked, experiments will be shown side-by-side", + disabled=(not edit_mode), + ) + + # Load existing layout setting file + self._v_space(1, c3) + c3.button("Load Setting", key=self._k("load_clicked")) + + # Save current layout setting (JSON download of the trimmed layout) + self._v_space(1, c4) + c4.download_button( + label="Save Setting", + data=json.dumps(self.trim(st.session_state[self._k("layout")])), + file_name=self.download_name, + mime="json", + disabled=(self.validate() != ""), + ) + + # Reset settings to default + self._v_space(1, c5) + c5.button("Reset Setting", key=self._k("reset_clicked")) + + # File uploader, shown when "Load Setting" was clicked + if st.session_state.get(self._k("load_clicked")): + st.file_uploader( + "Choose a json file", type="json", key=self._k("uploaded_json") + ) + + # Main part + if (not edit_mode) and (saved is not None): + # saved-mode + for exp_index in range(len(saved[0])): + layout_per_exp = saved[0][exp_index] + with st.expander("Experiment #%d" % (exp_index + 1), expanded=True): + for row in layout_per_exp: + st_cols = st.columns(len(row)) + for col_index, col in enumerate(row): + st_cols[col_index].info( + self.component_options[ + self.component_names.index(col) + ] + ) + else: + # edit-mode + st.selectbox( + "**#Experiments to view at once**", + list(range(1, self.max_experiments + 1)), + key=self._k("num_experiments"), + ) + for exp_index in range(st.session_state[self._k("num_experiments")]): + with st.expander("Experiment #%d" % (exp_index + 1)): + self._layout_editor_per_experiment(exp_index) + + # edit/save buttons + _, edit_btn_col, save_btn_col = st.columns([9, 1, 1]) + edit_btn_col.button("Edit", key=self._k("edit_clicked"), disabled=edit_mode) + save_btn_col.button( + "Save", key=self._k("save_clicked"), disabled=(not edit_mode) + ) + + # error/success messages + if self._k("save_error") in st.session_state and st.session_state.get( + self._k("save_clicked") + ): + error_message = st.session_state[self._k("save_error")] + if error_message: + st.error("Error: " + error_message, icon="🚨") + else: + st.success("Layouts Saved", icon="✔️") + if st.session_state.get(self._k("component_error")): + st.error( + "Error: " + st.session_state[self._k("component_error")], icon="🚨" + ) + del st.session_state[self._k("component_error")] + + # tips + st.info( + """ +**💡 Tips** + +- If nothing is set, the default layout will be used in the Viewer + +- Don't forget to click "save" on the bottom-right corner to save your setting +""" + ) + + # ------------------------------------------------------------------ # + # internal: vertical spacing helper (self-contained; no external import) + # ------------------------------------------------------------------ # + @staticmethod + def _v_space(n: int, col=None) -> None: + """Insert ``n`` blank lines (markdown ``#``) for vertical alignment of widgets.""" + target = col if col is not None else st + for _ in range(n): + target.markdown("#") diff --git a/src/workflow/FileManager.py b/src/workflow/FileManager.py index e49ef3bdb..eb23dd578 100644 --- a/src/workflow/FileManager.py +++ b/src/workflow/FileManager.py @@ -1,7 +1,17 @@ -from pathlib import Path +import gzip +import shutil import string import random -import shutil +import sqlite3 + +import pandas as pd +import polars as pl +import pickle as pkl +import pyarrow.dataset as ds + +from contextlib import contextmanager +from io import BytesIO +from pathlib import Path from typing import Union, List class FileManager: @@ -11,20 +21,92 @@ class FileManager: to be flexible for handling both individual files and lists of files, with integration into a Streamlit workflow. + In addition to the path helpers (``get_files``/``_set_type``/``_set_dir``), this manager + provides a SQLite-indexed results store keyed by ``(dataset_id, name_tag)`` with optimized + storage formats: + - Polars/Pandas DataFrames and LazyFrames: stored as parquet (``.pq``) for performance + - Other data structures: stored as compressed pickle (``.pkl.gz``) + + The store can return loaded frames (pandas by default, polars LazyFrame, or pyarrow + Dataset) OR -- via ``as_path=True`` -- the on-disk parquet PATH, which is exactly what an + OpenMS-Insight component's ``data_path=`` argument expects (subprocess preprocessing + + disk cache). Usage example:: + + from src.workflow.FileManager import FileManager + from openms_insight import Heatmap, StateManager + import polars as pl + + fm = FileManager(workspace_dir, cache_path=workspace_dir / "cache") + + # 1) store a (lazy) frame -> parquet, indexed by (dataset_id, name_tag) + fm.store_data("demo", "peaks", pl.scan_parquet("raw_peaks.parquet")) + + # 2) hand the parquet PATH to an Insight component (subprocess preprocessing + cache) + sm = StateManager(session_key="demo_grid") + Heatmap( + cache_id="demo_peaks_heatmap", + data_path=fm.result_path("demo", "peaks"), # <- the new path API + x_column="rt", y_column="mass", intensity_column="intensity", + cache_path=str(fm.cache_path / "insight"), # keep Insight caches in the workspace + )(state_manager=sm) + Methods: get_files: Returns a list of file paths as strings for the specified files, optionally with new file type and results subdirectory. - collect: Collects all files in a single list (e.g. to pass to tools which can handle multiple input files at once). + store_data: Stores data with automatic format detection (polars/pandas/pickle). + get_results: Retrieves data with proper format restoration (or the parquet path). + result_path: Returns the on-disk parquet path for a single ``(dataset_id, name_tag)``. """ def __init__( self, workflow_dir: Path, + cache_path: Path = None, ): """ Initializes the FileManager object with a the current workflow results directory. + + Args: + workflow_dir (Path): The current workflow results directory. + cache_path (Path, optional): Base directory for the results-store cache. Defaults + to ``/cache``. """ self.workflow_dir = workflow_dir + # Setup Caching + self.cache_path = cache_path if cache_path is not None else Path(workflow_dir, "cache") + Path(self.cache_path, 'files').mkdir(parents=True, exist_ok=True) + self._connect_to_sql() + + def _connect_to_sql(self): + self.cache_connection = sqlite3.connect( + Path(self.cache_path, 'cache.db'), isolation_level=None + ) + self.cache_cursor = self.cache_connection.cursor() + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_data ( + id TEXT PRIMARY KEY + ); + """) + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_files ( + id TEXT PRIMARY KEY + ); + """) + + # Add display_name column to both tables + self._add_column('stored_data', 'display_name') + self._add_column('stored_files', 'display_name') + + def __getstate__(self): + state = self.__dict__.copy() + del state['cache_connection'] + del state['cache_cursor'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._connect_to_sql() + def get_files( self, files: Union[List[Union[str, Path]], Path, str, List[List[str]]], @@ -177,3 +259,479 @@ def _create_results_sub_dir(self, name: str = "") -> str: path = Path(self.workflow_dir, "results", name) path.mkdir(exist_ok=True) return str(path) + + def _get_column_list(self, table_name: str) -> List[str]: + """ + Get a list of columns in the table. + + Args: + table_name (str): The name of the table. + + Returns: + columns (List): The columns in the table. + """ + self.cache_cursor.execute(f"PRAGMA table_info({table_name});") + return [col[1] for col in self.cache_cursor.fetchall()] + + + def _add_column(self, table_name: str, column_name: str) -> None: + """ + Checks if a column is in the cache table and if it is not adds + it to the table. + + Args: + table_name (str): The name of the table + column_name (str): The name of the column + """ + + # Fetch list of columns + columns = self._get_column_list(table_name) + + # Add column to table if it does not exist + if column_name not in columns: + self.cache_cursor.execute( + f"ALTER TABLE {table_name} ADD COLUMN {column_name} TEXT;" + ) + + def _add_entry(self, table_name: str, dataset_id: str, + column_name: str, path: str) -> None: + """ + Adds an entry to the cache index. + + Args: + table_name (str): The name of the table + dataset_id (str): The name of the dataset the data is + attached to. + column_name (str): The name of the column + path (str): The path to be inserted + """ + + # Ensure column exists + self._add_column(table_name, column_name) + + # Store reference + self.cache_cursor.execute(f""" + INSERT INTO {table_name} (id, {column_name}) + VALUES ("{dataset_id}", "{path}") + ON CONFLICT(id) + DO UPDATE SET {column_name} = excluded.{column_name}; + """) + + def _store_data(self, dataset_id: str, name_tag: str, data, row_group_size=None) -> None: + """ + Stores data as a cached file. Pandas/Polars DataFrames are stored as + parquet files, while all other data structures are stored as + compressed pickle. + Args: + dataset_id (str): The name of the dataset the data is + attached to. + name_tag (str): The name of the associated data structure. + data: Any pickleable data structure. + row_group_size (int, optional): Row group size for parquet files. + If None, the library default is used. + + Returns: + file_path (Path): The file path of the stored file. + """ + + path = Path(self.cache_path, 'files', dataset_id) + path.mkdir(parents=True, exist_ok=True) + + # Polars DataFrames and LazyFrames are stored as parquet + if isinstance(data, (pl.DataFrame, pl.LazyFrame)): + path = Path(path, f"{name_tag}.pq") + if isinstance(data, pl.LazyFrame): + # Keep the streaming sink when no bounded row groups are requested + # (default callers). Only materialize when row_group_size is set, + # since sink_parquet on this polars version rejects the kwarg. + if row_group_size is None: + data.sink_parquet(path) + else: + data.collect().write_parquet(path, row_group_size=row_group_size) + else: + data.write_parquet(path, row_group_size=row_group_size) + return path + # Pandas DataFrames are stored as parquet + elif isinstance(data, pd.DataFrame): + path = Path(path, f"{name_tag}.pq") + with open(path, 'wb') as f: + data.to_parquet(f, row_group_size=row_group_size) + return path + # Other data structures are stored as compressed pickle + else: + path = Path(path, f"{name_tag}.pkl.gz") + with gzip.open(path, 'wb') as f: + pkl.dump(data, f) + return path + + def store_data(self, dataset_id: str, name_tag: str, data, row_group_size=None) -> None: + """ + Stores a given data structure. + + Args: + dataset_id (str): The name of the dataset the data is + attached to. + name_tag (str): The name of the associated data structure. + data: Any pickleable data structure. + row_group_size (int, optional): Row group size for parquet files. + If None, the library default is used. + """ + + # Store datastructure as file + data_path = self._store_data(dataset_id, name_tag, data, row_group_size=row_group_size) + + # Store reference in index + data_path = data_path.resolve() + cache_path = self.cache_path.resolve() + relative_data_path = data_path.relative_to(cache_path) + self._add_entry('stored_data', dataset_id, name_tag, relative_data_path) + + @contextmanager + def parquet_sink(self, dataset_id, name_tag): + """Reserve target path, write to .tmp, atomically rename and register + SQLite index entry on clean exit; remove tmp on exception.""" + final_path = Path(self.cache_path, 'files', dataset_id, f"{name_tag}.pq") + final_path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = final_path.with_suffix('.pq.tmp') + try: + yield tmp_path + tmp_path.replace(final_path) + self._add_entry('stored_data', dataset_id, name_tag, + final_path.relative_to(self.cache_path)) + except Exception: + tmp_path.unlink(missing_ok=True) + raise + + def store_file(self, dataset_id: str, name_tag: str, file: Path | BytesIO, + remove: bool = True, file_name = None) -> None: + """ + Stores a given file. + + Args: + dataset_id (str): The name of the dataset the data is + attached to. + name_tag (str): The name of the associated data structure. + file (Path of File-Like): The file that should be stored. + remove (bool): Wether or not the file should be removed + after copying it. + filetype (str): The file extension of the file. Only + neccessary if a file-like object is used as input. + """ + + # Define storage path + if file_name is None: + file_name = f"{name_tag}{file.suffix}" + + target_path = Path( + self.cache_path, 'files', dataset_id, file_name + ) + target_path.parent.mkdir(parents=True, exist_ok=True) + + # Store file in path + if isinstance(file, BytesIO): + with open(target_path, 'wb') as f: + f.write(file.getbuffer()) + else: + file = Path(file) + shutil.copy(file, target_path) + if remove: + file.unlink() + + # Store reference in index + target_path = target_path.resolve() + cache_path = self.cache_path.resolve() + relative_target_path = target_path.relative_to(cache_path) + self._add_entry('stored_files', dataset_id, name_tag, relative_target_path) + + def get_results_list(self, name_tags: List[str], partial=False) -> List[str]: + """ + Get all results that contain data for specified fields. + + Args: + name_tags (List): the fields to be considered. + """ + # Some columns might not have been created yet (or ever).. + available_columns = ( + set(self._get_column_list('stored_data')) + | set(self._get_column_list('stored_files')) + ) + name_tags = [n for n in name_tags if n in available_columns] + if len(name_tags) == 0: + return [] + + # Fetch data + selection_operator = 'OR' if partial else 'AND' + selection_statement = ( + f" IS NOT NULL {selection_operator} ".join(name_tags) + + " IS NOT NULL;" + ) + self.cache_cursor.execute(f""" + SELECT id + FROM ( + SELECT sd.id AS id, sd.*, sf.* + FROM stored_data sd + LEFT JOIN stored_files sf ON sd.id = sf.id + + UNION + + SELECT sf.id AS id, sd.*, sf.* + FROM stored_files sf + LEFT JOIN stored_data sd ON sf.id = sd.id + ) combined + WHERE {selection_statement} + """) + + return [row[0] for row in self.cache_cursor.fetchall()] + + def get_results(self, dataset_id, name_tags, partial=False, + use_pyarrow=False, use_polars=False, as_path=False): + """ + Retrieve stored results for a dataset, keyed by name_tag. + + Files stored via ``store_file`` are returned as ``Path`` objects. Data stored via + ``store_data`` is returned according to the format flags below. For parquet (``.pq``) + columns the precedence is ``as_path > use_pyarrow > use_polars > pandas``; pickle + (``.pkl.gz``) columns always load and return the unpickled object (there is no path + contract for non-tabular data). + + Args: + dataset_id (str): The dataset whose results to fetch. + name_tags (list): The name_tags to fetch. + partial (bool): If True, silently skip missing tags instead of raising KeyError. + use_pyarrow (bool): For ``.pq`` columns, return a ``pyarrow.dataset.Dataset``. + use_polars (bool): For ``.pq`` columns, return a polars ``LazyFrame`` + (``scan_parquet``). + as_path (bool): For ``.pq`` columns, return the ``str`` path to the parquet file + (NOT a loaded frame), so it can be passed straight to an OpenMS-Insight + component's ``data_path=``. Mutually exclusive with ``use_pyarrow``/ + ``use_polars`` (takes precedence if more than one is set). + + Returns: + dict: Mapping of name_tag -> result (Path / DataFrame / LazyFrame / Dataset / str). + """ + results = {} + # Retrieve files as Path objects + file_columns = self._get_column_list('stored_files') + file_columns = [c for c in file_columns if c in name_tags] + if len(file_columns) > 0: + self.cache_cursor.execute(f""" + SELECT {', '.join(file_columns)} + FROM stored_files + WHERE id = '{dataset_id}'; + """) + result = self.cache_cursor.fetchone() + for c, r in zip(file_columns, result): + if r is None: + if partial: + continue + else: + raise KeyError(f"{c} does not exist for {dataset_id}") + results[c] = Path(self.cache_path, r) + + # Retrieve data as Python objects + data_columns = self._get_column_list('stored_data') + data_columns = [c for c in data_columns if c in name_tags] + if len(data_columns) > 0: + self.cache_cursor.execute(f""" + SELECT {', '.join(data_columns)} + FROM stored_data + WHERE id = '{dataset_id}'; + """) + result = self.cache_cursor.fetchone() + for c, r in zip(data_columns, result): + if r is None: + if partial: + continue + else: + raise KeyError(f"{c} does not exist for {dataset_id}") + file_path = Path(self.cache_path, r) + if file_path.suffix == '.pq': + if as_path: + # Return the parquet path for Insight data_path= + data = str(file_path) + elif use_pyarrow: + data = ds.dataset(file_path, format="parquet") + elif use_polars: + # Load as polars LazyFrame + data = pl.scan_parquet(file_path) + else: + # Default to pandas for backward compatibility + data = pd.read_parquet(file_path) + else: + with gzip.open(file_path, 'rb') as f: + data = pkl.load(f) + results[c] = data + return results + + def result_path(self, dataset_id: str, name_tag: str) -> str: + """ + Return the on-disk parquet path for a single ``(dataset_id, name_tag)``. + + Sugar around ``get_results(dataset_id, [name_tag], as_path=True)[name_tag]`` -- used + pervasively to feed an OpenMS-Insight component's ``data_path=``. + + Args: + dataset_id (str): The dataset id. + name_tag (str): The name_tag of the stored parquet data. + + Returns: + str: Absolute path to the parquet file. + + Raises: + KeyError: If the ``(dataset_id, name_tag)`` does not exist. + """ + result = self.get_results(dataset_id, [name_tag], as_path=True) + if name_tag not in result: + raise KeyError(f"{name_tag} does not exist for {dataset_id}") + return result[name_tag] + + def get_all_files_except(self, dataset_id: str, exclude_tags: List[str]) -> dict: + """ + Retrieves all files for a dataset except those specified in the exclusion list. + + Args: + dataset_id (str): The ID of the dataset to retrieve files for. + exclude_tags (List[str]): List of name tags to exclude from the results. + + Returns: + dict: Dictionary mapping name_tags to file paths for all files except excluded ones. + """ + # Get all column names from stored_files table + all_columns = self._get_column_list('stored_files') + + # Exclude internal columns + internal_columns = ['id', 'display_name'] + + # Filter out internal columns and excluded tags + included_tags = [ + col for col in all_columns + if col not in internal_columns and col not in exclude_tags + ] + + # Retrieve the actual file paths using get_results with partial=True + results = self.get_results(dataset_id, included_tags, partial=True) + + return results + + def result_exists(self, dataset_id, name_tag): + + # Check which table is correct + if name_tag in self._get_column_list('stored_data'): + table = 'stored_data' + elif name_tag in self._get_column_list('stored_files'): + table = 'stored_files' + else: + return False + + # Check if field value is set + self.cache_cursor.execute(f""" + SELECT {name_tag} + FROM {table} + WHERE id = '{dataset_id}' AND {name_tag} IS NOT NULL + """) + if self.cache_cursor.fetchone(): + return True + return False + + def remove_results(self, dataset_id): + + # Remove references + self.cache_cursor.execute(f""" + DELETE FROM stored_data + WHERE id = '{dataset_id}'; + """) + self.cache_cursor.execute(f""" + DELETE FROM stored_files + WHERE id = '{dataset_id}'; + """) + + # Remove stored files + shutil.rmtree(Path(self.cache_path, 'files', dataset_id), ignore_errors=True) + + def clear_cache(self): + shutil.rmtree(Path(self.cache_path, 'files')) + Path(self.cache_path, 'files').mkdir() + self.cache_cursor.execute(f"DROP TABLE IF EXISTS stored_data;") + self.cache_cursor.execute(f"DROP TABLE IF EXISTS stored_files;") + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_data ( + id TEXT PRIMARY KEY + ); + """) + self.cache_cursor.execute(""" + CREATE TABLE IF NOT EXISTS stored_files ( + id TEXT PRIMARY KEY + ); + """) + + def get_display_name(self, dataset_id: str) -> str: + """ + Returns the display name for a dataset, falling back to dataset_id + if no custom display name exists. + + Args: + dataset_id (str): The ID of the dataset. + + Returns: + str: The display name or dataset_id as fallback. + """ + # Try to get display_name from stored_data table + self.cache_cursor.execute(""" + SELECT display_name + FROM stored_data + WHERE id = ? AND display_name IS NOT NULL + """, (dataset_id,)) + result = self.cache_cursor.fetchone() + if result and result[0]: + return result[0] + + # Try to get display_name from stored_files table + self.cache_cursor.execute(""" + SELECT display_name + FROM stored_files + WHERE id = ? AND display_name IS NOT NULL + """, (dataset_id,)) + result = self.cache_cursor.fetchone() + if result and result[0]: + return result[0] + + # Fallback to dataset_id + return dataset_id + + def rename_dataset(self, dataset_id: str, new_display_name: str) -> bool: + """ + Validates and updates the display name for a dataset. + + Args: + dataset_id (str): The ID of the dataset to rename. + new_display_name (str): The new display name. + + Returns: + bool: True on success, False on failure. + """ + # Validation: non-empty name + if not new_display_name or not new_display_name.strip(): + return False + + # Validation: reasonable length limit (100 characters) + if len(new_display_name) > 100: + return False + + # Trim whitespace + new_display_name = new_display_name.strip() + + # Update display_name in stored_data table if entry exists + self.cache_cursor.execute(""" + UPDATE stored_data + SET display_name = ? + WHERE id = ? + """, (new_display_name, dataset_id)) + + # Update display_name in stored_files table if entry exists + self.cache_cursor.execute(""" + UPDATE stored_files + SET display_name = ? + WHERE id = ? + """, (new_display_name, dataset_id)) + + return True diff --git a/tests/test_view_grid.py b/tests/test_view_grid.py new file mode 100644 index 000000000..e9f457735 --- /dev/null +++ b/tests/test_view_grid.py @@ -0,0 +1,418 @@ +"""Tests for the reusable linked-grid template stack (src/view/grid.py + helpers). + +Headless / no-browser: mirrors how OpenMS-Insight's own tests construct components +(``mock_streamlit`` patching ``st.session_state`` + a temp cache dir). Components are built +from the committed example parquet via ``data_path=`` and exercised through +``_prepare_vue_data`` / ``_get_component_args``. The grid / show_linked_grid / LayoutManager +are driven under a minimal mocked Streamlit context (each component's ``__call__`` is patched +to run the data path without the Vue bridge, since AppTest cannot spawn the preprocessing +subprocess). +""" + +import tempfile +from contextlib import ExitStack +from pathlib import Path +from unittest.mock import patch + +import polars as pl +import pytest + +ROOT = Path(__file__).resolve().parents[1] +DATA = ROOT / "example-data" / "insight" + + +class MockSessionState(dict): + """Dict with attribute access, like st.session_state.""" + + def __getattr__(self, k): + try: + return self[k] + except KeyError as e: + raise AttributeError(k) from e + + def __setattr__(self, k, v): + self[k] = v + + +class _Col: + """Fake st.columns() column / container: context manager + the widgets the grid uses.""" + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def warning(self, *a, **k): + pass + + def info(self, *a, **k): + pass + + def button(self, *a, **k): + return False + + +_COLS_RECORD = [] + + +def _columns(spec, **k): + n = spec if isinstance(spec, int) else len(spec) + _COLS_RECORD.append(n) + return [_Col() for _ in range(n)] + + +def _container(*a, **k): + return _Col() + + +def _noop(*a, **k): + return None + + +@pytest.fixture +def mock_streamlit(): + state = MockSessionState() + with patch("streamlit.session_state", state): + yield state + + +@pytest.fixture +def cache_dir(): + return tempfile.mkdtemp(prefix="tmpl_view_grid_") + + +def _build_components(cache): + """Construct the four demo components from the example parquet fixtures. + + Uses ``data=pl.scan_parquet(...)`` (in-process preprocessing) rather than ``data_path=`` + so construction does not spawn a subprocess. This mirrors OpenMS-Insight's own + construction tests (which build from ``data=`` LazyFrames) and keeps these tests robust + when run in the same pytest session as the Streamlit ``AppTest`` GUI tests (the spawn + subprocess used by ``data_path=`` crashes under that shared runner -- a known AppTest + limitation, not a code defect). The demo *page* deliberately uses ``data_path=`` for the + production memory-efficiency benefit; the ``data_path=`` path itself is covered by + :func:`test_component_data_path_construction`. + """ + from openms_insight import Heatmap, LinePlot, SequenceView, Table + + return { + "spectra_table": Table( + cache_id="t_spectra", + data=pl.scan_parquet(DATA / "spectra.parquet"), + cache_path=cache, + interactivity={"spectrum": "scan_id"}, + index_field="scan_id", + default_row=0, + title="Spectrum Table", + ), + "spectrum_plot": LinePlot( + cache_id="t_spectrum_plot", + data=pl.scan_parquet(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + x_column="mass", + y_column="intensity", + highlight_column="is_annotated", + annotation_column="ion_label", + title="MS/MS Spectrum", + ), + "peak_map": Heatmap( + cache_id="t_peak_map", + data=pl.scan_parquet(DATA / "heat.parquet"), + cache_path=cache, + x_column="rt", + y_column="mass", + intensity_column="intensity", + interactivity={"spectrum": "scan_id", "peak": "peak_id"}, + title="Peak Map", + ), + "sequence_view": SequenceView( + cache_id="t_seq", + sequence_data=pl.scan_parquet(DATA / "sequences.parquet"), + peaks_data=pl.scan_parquet(DATA / "peaks.parquet"), + cache_path=cache, + filters={"spectrum": "scan_id"}, + interactivity={"peak": "peak_id"}, + deconvolved=True, + title="Fragment Coverage", + ), + } + + +def _patch_component_calls(stack, fake_call): + """Patch ``__call__`` on every concrete component class (they don't all share it).""" + from openms_insight import Heatmap, LinePlot, SequenceView, Table + + stack.enter_context(patch("streamlit.columns", _columns)) + stack.enter_context(patch("streamlit.container", _container)) + stack.enter_context(patch("streamlit.warning", _noop)) + stack.enter_context(patch("streamlit.divider", _noop)) + for cls in (Table, LinePlot, Heatmap, SequenceView): + stack.enter_context(patch.object(cls, "__call__", fake_call)) + + +# --------------------------------------------------------------------------- # +# fixtures (the committed example parquet) load with the documented schema +# --------------------------------------------------------------------------- # +@pytest.mark.parametrize( + "name,cols", + [ + ("spectra", {"scan_id", "rt", "ms_level", "precursor_mz", "n_peaks"}), + ("peaks", {"scan_id", "peak_id", "mass", "intensity", "is_annotated", "ion_label"}), + ("heat", {"scan_id", "rt", "mass", "intensity", "peak_id"}), + ("sequences", {"scan_id", "sequence", "precursor_charge"}), + ], +) +def test_example_fixtures_load(name, cols): + df = pl.read_parquet(DATA / f"{name}.parquet") + assert df.height > 0 + assert cols.issubset(set(df.columns)) + + +# --------------------------------------------------------------------------- # +# every component constructs from data_path= and runs the two contract methods +# --------------------------------------------------------------------------- # +def test_components_construct_and_prepare(mock_streamlit, cache_dir): + comps = _build_components(cache_dir) + assert set(comps) == {"spectra_table", "spectrum_plot", "peak_map", "sequence_view"} + for comp in comps.values(): + for state in ({}, {"spectrum": 1, "peak": 21}): + vue = comp._prepare_vue_data(state) + assert isinstance(vue, dict) + args = comp._get_component_args() + assert isinstance(args, dict) and "componentType" in args + + +# --------------------------------------------------------------------------- # +# render_linked_grid wiring: shared StateManager, per-cell keys, <=3 columns +# --------------------------------------------------------------------------- # +def test_render_linked_grid_wiring(mock_streamlit, cache_dir): + from openms_insight import StateManager + + from src.view.grid import render_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + rendered = [] + + def fc(self, key=None, state_manager=None, height=None): + self._prepare_vue_data( + state_manager.get_all_selections() if state_manager else {} + ) + rendered.append((key, id(state_manager))) + return None + + layout = [["spectra_table", "spectrum_plot"], ["peak_map", "sequence_view"]] + _COLS_RECORD.clear() + with ExitStack() as stack: + _patch_component_calls(stack, fc) + sm = render_linked_grid(layout, builders, state_key="exp0", grid_key="g") + + assert isinstance(sm, StateManager) + assert sorted(r[0] for r in rendered) == ["g_0_0", "g_0_1", "g_1_0", "g_1_1"] + # all cells shared exactly one StateManager (cross-linking) + assert len({r[1] for r in rendered}) == 1 + assert _COLS_RECORD == [2, 2] + + +def test_render_linked_grid_clamps_to_three_columns(mock_streamlit, cache_dir): + from src.view.grid import MAX_COLUMNS, render_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + rendered = [] + + def fc(self, key=None, state_manager=None, height=None): + rendered.append(key) + return None + + big = [["spectra_table", "spectrum_plot", "peak_map", "sequence_view"]] + _COLS_RECORD.clear() + with ExitStack() as stack: + _patch_component_calls(stack, fc) + render_linked_grid(big, builders, state_key="big", grid_key="b") + + assert _COLS_RECORD == [MAX_COLUMNS] + assert len(rendered) == MAX_COLUMNS + + +def test_render_linked_grid_on_missing(mock_streamlit, cache_dir): + from src.view.grid import render_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + rendered = [] + + def fc(self, key=None, state_manager=None, height=None): + rendered.append(key) + return None + + with ExitStack() as stack: + _patch_component_calls(stack, fc) + # warn -> skip, no cell rendered, no raise + render_linked_grid([["nope"]], builders, state_key="m1") + assert rendered == [] + # error -> KeyError + with pytest.raises(KeyError): + render_linked_grid([["nope"]], builders, state_key="m2", on_missing="error") + # invalid on_missing rejected up-front + with pytest.raises(ValueError): + render_linked_grid([["spectra_table"]], builders, state_key="m3", on_missing="x") + + +# --------------------------------------------------------------------------- # +# show_linked_grid: one independent StateManager per experiment +# --------------------------------------------------------------------------- # +def test_show_linked_grid_one_state_manager_per_experiment(mock_streamlit, cache_dir): + from src.common.common import show_linked_grid + + comps = _build_components(cache_dir) + builders = {k: (lambda c=v: c) for k, v in comps.items()} + two_exp = [[["spectra_table"]], [["peak_map"]]] + + def _make_fc(sink): + def fc(self, key=None, state_manager=None, height=None): + sink.append(id(state_manager)) + return None + + return fc + + for side_by_side in (True, False): + seen = [] + with ExitStack() as stack: + _patch_component_calls(stack, _make_fc(seen)) + show_linked_grid(two_exp, builders, tool="demo", side_by_side=side_by_side) + assert len(set(seen)) == 2, f"side_by_side={side_by_side}" + + +# --------------------------------------------------------------------------- # +# LayoutManager: trim/expand/validate/dependency + persistence round-trip +# --------------------------------------------------------------------------- # +def test_layout_manager_trim_expand_validate(mock_streamlit): + from src.view.grid import LayoutManager + + options = ["Spectrum table", "Spectrum plot", "Peak map", "Sequence view"] + names = ["spectra_table", "spectrum_plot", "peak_map", "sequence_view"] + lm = LayoutManager(options, names, store=_DummyStore(), session_prefix="t") + + labels = [[["Spectrum table", "Spectrum plot"]], [["Peak map", ""]]] + trimmed = lm.trim(labels) + assert trimmed == [[["spectra_table", "spectrum_plot"]], [["peak_map"]]] + assert lm.expand(trimmed) == [[["Spectrum table", "Spectrum plot"]], [["Peak map"]]] + + assert lm.validate([[[""]]]) != "" # empty rejected + assert lm.validate(labels) == "" # valid accepted + + # "(... needed)" dependency validation + idempotent add_options + lm.add_options(["Sequence view (Spectrum table needed)"], ["seqdep"]) + before = len(lm.component_names) + lm.add_options(["Sequence view (Spectrum table needed)"], ["seqdep"]) + assert len(lm.component_names) == before + assert lm.validate([[["Sequence view (Spectrum table needed)"]]]) != "" + assert ( + lm.validate([[["Spectrum table", "Sequence view (Spectrum table needed)"]]]) + == "" + ) + + +def test_layout_manager_persistence_roundtrip(mock_streamlit): + from src.view.grid import LayoutManager + from src.workflow.FileManager import FileManager + + ws = Path(tempfile.mkdtemp(prefix="tmpl_lm_ws_")) + fm = FileManager(ws, cache_path=ws / "cache") + lm = LayoutManager( + ["Spectrum table"], + ["spectra_table"], + store=fm, + layout_id="demo_layout", + session_prefix="t2", + ) + assert lm.get_layout() is None + trimmed = [[["spectra_table"]]] + lm.set_layout(trimmed, side_by_side=True) + got = lm.get_layout() + assert got == (trimmed, True) + + +class _DummyStore: + """In-memory Store protocol impl for trim/expand/validate tests (no disk).""" + + def __init__(self): + self._d = {} + + def get_results(self, dataset_id, name_tags): + return {t: self._d[(dataset_id, t)] for t in name_tags} + + def store_data(self, dataset_id, name_tag, data): + self._d[(dataset_id, name_tag)] = data + + def result_exists(self, dataset_id, name_tag): + return (dataset_id, name_tag) in self._d + + def remove_results(self, dataset_id): + self._d = {k: v for k, v in self._d.items() if k[0] != dataset_id} + + +def test_store_protocol_satisfied_by_filemanager(): + """FileManager structurally satisfies the grid.Store protocol.""" + from src.view.grid import Store + from src.workflow.FileManager import FileManager + + ws = Path(tempfile.mkdtemp(prefix="tmpl_store_")) + fm = FileManager(ws, cache_path=ws / "cache") + assert isinstance(fm, Store) + + +def test_component_data_path_construction(): + """The demo page's ``data_path=`` path works end-to-end (subprocess preprocessing). + + Run in a clean interpreter via ``subprocess`` so it exercises the exact production path + (Insight spawns a preprocessing subprocess for ``data_path=``) without being affected by + the Streamlit ``AppTest`` GUI tests that may share this pytest session. + """ + import subprocess + import sys + import textwrap + + script = textwrap.dedent( + f""" + import tempfile + from pathlib import Path + from unittest.mock import patch + + class S(dict): + def __getattr__(s, k): + try: return s[k] + except KeyError as e: raise AttributeError(k) from e + def __setattr__(s, k, v): s[k] = v + + DATA = Path({str(DATA)!r}) + with patch("streamlit.session_state", S()): + from openms_insight import Table, LinePlot, Heatmap, SequenceView + cache = tempfile.mkdtemp() + Table(cache_id="dp_t", data_path=str(DATA/"spectra.parquet"), cache_path=cache, + interactivity={{"spectrum": "scan_id"}}, index_field="scan_id", default_row=0) + LinePlot(cache_id="dp_lp", data_path=str(DATA/"peaks.parquet"), cache_path=cache, + filters={{"spectrum": "scan_id"}}, interactivity={{"peak": "peak_id"}}, + x_column="mass", y_column="intensity") + Heatmap(cache_id="dp_h", data_path=str(DATA/"heat.parquet"), cache_path=cache, + x_column="rt", y_column="mass", intensity_column="intensity") + SequenceView(cache_id="dp_sv", sequence_data_path=str(DATA/"sequences.parquet"), + peaks_data_path=str(DATA/"peaks.parquet"), cache_path=cache, + filters={{"spectrum": "scan_id"}}, deconvolved=True) + print("DATA_PATH_OK") + """ + ) + proc = subprocess.run( + [sys.executable, "-c", script], + cwd=str(ROOT), + capture_output=True, + text=True, + timeout=180, + ) + assert "DATA_PATH_OK" in proc.stdout, ( + f"data_path construction failed:\nstdout={proc.stdout}\nstderr={proc.stderr[-2000:]}" + ) From 5b7a54500f3f20a702177f8799c3869d4653dd7d Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 13:05:14 +0000 Subject: [PATCH 2/9] Phase 3 r1 fixes: grid.py upload-validation parity + drop unused imports - _handle_setting_buttons: validate the trimmed (internal-name) uploaded layout BEFORE expanding, matching the oracle handleSettingButtons (the (... needed) dependency check is a no-op on the trimmed form; it fires later at Save time). Validating expanded labels wrongly rejected hand-crafted uploads. - drop unused typing imports (Any, Sequence). --- src/view/grid.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/view/grid.py b/src/view/grid.py index 27c447a82..f30fa4294 100644 --- a/src/view/grid.py +++ b/src/view/grid.py @@ -30,13 +30,11 @@ import json from typing import ( - Any, Callable, Dict, List, Optional, Protocol, - Sequence, Tuple, runtime_checkable, ) @@ -416,14 +414,18 @@ def _handle_setting_buttons(self) -> None: uploaded = st.session_state.get(self._k("uploaded_json")) if uploaded is not None: uploaded_layout = json.load(uploaded) - # uploaded layout is trimmed (internal names); expand to labels for validation/edit - expanded = self.expand(uploaded_layout) - validated = self.validate(expanded) + # Validate the uploaded (trimmed, internal-name) layout BEFORE expanding, + # matching the oracle handleSettingButtons: internal names never contain + # the "(... needed)" dependency labels, so only the empty-input check + # fires on upload (dependency validation happens later, at Save time). + # Validating the expanded labels here would wrongly reject hand-crafted + # uploads, diverging from the oracle. + validated = self.validate(uploaded_layout) if validated != "": st.session_state[self._k("component_error")] = validated else: - st.session_state[self._k("layout")] = expanded - st.session_state[self._k("num_experiments")] = len(expanded) + st.session_state[self._k("layout")] = self.expand(uploaded_layout) + st.session_state[self._k("num_experiments")] = len(uploaded_layout) def _handle_edit_and_save_buttons(self) -> None: # "Edit" clicked: re-enter edit mode, seeded from the saved layout From cd5c8a46e50530ef03eac95f2413e1834002a1e8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 14:20:36 +0000 Subject: [PATCH 3/9] Harden flaky linked-grid test: assert session_key, not id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_show_linked_grid_one_state_manager_per_experiment compared id() of StateManagers it didn't retain, so GC could reuse an id within a run and make the id-based set flaky in full-session order (passed in isolation / the gate, but could report 1 distinct id instead of 2). Assert on the stable _session_key (demo__exp0 / demo__exp1) — the actual one-StateManager-per-experiment invariant. Verification harness only; no change to any reviewed unit. --- tests/test_view_grid.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_view_grid.py b/tests/test_view_grid.py index e9f457735..16a4feff3 100644 --- a/tests/test_view_grid.py +++ b/tests/test_view_grid.py @@ -274,7 +274,10 @@ def test_show_linked_grid_one_state_manager_per_experiment(mock_streamlit, cache def _make_fc(sink): def fc(self, key=None, state_manager=None, height=None): - sink.append(id(state_manager)) + # Record the StateManager's stable session_key (not id(): unretained + # StateManagers can be GC'd and have their id() reused within a run, + # making an id-based set flaky in the full-session test order). + sink.append(state_manager._session_key) return None return fc @@ -284,7 +287,9 @@ def fc(self, key=None, state_manager=None, height=None): with ExitStack() as stack: _patch_component_calls(stack, _make_fc(seen)) show_linked_grid(two_exp, builders, tool="demo", side_by_side=side_by_side) - assert len(set(seen)) == 2, f"side_by_side={side_by_side}" + # one StateManager per experiment -> two distinct session keys. + assert len(set(seen)) == 2, f"side_by_side={side_by_side}: {seen}" + assert set(seen) == {"demo__exp0", "demo__exp1"}, seen # --------------------------------------------------------------------------- # From cdf1fb599b28a2c86b91550d26f7e5c8b3bb2620 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 14:40:40 +0000 Subject: [PATCH 4/9] grid: keep empty experiments on layout upload (oracle parity) Round-5 finding 3-grid-003: uploading a layout JSON with a wholly-empty experiment ([]) wiped the whole layout. expand() dropped the empty experiment, so len(layout) < num_experiments(=len(uploaded)) tripped the reset-on-count-mismatch and replaced the upload with blanks. The oracle handleSettingButtons inline-expand keeps an empty experiment as a [] stub, so the counts match and nothing is reset. Add expand(drop_empty_experiments=) and pass False on the upload path (edit mode still drops empties). Test added. --- src/view/grid.py | 19 +++++++++++++++---- tests/test_view_grid.py | 10 ++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/view/grid.py b/src/view/grid.py index f30fa4294..3e1f7bf34 100644 --- a/src/view/grid.py +++ b/src/view/grid.py @@ -240,8 +240,15 @@ def trim(self, expanded: list) -> list: trimmed.append(rows) return trimmed - def expand(self, trimmed: list) -> list: - """internal names -> labels, dropping empty cells/rows/experiments.""" + def expand(self, trimmed: list, drop_empty_experiments: bool = True) -> list: + """internal names -> labels, dropping empty cells/rows. + + ``drop_empty_experiments`` (default True, the edit-mode behavior) also drops + a wholly-empty experiment. The upload path passes False to match the oracle + ``handleSettingButtons``, whose inline expand keeps an empty experiment as a + ``[]`` stub so ``num_experiments`` stays ``len(uploaded)`` and the + reset-on-count-mismatch never fires (which would wipe the upload). + """ expanded = [] for exp in trimmed: rows = [] @@ -254,7 +261,7 @@ def expand(self, trimmed: list) -> list: ) if cols: rows.append(cols) - if rows: + if rows or not drop_empty_experiments: expanded.append(rows) return expanded @@ -424,7 +431,11 @@ def _handle_setting_buttons(self) -> None: if validated != "": st.session_state[self._k("component_error")] = validated else: - st.session_state[self._k("layout")] = self.expand(uploaded_layout) + # Keep empty experiments (oracle inline-expand) so num_experiments == + # len(uploaded) and the reset-on-count-mismatch never wipes the upload. + st.session_state[self._k("layout")] = self.expand( + uploaded_layout, drop_empty_experiments=False + ) st.session_state[self._k("num_experiments")] = len(uploaded_layout) def _handle_edit_and_save_buttons(self) -> None: diff --git a/tests/test_view_grid.py b/tests/test_view_grid.py index 16a4feff3..9b1b35aad 100644 --- a/tests/test_view_grid.py +++ b/tests/test_view_grid.py @@ -307,6 +307,16 @@ def test_layout_manager_trim_expand_validate(mock_streamlit): assert trimmed == [[["spectra_table", "spectrum_plot"]], [["peak_map"]]] assert lm.expand(trimmed) == [[["Spectrum table", "Spectrum plot"]], [["Peak map"]]] + # Upload path keeps a wholly-empty experiment (oracle parity): expand drops + # empty cells/rows but, with drop_empty_experiments=False, keeps the empty + # experiment as a [] stub so num_experiments == len(uploaded) and the upload + # is not wiped by the reset-on-count-mismatch. + uploaded = [[["spectra_table"]], []] + assert lm.expand(uploaded) == [[["Spectrum table"]]] # default drops it + kept = lm.expand(uploaded, drop_empty_experiments=False) + assert kept == [[["Spectrum table"]], []] + assert len(kept) == len(uploaded) # count matches -> no spurious reset + assert lm.validate([[[""]]]) != "" # empty rejected assert lm.validate(labels) == "" # valid accepted From 66df8aeba7af4b94f926417cb1421f77c014a00d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 08:41:01 +0000 Subject: [PATCH 5/9] Docker: build + install openms-insight from the migration branch (all 4 Dockerfiles) The template viewer page (visualization_template.py) imports openms-insight, but it was not installed by any Dockerfile. Add an insight-build stage (node:21) to all four Dockerfiles (Dockerfile{,.arm}, Dockerfile_simple{,.arm}) that clones the openms-insight branch claude/kind-heisenberg-u6dVm, builds its Vue bundle, syncs dist into the package, then pip-installs the source tree (hatchling force-includes the bundled dist). Installs from source so no PyPI publish is required. --- Dockerfile | 18 ++++++++++++++++++ Dockerfile.arm | 18 ++++++++++++++++++ Dockerfile_simple | 17 +++++++++++++++++ Dockerfile_simple.arm | 17 +++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/Dockerfile b/Dockerfile index 2d1b5daf3..08eaf09bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -76,6 +76,19 @@ RUN mkdir /thirdparty && \ chmod -R +x /thirdparty ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/Sirius:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Fido:/thirdparty/MaRaCluster:/thirdparty/MyriMatch:/thirdparty/OMSSA:/thirdparty/Percolator:/thirdparty/SpectraST:/thirdparty/XTandem:/thirdparty/crux:${PATH}" +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; the compile-openms stage pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + # Build OpenMS and pyOpenMS. FROM setup-build-system AS compile-openms WORKDIR / @@ -99,6 +112,11 @@ RUN pip install dist/*.whl # Install other dependencies (excluding pyopenms) COPY requirements.txt ./requirements.txt RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle). The template's viewer page (visualization_template.py) imports +# it; installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN pip install -r requirements.txt WORKDIR / diff --git a/Dockerfile.arm b/Dockerfile.arm index 176598006..49dcfad86 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -78,6 +78,19 @@ RUN mkdir /thirdparty && \ chmod -R +x /thirdparty ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Percolator:/thirdparty/Sage:${PATH}" +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; the compile-openms stage pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + # Build OpenMS and pyOpenMS. FROM setup-build-system AS compile-openms WORKDIR / @@ -127,6 +140,11 @@ RUN pip install dist/*.whl # Install other dependencies (excluding pyopenms) COPY requirements.txt ./requirements.txt RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle). The template's viewer page (visualization_template.py) imports +# it; installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN pip install -r requirements.txt WORKDIR / diff --git a/Dockerfile_simple b/Dockerfile_simple index 163bcfe64..02d8f9cd6 100644 --- a/Dockerfile_simple +++ b/Dockerfile_simple @@ -7,6 +7,19 @@ # debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell # prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; stage1 pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + FROM ubuntu:22.04 AS stage1 ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git ARG OPENMS_BRANCH=develop @@ -60,6 +73,10 @@ SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] COPY requirements.txt requirements.txt RUN mamba install pip RUN python -m pip install --upgrade pip +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle); installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN python -m pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN python -m pip install -r requirements.txt # Pre-create bind-mount targets so apptainer/singularity has a real attach diff --git a/Dockerfile_simple.arm b/Dockerfile_simple.arm index be57317d2..7299d0fef 100644 --- a/Dockerfile_simple.arm +++ b/Dockerfile_simple.arm @@ -7,6 +7,19 @@ # debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell # prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force +# Build the OpenMS-Insight package (Python + Vue bundle) from the migration branch. +# Insight's Vue dist is gitignored and it has no pip build hook, so build the bundle +# here and sync it into the package tree; stage1 pip-installs it. +FROM node:21 AS insight-build +ARG INSIGHT_REPO=https://github.com/t0mdavid-m/openms-insight.git +ARG INSIGHT_BRANCH=claude/kind-heisenberg-u6dVm +ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSIGHT_BRANCH} insight-ref.json +RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight +WORKDIR /openms-insight/js-component +RUN npm install && npm run build +RUN rm -rf /openms-insight/openms_insight/js-component/dist \ + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + FROM ubuntu:22.04 AS stage1 ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git ARG OPENMS_BRANCH=develop @@ -60,6 +73,10 @@ SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] COPY requirements.txt requirements.txt RUN mamba install pip RUN python -m pip install --upgrade pip +# OpenMS-Insight: install from the migration branch built in the insight-build stage +# (with its Vue bundle); installing from source means no PyPI publish is required. +COPY --from=insight-build /openms-insight /tmp/openms-insight +RUN python -m pip install /tmp/openms-insight && rm -rf /tmp/openms-insight RUN python -m pip install -r requirements.txt # Pre-create bind-mount targets so apptainer/singularity has a real attach From 9be1502a28954f2f2bdc2cdd583ad93987a6daca Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 08:43:41 +0000 Subject: [PATCH 6/9] Docker: drop js-component/node_modules from the insight-build COPY (slim image) --- Dockerfile | 3 ++- Dockerfile.arm | 3 ++- Dockerfile_simple | 3 ++- Dockerfile_simple.arm | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 08eaf09bb..c42e395e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -87,7 +87,8 @@ RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insig WORKDIR /openms-insight/js-component RUN npm install && npm run build RUN rm -rf /openms-insight/openms_insight/js-component/dist \ - && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules # Build OpenMS and pyOpenMS. FROM setup-build-system AS compile-openms diff --git a/Dockerfile.arm b/Dockerfile.arm index 49dcfad86..7bca45ae5 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -89,7 +89,8 @@ RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insig WORKDIR /openms-insight/js-component RUN npm install && npm run build RUN rm -rf /openms-insight/openms_insight/js-component/dist \ - && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules # Build OpenMS and pyOpenMS. FROM setup-build-system AS compile-openms diff --git a/Dockerfile_simple b/Dockerfile_simple index 02d8f9cd6..5a7ea6219 100644 --- a/Dockerfile_simple +++ b/Dockerfile_simple @@ -18,7 +18,8 @@ RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insig WORKDIR /openms-insight/js-component RUN npm install && npm run build RUN rm -rf /openms-insight/openms_insight/js-component/dist \ - && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules FROM ubuntu:22.04 AS stage1 ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git diff --git a/Dockerfile_simple.arm b/Dockerfile_simple.arm index 7299d0fef..e8763c956 100644 --- a/Dockerfile_simple.arm +++ b/Dockerfile_simple.arm @@ -18,7 +18,8 @@ RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insig WORKDIR /openms-insight/js-component RUN npm install && npm run build RUN rm -rf /openms-insight/openms_insight/js-component/dist \ - && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist + && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ + && rm -rf /openms-insight/js-component/node_modules FROM ubuntu:22.04 AS stage1 ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git From a824864aa688cb29d9e5a658fb1348634de378be Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 08:47:17 +0000 Subject: [PATCH 7/9] Docker: mkdir -p the package js-component dir before syncing dist (fresh-clone fix) In a fresh clone openms_insight/js-component/ does not exist (only held the gitignored dist/), so cp had no parent dir. mkdir -p it first. Validated against a fresh clone: clone -> npm build -> sync -> pip wheel bundles the Vue dist (incl. index.js). --- Dockerfile | 3 ++- Dockerfile.arm | 3 ++- Dockerfile_simple | 3 ++- Dockerfile_simple.arm | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index c42e395e3..071c4a2b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -86,7 +86,8 @@ ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSI RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight WORKDIR /openms-insight/js-component RUN npm install && npm run build -RUN rm -rf /openms-insight/openms_insight/js-component/dist \ +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ && rm -rf /openms-insight/js-component/node_modules diff --git a/Dockerfile.arm b/Dockerfile.arm index 7bca45ae5..ccc0ebd38 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -88,7 +88,8 @@ ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSI RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight WORKDIR /openms-insight/js-component RUN npm install && npm run build -RUN rm -rf /openms-insight/openms_insight/js-component/dist \ +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ && rm -rf /openms-insight/js-component/node_modules diff --git a/Dockerfile_simple b/Dockerfile_simple index 5a7ea6219..8c95a0d76 100644 --- a/Dockerfile_simple +++ b/Dockerfile_simple @@ -17,7 +17,8 @@ ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSI RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight WORKDIR /openms-insight/js-component RUN npm install && npm run build -RUN rm -rf /openms-insight/openms_insight/js-component/dist \ +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ && rm -rf /openms-insight/js-component/node_modules diff --git a/Dockerfile_simple.arm b/Dockerfile_simple.arm index e8763c956..2bcdd890e 100644 --- a/Dockerfile_simple.arm +++ b/Dockerfile_simple.arm @@ -17,7 +17,8 @@ ADD https://api.github.com/repos/t0mdavid-m/openms-insight/git/refs/heads/${INSI RUN git clone -b ${INSIGHT_BRANCH} --single-branch ${INSIGHT_REPO} /openms-insight WORKDIR /openms-insight/js-component RUN npm install && npm run build -RUN rm -rf /openms-insight/openms_insight/js-component/dist \ +RUN mkdir -p /openms-insight/openms_insight/js-component \ + && rm -rf /openms-insight/openms_insight/js-component/dist \ && cp -r /openms-insight/js-component/dist /openms-insight/openms_insight/js-component/dist \ && rm -rf /openms-insight/js-component/node_modules From f09bfd02b33309dadf92f0b0f8b79ada7feb966c Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 09:34:26 +0000 Subject: [PATCH 8/9] CI fix: declare polars dependency (used by FileManager parquet data-layer) The migration added module-level "import polars as pl" to src/workflow/FileManager.py (parquet/as_path support for the Insight data-layer), but polars was not in requirements.txt -- only pyarrow (transitive via streamlit) was. The GUI test suite (pytest test_gui.py) installs requirements.txt and imports FileManager via the Workflow pages, so all 12 launches failed with "No module named 'polars'". Pin polars==1.41.2 (the proven version; FLASHApp uses polars>=1.0.0). Note: requirements.txt is pip-compiled; the external pyproject.toml source should also list polars so future recompiles keep it. --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index dc7d7b18e..a085d9a0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -77,6 +77,8 @@ pillow==11.1.0 # streamlit plotly==5.22.0 # via src (pyproject.toml) +polars==1.41.2 + # via src (pyproject.toml) protobuf==6.32.0 # via streamlit psutil==7.0.0 From 4e64166c3a0b3293b78382598824c243e0ad1d70 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 12:35:34 +0000 Subject: [PATCH 9/9] CI: build openms-insight from source before tests (mirrors Dockerfile) --- .github/workflows/ci.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ee0902b5..35c8086b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,10 +15,22 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - uses: actions/setup-node@v4 + with: + node-version: "21" - name: Install dependencies run: | python -m pip install --upgrade pip + # OpenMS-Insight (Phase-3) is not on PyPI: build it from the migration branch + # (Vue bundle + Python) and install from source -- mirrors the Dockerfiles. + git clone -b claude/kind-heisenberg-u6dVm --single-branch --depth 1 \ + https://github.com/t0mdavid-m/openms-insight.git /tmp/openms-insight + ( cd /tmp/openms-insight/js-component && npm install && npm run build ) + mkdir -p /tmp/openms-insight/openms_insight/js-component + cp -r /tmp/openms-insight/js-component/dist \ + /tmp/openms-insight/openms_insight/js-component/dist + pip install /tmp/openms-insight pip install -r requirements.txt # test with requirements file so can easily bump with dependabot pip install pytest fakeredis - name: Test