Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
- switch changelog bot trigger only on comments ([#4241](https://github.com/nf-core/tools/pull/4241))
- fix indentation in generated api docs ([#4245](https://github.com/nf-core/tools/pull/4245))

### Linting

- use static edam.csv and fix yaml comment loss ([#4242](https://github.com/nf-core/tools/pull/4242))

### Modules

- Allow task.ext.prefix2 in modules linting ([#4234](https://github.com/nf-core/tools/pull/4234))
Expand Down
62 changes: 62 additions & 0 deletions nf_core/assets/EDAM.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Class ID Preferred Label File extension
http://edamontology.org/format_1930 FASTQ fq|fastq
http://edamontology.org/format_2332 XML xml
http://edamontology.org/format_3256 N-Triples nt
http://edamontology.org/format_3257 Notation3 n3
http://edamontology.org/format_3261 RDF/XML rdf
http://edamontology.org/format_3464 JSON json
http://edamontology.org/format_3475 TSV tsv|tab
http://edamontology.org/format_3556 MHTML mhtml|mht|eml
http://edamontology.org/format_3665 K-mer countgraph oxlicg
http://edamontology.org/format_3682 imzML metadata file imzML
http://edamontology.org/format_3710 WIFF format wiff
http://edamontology.org/format_3746 BIOM format biom
http://edamontology.org/format_3749 JSON-LD jsonld
http://edamontology.org/format_3750 YAML yaml|yml
http://edamontology.org/format_3752 CSV csv
http://edamontology.org/format_3788 SQL sql
http://edamontology.org/format_3789 XQuery xq|xqy|xquery
http://edamontology.org/format_3839 ibd ibd
http://edamontology.org/format_3857 CWL cwl
http://edamontology.org/format_3911 msh msh
http://edamontology.org/format_3913 Loom loom
http://edamontology.org/format_3915 Zarr zarray|zgroup
http://edamontology.org/format_3916 MTX mtx
http://edamontology.org/format_3956 N-Quads nq
http://edamontology.org/format_3969 Vega json
http://edamontology.org/format_3970 Vega-lite json
http://edamontology.org/format_3972 BNGL bngl
http://edamontology.org/format_3973 Docker image format dockerfile
http://edamontology.org/format_3975 GFA 1 gfa
http://edamontology.org/format_3976 GFA 2 gfa
http://edamontology.org/format_3977 ObjTables xlsx
http://edamontology.org/format_3978 CONTIG contig
http://edamontology.org/format_3979 WEGO wego
http://edamontology.org/format_3980 RPKM rpkm
http://edamontology.org/format_3981 TAR format tar
http://edamontology.org/format_3982 CHAIN chain
http://edamontology.org/format_3983 NET net
http://edamontology.org/format_3984 QMAP qmap
http://edamontology.org/format_3985 gxformat2 ga
http://edamontology.org/format_3986 WMV wmv
http://edamontology.org/format_3987 ZIP format zip
http://edamontology.org/format_3988 LSM lsm
http://edamontology.org/format_3989 GZIP format gz|gzip
http://edamontology.org/format_3990 AVI avi
http://edamontology.org/format_3991 TrackDB trackDb
http://edamontology.org/format_3992 CIGAR format cigar
http://edamontology.org/format_3993 Stereolithography format stl
http://edamontology.org/format_3994 U3D u3d
http://edamontology.org/format_3995 Texture file format tex
http://edamontology.org/format_3996 Python script py
http://edamontology.org/format_3997 MPEG-4 mp4
http://edamontology.org/format_3998 Perl script pl
http://edamontology.org/format_3999 R script R
http://edamontology.org/format_4000 R markdown Rmd
http://edamontology.org/format_4001 NIFTI format nii
http://edamontology.org/format_4002 pickle pickle
http://edamontology.org/format_4003 NumPy format npy
http://edamontology.org/format_4004 SimTools repertoire file format repz
http://edamontology.org/format_4005 Configuration file format cfg
http://edamontology.org/format_4006 Zstandard format zst
http://edamontology.org/format_4007 MATLAB script m
4 changes: 4 additions & 0 deletions nf_core/assets/get_latest_edam.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Run the following command to update to the latest EDAM file
curl -4 -fsSL https://edamontology.org/EDAM.tsv | \
awk -F'\t' -v OFS='\t' 'NF>=15 && $15!="" {print $1,$2,$15}' \
> nf_core/assets/EDAM.tsv
28 changes: 23 additions & 5 deletions nf_core/modules/lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,13 +611,21 @@ def _add_edam_ontologies(section, edam_formats, desc):
section["ontologies"] = []
log.debug(f"expected ontologies for {desc}: {expected_ontologies}")
log.debug(f"current ontologies for {desc}: {current_ontologies}")
for ontology, ext in expected_ontologies:
if ontology not in current_ontologies:
for ontology_url, ext in expected_ontologies:
comment_text = edam_formats[ext][1]
if ontology_url not in current_ontologies:
try:
section["ontologies"].append(ruamel.yaml.comments.CommentedMap({"edam": ontology}))
section["ontologies"][-1].yaml_add_eol_comment(f"{edam_formats[ext][1]}", "edam")
cm = ruamel.yaml.comments.CommentedMap()
cm["edam"] = ontology_url
cm.yaml_add_eol_comment(comment_text, key="edam")
section["ontologies"].append(cm)
except KeyError:
log.warning(f"Could not add ontologies in {desc}")
else:
for item in section["ontologies"]:
if isinstance(item, ruamel.yaml.comments.CommentedMap) and item.get("edam") == ontology_url:
item.yaml_add_eol_comment(comment_text, key="edam")
break

# EDAM ontologies
edam_formats = nf_core.modules.modules_utils.load_edam()
Expand Down Expand Up @@ -681,7 +689,17 @@ def _add_edam_ontologies(section, edam_formats, desc):

def _ensure_string_keys(obj):
"""Recursively ensure all dict keys are strings (e.g., convert 1.2 -> "1.2")"""
if isinstance(obj, dict):
if isinstance(obj, ruamel.yaml.comments.CommentedMap):
Comment thread
LouisLeNezet marked this conversation as resolved.
for key in list(obj.keys()):
value = obj.pop(key)
new_key = str(key) if not isinstance(key, str) else key
obj[new_key] = _ensure_string_keys(value)
return obj
elif isinstance(obj, ruamel.yaml.comments.CommentedSeq):
for i, item in enumerate(obj):
obj[i] = _ensure_string_keys(item)
return obj
elif isinstance(obj, dict):
return {str(k) if not isinstance(k, str) else k: _ensure_string_keys(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [_ensure_string_keys(item) for item in obj]
Expand Down
16 changes: 8 additions & 8 deletions nf_core/modules/modules_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
from pathlib import Path
from urllib.parse import urlparse

import requests

from ..components.nfcore_component import NFCoreComponent

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -99,15 +97,17 @@ def get_installed_modules(directory: Path, repo_type="modules") -> tuple[list[st
def load_edam():
"""Load the EDAM ontology from the nf-core repository"""
edam_formats = {}
local_path = Path(__file__).parent.parent / "assets" / "EDAM.tsv"
try:
response = requests.get("https://edamontology.org/EDAM.tsv")
Comment thread
LouisLeNezet marked this conversation as resolved.
except requests.exceptions.RequestException as e:
with local_path.open("rb") as f:
data_bytes = f.read()
except (FileNotFoundError, OSError) as e:
log.warning(f"Failed to load EDAM ontology: {e}")
return edam_formats
for line in response.content.splitlines():
return {}
for line in data_bytes.splitlines():
fields = line.decode("utf-8").split("\t")
if fields[0].split("/")[-1].startswith("format") and fields[14]: # We choose an already provided extension
extensions = fields[14].split("|")
if fields[0].split("/")[-1].startswith("format") and fields[2]: # We choose an already provided extension
extensions = fields[2].split("|")
for extension in extensions:
if extension not in edam_formats:
edam_formats[extension] = (fields[0], fields[1]) # URL, name
Expand Down
7 changes: 7 additions & 0 deletions tests/modules/test_modules_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,10 @@ def test_filter_modules_by_name_empty_list(self):

filtered = nf_core.modules.modules_utils.filter_modules_by_name(modules, "fastqc")
assert len(filtered) == 0

def test_load_edam(self):
"""Test edam ontology loading"""
edam_formats = nf_core.modules.modules_utils.load_edam()
assert len(edam_formats) == 67
first_item = next(iter(edam_formats))
assert len(first_item) == 2
Loading