Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tools/zooma/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
categories:
- Metabolomics
description: Query ZOOMA API with values from an input table column and return annotations as tabular output.
homepage_url: https://www.ebi.ac.uk/spot/zooma/docs/api
long_description: |
Wrapper around the ZOOMA API annotation endpoint. The tool reads an input tabular file,
queries ZOOMA for each non-empty value in a selected column, and writes returned annotations
to a tabular output.
name: zooma
owner: recetox
remote_repository_url: https://github.com/RECETOX/galaxytools/tree/master/tools/zooma
type: unrestricted
22 changes: 22 additions & 0 deletions tools/zooma/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<macros>
<token name="@TOOL_VERSION@">0.1.0</token>
<token name="@GALAXY_VERSION@">0</token>

<xml name="creator">
<creator>
<organization
url="https://www.recetox.muni.cz/"
email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
name="RECETOX MUNI" />
</creator>
</xml>

<token name="@HELP@"><![CDATA[
.. class:: infomark

**What this tool does**

Queries the ZOOMA API (https://www.ebi.ac.uk/spot/zooma/docs/api) with values from a selected column of
an input tabular file and returns a tabular annotation output.
]]></token>
</macros>
3 changes: 3 additions & 0 deletions tools/zooma/test-data/expected.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
query property_value property_type semantic_tags confidence source_name source_type study_type
asthma asthma disease http://www.ebi.ac.uk/efo/EFO_0000270 HIGH GWAS Catalog DATABASE http://purl.obolibrary.org/obo/OBI_0000070
unknown
3 changes: 3 additions & 0 deletions tools/zooma/test-data/input.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
term other
asthma row1
unknown row2
24 changes: 24 additions & 0 deletions tools/zooma/test-data/mock_response.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"asthma": [
{
"annotatedProperty": {
"propertyValue": "asthma",
"propertyType": "disease"
},
"semanticTags": [
"http://www.ebi.ac.uk/efo/EFO_0000270"
],
"confidence": "HIGH",
"derivedFrom": {
"provenance": {
"source": {
"name": "GWAS Catalog",
"type": "DATABASE",
"semanticTag": "http://purl.obolibrary.org/obo/OBI_0000070"
}
}
}
}
],
"unknown": []
}
134 changes: 134 additions & 0 deletions tools/zooma/zooma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import argparse
import csv
import json

import requests


DEFAULT_API_URL = "https://www.ebi.ac.uk/spot/zooma/v2/api/services/annotate"


def parse_args():
parser = argparse.ArgumentParser(description="Query ZOOMA API for values from a selected tabular column.")
parser.add_argument("--input", required=True, help="Input tabular file path")
parser.add_argument("--output", required=True, help="Output tabular file path")
parser.add_argument("--column", required=True, type=int, help="1-based input column index used for query terms")
parser.add_argument("--has-header", action="store_true", help="Input tabular file contains a header row")
parser.add_argument("--api-url", default=DEFAULT_API_URL, help="ZOOMA annotation endpoint URL")
parser.add_argument("--timeout", type=int, default=30, help="HTTP request timeout in seconds")
parser.add_argument("--mock-response", help="Optional JSON file with mocked responses keyed by query term")
return parser.parse_args()


def get_nested_field(item, *path):
current = item
for key in path:
if not isinstance(current, dict):
return ""
current = current.get(key)
if current is None:
return ""
if isinstance(current, list):
return "|".join(str(value) for value in current)
return str(current)


def normalize_annotations(query_value, annotations):
if not annotations:
return [{
"query": query_value,
"property_value": "",
"property_type": "",
"semantic_tags": "",
"confidence": "",
"source_name": "",
"source_type": "",
"study_type": "",
}]

rows = []
for annotation in annotations:
rows.append({
"query": query_value,
"property_value": get_nested_field(annotation, "annotatedProperty", "propertyValue"),
"property_type": get_nested_field(annotation, "annotatedProperty", "propertyType"),
"semantic_tags": get_nested_field(annotation, "semanticTags"),
"confidence": get_nested_field(annotation, "confidence"),
"source_name": get_nested_field(annotation, "derivedFrom", "provenance", "source", "name"),
"source_type": get_nested_field(annotation, "derivedFrom", "provenance", "source", "type"),
"study_type": get_nested_field(annotation, "derivedFrom", "provenance", "source", "semanticTag"),
})
return rows


def query_zooma(query_value, api_url, timeout):
response = requests.get(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe implement a retry when blocked by rate-limit?

api_url,
params={"propertyValue": query_value},
headers={"Accept": "application/json"},
timeout=timeout,
)
response.raise_for_status()
payload = response.json()
if isinstance(payload, list):
return payload
if isinstance(payload, dict):
return [payload]
raise ValueError("Unexpected response payload type from ZOOMA API")


def run():
args = parse_args()

column_index = args.column - 1
if column_index < 0:
raise ValueError("Column index must be a positive integer.")

mock_response = None
if args.mock_response:
with open(args.mock_response, "r", encoding="utf-8") as handle:
mock_response = json.load(handle)

output_columns = [
"query",
"property_value",
"property_type",
"semantic_tags",
"confidence",
"source_name",
"source_type",
"study_type",
]

with open(args.input, "r", encoding="utf-8", newline="") as infile, open(
args.output, "w", encoding="utf-8", newline=""
) as outfile:
reader = csv.reader(infile, delimiter="\t")
writer = csv.DictWriter(outfile, fieldnames=output_columns, delimiter="\t", lineterminator="\n")
writer.writeheader()

first_row = True
for row in reader:
if first_row:
first_row = False
if args.has_header:
continue

if column_index >= len(row):
continue

query_value = row[column_index].strip()
if not query_value:
continue

if mock_response is not None:
annotations = mock_response.get(query_value, [])
else:
annotations = query_zooma(query_value, args.api_url, args.timeout)

for output_row in normalize_annotations(query_value, annotations):
writer.writerow(output_row)


if __name__ == "__main__":
run()
58 changes: 58 additions & 0 deletions tools/zooma/zooma.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<tool id="zooma" name="ZOOMA annotate" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please include a profile version

<macros>
<import>macros.xml</import>
</macros>
<expand macro="creator"/>

<requirements>
<requirement type="package" version="3.12">python</requirement>
<requirement type="package" version="2.32.3">requests</requirement>
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

request should bring in a python version, so you can drop the explicit python version

</requirements>

<command detect_errors="aggressive"><![CDATA[
python '$__tool_directory__/zooma.py'
--input '$input'
--output '$output'
--column '$query_column'
$has_header
--api-url '$api_url'
--timeout '$timeout'
#if str($mock_response)
--mock-response '$mock_response'
#end if
]]></command>

<inputs>
<param name="input" type="data" format="tabular,csv,tsv" label="Input table" />
<param name="has_header" type="boolean" truevalue="--has-header" falsevalue="" checked="true" label="Input file contains a header row" />
<param name="query_column" type="data_column" data_ref="input" numerical="false" use_header_names="true" label="Column used for ZOOMA query" />
<param name="api_url" type="text" value="https://www.ebi.ac.uk/spot/zooma/v2/api/services/annotate" label="ZOOMA API URL">
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be hard-coded

<help>Override only if you need a different compatible endpoint.</help>
</param>
<param name="timeout" type="integer" value="30" min="1" label="HTTP request timeout (seconds)" />
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure this should be offered to a user.

<param name="mock_response" type="data" format="json" optional="true" label="Optional mocked JSON response file" help="Optional test/offline map: query term -> list of ZOOMA annotations." />
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uha

</inputs>

<outputs>
<data name="output" format="tabular" />
</outputs>

<tests>
<test>
<param name="input" value="input.tsv" ftype="tabular" />
<param name="has_header" value="true" />
<param name="query_column" value="1" />
<param name="timeout" value="5" />
<param name="mock_response" value="mock_response.json" ftype="json" />
<output name="output" file="expected.tsv" ftype="tabular" />
</test>
</tests>

<help><![CDATA[
@HELP@
]]></help>

<citations>
<citation type="doi">10.5281/zenodo.6035335</citation>
</citations>
</tool>