diff --git a/tools/zooma/.shed.yml b/tools/zooma/.shed.yml new file mode 100644 index 00000000..3affdbb0 --- /dev/null +++ b/tools/zooma/.shed.yml @@ -0,0 +1,12 @@ +categories: +- Metabolomics +description: Query ZOOMA API with values from an input table column and return annotations as tabular output. +homepage_url: https://www.ebi.ac.uk/spot/zooma/docs/api +long_description: | + Wrapper around the ZOOMA API annotation endpoint. The tool reads an input tabular file, + queries ZOOMA for each non-empty value in a selected column, and writes returned annotations + to a tabular output. +name: zooma +owner: recetox +remote_repository_url: https://github.com/RECETOX/galaxytools/tree/master/tools/zooma +type: unrestricted diff --git a/tools/zooma/macros.xml b/tools/zooma/macros.xml new file mode 100644 index 00000000..597c817b --- /dev/null +++ b/tools/zooma/macros.xml @@ -0,0 +1,22 @@ + + 0.1.0 + 0 + + + + + + + + + diff --git a/tools/zooma/test-data/expected.tsv b/tools/zooma/test-data/expected.tsv new file mode 100644 index 00000000..1aa58c62 --- /dev/null +++ b/tools/zooma/test-data/expected.tsv @@ -0,0 +1,3 @@ +query property_value property_type semantic_tags confidence source_name source_type study_type +asthma asthma disease http://www.ebi.ac.uk/efo/EFO_0000270 HIGH GWAS Catalog DATABASE http://purl.obolibrary.org/obo/OBI_0000070 +unknown diff --git a/tools/zooma/test-data/input.tsv b/tools/zooma/test-data/input.tsv new file mode 100644 index 00000000..8b5465a7 --- /dev/null +++ b/tools/zooma/test-data/input.tsv @@ -0,0 +1,3 @@ +term other +asthma row1 +unknown row2 diff --git a/tools/zooma/test-data/mock_response.json b/tools/zooma/test-data/mock_response.json new file mode 100644 index 00000000..2e41afec --- /dev/null +++ b/tools/zooma/test-data/mock_response.json @@ -0,0 +1,24 @@ +{ + "asthma": [ + { + "annotatedProperty": { + "propertyValue": "asthma", + "propertyType": "disease" + }, + "semanticTags": [ + "http://www.ebi.ac.uk/efo/EFO_0000270" + ], + "confidence": "HIGH", + "derivedFrom": { + "provenance": { + "source": { + "name": "GWAS Catalog", + "type": "DATABASE", + "semanticTag": "http://purl.obolibrary.org/obo/OBI_0000070" + } + } + } + } + ], + "unknown": [] +} diff --git a/tools/zooma/zooma.py b/tools/zooma/zooma.py new file mode 100644 index 00000000..b33bffeb --- /dev/null +++ b/tools/zooma/zooma.py @@ -0,0 +1,134 @@ +import argparse +import csv +import json + +import requests + + +DEFAULT_API_URL = "https://www.ebi.ac.uk/spot/zooma/v2/api/services/annotate" + + +def parse_args(): + parser = argparse.ArgumentParser(description="Query ZOOMA API for values from a selected tabular column.") + parser.add_argument("--input", required=True, help="Input tabular file path") + parser.add_argument("--output", required=True, help="Output tabular file path") + parser.add_argument("--column", required=True, type=int, help="1-based input column index used for query terms") + parser.add_argument("--has-header", action="store_true", help="Input tabular file contains a header row") + parser.add_argument("--api-url", default=DEFAULT_API_URL, help="ZOOMA annotation endpoint URL") + parser.add_argument("--timeout", type=int, default=30, help="HTTP request timeout in seconds") + parser.add_argument("--mock-response", help="Optional JSON file with mocked responses keyed by query term") + return parser.parse_args() + + +def get_nested_field(item, *path): + current = item + for key in path: + if not isinstance(current, dict): + return "" + current = current.get(key) + if current is None: + return "" + if isinstance(current, list): + return "|".join(str(value) for value in current) + return str(current) + + +def normalize_annotations(query_value, annotations): + if not annotations: + return [{ + "query": query_value, + "property_value": "", + "property_type": "", + "semantic_tags": "", + "confidence": "", + "source_name": "", + "source_type": "", + "study_type": "", + }] + + rows = [] + for annotation in annotations: + rows.append({ + "query": query_value, + "property_value": get_nested_field(annotation, "annotatedProperty", "propertyValue"), + "property_type": get_nested_field(annotation, "annotatedProperty", "propertyType"), + "semantic_tags": get_nested_field(annotation, "semanticTags"), + "confidence": get_nested_field(annotation, "confidence"), + "source_name": get_nested_field(annotation, "derivedFrom", "provenance", "source", "name"), + "source_type": get_nested_field(annotation, "derivedFrom", "provenance", "source", "type"), + "study_type": get_nested_field(annotation, "derivedFrom", "provenance", "source", "semanticTag"), + }) + return rows + + +def query_zooma(query_value, api_url, timeout): + response = requests.get( + api_url, + params={"propertyValue": query_value}, + headers={"Accept": "application/json"}, + timeout=timeout, + ) + response.raise_for_status() + payload = response.json() + if isinstance(payload, list): + return payload + if isinstance(payload, dict): + return [payload] + raise ValueError("Unexpected response payload type from ZOOMA API") + + +def run(): + args = parse_args() + + column_index = args.column - 1 + if column_index < 0: + raise ValueError("Column index must be a positive integer.") + + mock_response = None + if args.mock_response: + with open(args.mock_response, "r", encoding="utf-8") as handle: + mock_response = json.load(handle) + + output_columns = [ + "query", + "property_value", + "property_type", + "semantic_tags", + "confidence", + "source_name", + "source_type", + "study_type", + ] + + with open(args.input, "r", encoding="utf-8", newline="") as infile, open( + args.output, "w", encoding="utf-8", newline="" + ) as outfile: + reader = csv.reader(infile, delimiter="\t") + writer = csv.DictWriter(outfile, fieldnames=output_columns, delimiter="\t", lineterminator="\n") + writer.writeheader() + + first_row = True + for row in reader: + if first_row: + first_row = False + if args.has_header: + continue + + if column_index >= len(row): + continue + + query_value = row[column_index].strip() + if not query_value: + continue + + if mock_response is not None: + annotations = mock_response.get(query_value, []) + else: + annotations = query_zooma(query_value, args.api_url, args.timeout) + + for output_row in normalize_annotations(query_value, annotations): + writer.writerow(output_row) + + +if __name__ == "__main__": + run() diff --git a/tools/zooma/zooma.xml b/tools/zooma/zooma.xml new file mode 100644 index 00000000..253dbc41 --- /dev/null +++ b/tools/zooma/zooma.xml @@ -0,0 +1,58 @@ + + + macros.xml + + + + + python + requests + + + + + + + + + + Override only if you need a different compatible endpoint. + + + + + + + + + + + + + + + + + + + + + + + + 10.5281/zenodo.6035335 + +