RECETOX · rdurnik · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/tools/aoptk/.shed.yml b/tools/aoptk/.shed.yml
@@ -4,13 +4,13 @@ remote_repository_url: "https://github.com/rdurnik/aoptk"
 homepage_url: "https://github.com/rdurnik/aoptk"
 categories:
   - Machine Learning
-description: "AOP-toolkit (aoptk) is a Python package designed to support the development of Adverse Outcome Pathways (AOPs) that require extensive data mining."
+description: "AOP-toolkit (aoptk) is a Python package designed to support data mining and analysis of toxicological outcomes."
 long_description: |
-  "AOP-toolkit (aoptk) is a Python package developed to support the construction of Adverse Outcome Pathways (AOPs) that require extensive mining and integration of toxicological data from heterogeneous sources. It enables researchers to collect literature from databases such as PubMed and Europe PMC, extract relevant information from full-text publications, and analyze complex, unstructured data using large language models. The toolkit also provides functionality for normalizing chemical names across publications, helping ensure consistency and interoperability."
+  "AOP-toolkit (aoptk) is a Python package for mining and analyzing toxicological and biomedical literature. Originally developed to support the construction of Adverse Outcome Pathways (AOPs), it provides general-purpose tools for retrieving, processing, and analyzing scientific publications."
 auto_tool_repositories:
   name_template: "{{ tool_id }}"
   description_template: "{{ tool_name }} tool from the aoptk package"
 suite:
   name: suite_aoptk
-  description: AOP-toolkit (aoptk) is a Python package developed to support the construction of Adverse Outcome Pathways (AOPs) that require extensive mining and integration of toxicological data from heterogeneous sources.
+  description: AOP-toolkit (aoptk) is a Python package for mining and analyzing toxicological and biomedical literature. Originally developed to support the construction of Adverse Outcome Pathways (AOPs), it provides general-purpose tools for retrieving, processing, and analyzing scientific publications.
   type: repository_suite_definition
diff --git a/tools/aoptk/aoptk_chemical_identifier.xml b/tools/aoptk/aoptk_chemical_identifier.xml
@@ -6,19 +6,21 @@
 
     <requirements>
         <expand macro="requirements"/>
+        <expand macro="email_credentials"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[
         chemical-identifier
             --query '$query'
-            --literature_database "$literature_database"
+            --literature_database "$literature_database_pubmed_europepmc"
             --chemical_database "$chemical_database"
             --outdir .
             \${EMAIL:+--email \$EMAIL}
     ]]></command>
 
     <inputs>
-        <expand macro="inputs"/>
+        <expand macro="query"/>
+        <expand macro="literature_database_pubmed_europepmc"/>
         <param argument="--chemical_database" type="data" format="xlsx" label="Chemical database" help="Custom chemical database with toxicologically relevant chemicals. Excel file with single column: chemical_name. Examples can be found in Citations." />
     </inputs>
 
@@ -31,13 +33,13 @@
         <!-- Hint: You can use [ctrl+alt+t] after defining the inputs/outputs to auto-scaffold some basic test cases. -->
         <test>
            <param name="query" value="hepg2 thioacetamide"/>
-           <param name="literature_database" value="pubmed"/>
+           <param name="literature_database_pubmed_europepmc" value="pubmed"/>
            <param name="chemical_database" location="https://zenodo.org/records/16532456/files/tg_gates.xlsx?download=1"/>
            <output name="Chemicals_per_publication" file="chemicals_per_publication_test.xlsx" compare="sim_size" delta="100"/>
         </test>
         <test>
             <param name="query" value="hepg2 thioacetamide spheroid"/>
-            <param name="literature_database" value="europepmc"/>
+            <param name="literature_database_pubmed_europepmc" value="europepmc"/>
             <param name="chemical_database" location="https://zenodo.org/records/16532456/files/tg_gates.xlsx?download=1"/>
             <output name="Publications_per_chemical" file="publications_per_chemical.xlsx" compare="sim_size" delta="100"/>
         </test>

diff --git a/tools/aoptk/aoptk_chemical_matching.xml b/tools/aoptk/aoptk_chemical_matching.xml
@@ -0,0 +1,58 @@
+<tool id="aoptk_match_chemicals" name="aoptk match chemicals" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.1" license="MIT">
+    <description>Match chemical entities.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '${match_chemicals}'
+    ]]></command>
+
+<configfiles>
+<configfile name="match_chemicals">
+import os
+import pandas as pd
+
+chemicals_df_1 = pd.read_csv("$input_file_1", sep="\t")
+chemicals_df_2 = pd.read_csv("$input_file_2", sep="\t")
+merged_files = chemicals_df_1.merge(
+    chemicals_df_2,
+    left_on="heading",
+    right_on="heading",
+    how="outer",
+)
+merged_files.to_csv("merged_chemicals.tsv", sep="\t", index=False)
+
+</configfile>
+</configfiles>
+
+    <inputs>
+        <param name="input_file_1" type="data" format="tabular" label="TSV with heading column." help="Input tsv file with heading column." />
+        <param name="input_file_2" type="data" format="tabular" label="TSV with heading column." help="Input tsv file with heading column." />
+    </inputs>
+
+    <outputs>
+        <data name="merged_chemicals" format="tabular" from_work_dir="merged_chemicals.tsv" label="Merged chemicals with heading." />
+    </outputs>
+
+    <tests>
+        <test>
+           <param name="input_file_1" value="test-data/normalized.tsv"/>
+           <param name="input_file_2" value="test-data/normalized.tsv"/>
+           <output name="merged_chemicals" file="test-data/normalized.tsv" compare="sim_size" delta="100"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Chemical Matching
+===================
+
+Tool to match chemical entities.
+
+    ]]></help>
+
+</tool>
diff --git a/tools/aoptk/aoptk_chemical_normalization_llm.xml b/tools/aoptk/aoptk_chemical_normalization_llm.xml
@@ -0,0 +1,62 @@
+<tool id="aoptk_normalize_chemicals_llm" name="aoptk normalize chemicals llm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.1" license="MIT">
+    <description>Normalize chemical entities using LLMs.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <expand macro="requirements"/>
+        <expand macro="openai_api_key_credentials"/>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '${normalize_chemicals_llm}'
+    ]]></command>
+
+<configfiles>
+<configfile name="normalize_chemicals_llm">
+import os
+from aoptk.text_generation_api import TextGenerationAPI
+from aoptk.chemical import Chemical
+import pandas as pd
+
+openai_key = os.environ.get("OPENAI_KEY")
+text_generation_api = TextGenerationAPI(model="$llm_model", api_key=openai_key)
+chemical_list = pd.read_csv("$chemical_list", sep="\t")["chemical"].tolist()
+chemicals = pd.read_csv("$chemicals", sep="\t")
+chemicals["chemical"] = chemicals["chemical"].apply(
+    lambda x: TextGenerationAPI(model="$llm_model", api_key=openai_key).normalize_chemical(chemical=Chemical(x), chemical_list=chemical_list)
+    )
+chemicals["heading"] = chemicals["chemical"].apply(lambda chem: chem.heading)
+chemicals.to_csv("normalized_chemicals.tsv", sep="\t", index=False)
+
+</configfile>
+</configfiles>
+
+    <inputs>
+        <expand macro="llm_models"/>
+        <param name="chemicals" type="data" format="tabular" label="TSV with chemical column." help="Input tsv file with chemical column." />
+        <param name="chemical_list" type="data" format="tabular" label="TSV with chemical list." help="Input tsv file with chemical list." />
+    </inputs>
+
+    <outputs>
+        <data name="normalized_chemicals" format="tabular" from_work_dir="normalized_chemicals.tsv" label="Chemicals with heading generated." />
+    </outputs>
+
+    <tests>
+        <test>
+           <param name="chemicals" value="test-data/chemicals.tsv"/>
+           <param name="chemical_list" value="test-data/chemicals.tsv"/>
+           <output name="normalized_chemicals" file="test-data/normalized.tsv" compare="sim_size" delta="100"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Chemical Normalization LLMs
+===================
+
+Tool to normalize chemical entities using LLMs. Using LLM to match a given chemical against a provided chemical list.
+
+    ]]></help>
+
+</tool>
diff --git a/tools/aoptk/aoptk_chemical_normalization_mesh.xml b/tools/aoptk/aoptk_chemical_normalization_mesh.xml
@@ -0,0 +1,55 @@
+<tool id="aoptk_normalize_chemicals_mesh" name="aoptk normalize chemicals mesh" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.1" license="MIT">
+    <description>Normalize chemical entities using MeshTerms.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '${normalize_chemicals_mesh}'
+    ]]></command>
+
+<configfiles>
+<configfile name="normalize_chemicals_mesh">
+from aoptk.chemical import Chemical
+from aoptk.normalization.mesh_terms import MeshTerms
+import os
+import pandas as pd
+
+chemicals = pd.read_csv("$input_file", sep="\t")
+chemicals["chemical"] = chemicals["chemical"].apply(
+    lambda x: MeshTerms().normalize_chemical(Chemical(x))
+    )
+chemicals["heading"] = chemicals["chemical"].apply(lambda chem: chem.heading)
+chemicals.to_csv("normalized_chemicals.tsv", sep="\t", index=False)
+
+</configfile>
+</configfiles>
+
+    <inputs>
+        <param name="input_file" type="data" format="tabular" label="TSV with chemical column." help="Input tsv file with chemical column." />
+    </inputs>
+
+    <outputs>
+        <data name="normalized_chemicals" format="tabular" from_work_dir="normalized_chemicals.tsv" label="Chemicals with heading generated." />
+    </outputs>
+
+    <tests>
+        <test>
+           <param name="input_file" value="test-data/chemicals.tsv"/>
+           <output name="normalized_chemicals" file="test-data/normalized.tsv" compare="sim_size" delta="100"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Chemical Normalization MeSH Terms
+===================
+
+Tool to normalize chemical entities using MeSH Terms.
+
+    ]]></help>
+
+</tool>
diff --git a/tools/aoptk/aoptk_chemical_normalization_pubchem.xml b/tools/aoptk/aoptk_chemical_normalization_pubchem.xml
@@ -0,0 +1,55 @@
+<tool id="aoptk_normalize_chemicals_pubchem" name="aoptk normalize chemicals pubchem" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.1" license="MIT">
+    <description>Normalize chemical entities using PubChem API.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '${normalize_chemicals_pubchem}'
+    ]]></command>
+
+<configfiles>
+<configfile name="normalize_chemicals_pubchem">
+from aoptk.normalization.pubchem_api import PubChemAPI
+from aoptk.chemical import Chemical
+import os
+import pandas as pd
+
+chemicals = pd.read_csv("$input_file", sep="\t")
+chemicals["chemical"] = chemicals["chemical"].apply(
+    lambda x: PubChemAPI().normalize_chemical(Chemical(x))
+    )
+chemicals["heading"] = chemicals["chemical"].apply(lambda chem: chem.heading)
+chemicals.to_csv("normalized_chemicals.tsv", sep="\t", index=False)
+
+</configfile>
+</configfiles>
+
+    <inputs>
+        <param name="input_file" type="data" format="tabular" label="TSV with chemical column." help="Input tsv file with chemical column." />
+    </inputs>
+
+    <outputs>
+        <data name="normalized_chemicals" format="tabular" from_work_dir="normalized_chemicals.tsv" label="Chemicals with heading generated." />
+    </outputs>
+
+    <tests>
+        <test>
+           <param name="input_file" value="test-data/chemicals.tsv"/>
+           <output name="normalized_chemicals" file="test-data/normalized.tsv" compare="sim_size" delta="100"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Chemical Normalization PubChem
+===================
+
+Tool to normalize chemical entities using PubChem API.
+
+    ]]></help>
+
+</tool>
diff --git a/tools/aoptk/aoptk_download_abstracts.xml b/tools/aoptk/aoptk_download_abstracts.xml
@@ -0,0 +1,74 @@
+<tool id="aoptk_download_abstracts" name="aoptk download abstracts" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.1" license="MIT">
+    <description>Download abstracts for a list of publication IDs.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <expand macro="requirements"/>
+        <expand macro="email_credentials"/>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '${download_abstracts}'
+    ]]></command>
+
+    <configfiles>
+        <configfile name="download_abstracts">
+from aoptk.literature.databases.pubmed import PubMed
+from aoptk.literature.databases.europepmc import EuropePMC
+from aoptk.literature.abstract import Abstract
+from Bio import Entrez
+import os
+
+with open("$input_file", "r") as f:
+    ids = [line.strip() for line in f.readlines()]
+email = os.environ.get("EMAIL")
+
+
+if "${literature_database_pubmed_europepmc}" == "pubmed":
+    Entrez.email = email    
+    pubmed = PubMed.__new__(PubMed)
+    pubmed.id_list = ids
+    abstracts = pubmed.get_abstracts()
+elif "${literature_database_pubmed_europepmc}" == "europepmc":
+    europepmc = EuropePMC("")
+    europepmc.id_list = ids
+    abstracts = europepmc.get_abstracts()
+else:
+    raise ValueError("Select valid database.")
+
+for abstract in abstracts:
+    with open(f"{abstract.publication_id}.txt", "w") as f:
+        f.write(abstract.text)
+
+    </configfile>
+    </configfiles>
+
+    <inputs>
+        <expand macro="literature_database_pubmed_europepmc"/>
+        <param name="input_file" type="data" format="txt" label="List of IDs to search for." help="Input text file with IDs to search for." />
+    </inputs>
+
+    <outputs>
+    <collection name="abstracts" type="list" label="Downloaded abstracts">
+        <discover_datasets pattern="(?P&lt;designation&gt;.*)$" format="txt" visible="false" />
+    </collection>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_file" value="test-data/ids.txt"/>
+            <output_collection name="abstracts" type="list" count="2"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Download Abstracts
+===================
+
+Tool to download publication abstracts.
+
+    ]]></help>
+
+</tool>