Skip to content
6 changes: 2 additions & 4 deletions docs/build_docs.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
rm -rf build
conda env remove -n alphabasedocs
conda create -n alphabasedocs python=3.9 -y
# conda create -n alphatimsinstaller python=3.9

conda activate alphabasedocs
# call conda install git -y
# call pip install 'git+https://github.com/MannLabs/alphatims.git#egg=alphatims[gui]' --use-feature=2020-resolver
# brew install freetype

pip install '../.[development]'
make html
conda deactivate
333 changes: 333 additions & 0 deletions nbs_tests/match/psm_match.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,333 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#---#| default_exp match.psm_match"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Match"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Peak matching functionalities"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"from peptdeep.match.psm_match import PepSpecMatch\n",
"from alpharaw import register_all_readers\n",
"from alpharaw.ms_data_base import ms_reader_provider\n",
"\n",
"register_all_readers()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"import io\n",
"import copy\n",
"\n",
"from alphabase.psm_reader.pfind_reader import pFindReader\n",
"from alphabase.peptide.fragment import create_fragment_mz_dataframe_by_sort_precursor\n",
"from alpharaw.legacy_msdata.mgf import MGFReader"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2/2 [00:01<00:00, 1.41it/s]\n"
]
}
],
"source": [
"#| hide\n",
"#unittest\n",
"mgf = io.StringIO(\"\"\"\n",
"BEGIN IONS\n",
"TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.31809.31809.3.0.dta\n",
"CHARGE=3+\n",
"RTINSECONDS=0.5418930\n",
"PEPMASS=272.276336\n",
"103.92207 5457.3\n",
"104.20045 5051.4\n",
"108.70090 5891.7\n",
"113.94175 6442.6\n",
"116.92975 40506.3\n",
"116.93716 8945.5\n",
"128.37773 6427.8\n",
"131.95308 288352.6\n",
"133.93259 7344.6\n",
"138.44611 7326.1\n",
"139.00072 41556.8\n",
"140.00319 16738.8\n",
"140.99719 9493.8\n",
"145.93156 10209.3\n",
"145.94897 10497.8\n",
"147.94559 8206.3\n",
"147.96396 30552.8\n",
"148.95543 14654.7\n",
"149.96338 234207.8\n",
"150.95096 8306.0\n",
"157.01089 84638.9\n",
"158.01357 27925.7\n",
"159.00627 16084.7\n",
"163.94281 24751.1\n",
"163.95915 32203.3\n",
"165.95605 44458.0\n",
"165.97186 11530.2\n",
"166.99500 26432.2\n",
"167.97302 9216.7\n",
"181.95230 13858.8\n",
"191.95448 66152.7\n",
"192.95538 8408.9\n",
"193.07185 9092.8\n",
"193.95313 660574.9\n",
"194.95674 23452.8\n",
"194.99008 143940.9\n",
"200.00568 19510.8\n",
"200.99942 23678.7\n",
"204.30894 9406.1\n",
"209.96466 21853.6\n",
"211.96245 65351.0\n",
"218.90355 9149.6\n",
"223.91072 11300.2\n",
"238.89684 12108.8\n",
"243.93825 10150.2\n",
"243.97040 10987.7\n",
"244.94121 8744.2\n",
"246.90314 11556.3\n",
"271.93225 29430.0\n",
"271.99219 51184.4\n",
"272.19150 31960.4\n",
"272.98602 35844.1\n",
"273.94431 11031.8\n",
"284.47998 8191.3\n",
"290.00125 66212.4\n",
"290.99539 54064.7\n",
"293.89490 10005.0\n",
"407.06372 10838.2\n",
"464.36697 9715.4\n",
"633.40036 633.40036\n",
"698.81390 9711.7\n",
"707.301117 707.301117\n",
"END IONS\n",
"BEGIN IONS\n",
"TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23862.23862.2.0.dta\n",
"CHARGE=2+\n",
"RTINSECONDS=0.6455220\n",
"PEPMASS=287.427959\n",
"103.34669 5304.0\n",
"104.66884 5639.7\n",
"113.42419 6258.3\n",
"118.84039 5837.5\n",
"119.93203 13977.3\n",
"130.69589 6876.2\n",
"133.94824 43094.3\n",
"134.30524 7671.5\n",
"135.96359 9031.3\n",
"138.99994 8329.7\n",
"146.95573 31143.9\n",
"147.96323 12176.5\n",
"150.95151 65859.3\n",
"151.95818 24384.2\n",
"157.01105 19241.5\n",
"157.34985 7532.5\n",
"161.08838 7843.9\n",
"161.94234 20119.7\n",
"162.95146 60110.4\n",
"163.95877 183305.5\n",
"164.96657 13647.5\n",
"174.95139 150331.9\n",
"175.95258 21393.4\n",
"178.94460 11433.1\n",
"179.95316 13650.5\n",
"180.96204 15353.5\n",
"190.94572 30418.9\n",
"191.95422 61914.1\n",
"192.61461 8642.1\n",
"192.94395 12331.4\n",
"192.96207 132342.5\n",
"193.96318 19303.0\n",
"209.04164 25149.6\n",
"209.96368 154185.0\n",
"209.98361 12353.5\n",
"213.86244 11541.3\n",
"224.93071 12903.0\n",
"228.92879 8773.6\n",
"241.86043 135357.5\n",
"242.86113 20805.2\n",
"242.94327 26679.4\n",
"243.95219 29569.9\n",
"244.92361 12153.5\n",
"246.90300 16650.3\n",
"252.96521 73484.3\n",
"253.96646 11527.5\n",
"286.85858 10166.4\n",
"287.94186 18763.2\n",
"303.87665 39189.3\n",
"304.88116 11976.0\n",
"321.89087 97122.5\n",
"322.88867 28020.8\n",
"370.28696 9008.2\n",
"389.82578 13277.0\n",
"407.83545 12220.4\n",
"425.84872 13236.5\n",
"482.54852 10940.2\n",
"END IONS\n",
"BEGIN IONS\n",
"TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23431.23431.2.0.dta\n",
"CHARGE=2+\n",
"RTINSECONDS=0.6455220\n",
"PEPMASS=287.427959\n",
"103.34669 5304.0\n",
"104.66884 5639.7\n",
"END IONS\n",
"BEGIN IONS\n",
"TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.32733.32733.2.0.dta\n",
"CHARGE=2+\n",
"RTINSECONDS=0.6455220\n",
"PEPMASS=287.427959\n",
"103.34669 5304.0\n",
"104.66884 5639.7\n",
"402.705571 402.705571\n",
"END IONS\n",
"BEGIN IONS\n",
"TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23669.23669.2.0.dta\n",
"CHARGE=2+\n",
"RTINSECONDS=0.6455220\n",
"PEPMASS=287.427959\n",
"END IONS\n",
"\"\"\")\n",
"\n",
"ms_file_dict = {\n",
" 'raw': copy.deepcopy(mgf),\n",
" 'raw1': copy.deepcopy(mgf),\n",
"}\n",
"\n",
"psmlabel_str = '''File_Name\tSequence\tModification\tCharge\tScan_No\tProteins\tQ-value\tTarget/Decoy\tFinal_Score\n",
"raw.31809.31809.2.0.dta\tPSTDLLMLK\t2,Phospho[S];7,Oxidation[M];\t2\t31809\tProt\t0\ttarget\t100\n",
"raw.23862.23862.2.0.dta\tHTAYSDFLSDK\t\t2\t23862\tProt\t0\ttarget\t100\n",
"raw.23431.23431.2.0.dta\tHTAYSDFLSDK\t\t2\t23431\tProt\t0\ttarget\t100\n",
"raw.32733.32733.2.0.dta\tHFALFSTDVTK\t\t2\t32733\tProt\t0\ttarget\t100\n",
"raw.23669.23669.2.0.dta\tHTAYSDFLSDK\t\t2\t23669\tProt\t0\ttarget\t100\n",
"raw1.31809.31809.2.0.dta\tPSTDLLMLK\t2,Phospho[S];7,Oxidation[M];\t2\t31809\tProt\t0\ttarget\t100\n",
"raw1.23862.23862.2.0.dta\tHTAYSDFLSDK\t\t2\t23862\tProt\t0\ttarget\t100\n",
"raw1.23431.23431.2.0.dta\tHTAYSDFLSDK\t\t2\t23431\tProt\t0\ttarget\t100\n",
"raw1.32733.32733.2.0.dta\tHFALFSTDVTK\t\t2\t32733\tProt\t0\ttarget\t100\n",
"'''\n",
"reader = pFindReader()\n",
"reader.import_file(io.StringIO(psmlabel_str))\n",
"psm_df = reader.psm_df\n",
"matching = PepSpecMatch()\n",
"matching.match_ms2_multi_raw(psm_df, ms_file_dict, 'mgf')\n",
"merrs = matching.matched_mz_err_df.values\n",
"#np.sum(matching.matched_intensity_df.values!=0,axis=1)\n",
"assert len(merrs[~np.isinf(merrs)])==6\n",
"assert np.count_nonzero(matching.matched_intensity_df.values)==6"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2/2 [00:00<00:00, 174.90it/s]\n"
]
}
],
"source": [
"#| hide\n",
"mgf_reader = ms_reader_provider.get_reader('mgf')\n",
"mgf_reader.import_raw(copy.deepcopy(mgf))\n",
"ms_file_dict = {'raw': mgf_reader}\n",
"mgf_reader1 = ms_reader_provider.get_reader('mgf')\n",
"mgf_reader1.import_raw(copy.deepcopy(mgf))\n",
"ms_file_dict['raw1'] = mgf_reader1\n",
"matching.match_ms2_multi_raw(psm_df, ms_file_dict, 'mgf')\n",
"merrs = matching.matched_mz_err_df.values\n",
"assert np.count_nonzero(matching.matched_intensity_df.values) == 6\n",
"assert len(merrs[~np.isinf(merrs)]) == 6"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 111.73it/s]\n"
]
}
],
"source": [
"#| hide\n",
"ms_file_dict = {\n",
" 'raw': copy.deepcopy(mgf),\n",
"}\n",
"reader = pFindReader()\n",
"reader.import_file(io.StringIO(psmlabel_str))\n",
"psm_df = reader.psm_df\n",
"psm_df = psm_df[~psm_df.raw_name.str.startswith('raw1')].copy()\n",
"matching = PepSpecMatch()\n",
"matching.match_ms2_multi_raw(psm_df, ms_file_dict, 'mgf')\n",
"matching.load_ms_data(copy.deepcopy(mgf), 'mgf')\n",
"df, frag_mz_df, frag_inten_df, frag_merr_df = matching.match_ms2_one_raw(\n",
" psm_df\n",
")\n",
"assert (matching.fragment_mz_df==frag_mz_df).values.all()\n",
"assert (matching.matched_intensity_df==frag_inten_df).values.all()\n",
"assert (matching.matched_mz_err_df==frag_merr_df).values.all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.3 ('base')",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading
Loading