diff --git a/docs/build_docs.sh b/docs/build_docs.sh
index ec9c83ca..0aefd331 100644
--- a/docs/build_docs.sh
+++ b/docs/build_docs.sh
@@ -1,11 +1,9 @@
 rm -rf build
 conda env remove -n alphabasedocs
 conda create -n alphabasedocs python=3.9 -y
-# conda create -n alphatimsinstaller python=3.9
+
 conda activate alphabasedocs
-# call conda install git -y
-# call pip install 'git+https://github.com/MannLabs/alphatims.git#egg=alphatims[gui]' --use-feature=2020-resolver
-# brew install freetype
+
 pip install '../.[development]'
 make html
 conda deactivate
diff --git a/nbs_tests/match/psm_match.ipynb b/nbs_tests/match/psm_match.ipynb
new file mode 100644
index 00000000..10599c6d
--- /dev/null
+++ b/nbs_tests/match/psm_match.ipynb
@@ -0,0 +1,333 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#---#| default_exp match.psm_match"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Match"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Peak matching functionalities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from peptdeep.match.psm_match import PepSpecMatch\n",
+    "from alpharaw import register_all_readers\n",
+    "from alpharaw.ms_data_base import ms_reader_provider\n",
+    "\n",
+    "register_all_readers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "import io\n",
+    "import copy\n",
+    "\n",
+    "from alphabase.psm_reader.pfind_reader import pFindReader\n",
+    "from alphabase.peptide.fragment import create_fragment_mz_dataframe_by_sort_precursor\n",
+    "from alpharaw.legacy_msdata.mgf import MGFReader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 2/2 [00:01<00:00,  1.41it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#| hide\n",
+    "#unittest\n",
+    "mgf = io.StringIO(\"\"\"\n",
+    "BEGIN IONS\n",
+    "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.31809.31809.3.0.dta\n",
+    "CHARGE=3+\n",
+    "RTINSECONDS=0.5418930\n",
+    "PEPMASS=272.276336\n",
+    "103.92207 5457.3\n",
+    "104.20045 5051.4\n",
+    "108.70090 5891.7\n",
+    "113.94175 6442.6\n",
+    "116.92975 40506.3\n",
+    "116.93716 8945.5\n",
+    "128.37773 6427.8\n",
+    "131.95308 288352.6\n",
+    "133.93259 7344.6\n",
+    "138.44611 7326.1\n",
+    "139.00072 41556.8\n",
+    "140.00319 16738.8\n",
+    "140.99719 9493.8\n",
+    "145.93156 10209.3\n",
+    "145.94897 10497.8\n",
+    "147.94559 8206.3\n",
+    "147.96396 30552.8\n",
+    "148.95543 14654.7\n",
+    "149.96338 234207.8\n",
+    "150.95096 8306.0\n",
+    "157.01089 84638.9\n",
+    "158.01357 27925.7\n",
+    "159.00627 16084.7\n",
+    "163.94281 24751.1\n",
+    "163.95915 32203.3\n",
+    "165.95605 44458.0\n",
+    "165.97186 11530.2\n",
+    "166.99500 26432.2\n",
+    "167.97302 9216.7\n",
+    "181.95230 13858.8\n",
+    "191.95448 66152.7\n",
+    "192.95538 8408.9\n",
+    "193.07185 9092.8\n",
+    "193.95313 660574.9\n",
+    "194.95674 23452.8\n",
+    "194.99008 143940.9\n",
+    "200.00568 19510.8\n",
+    "200.99942 23678.7\n",
+    "204.30894 9406.1\n",
+    "209.96466 21853.6\n",
+    "211.96245 65351.0\n",
+    "218.90355 9149.6\n",
+    "223.91072 11300.2\n",
+    "238.89684 12108.8\n",
+    "243.93825 10150.2\n",
+    "243.97040 10987.7\n",
+    "244.94121 8744.2\n",
+    "246.90314 11556.3\n",
+    "271.93225 29430.0\n",
+    "271.99219 51184.4\n",
+    "272.19150 31960.4\n",
+    "272.98602 35844.1\n",
+    "273.94431 11031.8\n",
+    "284.47998 8191.3\n",
+    "290.00125 66212.4\n",
+    "290.99539 54064.7\n",
+    "293.89490 10005.0\n",
+    "407.06372 10838.2\n",
+    "464.36697 9715.4\n",
+    "633.40036 633.40036\n",
+    "698.81390 9711.7\n",
+    "707.301117 707.301117\n",
+    "END IONS\n",
+    "BEGIN IONS\n",
+    "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23862.23862.2.0.dta\n",
+    "CHARGE=2+\n",
+    "RTINSECONDS=0.6455220\n",
+    "PEPMASS=287.427959\n",
+    "103.34669 5304.0\n",
+    "104.66884 5639.7\n",
+    "113.42419 6258.3\n",
+    "118.84039 5837.5\n",
+    "119.93203 13977.3\n",
+    "130.69589 6876.2\n",
+    "133.94824 43094.3\n",
+    "134.30524 7671.5\n",
+    "135.96359 9031.3\n",
+    "138.99994 8329.7\n",
+    "146.95573 31143.9\n",
+    "147.96323 12176.5\n",
+    "150.95151 65859.3\n",
+    "151.95818 24384.2\n",
+    "157.01105 19241.5\n",
+    "157.34985 7532.5\n",
+    "161.08838 7843.9\n",
+    "161.94234 20119.7\n",
+    "162.95146 60110.4\n",
+    "163.95877 183305.5\n",
+    "164.96657 13647.5\n",
+    "174.95139 150331.9\n",
+    "175.95258 21393.4\n",
+    "178.94460 11433.1\n",
+    "179.95316 13650.5\n",
+    "180.96204 15353.5\n",
+    "190.94572 30418.9\n",
+    "191.95422 61914.1\n",
+    "192.61461 8642.1\n",
+    "192.94395 12331.4\n",
+    "192.96207 132342.5\n",
+    "193.96318 19303.0\n",
+    "209.04164 25149.6\n",
+    "209.96368 154185.0\n",
+    "209.98361 12353.5\n",
+    "213.86244 11541.3\n",
+    "224.93071 12903.0\n",
+    "228.92879 8773.6\n",
+    "241.86043 135357.5\n",
+    "242.86113 20805.2\n",
+    "242.94327 26679.4\n",
+    "243.95219 29569.9\n",
+    "244.92361 12153.5\n",
+    "246.90300 16650.3\n",
+    "252.96521 73484.3\n",
+    "253.96646 11527.5\n",
+    "286.85858 10166.4\n",
+    "287.94186 18763.2\n",
+    "303.87665 39189.3\n",
+    "304.88116 11976.0\n",
+    "321.89087 97122.5\n",
+    "322.88867 28020.8\n",
+    "370.28696 9008.2\n",
+    "389.82578 13277.0\n",
+    "407.83545 12220.4\n",
+    "425.84872 13236.5\n",
+    "482.54852 10940.2\n",
+    "END IONS\n",
+    "BEGIN IONS\n",
+    "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23431.23431.2.0.dta\n",
+    "CHARGE=2+\n",
+    "RTINSECONDS=0.6455220\n",
+    "PEPMASS=287.427959\n",
+    "103.34669 5304.0\n",
+    "104.66884 5639.7\n",
+    "END IONS\n",
+    "BEGIN IONS\n",
+    "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.32733.32733.2.0.dta\n",
+    "CHARGE=2+\n",
+    "RTINSECONDS=0.6455220\n",
+    "PEPMASS=287.427959\n",
+    "103.34669 5304.0\n",
+    "104.66884 5639.7\n",
+    "402.705571 402.705571\n",
+    "END IONS\n",
+    "BEGIN IONS\n",
+    "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23669.23669.2.0.dta\n",
+    "CHARGE=2+\n",
+    "RTINSECONDS=0.6455220\n",
+    "PEPMASS=287.427959\n",
+    "END IONS\n",
+    "\"\"\")\n",
+    "\n",
+    "ms_file_dict = {\n",
+    "    'raw': copy.deepcopy(mgf),\n",
+    "    'raw1': copy.deepcopy(mgf),\n",
+    "}\n",
+    "\n",
+    "psmlabel_str = '''File_Name\tSequence\tModification\tCharge\tScan_No\tProteins\tQ-value\tTarget/Decoy\tFinal_Score\n",
+    "raw.31809.31809.2.0.dta\tPSTDLLMLK\t2,Phospho[S];7,Oxidation[M];\t2\t31809\tProt\t0\ttarget\t100\n",
+    "raw.23862.23862.2.0.dta\tHTAYSDFLSDK\t\t2\t23862\tProt\t0\ttarget\t100\n",
+    "raw.23431.23431.2.0.dta\tHTAYSDFLSDK\t\t2\t23431\tProt\t0\ttarget\t100\n",
+    "raw.32733.32733.2.0.dta\tHFALFSTDVTK\t\t2\t32733\tProt\t0\ttarget\t100\n",
+    "raw.23669.23669.2.0.dta\tHTAYSDFLSDK\t\t2\t23669\tProt\t0\ttarget\t100\n",
+    "raw1.31809.31809.2.0.dta\tPSTDLLMLK\t2,Phospho[S];7,Oxidation[M];\t2\t31809\tProt\t0\ttarget\t100\n",
+    "raw1.23862.23862.2.0.dta\tHTAYSDFLSDK\t\t2\t23862\tProt\t0\ttarget\t100\n",
+    "raw1.23431.23431.2.0.dta\tHTAYSDFLSDK\t\t2\t23431\tProt\t0\ttarget\t100\n",
+    "raw1.32733.32733.2.0.dta\tHFALFSTDVTK\t\t2\t32733\tProt\t0\ttarget\t100\n",
+    "'''\n",
+    "reader = pFindReader()\n",
+    "reader.import_file(io.StringIO(psmlabel_str))\n",
+    "psm_df = reader.psm_df\n",
+    "matching = PepSpecMatch()\n",
+    "matching.match_ms2_multi_raw(psm_df, ms_file_dict, 'mgf')\n",
+    "merrs = matching.matched_mz_err_df.values\n",
+    "#np.sum(matching.matched_intensity_df.values!=0,axis=1)\n",
+    "assert len(merrs[~np.isinf(merrs)])==6\n",
+    "assert np.count_nonzero(matching.matched_intensity_df.values)==6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 2/2 [00:00<00:00, 174.90it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#| hide\n",
+    "mgf_reader = ms_reader_provider.get_reader('mgf')\n",
+    "mgf_reader.import_raw(copy.deepcopy(mgf))\n",
+    "ms_file_dict = {'raw': mgf_reader}\n",
+    "mgf_reader1 = ms_reader_provider.get_reader('mgf')\n",
+    "mgf_reader1.import_raw(copy.deepcopy(mgf))\n",
+    "ms_file_dict['raw1'] = mgf_reader1\n",
+    "matching.match_ms2_multi_raw(psm_df, ms_file_dict, 'mgf')\n",
+    "merrs = matching.matched_mz_err_df.values\n",
+    "assert np.count_nonzero(matching.matched_intensity_df.values) == 6\n",
+    "assert len(merrs[~np.isinf(merrs)]) == 6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1/1 [00:00<00:00, 111.73it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#| hide\n",
+    "ms_file_dict = {\n",
+    "    'raw': copy.deepcopy(mgf),\n",
+    "}\n",
+    "reader = pFindReader()\n",
+    "reader.import_file(io.StringIO(psmlabel_str))\n",
+    "psm_df = reader.psm_df\n",
+    "psm_df = psm_df[~psm_df.raw_name.str.startswith('raw1')].copy()\n",
+    "matching = PepSpecMatch()\n",
+    "matching.match_ms2_multi_raw(psm_df, ms_file_dict, 'mgf')\n",
+    "matching.load_ms_data(copy.deepcopy(mgf), 'mgf')\n",
+    "df, frag_mz_df, frag_inten_df, frag_merr_df = matching.match_ms2_one_raw(\n",
+    "    psm_df\n",
+    ")\n",
+    "assert (matching.fragment_mz_df==frag_mz_df).values.all()\n",
+    "assert (matching.matched_intensity_df==frag_inten_df).values.all()\n",
+    "assert (matching.matched_mz_err_df==frag_merr_df).values.all()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.3 ('base')",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/nbs_tests/match/test_dia_matching.ipynb b/nbs_tests/match/test_dia_matching.ipynb
new file mode 100644
index 00000000..6e74716d
--- /dev/null
+++ b/nbs_tests/match/test_dia_matching.ipynb
@@ -0,0 +1,846 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:WARNING: Temp mmap arrays are written to /var/folders/fh/hf8t3l1x02d42ggk3b304_rh0000gn/T/temp_mmap_37vmr95k. Cleanup of this folder is OS dependant, and might need to be triggered manually! Current space: 624,114,790,400\n",
+      "WARNING:root:WARNING: No Bruker libraries are available for this operating system. Mobility and m/z values need to be estimated. While this estimation often returns acceptable results with errors < 0.02 Th, huge errors (e.g. offsets of 6 Th) have already been observed for some samples!\n"
+     ]
+    }
+   ],
+   "execution_count": 2,
+   "source": "from peptdeep.match.psm_match import PepSpecMatch_DIA"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "psm_match = PepSpecMatch_DIA()\n",
+    "psm_match.min_frag_mz = 0\n",
+    "# psm_match.raw_data.save_hdf(\"/Users/wenfengzeng/data/orbi_dia/hla/20200317_QE_HFX2_LC3_DIA_RA957_R01.raw.hdf\")"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "from alphabase.psm_reader.dia_psm_reader import DiannReader\n",
+    "psm_df = DiannReader().import_file(\"/Users/wenfengzeng/data/orbi_dia/hla/20200317_QE_HFX2_LC3_DIA_RA957_R01.tsv\")\n",
+    "psm_df"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "ms_files = [\"/Users/wenfengzeng/data/orbi_dia/hla/20200317_QE_HFX2_LC3_DIA_RA957_R01.raw.hdf\"]\n",
+    "df,mz_df,inten_df,merr_df = psm_match.match_ms2_multi_raw(psm_df, ms_files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_name</th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>charge</th>\n",
+       "      <th>rt</th>\n",
+       "      <th>rt_start</th>\n",
+       "      <th>rt_stop</th>\n",
+       "      <th>mobility</th>\n",
+       "      <th>proteins</th>\n",
+       "      <th>uniprot_ids</th>\n",
+       "      <th>genes</th>\n",
+       "      <th>...</th>\n",
+       "      <th>fdr</th>\n",
+       "      <th>spec_idx</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>rt_norm</th>\n",
+       "      <th>precursor_mz</th>\n",
+       "      <th>ccs</th>\n",
+       "      <th>frag_start_idx</th>\n",
+       "      <th>frag_stop_idx</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>FPTSSPTL</td>\n",
+       "      <td>1</td>\n",
+       "      <td>77.6566</td>\n",
+       "      <td>77.4414</td>\n",
+       "      <td>77.8715</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>Q15696;Q15695</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.001595</td>\n",
+       "      <td>55017</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.648554</td>\n",
+       "      <td>849.435260</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>ANSAFVER</td>\n",
+       "      <td>1</td>\n",
+       "      <td>46.3195</td>\n",
+       "      <td>46.1352</td>\n",
+       "      <td>46.5349</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>P08238;Q58FF7;P07900;P07900-2</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000097</td>\n",
+       "      <td>32622</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.386840</td>\n",
+       "      <td>893.447557</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>ANVAEHQR</td>\n",
+       "      <td>2</td>\n",
+       "      <td>23.8646</td>\n",
+       "      <td>23.7079</td>\n",
+       "      <td>24.0212</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>Q12929;Q12929-2</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.001492</td>\n",
+       "      <td>16745</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.199307</td>\n",
+       "      <td>462.735940</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>14</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>APAAGEEG</td>\n",
+       "      <td>1</td>\n",
+       "      <td>19.4263</td>\n",
+       "      <td>19.2696</td>\n",
+       "      <td>19.5829</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>Q9BYE7-3;Q9BYE7</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.001219</td>\n",
+       "      <td>13631</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.162240</td>\n",
+       "      <td>701.310060</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>21</td>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>HRIVSISL</td>\n",
+       "      <td>2</td>\n",
+       "      <td>67.9021</td>\n",
+       "      <td>67.7177</td>\n",
+       "      <td>68.0252</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>Q5VY09</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000713</td>\n",
+       "      <td>48029</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.567089</td>\n",
+       "      <td>462.784902</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>28</td>\n",
+       "      <td>35</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60421</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>TPSKQSNNKYAASSYLSLTPEQWK</td>\n",
+       "      <td>3</td>\n",
+       "      <td>78.7641</td>\n",
+       "      <td>78.6414</td>\n",
+       "      <td>78.8562</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>P0DOY2;P0CF74;P0DOY3</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000097</td>\n",
+       "      <td>55810</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>24</td>\n",
+       "      <td>0.657804</td>\n",
+       "      <td>910.124079</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>534642</td>\n",
+       "      <td>534665</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60422</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>VDDTQFVRFDSDAASPRTEPRAPW</td>\n",
+       "      <td>3</td>\n",
+       "      <td>88.9499</td>\n",
+       "      <td>88.7348</td>\n",
+       "      <td>89.1031</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>P30490;P30685;P30491;P10319;P18463</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000516</td>\n",
+       "      <td>63092</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>24</td>\n",
+       "      <td>0.742871</td>\n",
+       "      <td>921.775562</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>534665</td>\n",
+       "      <td>534688</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60423</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>VDDTQFVRFDSDAASPRGEPREPW</td>\n",
+       "      <td>3</td>\n",
+       "      <td>86.2703</td>\n",
+       "      <td>86.1779</td>\n",
+       "      <td>86.4238</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>P30504</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000399</td>\n",
+       "      <td>61178</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>24</td>\n",
+       "      <td>0.720492</td>\n",
+       "      <td>926.435317</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>534688</td>\n",
+       "      <td>534711</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60424</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>ASGSGAQVGGPISSGSSASSVTVTR</td>\n",
+       "      <td>3</td>\n",
+       "      <td>48.6067</td>\n",
+       "      <td>48.4827</td>\n",
+       "      <td>48.6992</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>P02545;P02545-3;P02545-5;P02545-4</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000713</td>\n",
+       "      <td>34246</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>25</td>\n",
+       "      <td>0.405942</td>\n",
+       "      <td>736.368132</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>534711</td>\n",
+       "      <td>534735</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60425</th>\n",
+       "      <td>20200317_QE_HFX2_LC3_DIA_RA957_R01</td>\n",
+       "      <td>SGGGGGGSSGGRGSGGGSSGGSIGG</td>\n",
+       "      <td>2</td>\n",
+       "      <td>18.2415</td>\n",
+       "      <td>18.1475</td>\n",
+       "      <td>18.3042</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
+       "      <td>P04264;CON__P04264</td>\n",
+       "      <td></td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000097</td>\n",
+       "      <td>12800</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>25</td>\n",
+       "      <td>0.152345</td>\n",
+       "      <td>905.388955</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>534735</td>\n",
+       "      <td>534759</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>60426 rows × 22 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 raw_name                   sequence  charge  \\\n",
+       "0      20200317_QE_HFX2_LC3_DIA_RA957_R01                   FPTSSPTL       1   \n",
+       "1      20200317_QE_HFX2_LC3_DIA_RA957_R01                   ANSAFVER       1   \n",
+       "2      20200317_QE_HFX2_LC3_DIA_RA957_R01                   ANVAEHQR       2   \n",
+       "3      20200317_QE_HFX2_LC3_DIA_RA957_R01                   APAAGEEG       1   \n",
+       "4      20200317_QE_HFX2_LC3_DIA_RA957_R01                   HRIVSISL       2   \n",
+       "...                                   ...                        ...     ...   \n",
+       "60421  20200317_QE_HFX2_LC3_DIA_RA957_R01   TPSKQSNNKYAASSYLSLTPEQWK       3   \n",
+       "60422  20200317_QE_HFX2_LC3_DIA_RA957_R01   VDDTQFVRFDSDAASPRTEPRAPW       3   \n",
+       "60423  20200317_QE_HFX2_LC3_DIA_RA957_R01   VDDTQFVRFDSDAASPRGEPREPW       3   \n",
+       "60424  20200317_QE_HFX2_LC3_DIA_RA957_R01  ASGSGAQVGGPISSGSSASSVTVTR       3   \n",
+       "60425  20200317_QE_HFX2_LC3_DIA_RA957_R01  SGGGGGGSSGGRGSGGGSSGGSIGG       2   \n",
+       "\n",
+       "            rt  rt_start  rt_stop  mobility proteins  \\\n",
+       "0      77.6566   77.4414  77.8715         0            \n",
+       "1      46.3195   46.1352  46.5349         0            \n",
+       "2      23.8646   23.7079  24.0212         0            \n",
+       "3      19.4263   19.2696  19.5829         0            \n",
+       "4      67.9021   67.7177  68.0252         0            \n",
+       "...        ...       ...      ...       ...      ...   \n",
+       "60421  78.7641   78.6414  78.8562         0            \n",
+       "60422  88.9499   88.7348  89.1031         0            \n",
+       "60423  86.2703   86.1779  86.4238         0            \n",
+       "60424  48.6067   48.4827  48.6992         0            \n",
+       "60425  18.2415   18.1475  18.3042         0            \n",
+       "\n",
+       "                              uniprot_ids genes  ...       fdr  spec_idx  \\\n",
+       "0                           Q15696;Q15695        ...  0.001595     55017   \n",
+       "1           P08238;Q58FF7;P07900;P07900-2        ...  0.000097     32622   \n",
+       "2                         Q12929;Q12929-2        ...  0.001492     16745   \n",
+       "3                         Q9BYE7-3;Q9BYE7        ...  0.001219     13631   \n",
+       "4                                  Q5VY09        ...  0.000713     48029   \n",
+       "...                                   ...   ...  ...       ...       ...   \n",
+       "60421                P0DOY2;P0CF74;P0DOY3        ...  0.000097     55810   \n",
+       "60422  P30490;P30685;P30491;P10319;P18463        ...  0.000516     63092   \n",
+       "60423                              P30504        ...  0.000399     61178   \n",
+       "60424   P02545;P02545-3;P02545-5;P02545-4        ...  0.000713     34246   \n",
+       "60425                  P04264;CON__P04264        ...  0.000097     12800   \n",
+       "\n",
+       "       mods  mod_sites nAA   rt_norm  precursor_mz  ccs  frag_start_idx  \\\n",
+       "0                        8  0.648554    849.435260  0.0               0   \n",
+       "1                        8  0.386840    893.447557  0.0               7   \n",
+       "2                        8  0.199307    462.735940  0.0              14   \n",
+       "3                        8  0.162240    701.310060  0.0              21   \n",
+       "4                        8  0.567089    462.784902  0.0              28   \n",
+       "...     ...        ...  ..       ...           ...  ...             ...   \n",
+       "60421                   24  0.657804    910.124079  0.0          534642   \n",
+       "60422                   24  0.742871    921.775562  0.0          534665   \n",
+       "60423                   24  0.720492    926.435317  0.0          534688   \n",
+       "60424                   25  0.405942    736.368132  0.0          534711   \n",
+       "60425                   25  0.152345    905.388955  0.0          534735   \n",
+       "\n",
+       "       frag_stop_idx  \n",
+       "0                  7  \n",
+       "1                 14  \n",
+       "2                 21  \n",
+       "3                 28  \n",
+       "4                 35  \n",
+       "...              ...  \n",
+       "60421         534665  \n",
+       "60422         534688  \n",
+       "60423         534711  \n",
+       "60424         534735  \n",
+       "60425         534759  \n",
+       "\n",
+       "[60426 rows x 22 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>b_z1</th>\n",
+       "      <th>b_z2</th>\n",
+       "      <th>y_z1</th>\n",
+       "      <th>y_z2</th>\n",
+       "      <th>b_modloss_z1</th>\n",
+       "      <th>b_modloss_z2</th>\n",
+       "      <th>y_modloss_z1</th>\n",
+       "      <th>y_modloss_z2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>190300.578125</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>37765.183594</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>534754</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>534755</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>534756</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>534757</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>534758</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>534759 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 b_z1  b_z2          y_z1  y_z2  b_modloss_z1  b_modloss_z2  \\\n",
+       "0            0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "1       190300.578125   0.0  37765.183594   0.0           0.0           0.0   \n",
+       "2            0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "3            0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "4            0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "...               ...   ...           ...   ...           ...           ...   \n",
+       "534754       0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "534755       0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "534756       0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "534757       0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "534758       0.000000   0.0      0.000000   0.0           0.0           0.0   \n",
+       "\n",
+       "        y_modloss_z1  y_modloss_z2  \n",
+       "0                0.0           0.0  \n",
+       "1                0.0           0.0  \n",
+       "2                0.0           0.0  \n",
+       "3                0.0           0.0  \n",
+       "4                0.0           0.0  \n",
+       "...              ...           ...  \n",
+       "534754           0.0           0.0  \n",
+       "534755           0.0           0.0  \n",
+       "534756           0.0           0.0  \n",
+       "534757           0.0           0.0  \n",
+       "534758           0.0           0.0  \n",
+       "\n",
+       "[534759 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inten_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PCC</th>\n",
+       "      <th>COS</th>\n",
+       "      <th>SA</th>\n",
+       "      <th>SPC</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>20142.000000</td>\n",
+       "      <td>20142.000000</td>\n",
+       "      <td>20142.000000</td>\n",
+       "      <td>20142.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>0.793184</td>\n",
+       "      <td>0.801761</td>\n",
+       "      <td>0.662681</td>\n",
+       "      <td>0.786549</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>0.259575</td>\n",
+       "      <td>0.252218</td>\n",
+       "      <td>0.251722</td>\n",
+       "      <td>0.137454</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>-0.050093</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.026527</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>0.722388</td>\n",
+       "      <td>0.736639</td>\n",
+       "      <td>0.530222</td>\n",
+       "      <td>0.704215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>0.900511</td>\n",
+       "      <td>0.905750</td>\n",
+       "      <td>0.724400</td>\n",
+       "      <td>0.805559</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>0.972494</td>\n",
+       "      <td>0.973826</td>\n",
+       "      <td>0.855859</td>\n",
+       "      <td>0.894600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>1.000003</td>\n",
+       "      <td>1.000002</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>&gt;0.90</th>\n",
+       "      <td>0.510807</td>\n",
+       "      <td>0.522722</td>\n",
+       "      <td>0.167494</td>\n",
+       "      <td>0.260136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>&gt;0.75</th>\n",
+       "      <td>0.726856</td>\n",
+       "      <td>0.739632</td>\n",
+       "      <td>0.461060</td>\n",
+       "      <td>0.635836</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                PCC           COS            SA           SPC\n",
+       "count  20142.000000  20142.000000  20142.000000  20142.000000\n",
+       "mean       0.793184      0.801761      0.662681      0.786549\n",
+       "std        0.259575      0.252218      0.251722      0.137454\n",
+       "min       -0.050093      0.000000      0.000000      0.026527\n",
+       "25%        0.722388      0.736639      0.530222      0.704215\n",
+       "50%        0.900511      0.905750      0.724400      0.805559\n",
+       "75%        0.972494      0.973826      0.855859      0.894600\n",
+       "max        1.000003      1.000002      1.000000      1.000000\n",
+       ">0.90      0.510807      0.522722      0.167494      0.260136\n",
+       ">0.75      0.726856      0.739632      0.461060      0.635836"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from peptdeep.model.ms2 import calc_ms2_similarity\n",
+    "metrics_list = []\n",
+    "frag_len = len(inten_df)//psm_match.max_spec_per_query\n",
+    "_df = df.iloc[:len(df)//psm_match.max_spec_per_query].copy()\n",
+    "for i in range(psm_match.max_spec_per_query):\n",
+    "    for j in range(i+1,psm_match.max_spec_per_query):\n",
+    "        _df,metrics_df = calc_ms2_similarity(\n",
+    "            _df,\n",
+    "            inten_df[i*frag_len:(i+1)*frag_len],\n",
+    "            inten_df[j*frag_len:(j+1)*frag_len],\n",
+    "        )\n",
+    "    metrics_list.append(metrics_df)\n",
+    "sum(metrics_list)/len(metrics_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/peptdeep/mass_spec/match.py b/peptdeep/mass_spec/match.py
index 7e9f8041..eff9cd85 100644
--- a/peptdeep/mass_spec/match.py
+++ b/peptdeep/mass_spec/match.py
@@ -213,7 +213,10 @@ def match_one_raw_with_numba(
 
 
 class PepSpecMatch(object):
-    """Main entry for peptide-spectrum matching"""
+    """Main entry for peptide-spectrum matching.
+
+    TODO: figure out relation with `peptdeep.match.psm_match.PepSpecMatch`.
+    """
 
     def __init__(
         self,
diff --git a/peptdeep/match/__init__.py b/peptdeep/match/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/peptdeep/match/normal_dia.py b/peptdeep/match/normal_dia.py
new file mode 100644
index 00000000..cce2ebf0
--- /dev/null
+++ b/peptdeep/match/normal_dia.py
@@ -0,0 +1,50 @@
+"""DIA grouping for normal DIA data, which is not acquired with ion mobility separation. Moved from alpharaw."""
+
+import typing
+from collections import defaultdict
+
+import numpy as np
+
+from alpharaw.ms_data_base import MSData_Base
+
+
+class NormalDIAGrouper:
+    def __init__(self, ms_data: MSData_Base):
+        self.ms_data = ms_data
+        self.ms_data.spectrum_df["dia_group"] = ms_data.spectrum_df.precursor_mz.astype(
+            int
+        )
+
+        self.dia_group_dfs = self.ms_data.spectrum_df.groupby("dia_group")
+        self.dia_isolation_dict = {}
+        for dia_group, df in self.dia_group_dfs:
+            if dia_group == -1:
+                continue
+            self.dia_isolation_dict[dia_group] = (
+                df.isolation_lower_mz.values[0],
+                df.isolation_upper_mz.values[0],
+            )
+        self.dia_groups = np.sort(list(self.dia_isolation_dict.keys()))
+
+    def assign_dia_groups(
+        self, precursor_mzs
+    ) -> typing.DefaultDict[typing.List, typing.List]:
+        dia_precursor_groups = defaultdict(list)
+        for i, mz in enumerate(precursor_mzs):
+            i_group = np.searchsorted(self.dia_groups, int(mz))
+            if i_group == 0:
+                i_group = 1
+            elif i_group == len(self.dia_groups):
+                i_group -= 1
+            dia_group = self.dia_groups[i_group]
+            if dia_group in self.dia_isolation_dict:
+                isolation_lower, isolation_upper = self.dia_isolation_dict[dia_group]
+                if mz >= isolation_lower and mz <= isolation_upper:
+                    dia_precursor_groups[dia_group].append(i)
+                    continue
+            dia_group = self.dia_groups[i_group - 1]
+            if dia_group in self.dia_isolation_dict:
+                isolation_lower, isolation_upper = self.dia_isolation_dict[dia_group]
+                if mz >= isolation_lower and mz <= isolation_upper:
+                    dia_precursor_groups[dia_group].append(i)
+        return dia_precursor_groups
diff --git a/peptdeep/match/psm_match.py b/peptdeep/match/psm_match.py
new file mode 100644
index 00000000..e34c1d09
--- /dev/null
+++ b/peptdeep/match/psm_match.py
@@ -0,0 +1,615 @@
+"""This module provides the `PepSpecMatch` class to match fragment ions of PSMs. Moved from alpharaw."""
+
+from typing import Tuple, Union
+
+import numba
+import numpy as np
+import pandas as pd
+import tqdm
+from alphabase.peptide.fragment import (
+    create_fragment_mz_dataframe,
+    get_charged_frag_types,
+)
+
+from peptdeep.match.utils import load_ms_data, match_one_raw_with_numba
+from peptdeep.match.normal_dia import NormalDIAGrouper
+from alpharaw import register_all_readers
+from alpharaw.match.match_utils import (
+    match_closest_peaks,
+    match_highest_peaks,
+)
+from alpharaw.match.spec_finder import find_dia_spec_idxes_same_window
+from alpharaw.ms_data_base import (
+    PEAK_INTENSITY_DTYPE,
+    PEAK_MZ_DTYPE,
+    MSData_Base,
+)
+from alpharaw.utils.ms_path_utils import parse_ms_files_to_dict
+
+register_all_readers()  # TODO remove this import side effect
+
+
+class PepSpecMatch:
+    """
+    Extract fragment ions from MS2 data.
+    The extracted information can be used for visualization of peak annotation or
+    PeptDeep transfer learnining for the MS2 model.
+
+    TODO: figure out relation with `peptdeep.mass_spec.match.PepSpecMatch`.
+    """
+
+    match_closest: bool = True
+    use_ppm: bool = True
+    #: matching mass tolerance
+    tolerance: float = 20.0
+    ms_loader_thread_num: int = 4
+
+    def __init__(
+        self,
+        charged_frag_types: list = None,
+        match_closest: bool = True,
+        use_ppm: bool = True,
+        tol_value: float = 20.0,
+    ):
+        """
+        Parameters
+        ----------
+        charged_frag_types : list, optional
+            fragment types with charge states,
+            e.g. ['b_z1', 'y_z2', 'b_modloss_z1', 'y_H2O_z2'].
+            Defaults to `get_charged_frag_types(['b','y','b_modloss','y_modloss'], 2)`.
+
+        match_closest : bool, optional
+            if True, match the closest peak for a m/z;
+            if False, matched the higest peak for a m/z in the tolerance range.
+            By default True.
+
+        use_ppm : bool, optional
+            If use ppm other wise Da, by default True.
+
+        tol_value : float, optional
+            Matching tolerance value (ppm or Da based on `use_ppm`)
+            for peak annotation, by default 20.0
+        """
+        self.charged_frag_types = (
+            get_charged_frag_types(["b", "y", "b_modloss", "y_modloss"], 2)
+            if charged_frag_types is None
+            else charged_frag_types
+        )
+        self.match_closest = match_closest
+        self.use_ppm = use_ppm
+        self.tolerance = tol_value
+
+    def get_fragment_mz_df(self) -> pd.DataFrame:
+        """
+        Call :func:`alphabase.peptide.fragment.create_fragment_mz_dataframe`
+        for :attr:`PepSpecMatch.psm_df` and :attr:`PepSpecMatch.charged_frag_types`.
+
+
+        Returns
+        -------
+        DataFrame
+            The fragment m/z dataframe in alphabase format.
+        """
+        return create_fragment_mz_dataframe(
+            self.psm_df,
+            self.charged_frag_types,
+            dtype=PEAK_MZ_DTYPE,
+        )
+
+    def _add_missing_columns_to_psm_df(
+        self, psm_df: pd.DataFrame, raw_data: MSData_Base = None
+    ):
+        """
+        Add missing "rt", "nce", "rt_norm", ("mobility") columns to `psm_df` inplace if missing.
+
+        Parameters
+        ----------
+        psm_df : pd.DataFrame
+            psm dataframe to be processed.
+        raw_data : MSData_Base, optional
+            The `MSData_Base`. If None, `self.raw_data`. by default None.
+
+        Returns
+        -------
+        DataFrame
+            The original `psm_df` with missing columns added.
+        """
+        if raw_data is None:
+            raw_data = self.raw_data
+        add_spec_info_list = []
+        if "rt" not in psm_df.columns:
+            add_spec_info_list.append("rt")
+
+        if "nce" in raw_data.spectrum_df.columns and "nce" not in psm_df.columns:
+            add_spec_info_list.append("nce")
+
+        if (
+            "mobility" not in psm_df.columns
+            and "mobility" in raw_data.spectrum_df.columns
+        ):
+            add_spec_info_list.append("mobility")
+
+        if len(add_spec_info_list) > 0:
+            # pfind does not report RT in the result file
+            psm_df = (
+                psm_df.reset_index()
+                .merge(
+                    raw_data.spectrum_df[["spec_idx"] + add_spec_info_list],
+                    how="left",
+                    on="spec_idx",
+                )
+                .set_index("index")
+            )
+
+            if "rt" in add_spec_info_list:
+                psm_df["rt_norm"] = psm_df.rt / raw_data.spectrum_df.rt.max()
+        # if 'rt_sec' not in psm_df.columns:
+        #     psm_df['rt_sec'] = psm_df.rt*60
+        return psm_df
+
+    def _prepare_matching_dfs(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Prepare empty `fragment_mz_df`, `matched_intensity_df`,
+        and `matched_mz_err_df` dataframes to extract peak matching information
+        for `self.psm_df`. These three dataframes will be only used internally
+        in :class:`PepSpecMatch` objects.
+
+        Returns
+        -------
+        Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]
+            pd.DataFrame: fragment mz dataframe.
+
+            pd.DataFrame: intensity dataframe to match.
+
+            pd.DataFrame: mz error dataframe to match.
+        """
+        fragment_mz_df = self.get_fragment_mz_df()
+
+        matched_intensity_df = pd.DataFrame(
+            np.zeros_like(fragment_mz_df.values, dtype=PEAK_INTENSITY_DTYPE),
+            columns=fragment_mz_df.columns,
+        )
+
+        matched_mz_err_df = pd.DataFrame(
+            np.full_like(fragment_mz_df.values, np.inf, dtype=PEAK_MZ_DTYPE),
+            columns=fragment_mz_df.columns,
+        )
+        return (fragment_mz_df, matched_intensity_df, matched_mz_err_df)
+
+    def load_ms_data(
+        self,
+        ms_file: Union[str, MSData_Base],
+        ms_file_type: str = "alpharaw_hdf",
+        process_count: int = 8,
+        **kwargs,
+    ):
+        """Load MS file to set `self.raw_data`.
+
+        Parameters
+        ----------
+        ms_file : str | MSData_Base
+            Absolute or relative path of the ms2 file.
+
+        ms_file_type : str, optional
+            ms2 file type, could be
+            ["alpharaw_hdf","thermo","sciex","alphapept_hdf","mgf"].
+            Default to 'alpharaw_hdf'.
+        """
+        self.raw_data = load_ms_data(ms_file, ms_file_type, process_count=process_count)
+
+    def get_peaks(self, spec_idx: int, **kwargs):
+        return self.raw_data.get_peaks(spec_idx)
+
+    def _match_one_psm(
+        self,
+        peak_mzs: np.ndarray,
+        peak_intensities: np.ndarray,
+        fragment_mz_df: pd.DataFrame,
+        matched_intensity_df: pd.DataFrame,
+        matched_mz_err_df: pd.DataFrame,
+        frag_start_idx: int,
+        frag_stop_idx: int,
+    ):
+        """
+        Match fragments of one precursor (located by `frag_start_idx` and `frag_stop_idx`)
+        against the corresponding `peak_mzs`.
+
+        Parameters
+        ----------
+        peak_mzs : np.ndarray
+            Peak m/z values to be matched.
+        peak_intensities : np.ndarray
+            Peak intensities to be matched.
+        fragment_mz_df : pd.DataFrame
+            fragment m/z dataframe to be matched.
+        matched_intensity_df : pd.DataFrame
+            The dataframe to store matched intensity values.
+        matched_mz_err_df : pd.DataFrame
+            The dataframe to store matched mz error values.
+        frag_start_idx : int
+            fragment start index of the given PSM.
+        frag_stop_idx : int
+            fragment stop index of the given PSM.
+        """
+        if len(peak_mzs) == 0:
+            return
+
+        peak_mzs = peak_mzs.astype(PEAK_MZ_DTYPE)
+
+        frag_mzs = fragment_mz_df.values[frag_start_idx:frag_stop_idx, :]
+
+        if self.use_ppm:
+            mz_tols = frag_mzs * self.tolerance * 1e-6
+        else:
+            mz_tols = np.full_like(frag_mzs, self.tolerance)
+
+        if self.match_closest:
+            matched_idxes = match_closest_peaks(
+                peak_mzs, peak_intensities, frag_mzs, mz_tols
+            )
+        else:
+            matched_idxes = match_highest_peaks(
+                peak_mzs,
+                peak_intensities,
+                frag_mzs,
+                mz_tols,
+            )
+
+        matched_intens = peak_intensities[matched_idxes]
+        matched_intens[matched_idxes == -1] = 0
+
+        matched_mz_errs = np.abs(peak_mzs[matched_idxes] - frag_mzs)
+        matched_mz_errs[matched_idxes == -1] = np.inf
+
+        matched_intensity_df.values[frag_start_idx:frag_stop_idx, :] = matched_intens
+
+        matched_mz_err_df.values[frag_start_idx:frag_stop_idx, :] = matched_mz_errs
+
+    def match_ms2_one_raw(
+        self,
+        psm_df_one_raw: pd.DataFrame,
+        verbose: bool = False,
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Matching psm_df_one_raw against self.raw_data
+        after `self.load_ms_data()`
+
+        Parameters
+        ----------
+        psm_df_one_raw : pd.DataFrame
+            psm dataframe
+            that contains only one raw file
+
+        Returns
+        -------
+        Tuple:
+            pd.DataFrame: psm dataframe with fragment index information.
+
+            pd.DataFrame: fragment mz dataframe.
+
+            pd.DataFrame: matched intensity dataframe.
+
+            pd.DataFrame: matched mass error dataframe.
+              np.inf if a fragment is not matched.
+        """
+        self.psm_df = psm_df_one_raw
+
+        psm_df_one_raw = self._add_missing_columns_to_psm_df(
+            psm_df_one_raw, self.raw_data
+        )
+
+        (
+            fragment_mz_df,
+            matched_intensity_df,
+            matched_mz_err_df,
+        ) = self._prepare_matching_dfs()
+
+        psm_iters = psm_df_one_raw[
+            ["spec_idx", "frag_start_idx", "frag_stop_idx"]
+        ].values
+        if verbose:
+            psm_iters = tqdm.tqdm(psm_iters)
+
+        for spec_idx, frag_start_idx, frag_stop_idx in psm_iters:
+            (spec_mzs, spec_intens) = self.get_peaks(spec_idx)
+
+            self._match_one_psm(
+                spec_mzs,
+                spec_intens,
+                fragment_mz_df,
+                matched_intensity_df,
+                matched_mz_err_df,
+                frag_start_idx,
+                frag_stop_idx,
+            )
+
+        return (psm_df_one_raw, fragment_mz_df, matched_intensity_df, matched_mz_err_df)
+
+    def _match_ms2_one_raw_numba(self, raw_name: str, psm_df_one_raw: pd.DataFrame):
+        if raw_name in self._ms_file_dict:
+            raw_data = load_ms_data(
+                self._ms_file_dict[raw_name],
+                self._ms_file_type,
+                process_count=self.ms_loader_thread_num,
+            )
+            if raw_data is None:
+                return
+
+            psm_df_one_raw = self._add_missing_columns_to_psm_df(
+                psm_df_one_raw, raw_data
+            )
+
+            if self.use_ppm:
+                all_frag_mz_tols = self.fragment_mz_df.values * self.tolerance * 1e-6
+            else:
+                all_frag_mz_tols = np.full_like(
+                    self.fragment_mz_df.values, self.tolerance
+                )
+
+            match_one_raw_with_numba(
+                psm_df_one_raw.spec_idx.values,
+                psm_df_one_raw.frag_start_idx.values,
+                psm_df_one_raw.frag_stop_idx.values,
+                self.fragment_mz_df.values,
+                all_frag_mz_tols,
+                raw_data.peak_df.mz.values,
+                raw_data.peak_df.intensity.values,
+                raw_data.spectrum_df.peak_start_idx.values,
+                raw_data.spectrum_df.peak_stop_idx.values,
+                self.matched_intensity_df.values,
+                self.matched_mz_err_df.values,
+                self.match_closest,
+            )
+        else:
+            print(f"`{raw_name}` is not found in ms_file_dict.")
+            return
+        return psm_df_one_raw
+
+    def match_ms2_multi_raw(
+        self,
+        psm_df: pd.DataFrame,
+        ms_files: Union[dict, list],
+        ms_file_type: str = "alpharaw_hdf",
+        process_num: int = 1,
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Matching PSM dataframe against the ms2 files in ms_files
+        This method will store matched values as attributes:
+        - self.psm_df
+        - self.fragment_mz_df
+        - self.matched_intensity_df
+        - self.matched_mz_err_df
+
+        Parameters
+        ----------
+        psm_df : pd.DataFrame
+            PSM dataframe
+
+        ms_files : dict | list
+            if dict: {raw_name: ms2 path}
+            if list: [ms2 path1, ms2 path2]
+
+        ms_file_type : str, optional
+            Could be 'alpharaw_hdf', 'mgf' or 'thermo', 'sciex', 'alphapept_hdf'.
+            Defaults to 'alphapept'.
+
+        Returns
+        -------
+        Tuple:
+            pd.DataFrame: psm dataframe with fragment index information.
+
+            pd.DataFrame: fragment mz dataframe.
+
+            pd.DataFrame: matched intensity dataframe.
+
+            pd.DataFrame: matched mass error dataframe.
+                np.inf if a fragment is not matched.
+
+        """
+        self.psm_df = psm_df
+
+        (
+            self.fragment_mz_df,
+            self.matched_intensity_df,
+            self.matched_mz_err_df,
+        ) = self._prepare_matching_dfs()
+
+        if isinstance(ms_files, dict):
+            self._ms_file_dict = ms_files
+        else:
+            self._ms_file_dict = parse_ms_files_to_dict(ms_files)
+
+        self._ms_file_type = ms_file_type
+
+        self.ms_loader_thread_num = process_num
+        # if process_num <= 1 or len(self._ms_file_dict) <= 1:
+        psm_df_list = []
+        for raw_name, df_group in tqdm.tqdm(self.psm_df.groupby("raw_name")):
+            _df = self._match_ms2_one_raw_numba(raw_name, df_group)
+            if _df is not None:
+                psm_df_list.append(_df)
+        # else:
+        #     with mp.get_context("spawn").Pool(processes=process_num) as p:
+        #         df_groupby = self.psm_df.groupby('raw_name')
+        #         def gen_group_df(df_groupby):
+        #             for raw_name, df_group in df_groupby:
+        #                 yield (raw_name, df_group)
+        #         process_bar(
+        #             p.imap_unordered(
+        #                 self._match_ms2_one_raw_numba,
+        #                 gen_group_df(df_groupby)
+        #             ), df_groupby.ngroups
+        #         )
+
+        self.psm_df = pd.concat(psm_df_list, ignore_index=True)
+        return (
+            self.psm_df,
+            self.fragment_mz_df,
+            self.matched_intensity_df,
+            self.matched_mz_err_df,
+        )
+
+
+class PepSpecMatch_DIA(PepSpecMatch):
+    """
+    Peak annotation for DIA data.
+    """
+
+    max_spec_per_query: int = 3
+    min_frag_mz: float = 200.0
+
+    def _add_missing_columns_to_psm_df(self, psm_df: pd.DataFrame, raw_data=None):
+        # DIA results do not have spec_idx/scan_num in psm_df, nothing to merge
+        return psm_df
+
+    def _prepare_matching_dfs(self):
+        fragment_mz_df = self.get_fragment_mz_df()
+        fragment_mz_df = pd.concat(
+            [fragment_mz_df] * self.max_spec_per_query, ignore_index=True
+        )
+        if self.use_ppm:
+            self.all_frag_mz_tols = fragment_mz_df.values * self.tolerance * 1e-6
+        else:
+            self.all_frag_mz_tols = np.full_like(fragment_mz_df.values, self.tolerance)
+
+        psm_df_list = []
+        len_frags = len(fragment_mz_df) // self.max_spec_per_query
+        for i in range(self.max_spec_per_query):
+            psm_df = self.psm_df.copy()
+            psm_df["frag_start_idx"] = psm_df.frag_start_idx + i * len_frags
+            psm_df["frag_stop_idx"] = psm_df.frag_stop_idx + i * len_frags
+            psm_df_list.append(psm_df)
+        self.psm_df = pd.concat(psm_df_list, ignore_index=True)
+
+        matched_intensity_df = pd.DataFrame(
+            np.zeros_like(fragment_mz_df.values, dtype=PEAK_INTENSITY_DTYPE),
+            columns=fragment_mz_df.columns,
+        )
+
+        matched_mz_err_df = pd.DataFrame(
+            np.zeros_like(fragment_mz_df.values, dtype=PEAK_MZ_DTYPE),
+            columns=fragment_mz_df.columns,
+        )
+
+        return (fragment_mz_df, matched_intensity_df, matched_mz_err_df)
+
+    def _match_ms2_one_raw_numba(
+        self, raw_name: str, psm_df_one_raw: pd.DataFrame
+    ) -> pd.DataFrame:
+        """
+        Internal method to extract peak information with numba as backend.
+
+        Parameters
+        ----------
+        raw_name : str
+            The raw name of the raw file. `psm_df_one_raw` dataframe should also
+            contain the same raw name in `raw_name` column.
+        psm_df_one_raw : pd.DataFrame
+            The dataframe for PSMs.
+
+        Returns
+        -------
+        pd.DataFrame
+            `psm_df_one_raw`
+        """
+        psm_df_one_raw = psm_df_one_raw.reset_index(drop=True)
+
+        if raw_name in self._ms_file_dict:
+            raw_data = load_ms_data(
+                self._ms_file_dict[raw_name],
+                self._ms_file_type,
+                process_count=self.ms_loader_thread_num,
+            )
+            if raw_data is None:
+                return
+
+            psm_origin_len = len(psm_df_one_raw) // self.max_spec_per_query
+
+            grouper = NormalDIAGrouper(raw_data)
+
+            psm_groups = grouper.assign_dia_groups(
+                psm_df_one_raw.precursor_mz.values[:psm_origin_len]
+            )
+
+            all_spec_idxes = np.full(len(psm_df_one_raw), -1, dtype=np.int32)
+
+            for dia_group, group_df in grouper.dia_group_dfs:
+                psm_idxes = psm_groups[dia_group]
+                if len(psm_idxes) == 0:
+                    continue
+                psm_idxes = np.array(psm_idxes, dtype=np.int32)
+                spec_idxes = find_dia_spec_idxes_same_window(
+                    group_df.rt.values,
+                    psm_df_one_raw.rt.values[psm_idxes],
+                    max_spec_per_query=self.max_spec_per_query,
+                )
+                for i in range(spec_idxes.shape[-1]):
+                    all_spec_idxes[psm_idxes + psm_origin_len * i] = spec_idxes[:, i]
+
+                match_one_raw_with_numba(
+                    all_spec_idxes,
+                    psm_df_one_raw.frag_start_idx.values,
+                    psm_df_one_raw.frag_stop_idx.values,
+                    self.fragment_mz_df.values,
+                    self.all_frag_mz_tols,
+                    raw_data.peak_df.mz.values,
+                    raw_data.peak_df.intensity.values,
+                    group_df.peak_start_idx.values,
+                    group_df.peak_stop_idx.values,
+                    self.matched_intensity_df.values,
+                    self.matched_mz_err_df.values,
+                    self.match_closest,
+                )
+        else:
+            print(f"`{raw_name}` is not found in ms_file_dict.")
+            return
+        return psm_df_one_raw
+
+    def match_ms2_multi_raw(
+        self,
+        psm_df: pd.DataFrame,
+        ms_files: Tuple[dict, list],
+        ms_file_type: str = "alpharaw_hdf",
+        process_num: int = 8,
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Match peaks for the given `psm_df` against the corresponding MS spectrum files.
+
+        Parameters
+        ----------
+        psm_df : pd.DataFrame
+            Peptide-spectrum matches in alphabase dataframe format.
+        ms_files : Tuple[dict, list]
+            The absolute or relative paths of MS files.
+            if the type is `dict`, the format will be
+            `{'raw_name1': 'raw_name1.raw', ...}` if `ms_file_type` is `thermo_raw`.
+        ms_file_type : str, optional
+            MS file type that is already registered in
+            :obj:`alpharaw.ms_data_base.ms_reader_provider`.
+            By default "alpharaw_hdf".
+        process_num : int, optional
+            Match peaks by using multiprocessing, by default 8
+
+        Returns
+        -------
+        Tuple
+            pd.DataFrame: the `psm_df`.
+
+            pd.DataFrame: fragment m/z dataframe in alphabase format.
+
+            pd.DataFrame: the matched fragment intensity dataframe in alphabase format.
+
+            pd.DataFrame: the matched mass error in the same dataframe format.
+        """
+        if isinstance(ms_files, list):
+            ms_files = parse_ms_files_to_dict(ms_files)
+        psm_df = psm_df[psm_df.raw_name.isin(ms_files)].reset_index(drop=True)
+        super().match_ms2_multi_raw(psm_df, ms_files, ms_file_type, process_num)
+
+        return (
+            self.psm_df,
+            self.fragment_mz_df,
+            self.matched_intensity_df,
+            self.matched_mz_err_df,
+        )
diff --git a/peptdeep/match/utils.py b/peptdeep/match/utils.py
new file mode 100644
index 00000000..4ed29727
--- /dev/null
+++ b/peptdeep/match/utils.py
@@ -0,0 +1,138 @@
+"""Utilities for psm matching. Moved from alpharaw."""
+
+from typing import Union
+
+import numba
+import numpy as np
+
+from alpharaw.match.match_utils import match_closest_peaks, match_highest_peaks
+from alpharaw.ms_data_base import MSData_Base, ms_reader_provider
+
+
+@numba.jit(nogil=True)
+def match_one_raw_with_numba(
+    spec_idxes: np.ndarray,
+    frag_start_idxes: np.ndarray,
+    frag_stop_idxes: np.ndarray,
+    all_frag_mzs: np.ndarray,
+    all_frag_mz_tols: np.ndarray,
+    all_spec_mzs: np.ndarray,
+    all_spec_intensities: np.ndarray,
+    peak_start_idxes: np.ndarray,
+    peak_stop_idxes: np.ndarray,
+    matched_intensities: np.ndarray,
+    matched_mz_errs: np.ndarray,
+    match_closest: bool = True,
+) -> None:
+    """
+    Internel function to match fragment mz values to spectrum mz values.
+    Matched_mz_errs[i] = np.inf if no peaks are matched.
+
+    Results will saved in place of matched_intensities
+    and matched_mz_errs.
+    """
+    for spec_idx, frag_start, frag_stop in zip(
+        spec_idxes, frag_start_idxes, frag_stop_idxes
+    ):
+        if spec_idx == -1:
+            continue
+        peak_start = peak_start_idxes[spec_idx]
+        peak_stop = peak_stop_idxes[spec_idx]
+        if peak_stop == peak_start:
+            continue
+        spec_mzs = all_spec_mzs[peak_start:peak_stop]
+        spec_intens = all_spec_intensities[peak_start:peak_stop]
+
+        frag_mzs = all_frag_mzs[frag_start:frag_stop, :].copy()
+        frag_mz_tols = all_frag_mz_tols[frag_start:frag_stop, :].copy()
+
+        if match_closest:
+            matched_idxes = match_closest_peaks(
+                spec_mzs, spec_intens, frag_mzs, frag_mz_tols
+            ).reshape(-1)
+        else:
+            matched_idxes = match_highest_peaks(
+                spec_mzs, spec_intens, frag_mzs, frag_mz_tols
+            ).reshape(-1)
+
+        matched_intens = spec_intens[matched_idxes]
+        matched_intens[matched_idxes == -1] = 0
+
+        matched_mass_errs = np.abs(
+            spec_mzs[matched_idxes.reshape(-1)] - frag_mzs.reshape(-1)
+        )
+        matched_mass_errs[matched_idxes == -1] = np.inf
+
+        matched_intensities[frag_start:frag_stop, :] = matched_intens.reshape(
+            frag_mzs.shape
+        )
+
+        matched_mz_errs[frag_start:frag_stop, :] = matched_mass_errs.reshape(
+            frag_mzs.shape
+        )
+
+
+def load_ms_data(
+    ms_file: Union[str, MSData_Base],
+    ms_file_type: str = "alpharaw_hdf",
+    process_count: int = 8,
+) -> MSData_Base:
+    """
+    Load MS file and get `MSData_Base` object.
+
+    Parameters
+    ----------
+    ms_file : str | MSData_Base
+        ms2 file path
+
+    ms_file_type : str, optional
+        ms2 file type, can be
+        ["alpharaw_hdf", "thermo", "sciex", "alphapept_hdf", "mgf"].
+        Default to 'alpharaw_hdf'.
+
+    Returns
+    -------
+    MSData_Base:
+        Instance of sub-class of `MSData_Base`.
+    """
+    if isinstance(ms_file, MSData_Base):
+        return ms_file
+    else:
+        raw_data = ms_reader_provider.get_reader(
+            ms_file_type, process_count=process_count
+        )
+        if raw_data is None:
+            print(
+                f"Raw file type '{ms_file_type}' is not registered in 'ms_reader_provider'"
+            )
+            return
+        raw_data.import_raw(ms_file)
+        return raw_data
+
+
+@numba.njit
+def get_ion_count_scores(
+    frag_mz_values: np.ndarray,
+    matched_intens: np.ndarray,
+    frag_start_idxes: np.ndarray,
+    frag_stop_idxes: np.ndarray,
+    min_mz: float = 200,
+):
+    """
+    TODO Deprecated
+    """
+    scores = []
+    for i in range(len(frag_start_idxes)):
+        scores.append(
+            np.count_nonzero(
+                matched_intens[frag_start_idxes[i] : frag_stop_idxes[i], :]
+                .copy()
+                .reshape(-1)[
+                    frag_mz_values[frag_start_idxes[i] : frag_stop_idxes[i], :]
+                    .copy()
+                    .reshape(-1)
+                    >= min_mz
+                ]
+            )
+        )
+    return np.array(scores, np.int32)
diff --git a/peptdeep/pipeline_api.py b/peptdeep/pipeline_api.py
index 03359d8e..8812d4e2 100644
--- a/peptdeep/pipeline_api.py
+++ b/peptdeep/pipeline_api.py
@@ -23,17 +23,14 @@
 
 from peptdeep.pretrained_models import ModelManager
 
-# from peptdeep.rescore.feature_extractor import match_one_raw
-from alpharaw.match.psm_match import (
-    PepSpecMatch,
-    PepSpecMatch_DIA,
-    parse_ms_files_to_dict,
-    get_ion_count_scores,
-)
 
 from peptdeep.model.ms2 import calc_ms2_similarity
 import peptdeep.model.rt as rt_module
 
+from peptdeep.match.psm_match import PepSpecMatch_DIA, PepSpecMatch
+from peptdeep.match.utils import get_ion_count_scores
+from alpharaw.utils.ms_path_utils import parse_ms_files_to_dict
+
 DIA_max_spec_per_query = 3
 DIA_min_ion_count = 6
 DIA_min_frag_mz = 200.0
@@ -108,10 +105,10 @@ def get_median_pccs_for_dia_psms(
     -----
     The `psm_df` contains `max_spec_per_query` copies per peptide, each matched against
     a different MS2 scan. The PSMs need to be sorted by spec index before processing because
-    `alpharaw.match.psm_match.PepSpecMatch.match_ms2_multi_raw`
+    `peptdeep.match.psm_match.PepSpecMatch.match_ms2_multi_raw`
     orders them by `raw_name` when multiple MS files are used.
 
-    See also (alpharaw.match.psm_match):
+    See also (peptdeep.match.psm_match):
         - `PepSpecMatch_DIA._prepare_matching_dfs`: creates replicated PSM/fragment structure
         - `PepSpecMatch_DIA._match_ms2_one_raw_numba`: finds nearby MS2 scans and extracts fragments
         - `PepSpecMatch.match_ms2_multi_raw`: processes multiple MS files (reorders by raw_name)
diff --git a/peptdeep/utils/__init__.py b/peptdeep/utils/__init__.py
index 3e98499e..14a7e50c 100644
--- a/peptdeep/utils/__init__.py
+++ b/peptdeep/utils/__init__.py
@@ -12,7 +12,6 @@
 import numpy as np
 
 
-# from alphatims
 def process_bar(iterator, len_iter):
     with tqdm.tqdm(total=len_iter) as bar:
         i = 0
diff --git a/peptdeep/utils/logger.py b/peptdeep/utils/logger.py
index 3e90ee19..91cf48cc 100644
--- a/peptdeep/utils/logger.py
+++ b/peptdeep/utils/logger.py
@@ -38,7 +38,7 @@ def set_logger(
         The file name to where the log is written.
         Folders are automatically created if needed.
         This is relative to the current path. When an empty string is provided,
-        a log is written to the AlphaTims "logs" folder with the name
+        a log is written to the PeptDeep "logs" folder with the name
         "log_yymmddhhmmss" (reversed timestamp year to seconds).
         If None, no log file is saved.
         Default is "".
@@ -158,7 +158,7 @@ def show_python_info() -> None:
     This is done in the following format:
     ```
         - [timestamp]> Python information:
-        - [timestamp]> alphatims          - [current_version]
+        - [timestamp]> peptdeep           - [current_version]
         - [timestamp]> [required package] - [current_version]
         - ...
         - [timestamp]> [required package] - [current_version]
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index 6efaf9e7..14afcb6d 100755
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -1,4 +1,6 @@
-TEST_NBS=$(find ../nbs_tests -name "*.ipynb")
+# ignore notebooks that requires local data
+TEST_NBS=$(find ../nbs_tests -name "*.ipynb" | grep -v "test_dia_matching.ipynb")
+
 TUTORIAL_NBS=$(find ../docs/tutorials -name "*.ipynb")
 UNIT_TESTS=$(find ../tests/unit -name "test_*.py")
 ALL_TESTS=$(echo $TEST_NBS$'\n'$TUTORIAL_NBS$'\n'$UNIT_TESTS)