diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cc2e358..e02148f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,10 +13,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" - name: Install dependencies run: | diff --git a/.readthedocs.yml b/.readthedocs.yml index 3f8e86a..c35e2da 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -15,3 +15,4 @@ python: sphinx: builder: dirhtml + configuration: docs/source/conf.py diff --git a/README.rst b/README.rst index 4d5f9d1..a9ad36a 100644 --- a/README.rst +++ b/README.rst @@ -34,9 +34,9 @@ engine results in Python. :alt: GitHub :target: https://www.apache.org/licenses/LICENSE-2.0 -.. image:: https://img.shields.io/twitter/follow/CompOmics?style=flat-square - :alt: Twitter - :target: https://twitter.com/compomics +.. image:: https://img.shields.io/badge/follow-@compomics.com-blue?style=flat-square&logo=bluesky + :alt: Follow @compomics.com + :target: https://bsky.app/profile/compomics.com diff --git a/psm_utils/__init__.py b/psm_utils/__init__.py index b4d5d3b..9f53a14 100644 --- a/psm_utils/__init__.py +++ b/psm_utils/__init__.py @@ -1,6 +1,6 @@ """Common utilities for parsing and handling PSMs, and search engine results.""" -__version__ = "1.3.0" +__version__ = "1.4.1" __all__ = ["Peptidoform", "PSM", "PSMList"] from warnings import filterwarnings diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py index fefed62..045d09c 100644 --- a/psm_utils/io/percolator.py +++ b/psm_utils/io/percolator.py @@ -246,6 +246,8 @@ def __init__( f"Could not infer Percolator Tab style from file extension `{suffix}`. " "Please provide the `style` parameter." ) + else: + self.style = style if self.style == "pin": basic_features = ["PSMScore", "ChargeN"] if add_basic_features else [] diff --git a/psm_utils/io/sage.py b/psm_utils/io/sage.py index eb1fae9..2d62682 100644 --- a/psm_utils/io/sage.py +++ b/psm_utils/io/sage.py @@ -57,6 +57,9 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: rescoring_features[ft] = psm_dict[ft] except KeyError: continue + + ion_mobility_features = self._extract_ion_mobility_features(psm_dict) + rescoring_features.update(ion_mobility_features) return PSM( peptidoform=self._parse_peptidoform( @@ -70,6 +73,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: score=float(psm_dict[self.score_column]), precursor_mz=self._parse_precursor_mz(psm_dict["expmass"], psm_dict["charge"]), retention_time=float(psm_dict["rt"]), + ion_mobility=rescoring_features.get("ion_mobility", None), protein_list=psm_dict["proteins"].split(";"), source="sage", rank=int(float(psm_dict["rank"])), @@ -92,6 +96,24 @@ def _parse_precursor_mz(expmass: str, charge: Optional[str]) -> Optional[float]: return (expmass + (mass.nist_mass["H"][1][0] * charge)) / charge else: return None + + @staticmethod + def _extract_ion_mobility_features(psm_dict: dict) -> dict: + """ + Extract ion mobility features from the PSM dictionary if present and non-zero. + Returns a dict with the relevant keys or an empty dict. + """ + try: + ion_mob = float(psm_dict["ion_mobility"]) + if ion_mob: + return { + "ion_mobility": ion_mob, + "predicted_mobility": float(psm_dict["predicted_mobility"]), + "delta_mobility": float(psm_dict["delta_mobility"]), + } + except (KeyError, ValueError): + pass + return {} @classmethod def from_dataframe(cls, dataframe) -> PSMList: diff --git a/tests/test_data/resultsIM.sage.tsv b/tests/test_data/resultsIM.sage.tsv new file mode 100644 index 0000000..273d59e --- /dev/null +++ b/tests/test_data/resultsIM.sage.tsv @@ -0,0 +1,2 @@ +psm_id peptide proteins num_proteins filename scannr rank label expmass calcmass charge peptide_len missed_cleavages semi_enzymatic isotope_error precursor_ppm fragment_ppm hyperscore delta_next delta_best rt aligned_rt predicted_rt delta_rt_model ion_mobility predicted_mobility delta_mobility matched_peaks longest_b longest_y longest_y_pct matched_intensity_pct scored_candidates poisson sage_discriminant_score posterior_error spectrum_q peptide_q protein_q ms2_intensity +529791 YVDDTQFVRFDSDAASPR sp|P01889|HLAB_HUMAN;sp|P10321|HLAC_HUMAN 2 G220824_028_Slot2-34_1_6753.mzml index=45761 1 1 2086.9507 2087.9548 3 18 0 0 -1.00335 0.38332784 1.5193833 44.947527657385436 24.626872218188044 0.0 23.004148 0.5305706 0.5301316 0.0004390478 0.96470714 0.9236175 0.041089654 17 2 6 0.33333334 46.215378 33149 -14.997740845471464 0.47067946 -96.47973 0.0000107030855 0.00009881538 0.0006954081 26230.0 \ No newline at end of file diff --git a/tests/test_data/test_out_sage.idXML b/tests/test_data/test_out_sage.idXML index 060d91d..bb69262 100644 --- a/tests/test_data/test_out_sage.idXML +++ b/tests/test_data/test_out_sage.idXML @@ -27,6 +27,7 @@ + diff --git a/tests/test_io/test_idxml.py b/tests/test_io/test_idxml.py index dce34a0..dae34da 100644 --- a/tests/test_io/test_idxml.py +++ b/tests/test_io/test_idxml.py @@ -107,7 +107,7 @@ def test_write_file_with_pyopenms_objects(self): assert sha == expected_sha def test_write_file_without_pyopenms_objects(self): - expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd" + expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd" #TODO: can cause problems locally depending on dependency versions reader = SageTSVReader("./tests/test_data/results.sage.tsv") psm_list = reader.read_file() writer = IdXMLWriter("./tests/test_data/test_out_sage.idXML") diff --git a/tests/test_io/test_sage.py b/tests/test_io/test_sage.py index 60d87ba..e60d471 100644 --- a/tests/test_io/test_sage.py +++ b/tests/test_io/test_sage.py @@ -47,6 +47,51 @@ }, ) +test_psm_im = PSM( + peptidoform="YVDDTQFVRFDSDAASPR/3", + spectrum_id="index=45761", + run="G220824_028_Slot2-34_1_6753", + collection=None, + spectrum=None, + is_decoy=False, + score=0.47067946, + qvalue=0.0000107030855, + pep=None, + precursor_mz=696.6580583654032, + retention_time=23.004148, + ion_mobility=0.96470714, + protein_list=['sp|P01889|HLAB_HUMAN','sp|P10321|HLAC_HUMAN'], + rank=1, + source="sage", + metadata={}, + rescoring_features={ + "expmass": 2086.9507, + "calcmass": 2087.9548, + "peptide_len": 18.0, + "missed_cleavages": 0.0, + "isotope_error": -1.00335, + "precursor_ppm": 0.38332784, + "fragment_ppm": 1.5193833, + "hyperscore": 44.947527657385436, + "delta_next": 24.626872218188044, + "delta_best": 0.0, + "delta_rt_model": 0.0004390478, + "aligned_rt": 0.5305706, + "predicted_rt": 0.5301316, + "matched_peaks": 17.0, + "longest_b": 2.0, + "longest_y": 6.0, + "longest_y_pct": 0.33333334, + "matched_intensity_pct": 46.215378, + "scored_candidates": 33149.0, + "poisson": -14.997740845471464, + "ms2_intensity": 26230.0, + "ion_mobility": 0.96470714, + "predicted_mobility": 0.9236175, + "delta_mobility": 0.041089654, + } +) + class TestSageTSVReader: def test_iter(self): @@ -54,6 +99,10 @@ def test_iter(self): for psm in reader: psm.provenance_data = {} assert psm == test_psm + with SageTSVReader("./tests/test_data/resultsIM.sage.tsv") as reader: + for psm in reader: + psm.provenance_data = {} + assert psm == test_psm_im class TestSageParquetReader: