From 7fa55aa4bb2bb5917177931dad2a4817d05b47a0 Mon Sep 17 00:00:00 2001 From: CCranney <11773171+CCranney@users.noreply.github.com> Date: Fri, 24 Nov 2023 10:06:39 -0800 Subject: [PATCH] added protein name reformatting for libraries with single, non-list proteins --- .../loaders/library/libraryLoaderStrategy.py | 7 ++++++ .../library/test_LibraryLoaderStrategy.py | 25 ++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/zodiaq/loaders/library/libraryLoaderStrategy.py b/src/zodiaq/loaders/library/libraryLoaderStrategy.py index 558bd75..d7c7758 100644 --- a/src/zodiaq/loaders/library/libraryLoaderStrategy.py +++ b/src/zodiaq/loaders/library/libraryLoaderStrategy.py @@ -87,11 +87,18 @@ def load_zodiaq_library_dict_from_file( random.seed(0) self._load_raw_library_object_from_file(libraryFilePath) zodiaqLibDict = self._format_raw_library_object_into_zodiaq_library_dict() + format_proteins_into_list_format(zodiaqLibDict) if determine_if_decoys_should_be_generated(zodiaqLibDict) and not isTest: zodiaqLibDict = add_decoys_to_zodiaq_library(zodiaqLibDict) return zodiaqLibDict +def format_proteins_into_list_format(zodiaqLibDict): + for key, value in zodiaqLibDict.items(): + if value["proteinName"] and not value["proteinName"][:1].isdigit(): + value["proteinName"] = f'1/{value["proteinName"]}' + + def create_peaks_from_mz_intensity_lists_and_zodiaq_key_id( mz: list, intensities: list, id: int ) -> list: diff --git a/tests/unit/loaders/library/test_LibraryLoaderStrategy.py b/tests/unit/loaders/library/test_LibraryLoaderStrategy.py index 0016052..545a3f0 100644 --- a/tests/unit/loaders/library/test_LibraryLoaderStrategy.py +++ b/tests/unit/loaders/library/test_LibraryLoaderStrategy.py @@ -1,6 +1,7 @@ from zodiaq.loaders.library.libraryLoaderStrategy import ( create_peaks_from_mz_intensity_lists_and_zodiaq_key_id, remove_low_intensity_peaks_below_max_peak_num, + format_proteins_into_list_format, ) import pytest @@ -26,7 +27,7 @@ def testPeaks(): ] -def test__library_loader_strategy_traml__create_peaks_from_mz_intensity_lists_and_zodiaq_key_id( +def test__library_loader_strategy__create_peaks_from_mz_intensity_lists_and_zodiaq_key_id( testPeaks, ): numPeaks = 15 @@ -39,7 +40,7 @@ def test__library_loader_strategy_traml__create_peaks_from_mz_intensity_lists_an assert peaks == testPeaks -def test__library_loader_strategy_traml__remove_low_intensity_peaks_below_max_peak_num( +def test__library_loader_strategy__remove_low_intensity_peaks_below_max_peak_num( testPeaks, ): maxPeakNum = 10 @@ -61,7 +62,7 @@ def test__library_loader_strategy_traml__remove_low_intensity_peaks_below_max_pe assert reducedTestPeaks == expectedReducedTestPeaks -def test__library_loader_strategy_traml__remove_low_intensity_peaks_below_max_peak_num__all_peaks_returned_when_length_fewer_than_max_peak_num( +def test__library_loader_strategy__remove_low_intensity_peaks_below_max_peak_num__all_peaks_returned_when_length_fewer_than_max_peak_num( testPeaks, ): maxPeakNum = 10 @@ -81,3 +82,21 @@ def test__library_loader_strategy_traml__remove_low_intensity_peaks_below_max_pe shortTestPeaks, maxPeakNum ) assert reducedShortTestPeaks == expectedReducedShortTestPeaks + + +def test__library_loader_strategy__format_proteins_into_list_format(): + zodiaqLibraryDict = { + (100.0, "PEPTIDE"): { + "proteinName": "PROTEIN", + } + } + format_proteins_into_list_format(zodiaqLibraryDict) + assert zodiaqLibraryDict[(100.0, "PEPTIDE")]["proteinName"] == "1/PROTEIN" + + zodiaqLibraryDict = { + (200.0, "PEPTIDE"): { + "proteinName": "", + } + } + format_proteins_into_list_format(zodiaqLibraryDict) + assert zodiaqLibraryDict[(200.0, "PEPTIDE")]["proteinName"] == ""