Skip to content

Commit

Permalink
Read RI values from comments field into RI field
Browse files Browse the repository at this point in the history
  • Loading branch information
wverastegui committed Dec 4, 2023
1 parent e471c66 commit 8cb2312
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
31 changes: 30 additions & 1 deletion RIAssigner/data/Data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
from typing import Iterable, List, Optional
import pandas as pd

from pint import Quantity, UnitRegistry
from pint.unit import build_unit_class
Expand Down Expand Up @@ -125,4 +126,32 @@ def comment(self) -> Iterable[CommentFieldType]:
Returns:
Iterable[CommentFieldType]: Comment field values stored in data.
"""
...
...

def extract_ri_from_comment(self, content_comment, specific_string):
""" Extract RI from comment field.
Extracts the RI from the comment field of the data file. The RI is expected to be
in the format 'specific_string=RI_value'. The function extracts the RI value and
returns it as a list.
Parameters
----------
content_comment:
Comment field of the data file.
specific_string:
String that is expected to be in the comment field before the RI value.
Returns
-------
RI values as a list.
"""

comments_series = pd.Series(content_comment)
mask = comments_series.str.contains(rf'\b{specific_string}\b', na=False)
extracted_values = comments_series.str.extract(rf'\b{specific_string}=(\d+)\b')[0].astype(float)

# Fill in NaN values with None or some default value
extracted_values = extracted_values.where(mask, None)

return extracted_values.tolist()

5 changes: 5 additions & 0 deletions RIAssigner/data/MatchMSData.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ def comment(self) -> Iterable[Data.CommentFieldType]:
self.comment_keys = "comment"
content = [safe_read_comment_key(spectrum, self.comment_keys) for spectrum in self._spectra]
return content

def ri_from_comment(self, specific_string: str = None):
""" Extract RI from comment field. """
comments = self.comment
self.retention_indices = self.extract_ri_from_comment(comments, specific_string)

def safe_read_comment_key(spectrum: Spectrum, key: str) -> Optional[str]:
""" Read key from spectrum and convert to str or return 'None'.
Expand Down
9 changes: 4 additions & 5 deletions RIAssigner/data/PandasData.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,7 @@ def comment(self) -> Iterable[Data.CommentFieldType]:
content = self._data[self._comment_keys].tolist()
return content

def extract_ri_from_comment(self, specific_string: str): # incomplete
""" Extract RI from comment field.
"""
extracted_strings = [s[s.find(specific_string):] for s in specific_string if specific_string in s]
self._data[self._comment_keys] = extracted_strings
def ri_from_comment(self, specific_string: str = None):
""" Extract RI from comment field. """
comments = self.comment
self.retention_indices = super().extract_ri_from_comment(comments, specific_string)

0 comments on commit 8cb2312

Please sign in to comment.