diff --git a/idc_index/index.py b/idc_index/index.py index 2efea596..fe7fc952 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -100,19 +100,54 @@ def _filter_by_dicom_series_uid(df_index, dicom_series_uid): @staticmethod def get_idc_version(): + """ + Returns the version of IDC data used in idc-index + """ return f"v{idc_index_data.__version__}" def get_collections(self): + """ + Returns the collections present in IDC + """ unique_collections = self.index["collection_id"].unique() return unique_collections.tolist() def get_series_size(self, seriesInstanceUID): + """ + Gets cumulative size (MB) of the DICOM instances in a given SeriesInstanceUID. + + Args: + seriesInstanceUID (str): The DICOM SeriesInstanceUID. + + Returns: + float: The cumulative size of the DICOM instances in the given SeriesInstanceUID rounded to two digits, in MB. + + Raises: + ValueError: If the `seriesInstanceUID` does not exist. + """ + resp = self.index[["SeriesInstanceUID"] == seriesInstanceUID][ "series_size_MB" ].iloc[0] return resp def get_patients(self, collection_id, outputFormat="dict"): + """ + Gets the patients in a collection. + + Args: + collection_id (str or a list of str): The collection id or list of collection ids. This should be in lower case separated by underscores. + For example, 'pdmr_texture_analysis'. or ['pdmr_texture_analysis','nlst'] + outputFormat (str, optional): The format in which to return the patient IDs. Available options are 'dict', + 'df', and 'list'. Default is 'dict'. + + Returns: + dict or pandas.DataFrame or list: Patient IDs in the requested output format. By default, it returns a dictionary. + + Raises: + ValueError: If `outputFormat` is not one of 'dict', 'df', 'list'. + """ + if not isinstance(collection_id, str) and not isinstance(collection_id, list): raise TypeError("collection_id must be a string or list of strings") @@ -149,7 +184,21 @@ def get_patients(self, collection_id, outputFormat="dict"): return response def get_dicom_studies(self, patientId, outputFormat="dict"): - """returns one row per distinct value of StudyInstanceUID""" + """ + Returns Studies for a given patient or list of patients. + + Args: + patientId (str or list of str): The patient Id or a list of patient Ids. + outputFormat (str, optional): The format in which to return the studies. Available options are 'dict', + 'df', and 'list'. Default is 'dict'. + + Returns: + dict or pandas.DataFrame or list: Studies in the requested output format. By default, it returns a dictionary. + + Raises: + ValueError: If `outputFormat` is not one of 'dict', 'df', 'list'. + ValueError: If any of the `patientId` does not exist. + """ if not isinstance(patientId, str) and not isinstance(patientId, list): raise TypeError("patientId must be a string or list of strings") @@ -211,7 +260,23 @@ def get_dicom_studies(self, patientId, outputFormat="dict"): return response - def get_dicom_series(self, studyInstanceUID=None, outputFormat="dict"): + def get_dicom_series(self, studyInstanceUID, outputFormat="dict"): + """ + Returns Series for a given study or list of studies. + + Args: + studyInstanceUID (str or list of str): The DICOM StudyInstanceUID or a list of StudyInstanceUIDs. + outputFormat (str, optional): The format in which to return the series. Available options are 'dict', + 'df', and 'list'. Default is 'dict'. + + Returns: + dict or pandas.DataFrame or list: Series in the requested output format. By default, it returns a dictionary. + + Raises: + ValueError: If `outputFormat` is not one of 'dict', 'df', 'list'. + ValueError: If any of the `studyInstanceUID` does not exist. + """ + if not isinstance(studyInstanceUID, str) and not isinstance( studyInstanceUID, list ): @@ -357,7 +422,7 @@ def get_series_file_URLs(self, seriesInstanceUID): return file_names def get_viewer_URL( - self, seriesInstanceUID, studyInstanceUID=None, viewer_selector=None + self, seriesInstanceUID=None, studyInstanceUID=None, viewer_selector=None ): """ Get the URL of the IDC viewer for the given series or study in IDC based on @@ -376,7 +441,7 @@ def get_viewer_URL( available in IDC viewer_selector: string containing the name of the viewer to use. Must be one of the following: - ohif_v2, ohif_v2, or slim. If not provided, default viewers will be used. + ohif_v2, ohif_v3, or slim. If not provided, default viewers will be used. Returns: string containing the IDC viewer URL for the given SeriesInstanceUID @@ -387,7 +452,10 @@ def get_viewer_URL( "Either SeriesInstanceUID or StudyInstanceUID, or both, must be provided." ) - if seriesInstanceUID not in self.index["SeriesInstanceUID"].values: + if ( + seriesInstanceUID is not None + and seriesInstanceUID not in self.index["SeriesInstanceUID"].values + ): raise ValueError("SeriesInstanceUID not found in IDC index.") if (