From 3b63f485265ff135dff750eaf0901695319ed857 Mon Sep 17 00:00:00 2001 From: Vamsi Thiriveedhi Date: Mon, 29 Apr 2024 11:26:57 -0400 Subject: [PATCH 1/2] fix: return only .dcm links s5cmd ls sometimes returns directories as well, this update ensures only .dcm links are returned --- idc_index/index.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/idc_index/index.py b/idc_index/index.py index eb68cc45..fe282bf8 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -369,11 +369,11 @@ def get_series_file_URLs(self, seriesInstanceUID): # Query to get the S3 URL s3url_query = f""" SELECT - series_aws_url + series_aws_url FROM - index + index WHERE - SeriesInstanceUID='{seriesInstanceUID}' + SeriesInstanceUID='{seriesInstanceUID}' """ s3url_query_df = self.sql_query(s3url_query) s3_url = s3url_query_df.series_aws_url[0] @@ -391,7 +391,11 @@ def get_series_file_URLs(self, seriesInstanceUID): # Parse the output to get the file names lines = output.split("\n") - file_names = [s3_url + line.split()[-1] for line in lines if line] + file_names = [ + s3_url + line.split()[-1] + for line in lines + if line and line.split()[-1].endswith(".dcm") + ] return file_names From 7059786198662ba5c3a7b92fb8f6e96758d2c10e Mon Sep 17 00:00:00 2001 From: vkt1414 Date: Tue, 30 Apr 2024 09:48:35 -0400 Subject: [PATCH 2/2] enh: do not change nulls to empty spaces --- idc_index/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idc_index/index.py b/idc_index/index.py index eb68cc45..139640d9 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -27,7 +27,7 @@ def __init__(self): # Read index file logger.debug(f"Reading index file v{idc_index_data.__version__}") self.index = pd.read_parquet(file_path) - self.index = self.index.astype(str).replace("nan", "") + # self.index = self.index.astype(str).replace("nan", "") self.index["series_size_MB"] = self.index["series_size_MB"].astype(float) self.collection_summary = self.index.groupby("collection_id").agg( {"Modality": pd.Series.unique, "series_size_MB": "sum"}