From e035eec81476cf96ad9c88d75e2c6cf48d84b89e Mon Sep 17 00:00:00 2001 From: Vamsi Thiriveedhi Date: Mon, 29 Apr 2024 11:26:57 -0400 Subject: [PATCH] fix: return only .dcm links s5cmd ls sometimes returns directories as well, this update ensures only .dcm links are returned --- idc_index/index.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/idc_index/index.py b/idc_index/index.py index 9c7aa1c9..3083dc48 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -331,11 +331,11 @@ def get_series_file_URLs(self, seriesInstanceUID): # Query to get the S3 URL s3url_query = f""" SELECT - series_aws_url + series_aws_url FROM - index + index WHERE - SeriesInstanceUID='{seriesInstanceUID}' + SeriesInstanceUID='{seriesInstanceUID}' """ s3url_query_df = self.sql_query(s3url_query) s3_url = s3url_query_df.series_aws_url[0] @@ -353,7 +353,11 @@ def get_series_file_URLs(self, seriesInstanceUID): # Parse the output to get the file names lines = output.split("\n") - file_names = [s3_url + line.split()[-1] for line in lines if line] + file_names = [ + s3_url + line.split()[-1] + for line in lines + if line and line.split()[-1].endswith(".dcm") + ] return file_names