From 986e0049e0974556787a5207fef415b6a21b2844 Mon Sep 17 00:00:00 2001 From: Andrey Fedorov Date: Thu, 8 Aug 2024 10:34:30 -0400 Subject: [PATCH] ENH: add clinical_index also added checks for existence of the URLs containing remote indices --- idc_index/index.py | 9 ++++++++- tests/idcindex.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/idc_index/index.py b/idc_index/index.py index 4fff30d7..8b47d9a6 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -41,7 +41,7 @@ class IDCClient: CITATION_FORMAT_BIBTEX = "application/x-bibtex" # Singleton pattern - # NOTE: In the future, one may want to use multiple clients e.g. for sub-datasets so a attribute-singleton as shown bewlo seems a better option. + # NOTE: In the future, one may want to use multiple clients e.g. for sub-datasets so a attribute-singleton as shown below seems a better option. # _instance: IDCClient # def __new__(cls): # if not hasattr(cls, "_instance") or getattr(cls, "_instance") is None: @@ -74,6 +74,8 @@ def __init__(self): {"Modality": pd.Series.unique, "series_size_MB": "sum"} ) + idc_version = f"v{Version(idc_index_data.__version__).major}" + self.indices_overview = { "index": { "description": "Main index containing one row per DICOM series.", @@ -95,6 +97,11 @@ def __init__(self): "installed": False, "url": f"{asset_endpoint_url}/sm_instance_index.parquet", }, + "clinical_index": { + "description": "Index of clinical data accompanying the available images.", + "installed": False, + "url": f"https://idc-open-metadata.s3.amazonaws.com/bigquery_export/idc_{idc_version}_clinical/column_metadata/000000000000.parquet", + }, } # Lookup s5cmd diff --git a/tests/idcindex.py b/tests/idcindex.py index 1ce45acb..7752b968 100644 --- a/tests/idcindex.py +++ b/tests/idcindex.py @@ -9,6 +9,7 @@ import pandas as pd import pytest +import requests from click.testing import CliRunner from idc_index import IDCClient, cli @@ -18,6 +19,17 @@ logging.basicConfig(level=logging.DEBUG) +def remote_file_exists(url): + try: + response = requests.head(url, allow_redirects=True) + # Check if the status code indicates success + return response.status_code == 200 + except requests.RequestException as e: + # Handle any exceptions (e.g., network issues) + print(f"An error occurred: {e}") + return False + + @pytest.fixture(autouse=True) def _change_test_dir(request, monkeypatch): monkeypatch.chdir(request.fspath.dirname) @@ -494,6 +506,12 @@ def test_fetch_index(self): assert i.indices_overview["sm_index"]["installed"] is True assert hasattr(i, "sm_index") + def test_indices_urls(self): + i = IDCClient() + for index in i.indices_overview: + if i.indices_overview[index]["url"] is not None: + assert remote_file_exists(i.indices_overview[index]["url"]) + if __name__ == "__main__": unittest.main()