diff --git a/cumulus_etl/fhir/__init__.py b/cumulus_etl/fhir/__init__.py index b5c51fe..c5d93e9 100644 --- a/cumulus_etl/fhir/__init__.py +++ b/cumulus_etl/fhir/__init__.py @@ -2,10 +2,10 @@ from .fhir_client import FhirClient, create_fhir_client_for_cli from .fhir_utils import ( + FhirUrl, download_reference, get_docref_note, parse_datetime, - parse_group_from_url, ref_resource, unref_resource, ) diff --git a/cumulus_etl/fhir/fhir_client.py b/cumulus_etl/fhir/fhir_client.py index b2124ee..1ac218d 100644 --- a/cumulus_etl/fhir/fhir_client.py +++ b/cumulus_etl/fhir/fhir_client.py @@ -2,15 +2,13 @@ import argparse import enum -import re -import sys from collections.abc import Iterable from json import JSONDecodeError import httpx from cumulus_etl import common, errors, store -from cumulus_etl.fhir import fhir_auth +from cumulus_etl.fhir import fhir_auth, fhir_utils class ServerType(enum.Enum): @@ -255,18 +253,15 @@ def create_fhir_client_for_cli( client_base_url = getattr(args, "fhir_url", None) if root_input.protocol in {"http", "https"}: if client_base_url and not root_input.path.startswith(client_base_url): - print( - "You provided both an input FHIR server and a different --fhir-url. Try dropping --fhir-url.", - file=sys.stderr, + errors.fatal( + "You provided both an input FHIR server and a different --fhir-url. " + "Try dropping --fhir-url.", + errors.ARGS_CONFLICT, ) - raise SystemExit(errors.ARGS_CONFLICT) elif not client_base_url: # Use the input URL as the base URL. But note that it may not be the server root. # For example, it may be a Group export URL. Let's try to find the actual root. - client_base_url = root_input.path - client_base_url = re.sub(r"/\$export(\?.*)?$", "/", client_base_url) - client_base_url = re.sub(r"/Patient/?$", "/", client_base_url) - client_base_url = re.sub(r"/Group/[^/]+/?$", "/", client_base_url) + client_base_url = fhir_utils.FhirUrl(root_input.path).root_url try: try: diff --git a/cumulus_etl/fhir/fhir_utils.py b/cumulus_etl/fhir/fhir_utils.py index 91eecfb..5e7c729 100644 --- a/cumulus_etl/fhir/fhir_utils.py +++ b/cumulus_etl/fhir/fhir_utils.py @@ -5,11 +5,14 @@ import email.message import re import urllib.parse +from typing import TYPE_CHECKING import inscriptis -from cumulus_etl import common -from cumulus_etl.fhir.fhir_client import FhirClient +from cumulus_etl import common, errors + +if TYPE_CHECKING: + from cumulus_etl.fhir.fhir_client import FhirClient # A relative reference is something like Patient/123 or Patient?identifier=http://hl7.org/fhir/sid/us-npi|9999999299 # (vs a contained reference that starts with # or an absolute URL reference like http://example.org/Patient/123) @@ -110,27 +113,43 @@ def parse_datetime(value: str | None) -> datetime.datetime | None: return None -def parse_group_from_url(url: str) -> str: +class FhirUrl: """ - Parses the group out of a FHIR URL. - - These URLS look something like: - - https://hostname/root/Group/my-group <- group name of `my-group` - - https://hostname/root/Group/my-group/$export <- group name of `my-group` - - https://hostname/root <- no group name + Parses a FHIR URL into relevant parts. + + Example URL parsing: + - https://hostname/root/Group/my-group + - Group: `my-group`, Base URL: https://hostname/root + - https://hostname/root/Group/my-group/$export?_type=Patient + - Group: `my-group`, Base URL: https://hostname/root + - https://hostname/root + - Group: ``, Base URL: https://hostname/root """ - parsed = urllib.parse.urlparse(url) - if not parsed.scheme: - raise ValueError(f"Could not parse URL '{url}'") - pieces = parsed.path.split("/Group/", 2) - match len(pieces): - case 2: - return pieces[1].split("/")[0] - case _: - # Global exports don't seem realistic, but if the user does do them, - # we'll use the empty string as the default group name for that. - return "" + def __init__(self, url: str): + parsed = urllib.parse.urlsplit(url) + if not parsed.scheme: + errors.fatal(f"Could not parse URL '{url}'", errors.ARGS_INVALID) + + # Strip off common bulk-export-suffixes that the user might give us, to get the base path + root_path = parsed.path + root_path = root_path.removesuffix("/") + root_path = root_path.removesuffix("/$export") + root_path = root_path.removesuffix("/Patient") # all-patient export + group_pieces = root_path.split("/Group/", 2) + root_path = group_pieces[0] + + # Original unmodified URL + self.full_url = url + + # The root of the FHIR server (i.e. https://host/api/FHIR/R4/) + root_parsed = parsed._replace(path=root_path, query="", fragment="") + self.root_url = urllib.parse.urlunsplit(root_parsed) + + # When exporting, the lack of a group means a global (system or all-patient) export, + # which doesn't seem super realistic, but is possible for the user to request. + # An empty-string group indicates there was no group. + self.group = group_pieces[1] if len(group_pieces) == 2 else "" ###################################################################################################################### @@ -140,7 +159,7 @@ def parse_group_from_url(url: str) -> str: ###################################################################################################################### -async def download_reference(client: FhirClient, reference: str) -> dict | None: +async def download_reference(client: "FhirClient", reference: str) -> dict | None: """ Downloads a resource, given a FHIR reference. @@ -187,7 +206,7 @@ def _mimetype_priority(mimetype: str) -> int: return 0 -async def _get_docref_note_from_attachment(client: FhirClient, attachment: dict) -> str: +async def _get_docref_note_from_attachment(client: "FhirClient", attachment: dict) -> str: """ Decodes or downloads a note from an attachment. @@ -232,7 +251,7 @@ def _save_cached_docref_note(docref: dict, note: str) -> None: common.write_text(note_path, note) -async def get_docref_note(client: FhirClient, docref: dict) -> str: +async def get_docref_note(client: "FhirClient", docref: dict) -> str: """ Returns the clinical note contained in or referenced by the given docref. diff --git a/cumulus_etl/loaders/fhir/bulk_export.py b/cumulus_etl/loaders/fhir/bulk_export.py index d5b467c..c6b387c 100644 --- a/cumulus_etl/loaders/fhir/bulk_export.py +++ b/cumulus_etl/loaders/fhir/bulk_export.py @@ -74,7 +74,7 @@ def __init__( # Public properties, to be read after the export: self.export_datetime = None - self.group_name = fhir.parse_group_from_url(self._url) + self.group_name = fhir.FhirUrl(self._url).group self.export_url = self._url def format_kickoff_url( diff --git a/cumulus_etl/loaders/fhir/export_log.py b/cumulus_etl/loaders/fhir/export_log.py index 9864385..0f8f1a0 100644 --- a/cumulus_etl/loaders/fhir/export_log.py +++ b/cumulus_etl/loaders/fhir/export_log.py @@ -76,7 +76,7 @@ def _parse(self, root: store.Root, path: str) -> None: def _parse_kickoff(self, row: dict) -> None: details = row["eventDetail"] - self.group_name = fhir.parse_group_from_url(details["exportUrl"]) + self.group_name = fhir.FhirUrl(details["exportUrl"]).group self.export_url = details["exportUrl"] def _parse_status_complete(self, row: dict) -> None: diff --git a/tests/fhir/test_fhir_client.py b/tests/fhir/test_fhir_client.py index 15ac7b2..ae9197c 100644 --- a/tests/fhir/test_fhir_client.py +++ b/tests/fhir/test_fhir_client.py @@ -419,16 +419,8 @@ def test_file_read_error(self): fhir.create_fhir_client_for_cli(args, store.Root("/tmp"), []) self.assertEqual(errors.ARGS_INVALID, cm.exception.code) - @ddt.data( - "http://example.invalid/root/", - "http://example.invalid/root/$export?", - "http://example.invalid/root/Group/xxx", - "http://example.invalid/root/Group/xxx/$export?_type=Patient", - "http://example.invalid/root/Patient", - "http://example.invalid/root/Patient/$export", - ) @mock.patch("cumulus_etl.fhir.fhir_client.FhirClient") - def test_can_find_auth_root(self, input_url, mock_client): + def test_can_find_auth_root(self, mock_client): """Verify that we detect the auth root for an input URL""" args = argparse.Namespace( fhir_url=None, @@ -439,8 +431,10 @@ def test_can_find_auth_root(self, input_url, mock_client): basic_passwd=None, bearer_token=None, ) - fhir.create_fhir_client_for_cli(args, store.Root(input_url), []) - self.assertEqual("http://example.invalid/root/", mock_client.call_args[0][0]) + fhir.create_fhir_client_for_cli( + args, store.Root("http://example.invalid/root/Group/xxx/$export?_type=Patient"), [] + ) + self.assertEqual("http://example.invalid/root", mock_client.call_args[0][0]) async def test_must_be_context_manager(self): """Verify that FHIRClient enforces its use as a context manager.""" diff --git a/tests/fhir/test_fhir_utils.py b/tests/fhir/test_fhir_utils.py index 9c432bb..e3dbf58 100644 --- a/tests/fhir/test_fhir_utils.py +++ b/tests/fhir/test_fhir_utils.py @@ -93,7 +93,7 @@ class TestUrlParsing(utils.AsyncTestCase): """Tests for URL parsing""" @ddt.data( - ("//host", ValueError), + ("//host", SystemExit), ("https://host", ""), ("https://host/root", ""), ("https://Group/MyGroup", ""), # Group is hostname here @@ -107,11 +107,26 @@ class TestUrlParsing(utils.AsyncTestCase): @ddt.unpack def test_parse_group_from_url(self, url, expected_group): if isinstance(expected_group, str): - group = fhir.parse_group_from_url(url) + group = fhir.FhirUrl(url).group assert expected_group == group else: with self.assertRaises(expected_group): - fhir.parse_group_from_url(url) + fhir.FhirUrl(url) + + @ddt.data( + "http://host/root/", + "http://host/root/$export?", + "http://host/root/Group/xxx", + "http://host/root/Group/xxx#fragment", + "http://host/root/Group/xxx/$export?_type=Patient", + "http://host/root/Patient", + "http://host/root/Patient/$export", + ) + def test_parse_base_url_from_url(self, url): + """Verify that we detect the auth root for an input URL""" + parsed = fhir.FhirUrl(url) + self.assertEqual(parsed.full_url, url) + self.assertEqual(parsed.root_url, "http://host/root") @ddt.ddt diff --git a/tests/loaders/ndjson/test_ndjson_loader.py b/tests/loaders/ndjson/test_ndjson_loader.py index 0f65513..705c2b0 100644 --- a/tests/loaders/ndjson/test_ndjson_loader.py +++ b/tests/loaders/ndjson/test_ndjson_loader.py @@ -207,7 +207,7 @@ async def test_fhir_url(self, mock_client): "--skip-init-checks", ] ) - self.assertEqual("https://example.com/hello1/", mock_client.call_args[0][0]) + self.assertEqual("https://example.com/hello1", mock_client.call_args[0][0]) # Confirm that we don't allow conflicting URLs with self.assertRaises(SystemExit):