From a9cdbd4402058de2118cc259326dbfac9ba9a7c8 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Mon, 30 Oct 2023 09:00:40 -0400 Subject: [PATCH 1/2] docs: fix link to source that changed after a refactor --- docs/deid.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/deid.md b/docs/deid.md index 3b27629a..5dc0fc22 100644 --- a/docs/deid.md +++ b/docs/deid.md @@ -74,7 +74,7 @@ But the devil is in the details, because it can be configured to do nothing at a or redact everything. Cumulus ETL uses a -[custom configuration](https://github.com/smart-on-fhir/cumulus-etl/blob/main/cumulus/deid/ms-config.json), +[custom configuration](https://github.com/smart-on-fhir/cumulus-etl/blob/main/cumulus_etl/deid/ms-config.json), designed to remove everything by default, and only allow specifically mentioned fields (i.e. an allow-list or whitelist). From 53d243ef2f26bda66bed6e09501bf5803eef6374 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Mon, 30 Oct 2023 09:01:10 -0400 Subject: [PATCH 2/2] fix: change overwrite detection to also allow .meta files Our ndjson format's new overwrite warning only looked for .ndjson files but we can also write .meta files for NLP jobs. So allow that sort of file too. --- cumulus_etl/formats/batched_files.py | 2 +- tests/formats/test_ndjson.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cumulus_etl/formats/batched_files.py b/cumulus_etl/formats/batched_files.py index de855ddf..2a48fc75 100644 --- a/cumulus_etl/formats/batched_files.py +++ b/cumulus_etl/formats/batched_files.py @@ -61,7 +61,7 @@ def _confirm_no_unknown_files_exist(self, folder: str) -> None: except FileNotFoundError: return # folder doesn't exist, we're good! - allowed_pattern = re.compile(rf"{self.dbname}\.[0-9]+\.{self.suffix}") + allowed_pattern = re.compile(rf"{self.dbname}\.[0-9]+\.({self.suffix}|meta)") if not all(map(allowed_pattern.fullmatch, filenames)): errors.fatal( f"There are unexpected files in the output folder '{folder}'.\n" diff --git a/tests/formats/test_ndjson.py b/tests/formats/test_ndjson.py index 593c45b2..78d99a98 100644 --- a/tests/formats/test_ndjson.py +++ b/tests/formats/test_ndjson.py @@ -49,6 +49,7 @@ def store( (None, True), ([], True), (["condition.1234.ndjson", "condition.22.ndjson"], True), + (["condition.000.meta"], True), (["condition.ndjson"], False), (["condition.000.parquet"], False), (["patient.000.ndjson"], False),