Docs cleanup, query tests

smart-on-fhir · Dec 4, 2023 · 4f270e1 · 4f270e1
1 parent c391b4f
commit 4f270e1
Show file tree

Hide file tree

Showing 5 changed files with 174 additions and 62 deletions.
diff --git a/cumulus_library/template_sql/statistics/psm_templates.py b/cumulus_library/template_sql/statistics/psm_templates.py
@@ -9,37 +9,6 @@
 from cumulus_library.errors import CumulusLibraryError
 
 
-class ExtensionConfig(object):
-    """convenience class for holding parameters for generating extension tables.
-
-    :param source_table: the table to extract extensions from
-    :param source_id: the id column to treat as a foreign key
-    :param target_table: the name of the table to create
-    :param target_col_prefix: the string to prepend code/display column names with
-    :param fhir_extension: the URL of the FHIR resource to select
-    :param code_systems: a list of codes, in preference order, to use to select data
-    :param is_array: a boolean indicating if the targeted field is an array type
-    """
-
-    def __init__(
-        self,
-        source_table: str,
-        source_id: str,
-        target_table: str,
-        target_col_prefix: str,
-        fhir_extension: str,
-        ext_systems: List[str],
-        is_array: bool = False,
-    ):
-        self.source_table = source_table
-        self.source_id = source_id
-        self.target_table = target_table
-        self.target_col_prefix = target_col_prefix
-        self.fhir_extension = fhir_extension
-        self.ext_systems = ext_systems
-        self.is_array = is_array
-
-
 def get_distinct_ids(
     columns: list[str], source_table: str, join_id: str = None, filter_table: str = None
 ) -> str:
@@ -79,7 +48,7 @@ def get_create_covariate_table(
     neg_source_table: str,
     primary_ref: str,
     dependent_variable: str,
-    join_cols_by_table: dict = {},
+    join_cols_by_table: dict,
     count_ref: str = None,
     count_table: str = None,
 ) -> str:
@@ -116,29 +85,3 @@ def get_create_covariate_table(
             count_table=count_table,
             join_cols_by_table=join_cols_by_table,
         )
-
-
-def get_extension_denormalize_query(config: ExtensionConfig) -> str:
-    """extracts target extension from a table into a denormalized table
-
-    This function is targeted at a complex extension element that is at the root
-    of a FHIR resource - as an example, see the 5 codes at the root node of
-    http://hl7.org/fhir/us/core/STU6/StructureDefinition-us-core-patient.html.
-    The template will create a new table with the extension data, in arrays,
-    mapped 1-1 to the table id. You can specify multiple systems
-    in the ExtensionConfig passed to this function. For each patient, we'll
-    take the data from the first extension coding system we find for each patient.
-
-    :param config: An instance of ExtensionConfig.
-    """
-    path = Path(__file__).parent
-    with open(f"{path}/extension_denormalize.sql.jinja") as extension_denormalize:
-        return Template(extension_denormalize.read()).render(
-            source_table=config.source_table,
-            source_id=config.source_id,
-            target_table=config.target_table,
-            target_col_prefix=config.target_col_prefix,
-            fhir_extension=config.fhir_extension,
-            ext_systems=config.ext_systems,
-            is_array=config.is_array,
-        )
diff --git a/cumulus_mhg_dev_db b/cumulus_mhg_dev_db
diff --git a/docs/third-party-software-citations.md b/docs/third-party-software-citations.md
@@ -0,0 +1,17 @@
+---
+title: Third party software citations
+parent: Library
+nav_order: 8
+# audience: Clinical researchers interested in publications
+# type: reference
+---
+
+# Third party software citations
+
+This file contains a list of third party software libraries associated with
+publications that are used by Cumulus library.  These are generally domain 
+specific to clinical research.
+
+## (PsmPy)[https://github.com/adriennekline/psmpy]
+
+A. Kline and Y. Luo, PsmPy: A Package for Retrospective Cohort Matching in Python, 2022 44th Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC), 2022, pp. 1354-1357, doi: 10.1109/EMBC48229.2022.9871333.
diff --git a/tests/test_psm_templates.py b/tests/test_psm_templates.py
@@ -0,0 +1,153 @@
+from contextlib import nullcontext as does_not_raise
+
+import pytest
+
+from cumulus_library.errors import CumulusLibraryError
+from cumulus_library.template_sql.statistics.psm_templates import (
+    get_distinct_ids,
+    get_create_covariate_table,
+)
+
+
+@pytest.mark.parametrize(
+    "columns,source_table,join_id,filter_table,expected,raises",
+    [
+        (
+            ["a", "b"],
+            "source",
+            "ref_id",
+            "filter",
+            """SELECT DISTINCT
+    "source"."a",
+    "source"."b"
+FROM source
+WHERE "source"."ref_id" NOT IN (
+    SELECT "filter"."ref_id"
+    FROM filter
+)""",
+            does_not_raise(),
+        ),
+        (
+            ["a", "b"],
+            "source",
+            None,
+            None,
+            """SELECT DISTINCT
+    "source"."a",
+    "source"."b"
+FROM source""",
+            does_not_raise(),
+        ),
+        (["a", "b"], "source", "ref_id", None, "", pytest.raises(CumulusLibraryError)),
+    ],
+)
+def test_get_distinct_ids(
+    columns, source_table, join_id, filter_table, expected, raises
+):
+    with raises:
+        query = get_distinct_ids(columns, source_table, join_id, filter_table)
+        assert query == expected
+
+
+@pytest.mark.parametrize(
+    "target,pos_source,neg_source,primary_ref,dep_var,join_cols_by_table,count_ref,count_table,expected,raises",
+    [
+        (
+            "target",
+            "pos_table",
+            "neg_table",
+            "subject_id",
+            "has_flu",
+            {},
+            None,
+            None,
+            """CREATE TABLE target AS (
+    SELECT 
+        DISTINCT sample_cohort."subject_id",
+        sample_cohort."has_flu",
+        neg_table.code
+    FROM "pos_table_sampled_ids" AS sample_cohort, 
+        "neg_table",
+    WHERE sample_cohort."subject_id" = "neg_table"."subject_id"
+    -- AND c.recordeddate <= sample_cohort.enc_end_date
+    ORDER BY sample_cohort."subject_id"
+)""",
+            does_not_raise(),
+        ),
+        (
+            "target",
+            "pos_table",
+            "neg_table",
+            "subject_id",
+            "has_flu",
+            {
+                "join_table": {
+                    "join_id": "enc_ref",
+                    "included_cols": [["a"], ["b", "c"]],
+                }
+            },
+            "enc_ref",
+            "join_table",
+            """CREATE TABLE target AS (
+    SELECT 
+        DISTINCT sample_cohort."subject_id",
+        sample_cohort."has_flu",
+        (
+            SELECT COUNT( DISTINCT subject_id )
+            FROM "join_table"
+            WHERE sample_cohort."enc_ref" = "join_table"."enc_ref"
+            --AND sample_cohort.enc_end_date >= "join_table".recordeddate
+        ) AS instance_count,
+        "join_table"."a",
+        "join_table"."b" AS "c",
+        neg_table.code
+    FROM "pos_table_sampled_ids" AS sample_cohort, 
+        "neg_table",
+        "join_table"
+    WHERE sample_cohort."subject_id" = "neg_table"."subject_id"
+        AND sample_cohort."enc_ref" = "join_table"."enc_ref"
+    -- AND c.recordeddate <= sample_cohort.enc_end_date
+    ORDER BY sample_cohort."subject_id"
+)""",
+            does_not_raise(),
+        ),
+        (
+            "target",
+            "pos_table",
+            "neg_table",
+            "subject_id",
+            "has_flu",
+            {},
+            "join_table",
+            None,
+            "",
+            pytest.raises(CumulusLibraryError),
+        ),
+    ],
+)
+def test_create_covariate_table(
+    target,
+    pos_source,
+    neg_source,
+    primary_ref,
+    dep_var,
+    join_cols_by_table,
+    count_ref,
+    count_table,
+    expected,
+    raises,
+):
+    with raises:
+        query = get_create_covariate_table(
+            target,
+            pos_source,
+            neg_source,
+            primary_ref,
+            dep_var,
+            join_cols_by_table,
+            count_ref,
+            count_table,
+        )
+        with open("output.sql", "w") as f:
+            f.write(query)
+        assert query == expected
diff --git a/tests/test_templates.py b/tests/test_templates.py
@@ -368,11 +368,10 @@ def test_ctas_query_creation():
     expected = """CREATE TABLE "test_schema"."test_table" AS (
     SELECT * FROM (
         VALUES
-        ((cast('foo' AS varchar),cast('foo' AS varchar))),
-        ((cast('bar' AS varchar),cast('bar' AS varchar)))
+        (cast('foo' AS varchar),cast('foo' AS varchar)),
+        (cast('bar' AS varchar),cast('bar' AS varchar))
     )
-        AS t
-        ("a","b")
+        AS t ("a","b")
 );"""
     query = get_ctas_query(
         schema_name="test_schema",