Skip to content

Commit

Permalink
Docs cleanup, query tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Dec 4, 2023
1 parent c391b4f commit 4f270e1
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 62 deletions.
59 changes: 1 addition & 58 deletions cumulus_library/template_sql/statistics/psm_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,6 @@
from cumulus_library.errors import CumulusLibraryError


class ExtensionConfig(object):
"""convenience class for holding parameters for generating extension tables.
:param source_table: the table to extract extensions from
:param source_id: the id column to treat as a foreign key
:param target_table: the name of the table to create
:param target_col_prefix: the string to prepend code/display column names with
:param fhir_extension: the URL of the FHIR resource to select
:param code_systems: a list of codes, in preference order, to use to select data
:param is_array: a boolean indicating if the targeted field is an array type
"""

def __init__(
self,
source_table: str,
source_id: str,
target_table: str,
target_col_prefix: str,
fhir_extension: str,
ext_systems: List[str],
is_array: bool = False,
):
self.source_table = source_table
self.source_id = source_id
self.target_table = target_table
self.target_col_prefix = target_col_prefix
self.fhir_extension = fhir_extension
self.ext_systems = ext_systems
self.is_array = is_array


def get_distinct_ids(
columns: list[str], source_table: str, join_id: str = None, filter_table: str = None
) -> str:
Expand Down Expand Up @@ -79,7 +48,7 @@ def get_create_covariate_table(
neg_source_table: str,
primary_ref: str,
dependent_variable: str,
join_cols_by_table: dict = {},
join_cols_by_table: dict,
count_ref: str = None,
count_table: str = None,
) -> str:
Expand Down Expand Up @@ -116,29 +85,3 @@ def get_create_covariate_table(
count_table=count_table,
join_cols_by_table=join_cols_by_table,
)


def get_extension_denormalize_query(config: ExtensionConfig) -> str:
"""extracts target extension from a table into a denormalized table
This function is targeted at a complex extension element that is at the root
of a FHIR resource - as an example, see the 5 codes at the root node of
http://hl7.org/fhir/us/core/STU6/StructureDefinition-us-core-patient.html.
The template will create a new table with the extension data, in arrays,
mapped 1-1 to the table id. You can specify multiple systems
in the ExtensionConfig passed to this function. For each patient, we'll
take the data from the first extension coding system we find for each patient.
:param config: An instance of ExtensionConfig.
"""
path = Path(__file__).parent
with open(f"{path}/extension_denormalize.sql.jinja") as extension_denormalize:
return Template(extension_denormalize.read()).render(
source_table=config.source_table,
source_id=config.source_id,
target_table=config.target_table,
target_col_prefix=config.target_col_prefix,
fhir_extension=config.fhir_extension,
ext_systems=config.ext_systems,
is_array=config.is_array,
)
Binary file added cumulus_mhg_dev_db
Binary file not shown.
17 changes: 17 additions & 0 deletions docs/third-party-software-citations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
title: Third party software citations
parent: Library
nav_order: 8
# audience: Clinical researchers interested in publications
# type: reference
---

# Third party software citations

This file contains a list of third party software libraries associated with
publications that are used by Cumulus library. These are generally domain
specific to clinical research.

## (PsmPy)[https://github.com/adriennekline/psmpy]

A. Kline and Y. Luo, PsmPy: A Package for Retrospective Cohort Matching in Python, 2022 44th Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC), 2022, pp. 1354-1357, doi: 10.1109/EMBC48229.2022.9871333.
153 changes: 153 additions & 0 deletions tests/test_psm_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from contextlib import nullcontext as does_not_raise

import pytest

from cumulus_library.errors import CumulusLibraryError
from cumulus_library.template_sql.statistics.psm_templates import (
get_distinct_ids,
get_create_covariate_table,
)


@pytest.mark.parametrize(
"columns,source_table,join_id,filter_table,expected,raises",
[
(
["a", "b"],
"source",
"ref_id",
"filter",
"""SELECT DISTINCT
"source"."a",
"source"."b"
FROM source
WHERE "source"."ref_id" NOT IN (
SELECT "filter"."ref_id"
FROM filter
)""",
does_not_raise(),
),
(
["a", "b"],
"source",
None,
None,
"""SELECT DISTINCT
"source"."a",
"source"."b"
FROM source""",
does_not_raise(),
),
(["a", "b"], "source", "ref_id", None, "", pytest.raises(CumulusLibraryError)),
],
)
def test_get_distinct_ids(
columns, source_table, join_id, filter_table, expected, raises
):
with raises:
query = get_distinct_ids(columns, source_table, join_id, filter_table)
assert query == expected


@pytest.mark.parametrize(
"target,pos_source,neg_source,primary_ref,dep_var,join_cols_by_table,count_ref,count_table,expected,raises",
[
(
"target",
"pos_table",
"neg_table",
"subject_id",
"has_flu",
{},
None,
None,
"""CREATE TABLE target AS (
SELECT
DISTINCT sample_cohort."subject_id",
sample_cohort."has_flu",
neg_table.code
FROM "pos_table_sampled_ids" AS sample_cohort,
"neg_table",
WHERE sample_cohort."subject_id" = "neg_table"."subject_id"
-- AND c.recordeddate <= sample_cohort.enc_end_date
ORDER BY sample_cohort."subject_id"
)""",
does_not_raise(),
),
(
"target",
"pos_table",
"neg_table",
"subject_id",
"has_flu",
{
"join_table": {
"join_id": "enc_ref",
"included_cols": [["a"], ["b", "c"]],
}
},
"enc_ref",
"join_table",
"""CREATE TABLE target AS (
SELECT
DISTINCT sample_cohort."subject_id",
sample_cohort."has_flu",
(
SELECT COUNT( DISTINCT subject_id )
FROM "join_table"
WHERE sample_cohort."enc_ref" = "join_table"."enc_ref"
--AND sample_cohort.enc_end_date >= "join_table".recordeddate
) AS instance_count,
"join_table"."a",
"join_table"."b" AS "c",
neg_table.code
FROM "pos_table_sampled_ids" AS sample_cohort,
"neg_table",
"join_table"
WHERE sample_cohort."subject_id" = "neg_table"."subject_id"
AND sample_cohort."enc_ref" = "join_table"."enc_ref"
-- AND c.recordeddate <= sample_cohort.enc_end_date
ORDER BY sample_cohort."subject_id"
)""",
does_not_raise(),
),
(
"target",
"pos_table",
"neg_table",
"subject_id",
"has_flu",
{},
"join_table",
None,
"",
pytest.raises(CumulusLibraryError),
),
],
)
def test_create_covariate_table(
target,
pos_source,
neg_source,
primary_ref,
dep_var,
join_cols_by_table,
count_ref,
count_table,
expected,
raises,
):
with raises:
query = get_create_covariate_table(
target,
pos_source,
neg_source,
primary_ref,
dep_var,
join_cols_by_table,
count_ref,
count_table,
)
with open("output.sql", "w") as f:
f.write(query)
assert query == expected
7 changes: 3 additions & 4 deletions tests/test_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,10 @@ def test_ctas_query_creation():
expected = """CREATE TABLE "test_schema"."test_table" AS (
SELECT * FROM (
VALUES
((cast('foo' AS varchar),cast('foo' AS varchar))),
((cast('bar' AS varchar),cast('bar' AS varchar)))
(cast('foo' AS varchar),cast('foo' AS varchar)),
(cast('bar' AS varchar),cast('bar' AS varchar))
)
AS t
("a","b")
AS t ("a","b")
);"""
query = get_ctas_query(
schema_name="test_schema",
Expand Down

0 comments on commit 4f270e1

Please sign in to comment.