Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Uprev to 3.10, namespace clarification #173

Merged
merged 2 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ jobs:
name: unit tests
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.9
python-version: '3.10'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small point here: we're only testing on 3.10, but as a command line CLI that doesn't ship with its own python in a docker image (i.e. unlike the ETL) - we should probably test on all supported pythons

Doesn't need to happen on this PR - I filed #174 for it.


- name: Install dependencies
run: |
Expand All @@ -35,7 +35,11 @@ jobs:
lint:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install linters
run: |
python -m pip install --upgrade pip
Expand All @@ -52,7 +56,11 @@ jobs:
permissions:
id-token: write # This is required for requesting the JWT
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
8 changes: 5 additions & 3 deletions cumulus_library/base_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import final

from cumulus_library.databases import DatabaseCursor
from cumulus_library import helper
from cumulus_library import base_utils


class BaseTableBuilder(ABC):
Expand Down Expand Up @@ -77,15 +77,17 @@ def execute_queries(
table_names.append(table_name)
for table_name in table_names:
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
with helper.get_progress_bar(disable=verbose) as progress:
with base_utils.get_progress_bar(disable=verbose) as progress:
task = progress.add_task(
self.display_text,
total=len(self.queries),
visible=not verbose,
)
for query in self.queries:
try:
with helper.query_console_output(verbose, query, progress, task):
with base_utils.query_console_output(
verbose, query, progress, task
):
cursor.execute(query)
except Exception as e: # pylint: disable=broad-exception-caught
sys.exit(e)
Expand Down
File renamed without changes.
8 changes: 4 additions & 4 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
databases,
enums,
errors,
helper,
base_utils,
protected_table_builder,
study_parser,
upload,
)
from cumulus_library.template_sql import templates
from cumulus_library.template_sql import base_templates


class StudyRunner:
Expand All @@ -41,15 +41,15 @@ def __init__(self, db: databases.DatabaseBackend, data_path: str):
def update_transactions(self, prefix: str, status: str):
"""Adds a record to a study's transactions table"""
self.cursor.execute(
templates.get_insert_into_query(
base_templates.get_insert_into_query(
f"{prefix}__{enums.ProtectedTables.TRANSACTIONS.value}",
protected_table_builder.TRANSACTIONS_COLS,
[
[
prefix,
__version__,
status,
helper.get_utc_datetime(),
base_utils.get_utc_datetime(),
]
],
{"event_time": "TIMESTAMP"},
Expand Down
8 changes: 4 additions & 4 deletions cumulus_library/parsers/fhir_valueset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from fhirclient.models.coding import Coding

from cumulus_library.helper import load_json
from cumulus_library.template_sql.templates import get_create_view_query
from cumulus_library import base_utils
from cumulus_library.template_sql import base_templates


def get_include_coding(valueset_json) -> List[Coding]:
Expand All @@ -20,7 +20,7 @@ def get_include_coding(valueset_json) -> List[Coding]:
:param valueset_json: ValueSet file, expecially those provided by NLM/ONC/VSAC
:return: list of codeable concepts (system, code, display) to include
"""
valueset = load_json(valueset_json)
valueset = base_utils.load_json(valueset_json)
parsed = []

for include in valueset["compose"]["include"]:
Expand All @@ -42,7 +42,7 @@ def create_view_sql(view_name: str, concept_list: List[Coding]) -> str:
content = []
for concept in concept_list:
content.append([concept.system, concept.code, concept.display])
return get_create_view_query(
return base_templates.get_create_view_query(
view_name=view_name, dataset=content, view_cols=["system", "code", "display"]
)

Expand Down
17 changes: 7 additions & 10 deletions cumulus_library/protected_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
""" Builder for creating tables for tracking state/logging changes"""

from cumulus_library.base_table_builder import BaseTableBuilder
from cumulus_library.enums import ProtectedTables
from cumulus_library.template_sql.templates import (
get_ctas_empty_query,
)
from cumulus_library import base_table_builder, enums
from cumulus_library.template_sql import base_templates

TRANSACTIONS_COLS = ["study_name", "library_version", "status", "event_time"]
TRANSACTION_COLS_TYPES = ["varchar", "varchar", "varchar", "timestamp"]
Expand All @@ -28,7 +25,7 @@
]


class ProtectedTableBuilder(BaseTableBuilder):
class ProtectedTableBuilder(base_table_builder.BaseTableBuilder):
"""Builder for tables that persist across study clean/build actions"""

display_text = "Creating/updating system tables..."
Expand All @@ -43,18 +40,18 @@ def prepare_queries(
**kwargs,
):
self.queries.append(
get_ctas_empty_query(
base_templates.get_ctas_empty_query(
schema,
f"{study_name}__{ProtectedTables.TRANSACTIONS.value}",
f"{study_name}__{enums.ProtectedTables.TRANSACTIONS.value}",
TRANSACTIONS_COLS,
TRANSACTION_COLS_TYPES,
)
)
if study_stats:
self.queries.append(
get_ctas_empty_query(
base_templates.get_ctas_empty_query(
schema,
f"{study_name}__{ProtectedTables.STATISTICS.value}",
f"{study_name}__{enums.ProtectedTables.STATISTICS.value}",
STATISTICS_COLS,
STATISTICS_COLS_TYPES,
)
Expand Down
40 changes: 19 additions & 21 deletions cumulus_library/statistics/psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import json
import os
import pathlib
import sys
import warnings

from pathlib import PosixPath
from dataclasses import dataclass

import pandas
Expand All @@ -18,14 +18,10 @@
import matplotlib.pyplot as plt
import seaborn as sns

from cumulus_library.databases import DatabaseCursor
from cumulus_library.base_table_builder import BaseTableBuilder
from cumulus_library.template_sql import templates

from cumulus_library.template_sql.statistics.psm_templates import (
get_distinct_ids,
get_create_covariate_table,
)
from cumulus_library import databases
from cumulus_library import base_table_builder
from cumulus_library.template_sql import base_templates
from cumulus_library.template_sql.statistics import psm_templates


@dataclass
Expand Down Expand Up @@ -56,12 +52,12 @@ class PsmConfig:
seed: int


class PsmBuilder(BaseTableBuilder):
class PsmBuilder(base_table_builder.BaseTableBuilder):
"""TableBuilder for creating PSM tables"""

display_text = "Building PSM tables..."

def __init__(self, toml_config_path: str, data_path: PosixPath):
def __init__(self, toml_config_path: str, data_path: pathlib.Path):
"""Loads PSM job details from a PSM configuration file"""
super().__init__()
# We're stashing the toml path for error reporting later
Expand All @@ -75,7 +71,7 @@ def __init__(self, toml_config_path: str, data_path: PosixPath):
sys.exit(f"PSM configuration not found at {self.toml_path}")
try:
self.config = PsmConfig(
classification_json=f"{PosixPath(self.toml_path).parent}/{toml_config['classification_json']}",
classification_json=f"{pathlib.Path(self.toml_path).parent}/{toml_config['classification_json']}",
pos_source_table=toml_config["pos_source_table"],
neg_source_table=toml_config["neg_source_table"],
target_table=toml_config["target_table"],
Expand Down Expand Up @@ -103,7 +99,7 @@ def _get_symptoms_dict(self, path: str) -> dict:

def _get_sampled_ids(
self,
cursor: DatabaseCursor,
cursor: databases.DatabaseCursor,
schema: str,
query: str,
sample_size: int,
Expand Down Expand Up @@ -135,12 +131,14 @@ def _get_sampled_ids(
return df

def _create_covariate_table(
self, cursor: DatabaseCursor, schema: str, table_suffix: str
self, cursor: databases.DatabaseCursor, schema: str, table_suffix: str
):
"""Creates a covariate table from the loaded toml config"""
# checks for primary & link ref being the same
source_refs = list({self.config.primary_ref, self.config.count_ref} - {None})
pos_query = get_distinct_ids(source_refs, self.config.pos_source_table)
pos_query = psm_templates.get_distinct_ids(
source_refs, self.config.pos_source_table
)
pos = self._get_sampled_ids(
cursor,
schema,
Expand All @@ -149,7 +147,7 @@ def _create_covariate_table(
self.config.dependent_variable,
1,
)
neg_query = get_distinct_ids(
neg_query = psm_templates.get_distinct_ids(
source_refs,
self.config.neg_source_table,
join_id=self.config.primary_ref,
Expand All @@ -165,14 +163,14 @@ def _create_covariate_table(
)

cohort = pandas.concat([pos, neg])
ctas_query = templates.get_ctas_query_from_df(
ctas_query = base_templates.get_ctas_query_from_df(
schema,
f"{self.config.pos_source_table}_sampled_ids_{table_suffix}",
cohort,
)
self.queries.append(ctas_query)

dataset_query = get_create_covariate_table(
dataset_query = psm_templates.get_create_covariate_table(
target_table=f"{self.config.target_table}_{table_suffix}",
pos_source_table=self.config.pos_source_table,
neg_source_table=self.config.neg_source_table,
Expand Down Expand Up @@ -261,15 +259,15 @@ def psm_effect_size_plot(
sns_plot.figure.savefig(filename, dpi=250, bbox_inches="tight")

def generate_psm_analysis(
self, cursor: DatabaseCursor, schema: str, table_suffix: str
self, cursor: databases.DatabaseCursor, schema: str, table_suffix: str
):
stats_table = f"{self.config.target_table}_{table_suffix}"
"""Runs PSM statistics on generated tables"""
cursor.execute(
templates.get_alias_table_query(stats_table, self.config.target_table)
base_templates.get_alias_table_query(stats_table, self.config.target_table)
)
df = cursor.execute(
templates.get_select_all_query(self.config.target_table)
base_templates.get_select_all_query(self.config.target_table)
).as_pandas()
symptoms_dict = self._get_symptoms_dict(self.config.classification_json)
for dependent_variable, codes in symptoms_dict.items():
Expand Down
10 changes: 5 additions & 5 deletions cumulus_library/studies/core/builder_condition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates, utils
from cumulus_library.template_sql import base_templates, sql_utils


expected_table_cols = {
Expand Down Expand Up @@ -45,7 +45,7 @@ class CoreConditionBuilder(base_table_builder.BaseTableBuilder):
display_text = "Creating Condition tables..."

def denormalize_codes(self):
preferred_config = utils.CodeableConceptConfig(
preferred_config = sql_utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -59,10 +59,10 @@ def denormalize_codes(self):
],
)
self.queries.append(
templates.get_codeable_concept_denormalize_query(preferred_config)
base_templates.get_codeable_concept_denormalize_query(preferred_config)
)

all_config = utils.CodeableConceptConfig(
all_config = sql_utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -71,7 +71,7 @@ def denormalize_codes(self):
filter_priority=False,
)
self.queries.append(
templates.get_codeable_concept_denormalize_query(all_config)
base_templates.get_codeable_concept_denormalize_query(all_config)
)

def prepare_queries(
Expand Down
6 changes: 3 additions & 3 deletions cumulus_library/studies/core/builder_documentreference.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates, utils
from cumulus_library.template_sql import sql_utils


expected_table_cols = {
Expand All @@ -28,11 +28,11 @@ def prepare_queries(
parser: databases.DatabaseParser = None,
**kwargs,
):
self.queries += utils.denormalize_codes(
self.queries += sql_utils.denormalize_codes(
schema,
cursor,
[
utils.CodeableConceptConfig(
sql_utils.CodeableConceptConfig(
source_table="documentreference",
source_id="id",
column_name="type",
Expand Down
7 changes: 3 additions & 4 deletions cumulus_library/studies/core/builder_encounter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates
from cumulus_library.template_sql import utils
from cumulus_library.template_sql import sql_utils


expected_table_cols = {
Expand Down Expand Up @@ -81,7 +80,7 @@ def denormalize_codes(self, schema, cursor):
code_configs = []
for code_source in code_sources:
code_configs.append(
utils.CodeableConceptConfig(
sql_utils.CodeableConceptConfig(
source_table="encounter",
source_id="id",
column_name=code_source["column_name"],
Expand All @@ -91,7 +90,7 @@ def denormalize_codes(self, schema, cursor):
target_table=f"core__encounter_dn_{code_source['column_name']}",
)
)
self.queries += utils.denormalize_codes(schema, cursor, code_configs)
self.queries += sql_utils.denormalize_codes(schema, cursor, code_configs)

def prepare_queries(
self,
Expand Down
Loading
Loading