Skip to content

Commit

Permalink
Uprev to 3.11, namespace clarification
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Feb 5, 2024
1 parent cf0b614 commit 811f514
Show file tree
Hide file tree
Showing 23 changed files with 167 additions and 155 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
name: unit tests
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
Expand All @@ -35,7 +35,7 @@ jobs:
lint:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install linters
run: |
python -m pip install --upgrade pip
Expand All @@ -52,19 +52,19 @@ jobs:
permissions:
id-token: write # This is required for requesting the JWT
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .
# See https://github.com/aws-actions/configure-aws-credentials for configuring
# See https://github.com/Moulick/configure-multiple-aws-roles for configuring
# the aws side of this - the below action is just a light wrapper
- name: Configure AWS Credentials
uses: mcblair/configure-aws-[email protected]
uses: Moulick/configure-multiple-aws-roles@v4
with:
role-arn: arn:aws:iam::${{secrets.AWS_ACCOUNT}}:role/cumulus-library-ci
region: us-east-1
profile-name: cumulus-library-ci
profile: cumulus-library-ci
- name: Rebuild tables
env:
PROFILE: cumulus-library-ci
Expand Down
8 changes: 5 additions & 3 deletions cumulus_library/base_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import final

from cumulus_library.databases import DatabaseCursor
from cumulus_library import helper
from cumulus_library import base_utils


class BaseTableBuilder(ABC):
Expand Down Expand Up @@ -77,15 +77,17 @@ def execute_queries(
table_names.append(table_name)
for table_name in table_names:
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
with helper.get_progress_bar(disable=verbose) as progress:
with base_utils.get_progress_bar(disable=verbose) as progress:
task = progress.add_task(
self.display_text,
total=len(self.queries),
visible=not verbose,
)
for query in self.queries:
try:
with helper.query_console_output(verbose, query, progress, task):
with base_utils.query_console_output(
verbose, query, progress, task
):
cursor.execute(query)
except Exception as e: # pylint: disable=broad-exception-caught
sys.exit(e)
Expand Down
File renamed without changes.
8 changes: 4 additions & 4 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
databases,
enums,
errors,
helper,
base_utils,
protected_table_builder,
study_parser,
upload,
)
from cumulus_library.template_sql import templates
from cumulus_library.template_sql import base_templates


class StudyRunner:
Expand All @@ -41,15 +41,15 @@ def __init__(self, db: databases.DatabaseBackend, data_path: str):
def update_transactions(self, prefix: str, status: str):
"""Adds a record to a study's transactions table"""
self.cursor.execute(
templates.get_insert_into_query(
base_templates.get_insert_into_query(
f"{prefix}__{enums.ProtectedTables.TRANSACTIONS.value}",
protected_table_builder.TRANSACTIONS_COLS,
[
[
prefix,
__version__,
status,
helper.get_utc_datetime(),
base_utils.get_utc_datetime(),
]
],
{"event_time": "TIMESTAMP"},
Expand Down
8 changes: 4 additions & 4 deletions cumulus_library/parsers/fhir_valueset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from fhirclient.models.coding import Coding

from cumulus_library.helper import load_json
from cumulus_library.template_sql.templates import get_create_view_query
from cumulus_library import base_utils
from cumulus_library.template_sql import base_templates


def get_include_coding(valueset_json) -> List[Coding]:
Expand All @@ -20,7 +20,7 @@ def get_include_coding(valueset_json) -> List[Coding]:
:param valueset_json: ValueSet file, expecially those provided by NLM/ONC/VSAC
:return: list of codeable concepts (system, code, display) to include
"""
valueset = load_json(valueset_json)
valueset = base_utils.load_json(valueset_json)
parsed = []

for include in valueset["compose"]["include"]:
Expand All @@ -42,7 +42,7 @@ def create_view_sql(view_name: str, concept_list: List[Coding]) -> str:
content = []
for concept in concept_list:
content.append([concept.system, concept.code, concept.display])
return get_create_view_query(
return base_templates.get_create_view_query(
view_name=view_name, dataset=content, view_cols=["system", "code", "display"]
)

Expand Down
17 changes: 7 additions & 10 deletions cumulus_library/protected_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
""" Builder for creating tables for tracking state/logging changes"""

from cumulus_library.base_table_builder import BaseTableBuilder
from cumulus_library.enums import ProtectedTables
from cumulus_library.template_sql.templates import (
get_ctas_empty_query,
)
from cumulus_library import base_table_builder, enums
from cumulus_library.template_sql import base_templates

TRANSACTIONS_COLS = ["study_name", "library_version", "status", "event_time"]
TRANSACTION_COLS_TYPES = ["varchar", "varchar", "varchar", "timestamp"]
Expand All @@ -28,7 +25,7 @@
]


class ProtectedTableBuilder(BaseTableBuilder):
class ProtectedTableBuilder(base_table_builder.BaseTableBuilder):
"""Builder for tables that persist across study clean/build actions"""

display_text = "Creating/updating system tables..."
Expand All @@ -43,18 +40,18 @@ def prepare_queries(
**kwargs,
):
self.queries.append(
get_ctas_empty_query(
base_templates.get_ctas_empty_query(
schema,
f"{study_name}__{ProtectedTables.TRANSACTIONS.value}",
f"{study_name}__{enums.ProtectedTables.TRANSACTIONS.value}",
TRANSACTIONS_COLS,
TRANSACTION_COLS_TYPES,
)
)
if study_stats:
self.queries.append(
get_ctas_empty_query(
base_templates.get_ctas_empty_query(
schema,
f"{study_name}__{ProtectedTables.STATISTICS.value}",
f"{study_name}__{enums.ProtectedTables.STATISTICS.value}",
STATISTICS_COLS,
STATISTICS_COLS_TYPES,
)
Expand Down
40 changes: 19 additions & 21 deletions cumulus_library/statistics/psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import json
import os
import pathlib
import sys
import warnings

from pathlib import PosixPath
from dataclasses import dataclass

import pandas
Expand All @@ -18,14 +18,10 @@
import matplotlib.pyplot as plt
import seaborn as sns

from cumulus_library.databases import DatabaseCursor
from cumulus_library.base_table_builder import BaseTableBuilder
from cumulus_library.template_sql import templates

from cumulus_library.template_sql.statistics.psm_templates import (
get_distinct_ids,
get_create_covariate_table,
)
from cumulus_library import databases
from cumulus_library import base_table_builder
from cumulus_library.template_sql import base_templates
from cumulus_library.template_sql.statistics import psm_templates


@dataclass
Expand Down Expand Up @@ -56,12 +52,12 @@ class PsmConfig:
seed: int


class PsmBuilder(BaseTableBuilder):
class PsmBuilder(base_table_builder.BaseTableBuilder):
"""TableBuilder for creating PSM tables"""

display_text = "Building PSM tables..."

def __init__(self, toml_config_path: str, data_path: PosixPath):
def __init__(self, toml_config_path: str, data_path: pathlib.Path):
"""Loads PSM job details from a PSM configuration file"""
super().__init__()
# We're stashing the toml path for error reporting later
Expand All @@ -75,7 +71,7 @@ def __init__(self, toml_config_path: str, data_path: PosixPath):
sys.exit(f"PSM configuration not found at {self.toml_path}")
try:
self.config = PsmConfig(
classification_json=f"{PosixPath(self.toml_path).parent}/{toml_config['classification_json']}",
classification_json=f"{pathlib.Path(self.toml_path).parent}/{toml_config['classification_json']}",
pos_source_table=toml_config["pos_source_table"],
neg_source_table=toml_config["neg_source_table"],
target_table=toml_config["target_table"],
Expand Down Expand Up @@ -103,7 +99,7 @@ def _get_symptoms_dict(self, path: str) -> dict:

def _get_sampled_ids(
self,
cursor: DatabaseCursor,
cursor: databases.DatabaseCursor,
schema: str,
query: str,
sample_size: int,
Expand Down Expand Up @@ -135,12 +131,14 @@ def _get_sampled_ids(
return df

def _create_covariate_table(
self, cursor: DatabaseCursor, schema: str, table_suffix: str
self, cursor: databases.DatabaseCursor, schema: str, table_suffix: str
):
"""Creates a covariate table from the loaded toml config"""
# checks for primary & link ref being the same
source_refs = list({self.config.primary_ref, self.config.count_ref} - {None})
pos_query = get_distinct_ids(source_refs, self.config.pos_source_table)
pos_query = psm_templates.get_distinct_ids(
source_refs, self.config.pos_source_table
)
pos = self._get_sampled_ids(
cursor,
schema,
Expand All @@ -149,7 +147,7 @@ def _create_covariate_table(
self.config.dependent_variable,
1,
)
neg_query = get_distinct_ids(
neg_query = psm_templates.get_distinct_ids(
source_refs,
self.config.neg_source_table,
join_id=self.config.primary_ref,
Expand All @@ -165,14 +163,14 @@ def _create_covariate_table(
)

cohort = pandas.concat([pos, neg])
ctas_query = templates.get_ctas_query_from_df(
ctas_query = base_templates.get_ctas_query_from_df(
schema,
f"{self.config.pos_source_table}_sampled_ids_{table_suffix}",
cohort,
)
self.queries.append(ctas_query)

dataset_query = get_create_covariate_table(
dataset_query = psm_templates.get_create_covariate_table(
target_table=f"{self.config.target_table}_{table_suffix}",
pos_source_table=self.config.pos_source_table,
neg_source_table=self.config.neg_source_table,
Expand Down Expand Up @@ -261,15 +259,15 @@ def psm_effect_size_plot(
sns_plot.figure.savefig(filename, dpi=250, bbox_inches="tight")

def generate_psm_analysis(
self, cursor: DatabaseCursor, schema: str, table_suffix: str
self, cursor: databases.DatabaseCursor, schema: str, table_suffix: str
):
stats_table = f"{self.config.target_table}_{table_suffix}"
"""Runs PSM statistics on generated tables"""
cursor.execute(
templates.get_alias_table_query(stats_table, self.config.target_table)
base_templates.get_alias_table_query(stats_table, self.config.target_table)
)
df = cursor.execute(
templates.get_select_all_query(self.config.target_table)
base_templates.get_select_all_query(self.config.target_table)
).as_pandas()
symptoms_dict = self._get_symptoms_dict(self.config.classification_json)
for dependent_variable, codes in symptoms_dict.items():
Expand Down
10 changes: 5 additions & 5 deletions cumulus_library/studies/core/builder_condition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates, utils
from cumulus_library.template_sql import base_templates, sql_utils


expected_table_cols = {
Expand Down Expand Up @@ -45,7 +45,7 @@ class CoreConditionBuilder(base_table_builder.BaseTableBuilder):
display_text = "Creating Condition tables..."

def denormalize_codes(self):
preferred_config = utils.CodeableConceptConfig(
preferred_config = sql_utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -59,10 +59,10 @@ def denormalize_codes(self):
],
)
self.queries.append(
templates.get_codeable_concept_denormalize_query(preferred_config)
base_templates.get_codeable_concept_denormalize_query(preferred_config)
)

all_config = utils.CodeableConceptConfig(
all_config = sql_utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -71,7 +71,7 @@ def denormalize_codes(self):
filter_priority=False,
)
self.queries.append(
templates.get_codeable_concept_denormalize_query(all_config)
base_templates.get_codeable_concept_denormalize_query(all_config)
)

def prepare_queries(
Expand Down
6 changes: 3 additions & 3 deletions cumulus_library/studies/core/builder_documentreference.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates, utils
from cumulus_library.template_sql import sql_utils


expected_table_cols = {
Expand All @@ -28,11 +28,11 @@ def prepare_queries(
parser: databases.DatabaseParser = None,
**kwargs,
):
self.queries += utils.denormalize_codes(
self.queries += sql_utils.denormalize_codes(
schema,
cursor,
[
utils.CodeableConceptConfig(
sql_utils.CodeableConceptConfig(
source_table="documentreference",
source_id="id",
column_name="type",
Expand Down
7 changes: 3 additions & 4 deletions cumulus_library/studies/core/builder_encounter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates
from cumulus_library.template_sql import utils
from cumulus_library.template_sql import sql_utils


expected_table_cols = {
Expand Down Expand Up @@ -81,7 +80,7 @@ def denormalize_codes(self, schema, cursor):
code_configs = []
for code_source in code_sources:
code_configs.append(
utils.CodeableConceptConfig(
sql_utils.CodeableConceptConfig(
source_table="encounter",
source_id="id",
column_name=code_source["column_name"],
Expand All @@ -91,7 +90,7 @@ def denormalize_codes(self, schema, cursor):
target_table=f"core__encounter_dn_{code_source['column_name']}",
)
)
self.queries += utils.denormalize_codes(schema, cursor, code_configs)
self.queries += sql_utils.denormalize_codes(schema, cursor, code_configs)

def prepare_queries(
self,
Expand Down
Loading

0 comments on commit 811f514

Please sign in to comment.