Skip to content

Commit

Permalink
Better duckdb schema, manifest/template entrypoint tweaks (#342)
Browse files Browse the repository at this point in the history
* Better duckdb schema, manifest/template entrypoint tweaks

* PR feedback

* cleanup of countsbuilder tests
  • Loading branch information
dogversioning authored Jan 21, 2025
1 parent 0dc79ae commit 60209ac
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 44 deletions.
13 changes: 12 additions & 1 deletion cumulus_library/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@
from cumulus_library.builders.base_table_builder import BaseTableBuilder
from cumulus_library.builders.counts import CountsBuilder
from cumulus_library.study_manifest import StudyManifest
from cumulus_library.template_sql.base_templates import get_template

__all__ = ["BaseTableBuilder", "CountsBuilder", "StudyConfig", "StudyManifest"]
# A note about the `get_template` function:
# As of this writing, the get_template function is used internally by the
# Cumulus team across multiple study repos for creating SQL in databases, both
# from the templates in this repo and in the various other projects (while leveraging
# some of our syntax helper macros, which are auto loaded by this function).
# Breaking changes to these templates will result in a major version bump.

# This API should be usable for your own study efforts - but the documentation
# is all code level. See template_sql for more information.

__all__ = ["BaseTableBuilder", "CountsBuilder", "StudyConfig", "StudyManifest", "get_template"]
__version__ = "4.2.0"
6 changes: 1 addition & 5 deletions cumulus_library/actions/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

from cumulus_library import (
BaseTableBuilder,
CountsBuilder,
base_utils,
databases,
enums,
Expand Down Expand Up @@ -97,10 +96,7 @@ def _load_and_execute_builder(
# remove it so it doesn't interfere with the next python module to
# execute, since the subclass would otherwise hang around.
table_builder_class = table_builder_subclasses[0]
if issubclass(table_builder_class, CountsBuilder):
table_builder = table_builder_class(study_prefix=manifest.get_study_prefix())
else:
table_builder = table_builder_class()
table_builder = table_builder_class(manifest=manifest)
if write_reference_sql:
prefix = manifest.get_study_prefix()
table_builder.prepare_queries(config=config, manifest=manifest, parser=db_parser)
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/builders/base_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class BaseTableBuilder(abc.ABC):

display_text = "Building custom tables..."

def __init__(self):
def __init__(self, manifest: study_manifest.StudyManifest | None = None):
self.queries = []

@abc.abstractmethod
Expand Down
30 changes: 20 additions & 10 deletions cumulus_library/builders/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from pathlib import Path

from rich.console import Console

from cumulus_library import BaseTableBuilder, errors, study_manifest
from cumulus_library.builders.statistics_templates import counts_templates

Expand All @@ -12,21 +14,30 @@
class CountsBuilder(BaseTableBuilder):
"""Extends BaseTableBuilder for counts-related use cases"""

def __init__(self, study_prefix: str | None = None):
def __init__(
self, study_prefix: str | None = None, manifest: study_manifest.StudyManifest | None = None
):
super().__init__()
self.study_prefix = study_prefix
if manifest:
self.study_prefix = manifest.get_study_prefix()
elif study_prefix:
console = Console()
console.print(
"[yellow]Warning: providing study_prefix to a CountsBuilder is deprecated"
" and will be removed in a future version"
)
self.study_prefix = study_prefix
else:
raise errors.CountsBuilderError(
"CountsBuilder should be initiated with a valid manifest.toml"
)

def get_table_name(self, table_name: str, duration=None) -> str:
"""Convenience method for constructing table name
:param table_name: table name to add after the study prefix
:param duration: a time period reflecting the table binning strategy
"""
if not self.study_prefix:
raise errors.CountsBuilderError(
"CountsBuilder must be either initiated with a study prefix, "
"or be in a directory with a valid manifest.toml"
)
if duration:
return f"{self.study_prefix}__{table_name}_{duration}"
else:
Expand Down Expand Up @@ -64,7 +75,7 @@ def get_count_query(
:keyword min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
:keyword fhir_resource: The type of FHIR resource to count (see
statistics/statistics_templates/count_templates.CountableFhirResource)
builders/statistics_templates/count_templates.CountableFhirResource)
"""
if not table_name or not source_table or not table_cols:
raise errors.CountsBuilderError(
Expand Down Expand Up @@ -345,5 +356,4 @@ def prepare_queries(
This should be overridden in any count generator. See studies/core/count_core.py
for an example
"""
if manifest and not self.study_prefix:
self.study_prefix = manifest.get_study_prefix()
pass
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_count_query(
table_col_classed.append(CountColumn(name=item, db_type="varchar", alias=None))
table_cols = table_col_classed

query = base_templates.get_base_template(
query = base_templates.get_template(
"count",
path,
table_name=table_name,
Expand Down
9 changes: 8 additions & 1 deletion cumulus_library/template_sql/base_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ class TableView(enum.Enum):
VIEW = "VIEW"


def get_base_template(filename_stem: str, path: pathlib.Path | None = None, **kwargs: dict) -> str:
def get_base_template(*args, **kwargs):
"""Legacy wrapper for get_template"""
# TODO: Isolate this as sole library entrypoint after cutting over studies to use
# the public get_template
return get_template(*args, **kwargs)


def get_template(filename_stem: str, path: pathlib.Path | None = None, **kwargs: dict) -> str:
"""Abstract renderer for jinja templates
You can use this renderer directly, but if you are designing a function
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/template_sql/ctas_from_parquet.sql.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{%- if db_type == 'athena' -%}
CREATE EXTERNAL TABLE IF NOT EXISTS `{{ schema_name }}`.`{{ table_name }}` (
{%- elif db_type == 'duckdb' -%}
CREATE TABLE IF NOT EXISTS "{{ table_name }}" AS SELECT
CREATE TABLE IF NOT EXISTS "{{schema_name}}"."{{ table_name }}" AS SELECT
{%- endif %}
{%- for col in table_cols %}
{%- if db_type == 'athena' %} {{ col }} {{ remote_table_cols_types[loop.index0] }}
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/template_sql/insert_into.sql.jinja
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{%- import 'syntax.sql.jinja' as syntax -%}
INSERT INTO {{ schema_name }}.{{ table_name }}
INSERT INTO "{{ schema_name }}"."{{ table_name }}"
(
{%- for col in table_cols -%}
"{{ col }}"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def test_import_study(tmp_path, mock_db_config):
archive.write(tmp_path / "archive/test__table.csv")
(tmp_path / "archive/test__table.parquet").unlink()
(tmp_path / "archive/test__table.csv").unlink()
mock_db_config.schema = "schema_name"
mock_db_config.schema = "main"
importer.import_archive(config=mock_db_config, archive_path=tmp_path / "archive/test.zip")
assert len(list((tmp_path / "archive").glob("*"))) == 1
test_table = mock_db_config.db.cursor().execute("SELECT * FROM test__table").fetchall()
Expand Down
6 changes: 3 additions & 3 deletions tests/test_base_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ def test_ctas_empty_query_creation(expected, schema, table, cols, types):
["String", "Int"],
),
(
"""CREATE TABLE IF NOT EXISTS "local_table" AS SELECT "a", "b"
"""CREATE TABLE IF NOT EXISTS "test_duckdb"."local_table" AS SELECT "a", "b"
FROM read_parquet('./tests/test_data/*.parquet')""",
"duckdb",
"test_duckdb",
Expand Down Expand Up @@ -728,7 +728,7 @@ def test_extension_denormalize_creation():


def test_insert_into_query_creation():
expected = """INSERT INTO test.test_table
expected = """INSERT INTO "test"."test_table"
("a","b")
VALUES
('foo','foo'),
Expand All @@ -740,7 +740,7 @@ def test_insert_into_query_creation():
dataset=[["foo", "foo"], ["bar", "bar"]],
)
assert query == expected
expected = """INSERT INTO test.test_table
expected = """INSERT INTO "test"."test_table"
("a","b")
VALUES
('foo',VARCHAR 'foo'),
Expand Down
15 changes: 2 additions & 13 deletions tests/test_counts_builder.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""tests for outputs of counts_builder module"""

import pathlib
from contextlib import nullcontext as does_not_raise
from unittest import mock

import pytest

from cumulus_library import StudyManifest, errors
from cumulus_library import errors
from cumulus_library.builders import counts

TEST_PREFIX = "test"
Expand Down Expand Up @@ -201,18 +200,8 @@ def test_count_wrappers(


def test_null_study_prefix():
builder = counts.CountsBuilder()
with pytest.raises(errors.CountsBuilderError):
builder.get_table_name("table") # needs study_prefix to work


def test_implicit_study_prefix():
builder = counts.CountsBuilder()
manifest = StudyManifest(pathlib.Path("./tests/test_data/study_python_valid"))

assert builder.study_prefix is None
builder.prepare_queries(manifest=manifest)
assert builder.study_prefix == "study_python_valid"
counts.CountsBuilder()


def test_write_queries(tmp_path):
Expand Down
3 changes: 0 additions & 3 deletions tests/test_data/study_python_counts_valid/module1.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
class ModuleOneRunner(cumulus_library.CountsBuilder):
display_text = "module1"

def __init__(self, study_prefix):
super().__init__(study_prefix="study_python_counts_valid")

def prepare_queries(self, *args, **kwargs):
self.queries.append(
"CREATE TABLE IF NOT EXISTS study_python_counts_valid__table1 (test int);"
Expand Down
3 changes: 0 additions & 3 deletions tests/test_data/study_python_counts_valid/module2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
class ModuleTwoRunner(cumulus_library.CountsBuilder):
display_text = "module2"

def __init__(self, study_prefix):
super().__init__(study_prefix="study_python_counts_valid")

def prepare_queries(self, *args, **kwargs):
self.queries.append(
"CREATE TABLE IF NOT EXISTS study_python_counts_valid__table2 (test int);"
Expand Down

0 comments on commit 60209ac

Please sign in to comment.