Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
troyraen committed Jan 4, 2025
1 parent 42a87f7 commit 7b2fb2f
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 45 deletions.
133 changes: 128 additions & 5 deletions tests/hats_import/verification/test_run_verification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import pytest

import hats_import.verification.run_verification as runner
Expand All @@ -14,12 +15,134 @@ def test_bad_args():
runner.run(args)


def test_no_implementation(tmp_path, small_sky_object_catalog):
"""Womp womp. Test that we don't have a verification pipeline implemented"""
def test_runner(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
"""Runner should execute all tests and write a report to file."""
result_cols = ["datetime", "passed", "test", "target"]

args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
verifier = runner.run(args, write_mode="w")
all_passed = verifier.results_df.passed.all()
assert all_passed, "valid catalog failed"
# # [FIXME] pandas metadata is unexpectedly missing hats columns
# if not all_passed:
# _test = verifier.results_df.test == "schema consistency"
# _target = verifier.results_df.target == "constructed_truth_schema"
# assert verifier.results_df.loc[~(_test & _target)].passed.all()
written_results = pd.read_csv(args.output_path / args.output_filename)
assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"

args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
verifier = runner.run(args, write_mode="w")
assert not verifier.results_df.passed.all(), "invalid catalog passed"
written_results = pd.read_csv(args.output_path / args.output_filename)
assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"


def test_test_file_sets(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
"""File set tests should fail if files listed in _metadata don't match the actual data files."""
args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_file_sets()
assert passed, "valid catalog failed"

args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_file_sets()
assert not passed, "invalid catalog passed"
bad_files = {"Norder=0/Dir=0/Npix=11.extra_file.parquet", "Norder=0/Dir=0/Npix=11.missing_file.parquet"}
assert bad_files == set(verifier.results_df.bad_files.squeeze()), "bad_files failed"


def test_test_is_valid_catalog(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
"""`hats.is_valid_catalog` should pass for valid catalogs, fail for catalogs without ancillary files."""
args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_is_valid_catalog()
assert passed, "valid catalog failed"

args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_is_valid_catalog()
assert not passed, "invalid catalog passed"


def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
"""Row count tests should pass if all row counts match, else fail."""
args = VerificationArguments(
input_catalog_path=small_sky_object_catalog, output_path=tmp_path, truth_total_rows=131
)
verifier = runner.Verifier.from_args(args)
verifier.test_num_rows()
all_passed = verifier.results_df.passed.all()
assert all_passed, "valid catalog failed"

args = VerificationArguments(
input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path, truth_total_rows=131
)
verifier = runner.Verifier.from_args(args)
verifier.test_num_rows()
results = verifier.results_df
all_failed = not results.passed.any()
assert all_failed, "invalid catalog passed"

targets = {"file footers vs _metadata", "file footers vs truth"}
assert targets == set(results.target), "wrong targets"

bad_files = {
"Norder=0/Dir=0/Npix=11.extra_file.parquet",
"Norder=0/Dir=0/Npix=11.extra_rows.parquet",
"Norder=0/Dir=0/Npix=11.missing_file.parquet",
}
_result = results.loc[results.target == "file footers vs _metadata"].squeeze()
assert bad_files == set(_result.bad_files), "wrong bad_files"


@pytest.mark.parametrize("check_metadata", [(False,), (True,)])
def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check_metadata):
"""Schema tests should pass if all column names, dtypes, and (optionally) metadata match, else fail."""
args = VerificationArguments(
input_catalog_path=small_sky_object_catalog,
output_path=tmp_path,
output_artifact_name="small_sky_object_verification_report",
truth_schema=small_sky_object_catalog / "dataset/_common_metadata",
)
with pytest.raises(NotImplementedError, match="not yet implemented"):
runner.run(args)
verifier = runner.Verifier.from_args(args)
verifier.test_schemas(check_metadata=check_metadata)
all_passed = verifier.results_df.passed.all()
assert all_passed, "valid catalog failed"
# # [FIXME] pandas metadata is unexpectedly missing hats columns
# if not all_passed:
# _test = verifier.results_df.test == "schema consistency"
# _target = verifier.results_df.target == "constructed_truth_schema"
# assert verifier.results_df.loc[~(_test & _target)].passed.all()

args = VerificationArguments(
input_catalog_path=bad_schemas_dir,
output_path=tmp_path,
truth_schema=bad_schemas_dir / "dataset/_common_metadata.import_truth",
)
verifier = runner.Verifier.from_args(args)
verifier.test_schemas(check_metadata=check_metadata)
results = verifier.results_df
all_failed = not any(results.passed)
assert all_failed, "invalid catalog passed"

targets_failed = {"constructed_truth_schema", "_common_metadata vs truth", "file footers vs truth"}
if not check_metadata:
targets_passed = {"_metadata vs truth"}
else:
targets_passed = set()
targets_failed = targets_failed.union({"_metadata vs truth"})
assert targets_passed.union(targets_failed) == set(results.target), "wrong targets"
assert all(results.loc[results.target.isin(targets_passed)].passed), "valid targets failed"
assert not any(results.loc[results.target.isin(targets_failed)].passed), "invalid targets passed"

target = "file footers vs truth"
result = results.loc[results.target == target].squeeze()
expected_bad_files = {
"Norder=0/Dir=0/Npix=11.extra_column.parquet",
"Norder=0/Dir=0/Npix=11.missing_column.parquet",
"Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet",
}
if check_metadata:
expected_bad_files = expected_bad_files.union({"Norder=0/Dir=0/Npix=11.no_metadata.parquet"})
assert expected_bad_files == set(result.bad_files), "wrong bad_files"
49 changes: 9 additions & 40 deletions tests/hats_import/verification/test_verification_arguments.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,40 @@
"""Tests of argument validation"""

import pytest
from hats import read_hats

from hats_import.verification.arguments import VerificationArguments


def test_none():
"""No arguments provided. Should error for required args."""
with pytest.raises(ValueError):
with pytest.raises(TypeError):
VerificationArguments()


def test_empty_required(tmp_path):
"""*Most* required arguments are provided."""
## Input path is missing
with pytest.raises(ValueError, match="input_catalog_path"):
VerificationArguments(
output_path=tmp_path,
output_artifact_name="small_sky_object_verification_report",
)
with pytest.raises(TypeError, match="input_catalog_path"):
VerificationArguments(output_path=tmp_path)


def test_invalid_paths(tmp_path, small_sky_object_catalog):
"""Required arguments are provided, but paths aren't found."""
## Prove that it works with required args
VerificationArguments(
input_catalog_path=small_sky_object_catalog,
output_path=tmp_path,
output_artifact_name="small_sky_object_verification_report",
)
VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)

## Input path is invalid catalog
with pytest.raises(ValueError, match="input_catalog_path not a valid catalog"):
VerificationArguments(
input_catalog_path="path",
output_path=f"{tmp_path}/path",
output_artifact_name="small_sky_object_verification_report",
)


def test_good_paths(tmp_path, small_sky_object_catalog):
"""Required arguments are provided, and paths are found."""
tmp_path_str = str(tmp_path)
args = VerificationArguments(
input_catalog_path=small_sky_object_catalog,
output_path=tmp_path,
output_artifact_name="small_sky_object_verification_report",
)
assert args.input_catalog_path == small_sky_object_catalog
assert str(args.output_path) == tmp_path_str
assert str(args.tmp_path).startswith(tmp_path_str)
## Input path is not an existing directory
with pytest.raises(ValueError, match="input_catalog_path must be an existing directory"):
VerificationArguments(input_catalog_path="path", output_path=f"{tmp_path}/path")


@pytest.mark.timeout(5)
def test_catalog_object(tmp_path, small_sky_object_catalog):
def test_good_paths(tmp_path, small_sky_object_catalog):
"""Required arguments are provided, and paths are found.
NB: This is currently the last test in alpha-order, and may require additional
time to teardown fixtures."""
small_sky_catalog_object = read_hats(catalog_path=small_sky_object_catalog)
tmp_path_str = str(tmp_path)
args = VerificationArguments(
input_catalog=small_sky_catalog_object,
output_path=tmp_path,
output_artifact_name="small_sky_object_verification_report",
)
args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
assert args.input_catalog_path == small_sky_object_catalog
assert str(args.output_path) == tmp_path_str
assert str(args.tmp_path).startswith(tmp_path_str)

0 comments on commit 7b2fb2f

Please sign in to comment.