feat: add new "cumulus-etl export" command for bulk exporting #951
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
pull_request: | |
push: | |
branches: | |
- main | |
paths-ignore: | |
- 'docs/**' | |
- '*.md' | |
# The goal here is to cancel older workflows when a PR is updated (because it's pointless work) | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} | |
cancel-in-progress: true | |
jobs: | |
unittest: | |
name: unit tests | |
runs-on: ubuntu-22.04 | |
strategy: | |
matrix: | |
# Don't go crazy with the Python versions as the test suite is a beast. | |
# Just test the minimum version we support. | |
python-version: ["3.10"] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install .[tests] | |
- name: Check out MS tool | |
uses: actions/checkout@v4 | |
with: | |
repository: microsoft/Tools-for-Health-Data-Anonymization | |
path: mstool | |
- name: Build MS tool | |
run: | | |
sudo apt-get update | |
sudo apt-get install dotnet6 | |
dotnet publish \ | |
--runtime=linux-x64 \ | |
--configuration=Release \ | |
-p:PublishSingleFile=true \ | |
--output=$HOME/.local/bin \ | |
mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool | |
- name: Test with pytest | |
run: | | |
python -m pytest --cov=cumulus_etl --cov-report=xml | |
- name: Check coverage report | |
if: github.ref != 'refs/heads/main' | |
uses: orgoro/[email protected] | |
with: | |
coverageFile: coverage.xml | |
token: ${{ secrets.GITHUB_TOKEN }} | |
thresholdAll: .93 | |
thresholdNew: 1 | |
thresholdModified: 1 | |
nlp-regression: | |
runs-on: ubuntu-latest | |
env: | |
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install Docker | |
uses: docker/setup-buildx-action@v3 | |
- name: Build ETL image | |
uses: docker/build-push-action@v6 | |
with: | |
load: true # just build, no push | |
tags: smartonfhir/cumulus-etl:latest | |
- name: Download NLP images | |
run: docker compose --profile covid-symptom up -d --quiet-pull | |
- name: Run NLP | |
run: | | |
export DATADIR=$(realpath tests/data/nlp-regression) | |
# Run the NLP task | |
docker compose run --rm \ | |
--volume $DATADIR:/in \ | |
cumulus-etl \ | |
/in/input \ | |
/in/run-output \ | |
/in/phi \ | |
--output-format=ndjson \ | |
--task covid_symptom__nlp_results | |
# Compare results | |
export OUTDIR=$DATADIR/run-output/covid_symptom__nlp_results | |
sudo chown -R $(id -u) $OUTDIR | |
sed -i 's/"generated_on": "[^"]*", //g' $OUTDIR/*.ndjson | |
diff -upr $DATADIR/expected-output $OUTDIR | |
echo "All Good!" | |
lint: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install linters | |
run: | | |
python -m pip install --upgrade pip | |
pip install .[dev] | |
- name: Run ruff | |
run: ruff check --output-format=github . |