Skip to content

fix: allow overwriting delta lake entries with same timestamp #1053

fix: allow overwriting delta lake entries with same timestamp

fix: allow overwriting delta lake entries with same timestamp #1053

Workflow file for this run

name: CI
on:
pull_request:
push:
branches:
- main
paths-ignore:
- 'docs/**'
- '*.md'
# The goal here is to cancel older workflows when a PR is updated (because it's pointless work)
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
jobs:
unittest:
name: unit tests
runs-on: ubuntu-22.04
strategy:
matrix:
# Don't go crazy with the Python versions as the test suite is a beast.
# Just test the minimum version we support.
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[tests]
- name: Check out MS tool
uses: actions/checkout@v4
with:
repository: microsoft/Tools-for-Health-Data-Anonymization
path: mstool
- name: Build MS tool
run: |
sudo apt-get update
sudo apt-get install dotnet6
dotnet publish \
--runtime=linux-x64 \
--configuration=Release \
-p:PublishSingleFile=true \
--output=$HOME/.local/bin \
mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool
- name: Test with pytest
run: |
python -m pytest --cov=cumulus_etl --cov-report=xml
- name: Log missing coverage
run: |
coverage report -m --skip-covered
- name: Check coverage report
if: github.ref != 'refs/heads/main'
uses: orgoro/[email protected]
with:
coverageFile: coverage.xml
token: ${{ secrets.GITHUB_TOKEN }}
thresholdAll: .98
thresholdNew: 1
thresholdModified: 1
nlp-regression:
runs-on: ubuntu-latest
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
steps:
- uses: actions/checkout@v4
- name: Install Docker
uses: docker/setup-buildx-action@v3
- name: Build ETL image
uses: docker/build-push-action@v6
with:
load: true # just build, no push
tags: smartonfhir/cumulus-etl:latest
build-args: |
LOCAL_VERSION=${{ github.sha }}
- name: Download NLP images
run: |
# Just start the specific services we need, to conserve CI disk space
docker compose up -d --quiet-pull cnlpt-negation ctakes-covid
- name: Run NLP
run: |
export DATADIR=$(realpath tests/data/nlp-regression)
# Run the NLP task
docker compose run --rm \
--volume $DATADIR:/in \
cumulus-etl \
/in/input \
/in/run-output \
/in/phi \
--export-group nlp-test \
--export-timestamp 2024-08-29 \
--output-format=ndjson \
--task covid_symptom__nlp_results
# Compare results
export OUTDIR=$DATADIR/run-output/covid_symptom__nlp_results
sudo chown -R $(id -u) $OUTDIR
sed -i 's/"generated_on": "[^"]*", //g' $OUTDIR/*.ndjson
diff -upr $DATADIR/expected-output $OUTDIR
echo "All Good!"
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install linters
run: |
python -m pip install --upgrade pip
pip install .[dev]
- name: Run ruff
run: ruff check --output-format=github .