Skip to content

Commit

Permalink
Merge pull request #66 from globusmedical/fix_unit_test_squashed
Browse files Browse the repository at this point in the history
Fixes for SQ type and unit tests, squashed from PR 64
  • Loading branch information
pchoisel authored Mar 13, 2024
2 parents 3ffe350 + 097b554 commit b37a883
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 93 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ __pycache__
.vscode
build
*.egg-info
.python-version
.python-version
# Ignore Pipfile.lock, since different versions of python and OS produce different hashes
Pipfile.lock
15 changes: 15 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

# pipenv is useful for developing local package in
# editable mode. See https://pypi.org/project/pipenv/

[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
dicom-anonymizer = {file = ".", editable = true}

[dev-packages]
pytest = "*"
setuptools = "*"
50 changes: 50 additions & 0 deletions cspell.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// cSpell Settings
{
// Version of the setting file. Always 0.2
"version": "0.2",
// language - current active spelling language
"language": "en",
// ignorepaths - list of glob patterns for files to ignore
"ignorePaths": [
"ext",
"*.json",
"Pipfile.lock",
"**/tests"
],
// words - list of words to be always considered correct
"words": [
"anonymization",
"argparse",
"bdist",
"behaviour",
"dcmread",
"DICM",
"DICOM",
"dicomanonymizer",
"dicomfields",
"Edern",
"Fiducials",
"Fontaine",
"Haumont",
"Kitware",
"Laurenn",
"multival",
"pipenv",
"Pipfile",
"pydicom",
"pytest",
"Radiopharmaceutical",
"setuptools",
"simpledicomanonymizer",
"tqdm",
"venv",
"virtualenv",
"xgggg"
]
// flagWords - list of words to be always considered incorrect
// This is useful for offensive words and common spelling errors.
// For example "hte" should be "the"
// "flagWords": [
// "hte"
// ]
}
2 changes: 1 addition & 1 deletion dicomanonymizer/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def anonymize(input_path: str, output_path: str, anonymization_actions: dict, de
Read data from input path (folder or file) and launch the anonymization.
:param input_path: Path to a folder or to a file. If set to a folder,
then cross all over subfiles and apply anonymization.
then cross all over sub-folders and apply anonymization.
:param output_path: Path to a folder or to a file.
:param anonymization_actions: List of actions that will be applied on tags.
:param deletePrivateTags: Whether to delete private tags.
Expand Down
14 changes: 11 additions & 3 deletions dicomanonymizer/simpledicomanonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def replace_element(element):
See https://laurelbridge.com/pdf/Dicom-Anonymization-Conformance-Statement.pdf
"""
if element.VR in ('LO', 'LT', 'SH', 'PN', 'CS', 'ST', 'UT'):
element.value = 'Anonymized'
element.value = 'ANONYMIZED' # CS VR accepts only uppercase characters
elif element.VR == 'UI':
replace_element_UID(element)
elif element.VR in ('DS', 'IS'):
Expand All @@ -118,8 +118,16 @@ def replace_element(element):
element.value = b'Anonymized'
elif element.VR == 'SQ':
for sub_dataset in element.value:
for sub_element in sub_dataset.elements():
replace_element(sub_element)
for sub_element in sub_dataset.elements():
if isinstance(sub_element, pydicom.dataelem.RawDataElement):
# RawDataElement is a NamedTuple, so cannot set its value attribute.
# Convert it to a DataElement, replace value, and set it back.
# Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63
e2 = pydicom.dataelem.DataElement_from_raw(sub_element)
replace_element(e2)
sub_dataset.add(e2)
else:
replace_element(sub_element)
else:
raise NotImplementedError('Not anonymized. VR {} not yet implemented.'.format(element.VR))

Expand Down
6 changes: 5 additions & 1 deletion examples/anonymize_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
from pydicom.data import get_testdata_file
from pydicom import dcmread


def main():
original_ds = dcmread(get_testdata_file("CT_small.dcm"))
data_ds = original_ds.copy()
anonymize_dataset(data_ds, delete_private_tags=True) # Anonymization is done in-place
anonymize_dataset(
data_ds, delete_private_tags=True
) # Anonymization is done in-place
print("Examples of original -> anonymized values:")
for tt in ["PatientName", "PatientID", "StudyDate"]:
print(f" {tt}: '{original_ds[tt].value}' -> '{data_ds[tt].value}'")


if __name__ == "__main__":
main()
29 changes: 23 additions & 6 deletions examples/anonymize_extra_rules.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
import argparse
from dicomanonymizer import ALL_TAGS, anonymize, keep


def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('input', help='Path to the input dicom file or input directory which contains dicom files')
parser.add_argument('output', help='Path to the output dicom file or output directory which will contains dicom files')
parser.add_argument('--suffix', action='store', help='Suffix that will be added at the end of series description')
parser.add_argument(
"input",
help="Path to the input dicom file or input directory which contains dicom files",
)
parser.add_argument(
"output",
help="Path to the output dicom file or output directory which will contains dicom files",
)
parser.add_argument(
"--suffix",
action="store",
help="Suffix that will be added at the end of series description",
)
args = parser.parse_args()

input_dicom_path = args.input
Expand All @@ -16,7 +27,7 @@ def main():
def setup_series_description(dataset, tag):
element = dataset.get(tag)
if element is not None:
element.value = f'{element.value}-{args.suffix}'
element.value = f"{element.value}-{args.suffix}"

# ALL_TAGS variable is defined on file dicomfields.py
# the 'keep' method is already defined into the dicom-anonymizer
Expand All @@ -28,7 +39,13 @@ def setup_series_description(dataset, tag):
extra_anonymization_rules[(0x0008, 0x103E)] = setup_series_description

# Launch the anonymization
anonymize(input_dicom_path, output_dicom_path, extra_anonymization_rules, delete_private_tags=False)
anonymize(
input_dicom_path,
output_dicom_path,
extra_anonymization_rules,
delete_private_tags=False,
)


if __name__ == '__main__':
if __name__ == "__main__":
main()
137 changes: 71 additions & 66 deletions tests/test_anon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
warnings.filterwarnings("ignore")


def get_all_failed():
def get_all_failed(): # sourcery skip: inline-immediately-returned-variable
# The following files are intended to fail dcmread
# No point including them for anonymization testing
dcmread_failed = [
Expand All @@ -27,50 +27,7 @@ def get_all_failed():
"OT-PAL-8-face.dcm",
]

# TODO: Investigate why these fail replacement test of anonymization
replaced_failed = [
"693_J2KI.dcm",
"JPEG-lossy.dcm",
"JPEG2000-embedded-sequence-delimiter.dcm",
"JPEG2000.dcm",
"JPGExtended.dcm",
"reportsi.dcm",
"reportsi_with_empty_number_tags.dcm",
"SC_rgb_gdcm_KY.dcm",
"SC_rgb_jpeg_lossy_gdcm.dcm",
"693_UNCI.dcm",
"JPEG-LL.dcm",
"JPEG2000_UNC.dcm",
"MR2_J2KI.dcm",
"MR2_J2KR.dcm",
"MR2_UNCI.dcm",
"RG1_J2KI.dcm",
"RG1_J2KR.dcm",
"RG1_UNCI.dcm",
"RG3_J2KI.dcm",
"RG3_J2KI.dcm",
"RG3_J2KR.dcm",
"RG3_UNCI.dcm",
"SC_rgb_gdcm2k_uncompressed.dcm",
"US1_J2KI.dcm",
"US1_J2KR.dcm",
"US1_UNCI.dcm",
"test-SR.dcm",
]

# TODO: Investigate why these fail deletion test of anonymization
deleted_failed = [
"color3d_jpeg_baseline.dcm",
"JPGLosslessP14SV1_1s_1f_8b.dcm",
"test-SR.dcm",
]

# TODO: Investigate why these fail emptying test of anonymization
emptied_failed = [
"JPGLosslessP14SV1_1s_1f_8b.dcm",
"test-SR.dcm",
]
return dcmread_failed + replaced_failed + deleted_failed + emptied_failed
return dcmread_failed


@lru_cache(maxsize=None)
Expand All @@ -83,6 +40,7 @@ def get_passing_files():
@pytest.fixture(scope="module", params=get_passing_files())
def orig_anon_dataset(request):
orig_ds = dcmread(request.param)
orig_ds.filename = None # Non-None value causes warnings in copy(). Not needed for this testing
anon_ds = orig_ds.copy()
anonymize_dataset(anon_ds)
return (orig_ds, anon_ds)
Expand All @@ -91,34 +49,81 @@ def orig_anon_dataset(request):
def test_deleted_tags_are_removed(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset
deleted_tags = dicomfields.X_TAGS
for tt in deleted_tags:
if len(tt) == 2 and tt in orig_ds:
assert tt not in anon_ds

for tt in deleted_tags: # sourcery skip: no-loop-in-tests
if (
len(tt) == 2 and tt in orig_ds
): # sourcery skip: merge-nested-ifs, no-conditionals-in-tests
# TODO: Investigate why Date type are replaced instead of deleted
# See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56
if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests
assert (
tt not in anon_ds
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"

def test_changed_tags_are_replaced(orig_anon_dataset):
changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
)

changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
)

empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED")


def is_elem_replaced(orig, anon) -> bool:
if orig.VR == "SQ":
for x, y in zip(orig.value, anon.value):
for tt in changed_tags:
if tt in x and len(x[tt].value) > 0:
assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!"
assert is_elem_replaced(
x[tt], y[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced"
return True

return orig.value != anon.value if orig.value not in empty_values else True


def test_changed_tags_are_replaced(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset

for tt in changed_tags:
if tt in orig_ds:
assert anon_ds[tt] != orig_ds[tt]
for tt in changed_tags: # sourcery skip: no-loop-in-tests
if tt in orig_ds: # sourcery skip: no-conditionals-in-tests
assert (
tt in anon_ds
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
assert is_elem_replaced(
orig_ds[tt], anon_ds[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"


def test_empty_tags_are_emptied(orig_anon_dataset):
empty_values = (0, "", "00010101", "000000.00")
empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS


def is_elem_empty(elem) -> bool:
if elem.VR == "SQ":
for x in elem.value:
for tt in empty_tags:
if tt in x and len(x[tt].value) > 0:
assert is_elem_empty(
x[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
return True

return elem.value in empty_values


def test_empty_tags_are_emptied(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset

for tt in empty_tags:
if tt in orig_ds:
assert anon_ds[tt].value in empty_values
for tt in empty_tags: # sourcery skip: no-loop-in-tests
if (
tt in orig_ds and len(orig_ds[tt].value) > 0
): # sourcery skip: no-conditionals-in-tests
assert is_elem_empty(
anon_ds[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty"
Loading

0 comments on commit b37a883

Please sign in to comment.