Skip to content

Commit

Permalink
Merge pull request #291 from Pavan-Bellam/workflow2
Browse files Browse the repository at this point in the history
resolved issue regarding commit workflow
  • Loading branch information
pradeeban authored Apr 5, 2022
2 parents 9b57cc5 + 91a05b3 commit 8ebbb50
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 17 deletions.
21 changes: 10 additions & 11 deletions modules/png-extraction/ImageExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ def initialize_config_and_execute(config_values):

print_images = bool(configs['PrintImages'])
print_only_common_headers = bool(configs['CommonHeadersOnly'])
global public_headers_bool
public_headers_bool = bool(configs['PublicHeadersOnly'])
global SpecificHeadersOnly
PublicHeadersOnly = bool(configs['PublicHeadersOnly'])
SpecificHeadersOnly = bool(configs['SpecificHeadersOnly'])
depth = int(configs['Depth'])
processes = int(configs['UseProcesses']) # how many processes to use.
Expand Down Expand Up @@ -96,12 +94,12 @@ def initialize_config_and_execute(config_values):
logging.info("------- Values Initialization DONE -------")
final_res = execute(pickle_file, dicom_home, output_directory, print_images, print_only_common_headers, depth,
processes, flattened_to_level, email, send_email, no_splits, is16Bit, png_destination,
failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start)
failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start,SpecificHeadersOnly,PublicHeadersOnly)
return final_res


# Function for getting tuple for field,val pairs
def get_tuples(plan, outlist = None, key = ""):
def get_tuples(plan,SpecificHeadersOnly,PublicHeadersOnly, outlist = None, key = ""):
if len(key)>0:
key = key + "_"
if not outlist:
Expand Down Expand Up @@ -135,7 +133,7 @@ def get_tuples(plan, outlist = None, key = ""):
if plan[i] not in headers:
headers.append(plan[i])
else:
if (public_headers_bool):
if (PublicHeadersOnly):
for aa in plan.dir():
headers.append(plan[aa])
else:
Expand All @@ -154,7 +152,7 @@ def get_tuples(plan, outlist = None, key = ""):
if type(value) is dicom.sequence.Sequence:
for nn, ss in enumerate(list(value)):
newkey = "_".join([key,("%d"%nn),name]) if len(key) else "_".join([("%d"%nn),name])
candidate = get_tuples(ss,outlist=None,key=newkey)
candidate = get_tuples(ss,SpecificHeadersOnly,PublicHeadersOnly,outlist=None,key=newkey)
# if extracted tuples are too big condense to a string
if len(candidate)>2000:
outlist.append((newkey,str(candidate)))
Expand All @@ -177,15 +175,15 @@ def get_tuples(plan, outlist = None, key = ""):


def extract_headers(f_list_elem):
nn,ff = f_list_elem # unpack enumerated list
nn,ff,SpecificHeadersOnly,PublicHeadersOnly = f_list_elem # unpack enumerated list
plan = dicom.dcmread(ff, force=True) # reads in dicom file
# checks if this file has an image
c=True
try:
check = plan.pixel_array # throws error if dicom file has no image
except:
c = False
kv = get_tuples(plan) # gets tuple for field,val pairs for this file. function defined above
kv = get_tuples(plan,SpecificHeadersOnly,PublicHeadersOnly) # gets tuple for field,val pairs for this file. function defined above
# dicom images should not have more than 300 dicom tags
if len(kv)>300:
logging.debug(str(len(kv)) + " dicom tags produced by " + ff)
Expand Down Expand Up @@ -354,7 +352,7 @@ def fix_mismatch(with_VRs=['PN', 'DS', 'IS', 'LO', 'OB']):

def execute(pickle_file, dicom_home, output_directory, print_images, print_only_common_headers, depth,
processes, flattened_to_level, email, send_email, no_splits, is16Bit, png_destination,
failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start):
failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start,SpecificHeadersOnly,PublicHeadersOnly):
err = None
fix_mismatch()
if processes == 0.5: # use half the cores to avoid high ram usage
Expand Down Expand Up @@ -421,7 +419,8 @@ def execute(pickle_file, dicom_home, output_directory, print_images, print_only_

with Pool(core_count) as p:
# we send here print_only_public_headers bool value
res = p.imap_unordered(extract_headers, enumerate(chunk))
chunks_list=[tups + (SpecificHeadersOnly,PublicHeadersOnly,) for tups in enumerate(chunk)]
res = p.imap_unordered(extract_headers, chunks_list)
for i,e in enumerate(res):
headerlist.append(e)
data = pd.DataFrame(headerlist)
Expand Down
8 changes: 6 additions & 2 deletions tests/integration/test_e2e_png_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def generate_kwargs(self, out_dir: PurePath, **kwargs):
'meta_directory': str(out_dir / 'meta') + '/',
'LOG_FILENAME': str(out_dir / 'ImageExtractor.out'),
'metadata_col_freq_threshold': 0.1,
't_start': time.time()
't_start': time.time(),
'SpecificHeadersOnly': False,
'PublicHeadersOnly' : True
}
kwargs_dict.update(**kwargs)
return kwargs_dict
Expand Down Expand Up @@ -142,7 +144,9 @@ def generate_config(self, **kwargs):
"FlattenedToLevel": "patient",
"is16Bit": True,
"SendEmail": False,
"YourEmail": "[email protected]"
"YourEmail": "[email protected]",
'SpecificHeadersOnly' : False,
'PublicHeadersOnly': True
}
config.update(**kwargs)
return config
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_png_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ class TestExtractHeaders:
Test ImageExtractor.extract_headers
"""
valid_test_dcm_file = 0, str(
pytest.data_dir / 'png-extraction' / 'input' / 'test-img.dcm')
pytest.data_dir / 'png-extraction' / 'input' / 'test-img.dcm'), False , True
invalid_test_dcm_file = 0, str(
pytest.data_dir / 'png-extraction' / 'input' / 'no-img.dcm')
pytest.data_dir / 'png-extraction' / 'input' / 'no-img.dcm'), False, True

def test_no_image(self):
"""
Expand Down Expand Up @@ -125,7 +125,7 @@ def test_correct_output(self):
Verifies first key
"""
first_key = self.test_valid_plan.dir()[0]
tuple_list = ImageExtractor.get_tuples(self.test_valid_plan)
tuple_list = ImageExtractor.get_tuples(self.test_valid_plan,False,True)
assert tuple_list[0][0] == first_key

# TODO hasattr error
Expand All @@ -146,7 +146,7 @@ def setup_method(self):
Test Setup
"""
header_list = [ImageExtractor.extract_headers(
(0, self.test_dcm_file))]
(0, self.test_dcm_file,False,True))]
self.file_data = pd.DataFrame(header_list)
self.index = 0
self.invalid_file_data = pd.DataFrame([
Expand Down

0 comments on commit 8ebbb50

Please sign in to comment.