diff --git a/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py b/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py index 4964cea2fb..a8ecf05e0b 100644 --- a/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py +++ b/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py @@ -23,6 +23,7 @@ MAX_CUSTOM_LEN = 100 IMAGE_DETAILS = ImageColumns.IMAGE_DETAILS.value + def extract_features(image_dataset: pd.DataFrame, target_column: str, image_mode: str = None, @@ -91,7 +92,12 @@ def extract_features(image_dataset: pd.DataFrame, return results, feature_names -def process_data(data, tag, feature_names, feature_metadata, row_feature_values, blacklisted_tags): +def process_data(data, + tag, + feature_names, + feature_metadata, + row_feature_values, + blacklisted_tags): if isinstance(data, bytes): data = data.decode(errors='replace') if len(data) > MAX_CUSTOM_LEN: @@ -110,7 +116,12 @@ def process_data(data, tag, feature_names, feature_metadata, row_feature_values, 'in the feature names. Ignoring tag ' 'from extracted metadata.') -def append_exif_features(image, row_feature_values, feature_names, blacklisted_tags, feature_metadata): + +def append_exif_features(image, + row_feature_values, + feature_names, + blacklisted_tags, + feature_metadata): if isinstance(image, str): image_pointer_path = get_image_pointer_from_path(image) with Image.open(image_pointer_path) as im: @@ -119,9 +130,14 @@ def append_exif_features(image, row_feature_values, feature_names, blacklisted_t if tag_id in IFD_CODE_LOOKUP: ifd_data = exifdata.get_ifd(tag_id) for nested_tag_id, data in ifd_data.items(): - tag = ExifTags.GPSTAGS.get(nested_tag_id, None) or ExifTags.TAGS.get(nested_tag_id, None) or nested_tag_id - process_data(data, tag, feature_names, feature_metadata, row_feature_values, blacklisted_tags) + tag = ExifTags.GPSTAGS.get(nested_tag_id, None) \ + or ExifTags.TAGS.get(nested_tag_id, None) \ + or nested_tag_id + process_data(data, tag, feature_names, + feature_metadata, row_feature_values, + blacklisted_tags) else: tag = str(TAGS.get(tag_id, tag_id)) data = exifdata.get(tag_id) - process_data(data, tag, feature_names, feature_metadata, row_feature_values, blacklisted_tags) + process_data(data, tag, feature_names, feature_metadata, + row_feature_values, blacklisted_tags) diff --git a/responsibleai_vision/responsibleai_vision/utils/image_reader.py b/responsibleai_vision/responsibleai_vision/utils/image_reader.py index 898034d754..4cfac5c81b 100644 --- a/responsibleai_vision/responsibleai_vision/utils/image_reader.py +++ b/responsibleai_vision/responsibleai_vision/utils/image_reader.py @@ -22,6 +22,7 @@ IFD_CODE_LOOKUP = {t.value: t.name for t in ExifTags.IFD} + def _get_retry_session(url): domain = urlparse(url.lower()).netloc if domain in _requests_sessions: @@ -94,7 +95,10 @@ def get_all_exif_feature_names(image_dataset): ifd_data = exifdata.get_ifd(tag_id) for nested_tag_id in ifd_data: - nested_tag = ExifTags.GPSTAGS.get(nested_tag_id, None) or ExifTags.TAGS.get(nested_tag_id, None) or nested_tag_id + nested_tag = ExifTags.GPSTAGS.get(nested_tag_id, + None) \ + or ExifTags.TAGS.get(nested_tag_id, None) \ + or nested_tag_id exif_feature_names.add(nested_tag) else: # get the tag name, instead of human unreadable tag id