Skip to content

Commit

Permalink
Merge pull request #314 from smart-on-fhir/mikix/safer-file-size
Browse files Browse the repository at this point in the history
fix: be more graceful when MS tool moves files underneath us
  • Loading branch information
mikix authored May 20, 2024
2 parents a322b54 + 8c340b6 commit ceabc53
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ repos:
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.11
language_version: python3.12
13 changes: 12 additions & 1 deletion cumulus_etl/deid/mstool.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,17 @@ def _compare_file_sizes(target: dict[str, int], current: dict[str, int]) -> floa
return total_current / total_expected


def _get_file_size_safe(path: str) -> int:
try:
return os.path.getsize(path)
except FileNotFoundError:
# The MS Tool moves temporary files around as it completes each file,
# so we guard against an unlucky race condition of a file being moved
# before we can query its size. (Total size will be wrong for a moment,
# but it will correct itself in a second.)
return 0


def _count_file_sizes(pattern: str) -> dict[str, int]:
"""Returns all files that match the given pattern and their sizes"""
return {os.path.basename(filename): os.path.getsize(filename) for filename in glob.glob(pattern)}
return {os.path.basename(filename): _get_file_size_safe(filename) for filename in glob.glob(pattern)}

0 comments on commit ceabc53

Please sign in to comment.