From 8c340b6ec48dd895ceeeccf57303a118715a030a Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Mon, 20 May 2024 12:21:52 -0400 Subject: [PATCH] fix: be more graceful when MS tool moves files underneath us It moves temporary files into their final place, and that might happen as we are trying to get the file size - so handle that exception. --- .pre-commit-config.yaml | 2 +- cumulus_etl/deid/mstool.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0bbaa623..0a8c1006 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,4 +9,4 @@ repos: # supported by your project here, or alternatively use # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version - language_version: python3.11 + language_version: python3.12 diff --git a/cumulus_etl/deid/mstool.py b/cumulus_etl/deid/mstool.py index 5d9be24a..a6d34fdf 100644 --- a/cumulus_etl/deid/mstool.py +++ b/cumulus_etl/deid/mstool.py @@ -80,6 +80,17 @@ def _compare_file_sizes(target: dict[str, int], current: dict[str, int]) -> floa return total_current / total_expected +def _get_file_size_safe(path: str) -> int: + try: + return os.path.getsize(path) + except FileNotFoundError: + # The MS Tool moves temporary files around as it completes each file, + # so we guard against an unlucky race condition of a file being moved + # before we can query its size. (Total size will be wrong for a moment, + # but it will correct itself in a second.) + return 0 + + def _count_file_sizes(pattern: str) -> dict[str, int]: """Returns all files that match the given pattern and their sizes""" - return {os.path.basename(filename): os.path.getsize(filename) for filename in glob.glob(pattern)} + return {os.path.basename(filename): _get_file_size_safe(filename) for filename in glob.glob(pattern)}