From aa3ea4e27712ed9dfad32da539a0adc755de66ba Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 6 Jan 2025 12:04:16 -0800 Subject: [PATCH 1/2] Add unit test to replicate missing file size --- tests/test_wfutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_wfutils.py b/tests/test_wfutils.py index c0b9038b..fb0481ad 100644 --- a/tests/test_wfutils.py +++ b/tests/test_wfutils.py @@ -320,7 +320,7 @@ def test_workflow_execution_record_from_workflow_job(site_config, fixtures_dir, assert wfe.ended_at_time -def test_make_data_objects_substutes_workflow_id(site_config, fixtures_dir, tmp_path): +def test_make_data_objects_includes_workflow_execution_id_and_file_size(site_config, fixtures_dir, tmp_path): job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) job = WorkflowJob(site_config, workflow_state, job_metadata) @@ -329,7 +329,7 @@ def test_make_data_objects_substutes_workflow_id(site_config, fixtures_dir, tmp_ for data_object in data_objects: assert isinstance(data_object, DataObject) assert job.workflow_execution_id in data_object.description - + assert data_object.file_size_bytes From 0a9743bc28143faee7678b66f2fa1aacdee9915d Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Mon, 6 Jan 2025 12:04:45 -0800 Subject: [PATCH 2/2] update make data object method to include file size --- nmdc_automation/workflow_automation/wfutils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nmdc_automation/workflow_automation/wfutils.py b/nmdc_automation/workflow_automation/wfutils.py index 056f455f..dbc2d132 100755 --- a/nmdc_automation/workflow_automation/wfutils.py +++ b/nmdc_automation/workflow_automation/wfutils.py @@ -527,6 +527,7 @@ def make_data_objects(self, output_dir: Union[str, Path] = None) -> List[DataObj md5_sum = _md5(output_file_path) + file_size_bytes = output_file_path.stat().st_size file_url = f"{self.url_root}/{self.was_informed_by}/{self.workflow_execution_id}/{output_file_path.name}" # copy the file to the output directory if provided @@ -542,6 +543,7 @@ def make_data_objects(self, output_dir: Union[str, Path] = None) -> List[DataObj data_object = DataObject( id=output_spec["id"], name=output_file_path.name, type="nmdc:DataObject", url=file_url, data_object_type=output_spec["data_object_type"], md5_checksum=md5_sum, + file_size_bytes=file_size_bytes, description=output_spec["description"].replace('{id}', self.workflow_execution_id), was_generated_by=self.workflow_execution_id, )