From d1b4bab2f7f38cf6c303416abe461c71b33acd1c Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:00:49 -0400 Subject: [PATCH 1/3] updated --- src/common/constants.py | 1 + src/process_manifest.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/common/constants.py b/src/common/constants.py index c13100a..467c50a 100644 --- a/src/common/constants.py +++ b/src/common/constants.py @@ -13,6 +13,7 @@ FILE_ID_FIELD = "id-field" FILE_ID_DEFAULT = "fileID" OMIT_DCF_PREFIX = "omit-DCF-prefix" +DCF_PREFIX = "dg.4DFC/" MD5_DEFAULT = "md5sum" #match data model md5 name TOKEN = "token" API_URL = "api-url" diff --git a/src/process_manifest.py b/src/process_manifest.py index 06eedf2..cf6f8cf 100644 --- a/src/process_manifest.py +++ b/src/process_manifest.py @@ -1,6 +1,6 @@ import csv, os, io -from common.constants import FILE_ID_DEFAULT, FILE_NAME_FIELD, BATCH_BUCKET, S3_BUCKET, FILE_PREFIX, BATCH_ID, BATCH, BATCH_CREATED,\ - FILE_ID_FIELD, UPLOAD_TYPE, FILE_NAME_DEFAULT, FILE_PATH, FILE_SIZE_DEFAULT, BATCH_STATUS, PRE_MANIFEST +from common.constants import FILE_ID_DEFAULT, FILE_NAME_FIELD, BATCH_BUCKET, S3_BUCKET, FILE_PREFIX, BATCH_ID, DCF_PREFIX, BATCH_CREATED,\ + FILE_ID_FIELD, UPLOAD_TYPE, FILE_NAME_DEFAULT, FILE_PATH, FILE_SIZE_DEFAULT, BATCH_STATUS, PRE_MANIFEST, OMIT_DCF_PREFIX from common.graphql_client import APIInvoker from copier import Copier @@ -37,7 +37,7 @@ def process_manifest_file(configs, has_file_id, file_infos, manifest_rows, manif manifest_file_info = None try: if not has_file_id: - result = add_file_id(file_id_name, file_name_name, final_manifest_path , file_infos, manifest_rows, manifest_columns) + result = add_file_id(file_id_name, file_name_name, final_manifest_path , file_infos, manifest_rows, manifest_columns, configs.get(OMIT_DCF_PREFIX)) if not result: print(f"Failed to add file id to the pre-manifest, {final_manifest_path }.") return False @@ -79,15 +79,15 @@ def process_manifest_file(configs, has_file_id, file_infos, manifest_rows, manif return True # This method will create a new manifest file with the file id column added to the pre-manifest. -def add_file_id(file_id_name, file_name_name, final_manifest_path, file_infos, manifest_rows, manifest_columns): +def add_file_id(file_id_name, file_name_name, final_manifest_path, file_infos, manifest_rows, manifest_columns, omit_prefix): output = [] for file in file_infos: row = [row for row in manifest_rows if row[file_name_name] == file["fileName"]][0] + file[FILE_ID_DEFAULT] = file[FILE_ID_DEFAULT] if omit_prefix != True else file[FILE_ID_DEFAULT].replace(DCF_PREFIX, "") row[file_id_name] = file[FILE_ID_DEFAULT] output.append(row.values()) with open(final_manifest_path, 'w', newline='') as f: writer = csv.writer(f, delimiter='\t') writer.writerow(manifest_columns) writer.writerows(output) - return True \ No newline at end of file From 13c357b22743f59cbba91576a0155208bffe0d13 Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:14:29 -0400 Subject: [PATCH 2/3] update --- src/process_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/process_manifest.py b/src/process_manifest.py index cf6f8cf..9817945 100644 --- a/src/process_manifest.py +++ b/src/process_manifest.py @@ -83,7 +83,7 @@ def add_file_id(file_id_name, file_name_name, final_manifest_path, file_infos, m output = [] for file in file_infos: row = [row for row in manifest_rows if row[file_name_name] == file["fileName"]][0] - file[FILE_ID_DEFAULT] = file[FILE_ID_DEFAULT] if omit_prefix != True else file[FILE_ID_DEFAULT].replace(DCF_PREFIX, "") + file[FILE_ID_DEFAULT] = file[FILE_ID_DEFAULT] if omit_prefix == False else file[FILE_ID_DEFAULT].replace(DCF_PREFIX, "") row[file_id_name] = file[FILE_ID_DEFAULT] output.append(row.values()) with open(final_manifest_path, 'w', newline='') as f: From 9f322ad5dfdd85778de47f9462a03e62b459fa1e Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Thu, 12 Sep 2024 16:52:55 -0400 Subject: [PATCH 3/3] update --- src/copier.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/copier.py b/src/copier.py index 7c4b2e4..f417d59 100644 --- a/src/copier.py +++ b/src/copier.py @@ -99,7 +99,8 @@ def copy_file(self, file_info, overwrite, dryrun): self.files_exist_at_dest += 1 file_info[SKIPPED] = True return succeed - + else: + file_info[SKIPPED] = False #self.log.info(f'Copying from {org_url} to s3://{self.bucket_name}/{key.strip("/")} ...') self.log.info(f'Copying from {org_url} to destination folder in S3 bucket ...') dest_size = self._upload_obj(org_url, key, org_size)