Skip to content

Commit

Permalink
enh: direct stdout and stderr to devnull, fix sync dry run logic
Browse files Browse the repository at this point in the history
  • Loading branch information
vkt1414 committed May 3, 2024
1 parent 401c841 commit 4d599ca
Showing 1 changed file with 38 additions and 8 deletions.
46 changes: 38 additions & 8 deletions idc_index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,14 +470,22 @@ def get_viewer_URL(
return viewer_url

def _validate_update_manifest_and_get_download_size(
self, manifestFile, downloadDir, validate_manifest
self,
manifestFile,
downloadDir,
validate_manifest,
show_progress_bar,
use_s5cmd_sync_dry_run,
) -> tuple[float, str, Path]:
"""
Validates the manifest file by checking the URLs in the manifest
Args:
manifestFile (str): The path to the manifest file.
downloadDir (str): The path to the download directory.
validate_manifest (bool, optional): If True, validates the manifest for any errors. Defaults to True.
show_progress_bar (bool, optional): If True, tracks the progress of download
use_s5cmd_sync_dry_run (bool, optional): If True, improves the accuracy of progress bar in unusual circumstances
Returns:
total_size (float): The total size of all series in the manifest file.
endpoint_to_use (str): The endpoint URL to use (either AWS or GCP).
Expand Down Expand Up @@ -590,7 +598,16 @@ def _validate_update_manifest_and_get_download_size(

# Write a temporary manifest file
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_manifest_file:
merged_df["s5cmd_cmd"] = "cp " + merged_df["s3_url"] + " " + downloadDir
if (
show_progress_bar
and use_s5cmd_sync_dry_run
and len(os.listdir(downloadDir)) != 0
):
merged_df["s5cmd_cmd"] = (
"sync " + merged_df["s3_url"] + " " + downloadDir
)
else:
merged_df["s5cmd_cmd"] = "cp " + merged_df["s3_url"] + " " + downloadDir
merged_df["s5cmd_cmd"].to_csv(temp_manifest_file, header=False, index=False)
print("Parsing the manifest is finished. Download will begin soon")
return total_size, endpoint_to_use, Path(temp_manifest_file.name)
Expand Down Expand Up @@ -764,8 +781,8 @@ def _s5cmd_run(
logger.info(f"use_s5cmd_sync_dry_run: {use_s5cmd_sync_dry_run}")

if quiet:
stdout = subprocess.PIPE
stderr = subprocess.STDOUT
stdout = subprocess.DEVNULL
stderr = subprocess.DEVNULL
else:
stdout = None
stderr = None
Expand Down Expand Up @@ -924,7 +941,11 @@ def download_from_manifest(
endpoint_to_use,
temp_manifest_file,
) = self._validate_update_manifest_and_get_download_size(
manifestFile, downloadDir, validate_manifest
manifestFile,
downloadDir,
validate_manifest,
show_progress_bar,
use_s5cmd_sync_dry_run,
)

total_size_rounded = round(total_size, 2)
Expand Down Expand Up @@ -1028,9 +1049,18 @@ def download_from_selection(
# Download the files
# make temporary file to store the list of files to download
with tempfile.NamedTemporaryFile(mode="w", delete=False) as manifest_file:
result_df["s5cmd_cmd"] = (
"cp " + result_df["series_aws_url"] + " " + downloadDir
)
if (
show_progress_bar
and use_s5cmd_sync_dry_run
and len(os.listdir(downloadDir)) != 0
):
result_df["s5cmd_cmd"] = (
"sync " + result_df["series_aws_url"] + " " + downloadDir
)
else:
result_df["s5cmd_cmd"] = (
"cp " + result_df["series_aws_url"] + " " + downloadDir
)
result_df["s5cmd_cmd"].to_csv(manifest_file, header=False, index=False)
logger.info(
"""
Expand Down

0 comments on commit 4d599ca

Please sign in to comment.