From 6eff86478de6e16ecbe33497312ee9ee957772c7 Mon Sep 17 00:00:00 2001 From: Brian Healy <42810347+bfhealy@users.noreply.github.com> Date: Wed, 3 Jan 2024 11:56:04 -0600 Subject: [PATCH] Refactor analyze_logs to use new runtime line (#526) --- tools/analyze_logs.py | 102 ++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/tools/analyze_logs.py b/tools/analyze_logs.py index c4cbe26..4d308c6 100755 --- a/tools/analyze_logs.py +++ b/tools/analyze_logs.py @@ -2,10 +2,11 @@ import pathlib import argparse import pandas as pd -import warnings from datetime import timedelta import json import matplotlib.pyplot as plt +import os +from datetime import datetime BASE_DIR = pathlib.Path(__file__).parent.parent.absolute() plt.rcParams["font.size"] = 16 @@ -18,18 +19,13 @@ def get_parser(): parser.add_argument( "--logs-path", type=str, + default="generated_features_new/logs", help="path (from base_dir) to slurm logs", ) - parser.add_argument( - "--job-ids-prefix", - type=str, - default="job_ids", - help="path (from base_dir) + prefix of file containing slurm job ids", - ) parser.add_argument( "--logs-name-pattern", type=str, - default="", + default="generate_features_new", help="common naming convention for slurm logs (e.g. generate_features)", ) parser.add_argument( @@ -51,58 +47,66 @@ def get_parser(): help="name of histogram plot (saved in base_dir)", ) parser.add_argument( - "--workflow", + "--start-date", type=str, - default="feature_generation", - help="name of workflow", + default="2023-12-27", + help="Earliest date to include in log search [YYYY-MM-DD]", ) return parser def main( - logs_path, - job_ids_prefix="job_ids", - logs_name_pattern="", + logs_path="generated_features_new/logs", + logs_name_pattern="generate_features_new", logs_suffix="out", output_prefix="runtime_output", plot_name="quad_runtime_hist", - workflow="feature_generation", + start_date="2023-12-27", ): - job_ids = pd.read_table(BASE_DIR / f"{job_ids_prefix}_{workflow}.txt", header=None) logs_path = BASE_DIR / logs_path + log_files = logs_path.glob(f"{logs_name_pattern}_[0-9]*_[0-9]*.{logs_suffix}") + log_files = [x for x in log_files] - results_dct = {} - for id_runtime_pair in job_ids.values: - job_id, runtime = id_runtime_pair[0].split(',') - - log_path = logs_path.glob(f"{logs_name_pattern}_{job_id}_*.{logs_suffix}") - log_path = [x for x in log_path] + if len(log_files) == 0: + raise ValueError("Could not find any log files.") - if len(log_path) > 0: - log_output = pd.read_table(log_path[0], header=None) + start_date = datetime.strptime(start_date, '%Y-%m-%d') - n_sources_start = int(log_output.iloc[2].values[0].split()[1]) - n_sources_end = int(log_output.iloc[-1].values[0].split()[3]) - - runtime_split = runtime.split("-") - if len(runtime_split) == 1: - runtime_days = 0 - runtime_hms = runtime_split[0].split(":") - else: - runtime_days = int(runtime_split[0]) - runtime_hms = runtime_split[1].split(":") - - runtime_hours = int(runtime_hms[0]) - runtime_minutes = int(runtime_hms[1]) - runtime_seconds = int(runtime_hms[2]) + results_dct = {} + log_count = 0 + done_count = 0 + for log_file in log_files: + + mod_time = os.path.getmtime(log_file) + mod_datetime = datetime.utcfromtimestamp(mod_time) + + if mod_datetime > start_date: + log_count += 1 + job_id = str(log_file).split("_")[-2] + + try: + log_output = pd.read_table(log_file, header=None) + except pd.errors.EmptyDataError: + # Some logs may be empty if the instance just began + continue + + try: + n_sources_start = int(log_output.iloc[2].values[0].split()[1]) + except IndexError: + # Some logs may not yet have initial results if instance just began + continue + + try: + n_sources_end = int(log_output.iloc[-2].values[0].split()[3]) + runtime = float(log_output.iloc[-1].values[0].split()[3]) + except IndexError: + # Some logs may not yet have final results if the instance is still running + continue delta = timedelta( - days=runtime_days, - hours=runtime_hours, - minutes=runtime_minutes, - seconds=runtime_seconds, + seconds=runtime, ) total_seconds = delta.total_seconds() @@ -113,9 +117,9 @@ def main( "seconds_per_source_start": total_seconds / n_sources_start, } - else: - warnings.warn(f"Could not find log for job ID {job_id}") + done_count += 1 + print(f"Found {log_count} logs modified after {start_date}.") # make histogram sec_per_lc_start = [x['seconds_per_source_start'] for x in results_dct.values()] @@ -123,12 +127,14 @@ def main( plt.hist(sec_per_lc_start) plt.xlabel("Quadrant runtime [sec per lightcurve]") plt.ylabel("Count") - fig.savefig(BASE_DIR / f"{plot_name}_{workflow}.pdf", bbox_inches='tight') - print(f"Saved plot to {BASE_DIR}/{plot_name}_{workflow}.pdf") + fig.savefig(BASE_DIR / f"{plot_name}_{logs_name_pattern}.pdf", bbox_inches='tight') + print(f"Saved plot to {BASE_DIR}/{plot_name}_{logs_name_pattern}.pdf") - with open(BASE_DIR / f"{output_prefix}_{workflow}.json", "w") as f: + with open(BASE_DIR / f"{output_prefix}_{logs_name_pattern}.json", "w") as f: json.dump(results_dct, f) - print(f"Wrote results to {BASE_DIR}/{output_prefix}_{workflow}.json") + print( + f"Wrote results for {done_count} completed jobs to {BASE_DIR}/{output_prefix}_{logs_name_pattern}.json" + ) if __name__ == "__main__":