mlederbauer · mlederbauer · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/scripts/analysis/accuracy_table.py b/scripts/analysis/accuracy_table.py
@@ -0,0 +1,46 @@
+import pandas as pd
+
+
+def create_accuracy_table(one_target_path):
+    one_target_df = pd.read_csv(one_target_path)
+
+    targets = ["metal", "E_ligand", "X3_ligand"]
+    results = []
+
+    for target in targets:
+        with_ligands = one_target_df[
+            (one_target_df["target"] == target)
+            & (one_target_df["model"] == "random_forest")
+            & (one_target_df["dataset_fraction"] == 1.0)
+            & (one_target_df["nmr_only"] is False)
+        ]
+
+        without_ligands = one_target_df[
+            (one_target_df["target"] == target)
+            & (one_target_df["model"] == "random_forest")
+            & (one_target_df["dataset_fraction"] == 1.0)
+            & (one_target_df["nmr_only"] is True)
+        ]
+
+        if not with_ligands.empty:
+            with_ligands_acc = f"{with_ligands['accuracy_mean'].values[0]*100:.1f} ± {((with_ligands['accuracy_hb'].values[0] - with_ligands['accuracy_lb'].values[0])/2)*100:.1f}"
+        else:
+            with_ligands_acc = "N/A"
+
+        if not without_ligands.empty:
+            without_ligands_acc = f"{without_ligands['accuracy_mean'].values[0]*100:.1f} ± {((without_ligands['accuracy_hb'].values[0] - without_ligands['accuracy_lb'].values[0])/2)*100:.1f}"
+        else:
+            without_ligands_acc = "N/A"
+
+        results.append([target, with_ligands_acc, without_ligands_acc])
+
+    results_df = pd.DataFrame(
+        results,
+        columns=[
+            "Target",
+            "With Ligands: Accuracy / %",
+            "Without Ligands: Accuracy / %",
+        ],
+    )
+    print(results_df)
+    return results_df
diff --git a/scripts/reproduce_results.py b/scripts/reproduce_results.py
@@ -1,29 +1,14 @@
-"""Scripts for reproducing all results shown in the report."""
+"""Script for reproducing all results shown in the report."""
 
 import argparse
 import shlex
 import subprocess
 
 
-def run_script(script_name, targets, include_structural, max_evals):
+def run_command(cmd):
     """
-    Helper function to run the Python scripts via subprocess, ensuring safety by escaping inputs.
+    Helper function to run a command via subprocess.
     """
-    # Sanitize each target to prevent shell injection, even though shell=False by default
-    targets = [shlex.quote(target) for target in targets]
-    target_string = " ".join(targets)
-
-    # Safely prepare the command array
-    cmd = [
-        "python",
-        script_name,
-        "--target",
-        target_string,
-        "--include_structural",
-        str(include_structural),
-        "--max_evals",
-        str(max_evals),
-    ]
     print(
         "---------------------------------------------------------------------"
     )
@@ -32,31 +17,42 @@ def run_script(script_name, targets, include_structural, max_evals):
         "---------------------------------------------------------------------"
     )
 
-    # pylint: disable=subprocess-run-check
     subprocess.run(cmd, check=True, shell=False)  # noqa: S603
 
 
+def run_script(
+    script_name, targets=None, include_structural=None, max_evals=None
+):
+    """
+    Helper function to run the Python scripts via subprocess.
+    """
+    cmd = ["python", script_name]
+
+    if targets:
+        targets = [shlex.quote(target) for target in targets]
+        target_string = " ".join(targets)
+        cmd.extend(["--target", target_string])
+
+    if include_structural is not None:
+        cmd.extend(["--include_structural", str(include_structural)])
+
+    if max_evals is not None:
+        cmd.extend(["--max_evals", str(max_evals)])
+
+    run_command(cmd)
+
+
 def run_one_target_experiments(max_evals):
     """
     Runs the experiments for single target predictions.
     """
     targets = ["metal", "X3_ligand", "E_ligand"]
-    # Run with structural features False for all, but True for X3_ligand
     for target in targets:
-        if target == "X3_ligand":
-            include_structural = True
-            run_script(
-                "./scripts/training/one_target.py",
-                [target],
-                include_structural,
-                max_evals,
-            )
-        include_structural = False
         run_script(
-            "./scripts/training/one_target.py",
-            [target],
-            include_structural,
-            max_evals,
+            "./scripts/training/one_target.py", [target], True, max_evals
+        )
+        run_script(
+            "./scripts/training/one_target.py", [target], False, max_evals
         )
 
 
@@ -70,76 +66,38 @@ def run_multi_target_experiments(max_evals):
         ("X3_ligand", "E_ligand"),
         ("metal", "E_ligand", "X3_ligand"),
     ]
-    # Run with and without structural features for the combination of all three targets
     for targets in target_combinations:
-        if len(targets) > 2:
-            include_structural = True
-            run_script(
-                "./scripts/training/multi_targets.py",
-                targets,
-                include_structural,
-                max_evals,
-            )
-        include_structural = False
         run_script(
-            "./scripts/training/multi_targets.py",
-            targets,
-            include_structural,
-            max_evals,
+            "./scripts/training/multi_targets.py", targets, False, max_evals
         )
 
 
 def run_baselines():
-    # Run the script scripts/training/baselines.py
-    cmd = ["python", "scripts/training/baselines.py"]
-    print(
-        "---------------------------------------------------------------------"
-    )
-    print(f"Running command: {' '.join(cmd)}")
-    print(
-        "---------------------------------------------------------------------"
-    )
-
-    # pylint: disable=subprocess-run-check
-    subprocess.run(cmd, check=True, shell=False)  # noqa: S603
+    """
+    Runs the baseline experiments.
+    """
+    run_command(["python", "scripts/training/baselines.py"])
 
-    return
 
+def run_visualize_results(script_name, max_evals):
+    """
+    Runs the visualization script.
+    """
+    run_script(script_name, max_evals=max_evals)
 
-def run_visualize_results(script_name: str, max_evals: int):
-    cmd = [
-        "python",
-        script_name,
-        "--max_evals",
-        str(max_evals),
-        "-me",
-        str(max_evals),
-    ]
-    print(
-        "---------------------------------------------------------------------"
-    )
-    print(f"Running command: {' '.join(cmd)}")
-    print(
-        "---------------------------------------------------------------------"
-    )
 
-    # pylint: disable=subprocess-run-check
-    subprocess.run(cmd, check=True, shell=False)  # noqa: S603
+def run_dataframe_statistics():
+    """
+    Runs the dataframe statistics script.
+    """
+    run_command(["python", "scripts/analysis/dataset_statistics.py"])
 
 
-def run_dataframe_statistics():
-    cmd = [
-        "python",
-        "scripts/analysis/dataset_statistics.py",
-    ]
-    print(
-        "---------------------------------------------------------------------"
-    )
-    print(f"Running command: {' '.join(cmd)}")
-    print(
-        "---------------------------------------------------------------------"
-    )
-    subprocess.run(cmd, check=True, shell=False)  # noqa: S603
+def run_accuracy_table():
+    """
+    Runs the accuracy table script.
+    """
+    run_command(["python", "scripts/analysis/accuracy_table.py"])
 
 
 def main():
@@ -155,14 +113,14 @@ def main():
     )
     args = parser.parse_args()
 
-    # run baselines
     run_baselines()
     run_dataframe_statistics()
     run_one_target_experiments(args.max_evals)
     run_multi_target_experiments(args.max_evals)
     run_visualize_results(
         "scripts/analysis/visualize_results.py", max_evals=args.max_evals
     )
+    run_accuracy_table()
 
 
 if __name__ == "__main__":