Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/accuracy table #99

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions scripts/analysis/accuracy_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pandas as pd


def create_accuracy_table(one_target_path):
one_target_df = pd.read_csv(one_target_path)

targets = ["metal", "E_ligand", "X3_ligand"]
results = []

for target in targets:
with_ligands = one_target_df[
(one_target_df["target"] == target)
& (one_target_df["model"] == "random_forest")
& (one_target_df["dataset_fraction"] == 1.0)
& (one_target_df["nmr_only"] is False)
]

without_ligands = one_target_df[
(one_target_df["target"] == target)
& (one_target_df["model"] == "random_forest")
& (one_target_df["dataset_fraction"] == 1.0)
& (one_target_df["nmr_only"] is True)
]

if not with_ligands.empty:
with_ligands_acc = f"{with_ligands['accuracy_mean'].values[0]*100:.1f} ± {((with_ligands['accuracy_hb'].values[0] - with_ligands['accuracy_lb'].values[0])/2)*100:.1f}"
else:
with_ligands_acc = "N/A"

if not without_ligands.empty:
without_ligands_acc = f"{without_ligands['accuracy_mean'].values[0]*100:.1f} ± {((without_ligands['accuracy_hb'].values[0] - without_ligands['accuracy_lb'].values[0])/2)*100:.1f}"
else:
without_ligands_acc = "N/A"

results.append([target, with_ligands_acc, without_ligands_acc])

results_df = pd.DataFrame(
results,
columns=[
"Target",
"With Ligands: Accuracy / %",
"Without Ligands: Accuracy / %",
],
)
print(results_df)
return results_df
142 changes: 50 additions & 92 deletions scripts/reproduce_results.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,14 @@
"""Scripts for reproducing all results shown in the report."""
"""Script for reproducing all results shown in the report."""

import argparse
import shlex
import subprocess


def run_script(script_name, targets, include_structural, max_evals):
def run_command(cmd):
"""
Helper function to run the Python scripts via subprocess, ensuring safety by escaping inputs.
Helper function to run a command via subprocess.
"""
# Sanitize each target to prevent shell injection, even though shell=False by default
targets = [shlex.quote(target) for target in targets]
target_string = " ".join(targets)

# Safely prepare the command array
cmd = [
"python",
script_name,
"--target",
target_string,
"--include_structural",
str(include_structural),
"--max_evals",
str(max_evals),
]
print(
"---------------------------------------------------------------------"
)
Expand All @@ -32,31 +17,42 @@ def run_script(script_name, targets, include_structural, max_evals):
"---------------------------------------------------------------------"
)

# pylint: disable=subprocess-run-check
subprocess.run(cmd, check=True, shell=False) # noqa: S603


def run_script(
script_name, targets=None, include_structural=None, max_evals=None
):
"""
Helper function to run the Python scripts via subprocess.
"""
cmd = ["python", script_name]

if targets:
targets = [shlex.quote(target) for target in targets]
target_string = " ".join(targets)
cmd.extend(["--target", target_string])

if include_structural is not None:
cmd.extend(["--include_structural", str(include_structural)])

if max_evals is not None:
cmd.extend(["--max_evals", str(max_evals)])

run_command(cmd)


def run_one_target_experiments(max_evals):
"""
Runs the experiments for single target predictions.
"""
targets = ["metal", "X3_ligand", "E_ligand"]
# Run with structural features False for all, but True for X3_ligand
for target in targets:
if target == "X3_ligand":
include_structural = True
run_script(
"./scripts/training/one_target.py",
[target],
include_structural,
max_evals,
)
include_structural = False
run_script(
"./scripts/training/one_target.py",
[target],
include_structural,
max_evals,
"./scripts/training/one_target.py", [target], True, max_evals
)
run_script(
"./scripts/training/one_target.py", [target], False, max_evals
)


Expand All @@ -70,76 +66,38 @@ def run_multi_target_experiments(max_evals):
("X3_ligand", "E_ligand"),
("metal", "E_ligand", "X3_ligand"),
]
# Run with and without structural features for the combination of all three targets
for targets in target_combinations:
if len(targets) > 2:
include_structural = True
run_script(
"./scripts/training/multi_targets.py",
targets,
include_structural,
max_evals,
)
include_structural = False
run_script(
"./scripts/training/multi_targets.py",
targets,
include_structural,
max_evals,
"./scripts/training/multi_targets.py", targets, False, max_evals
)


def run_baselines():
# Run the script scripts/training/baselines.py
cmd = ["python", "scripts/training/baselines.py"]
print(
"---------------------------------------------------------------------"
)
print(f"Running command: {' '.join(cmd)}")
print(
"---------------------------------------------------------------------"
)

# pylint: disable=subprocess-run-check
subprocess.run(cmd, check=True, shell=False) # noqa: S603
"""
Runs the baseline experiments.
"""
run_command(["python", "scripts/training/baselines.py"])

return

def run_visualize_results(script_name, max_evals):
"""
Runs the visualization script.
"""
run_script(script_name, max_evals=max_evals)

def run_visualize_results(script_name: str, max_evals: int):
cmd = [
"python",
script_name,
"--max_evals",
str(max_evals),
"-me",
str(max_evals),
]
print(
"---------------------------------------------------------------------"
)
print(f"Running command: {' '.join(cmd)}")
print(
"---------------------------------------------------------------------"
)

# pylint: disable=subprocess-run-check
subprocess.run(cmd, check=True, shell=False) # noqa: S603
def run_dataframe_statistics():
"""
Runs the dataframe statistics script.
"""
run_command(["python", "scripts/analysis/dataset_statistics.py"])


def run_dataframe_statistics():
cmd = [
"python",
"scripts/analysis/dataset_statistics.py",
]
print(
"---------------------------------------------------------------------"
)
print(f"Running command: {' '.join(cmd)}")
print(
"---------------------------------------------------------------------"
)
subprocess.run(cmd, check=True, shell=False) # noqa: S603
def run_accuracy_table():
"""
Runs the accuracy table script.
"""
run_command(["python", "scripts/analysis/accuracy_table.py"])


def main():
Expand All @@ -155,14 +113,14 @@ def main():
)
args = parser.parse_args()

# run baselines
run_baselines()
run_dataframe_statistics()
run_one_target_experiments(args.max_evals)
run_multi_target_experiments(args.max_evals)
run_visualize_results(
"scripts/analysis/visualize_results.py", max_evals=args.max_evals
)
run_accuracy_table()


if __name__ == "__main__":
Expand Down
Loading