From f8bf3b75cc54ba9b2801237a6ac12d73469baa82 Mon Sep 17 00:00:00 2001 From: Alex Morehead Date: Tue, 17 Dec 2024 18:05:57 -0600 Subject: [PATCH] Fix method name parsing --- ...ex_method_interaction_analysis_plotting.ipynb | 16 +++++++++++----- ...astex_method_interaction_analysis_plotting.py | 16 +++++++++++----- ...en_method_interaction_analysis_plotting.ipynb | 16 +++++++++++----- ...ckgen_method_interaction_analysis_plotting.py | 16 +++++++++++----- ...rs_method_interaction_analysis_plotting.ipynb | 16 +++++++++++----- ...sters_method_interaction_analysis_plotting.py | 16 +++++++++++----- 6 files changed, 66 insertions(+), 30 deletions(-) diff --git a/notebooks/astex_method_interaction_analysis_plotting.ipynb b/notebooks/astex_method_interaction_analysis_plotting.ipynb index a22cbd1..9554719 100644 --- a/notebooks/astex_method_interaction_analysis_plotting.ipynb +++ b/notebooks/astex_method_interaction_analysis_plotting.ipynb @@ -20,6 +20,7 @@ "metadata": {}, "outputs": [], "source": [ + "import copy\n", "import os\n", "import re\n", "import shutil\n", @@ -279,14 +280,19 @@ " }\n", ")\n", "\n", - "for method in baseline_methods:\n", + "for method in copy.deepcopy(baseline_methods):\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", - " if not os.path.exists(f\"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5\"):\n", - " v1_baseline = method == \"diffdockv1\"\n", - " vina_binding_site_method = method.split(\"_\")[-1] if \"_\" in method else \"p2rank\"\n", + " v1_baseline = method == \"diffdockv1\"\n", + " vina_binding_site_method = method.split(\"_\")[-1] if \"_\" in method else \"p2rank\"\n", + "\n", + " vina_suffix = f\"_{vina_binding_site_method}\" if \"_\" in method else \"\"\n", + " method = method.split(\"_\")[0]\n", "\n", + " if not os.path.exists(\n", + " f\"{method}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5\"\n", + " ):\n", " with open_dict(cfg):\n", " cfg.method = method\n", " cfg.repeat_index = repeat_index\n", @@ -358,7 +364,7 @@ "\n", " # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n", " with pd.HDFStore(\n", - " f\"{method}_astex_diverse_interaction_dataframes_{repeat_index}.h5\"\n", + " f\"{method}{vina_suffix}_astex_diverse_interaction_dataframes_{repeat_index}.h5\"\n", " ) as store:\n", " for i, df in enumerate(astex_protein_ligand_interaction_dfs):\n", " store.put(f\"df_{i}\", df)" diff --git a/notebooks/astex_method_interaction_analysis_plotting.py b/notebooks/astex_method_interaction_analysis_plotting.py index 4d7aeaf..1756e78 100644 --- a/notebooks/astex_method_interaction_analysis_plotting.py +++ b/notebooks/astex_method_interaction_analysis_plotting.py @@ -5,6 +5,7 @@ # #### Import packages # %% +import copy import os import re import shutil @@ -209,14 +210,19 @@ def create_temp_pdb_with_only_molecule_type_residues( } ) -for method in baseline_methods: +for method in copy.deepcopy(baseline_methods): for repeat_index in range(1, max_num_repeats_per_method + 1): method_title = method_mapping[method] - if not os.path.exists(f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5"): - v1_baseline = method == "diffdockv1" - vina_binding_site_method = method.split("_")[-1] if "_" in method else "p2rank" + v1_baseline = method == "diffdockv1" + vina_binding_site_method = method.split("_")[-1] if "_" in method else "p2rank" + + vina_suffix = f"_{vina_binding_site_method}" if "_" in method else "" + method = method.split("_")[0] + if not os.path.exists( + f"{method}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5" + ): with open_dict(cfg): cfg.method = method cfg.repeat_index = repeat_index @@ -288,7 +294,7 @@ def create_temp_pdb_with_only_molecule_type_residues( # NOTE: we iteratively save the interaction dataframes to an HDF5 file with pd.HDFStore( - f"{method}_astex_diverse_interaction_dataframes_{repeat_index}.h5" + f"{method}{vina_suffix}_astex_diverse_interaction_dataframes_{repeat_index}.h5" ) as store: for i, df in enumerate(astex_protein_ligand_interaction_dfs): store.put(f"df_{i}", df) diff --git a/notebooks/dockgen_method_interaction_analysis_plotting.ipynb b/notebooks/dockgen_method_interaction_analysis_plotting.ipynb index 4efb5e1..699dbf3 100644 --- a/notebooks/dockgen_method_interaction_analysis_plotting.ipynb +++ b/notebooks/dockgen_method_interaction_analysis_plotting.ipynb @@ -20,6 +20,7 @@ "metadata": {}, "outputs": [], "source": [ + "import copy\n", "import os\n", "import re\n", "import shutil\n", @@ -292,14 +293,19 @@ " }\n", ")\n", "\n", - "for method in baseline_methods:\n", + "for method in copy.deepcopy(baseline_methods):\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", - " if not os.path.exists(f\"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5\"):\n", - " v1_baseline = method == \"diffdockv1\"\n", - " vina_binding_site_method = method.split(\"_\")[-1] if \"_\" in method else \"p2rank\"\n", + " v1_baseline = method == \"diffdockv1\"\n", + " vina_binding_site_method = method.split(\"_\")[-1] if \"_\" in method else \"p2rank\"\n", + "\n", + " vina_suffix = f\"_{vina_binding_site_method}\" if \"_\" in method else \"\"\n", + " method = method.split(\"_\")[0]\n", "\n", + " if not os.path.exists(\n", + " f\"{method}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5\"\n", + " ):\n", " with open_dict(cfg):\n", " cfg.method = method\n", " cfg.repeat_index = repeat_index\n", @@ -371,7 +377,7 @@ "\n", " # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n", " with pd.HDFStore(\n", - " f\"{method}_dockgen_interaction_dataframes_{repeat_index}.h5\"\n", + " f\"{method}{vina_suffix}_dockgen_interaction_dataframes_{repeat_index}.h5\"\n", " ) as store:\n", " for i, df in enumerate(dockgen_protein_ligand_interaction_dfs):\n", " store.put(f\"df_{i}\", df)" diff --git a/notebooks/dockgen_method_interaction_analysis_plotting.py b/notebooks/dockgen_method_interaction_analysis_plotting.py index aed5e62..d60fdcb 100644 --- a/notebooks/dockgen_method_interaction_analysis_plotting.py +++ b/notebooks/dockgen_method_interaction_analysis_plotting.py @@ -5,6 +5,7 @@ # #### Import packages # %% +import copy import os import re import shutil @@ -222,14 +223,19 @@ def create_temp_pdb_with_only_molecule_type_residues( } ) -for method in baseline_methods: +for method in copy.deepcopy(baseline_methods): for repeat_index in range(1, max_num_repeats_per_method + 1): method_title = method_mapping[method] - if not os.path.exists(f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5"): - v1_baseline = method == "diffdockv1" - vina_binding_site_method = method.split("_")[-1] if "_" in method else "p2rank" + v1_baseline = method == "diffdockv1" + vina_binding_site_method = method.split("_")[-1] if "_" in method else "p2rank" + + vina_suffix = f"_{vina_binding_site_method}" if "_" in method else "" + method = method.split("_")[0] + if not os.path.exists( + f"{method}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5" + ): with open_dict(cfg): cfg.method = method cfg.repeat_index = repeat_index @@ -301,7 +307,7 @@ def create_temp_pdb_with_only_molecule_type_residues( # NOTE: we iteratively save the interaction dataframes to an HDF5 file with pd.HDFStore( - f"{method}_dockgen_interaction_dataframes_{repeat_index}.h5" + f"{method}{vina_suffix}_dockgen_interaction_dataframes_{repeat_index}.h5" ) as store: for i, df in enumerate(dockgen_protein_ligand_interaction_dfs): store.put(f"df_{i}", df) diff --git a/notebooks/posebusters_method_interaction_analysis_plotting.ipynb b/notebooks/posebusters_method_interaction_analysis_plotting.ipynb index bab86d4..81c7538 100644 --- a/notebooks/posebusters_method_interaction_analysis_plotting.ipynb +++ b/notebooks/posebusters_method_interaction_analysis_plotting.ipynb @@ -20,6 +20,7 @@ "metadata": {}, "outputs": [], "source": [ + "import copy\n", "import os\n", "import re\n", "import shutil\n", @@ -289,14 +290,19 @@ " }\n", ")\n", "\n", - "for method in baseline_methods:\n", + "for method in copy.deepcopy(baseline_methods):\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", - " if not os.path.exists(f\"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5\"):\n", - " v1_baseline = method == \"diffdockv1\"\n", - " vina_binding_site_method = method.split(\"_\")[-1] if \"_\" in method else \"p2rank\"\n", + " v1_baseline = method == \"diffdockv1\"\n", + " vina_binding_site_method = method.split(\"_\")[-1] if \"_\" in method else \"p2rank\"\n", + "\n", + " vina_suffix = f\"_{vina_binding_site_method}\" if \"_\" in method else \"\"\n", + " method = method.split(\"_\")[0]\n", "\n", + " if not os.path.exists(\n", + " f\"{method}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5\"\n", + " ):\n", " with open_dict(cfg):\n", " cfg.method = method\n", " cfg.repeat_index = repeat_index\n", @@ -370,7 +376,7 @@ "\n", " # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n", " with pd.HDFStore(\n", - " f\"{method}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5\"\n", + " f\"{method}{vina_suffix}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5\"\n", " ) as store:\n", " for i, df in enumerate(posebusters_protein_ligand_interaction_dfs):\n", " store.put(f\"df_{i}\", df)" diff --git a/notebooks/posebusters_method_interaction_analysis_plotting.py b/notebooks/posebusters_method_interaction_analysis_plotting.py index 2c598f3..e6e5c06 100644 --- a/notebooks/posebusters_method_interaction_analysis_plotting.py +++ b/notebooks/posebusters_method_interaction_analysis_plotting.py @@ -5,6 +5,7 @@ # #### Import packages # %% +import copy import os import re import shutil @@ -219,14 +220,19 @@ def create_temp_pdb_with_only_molecule_type_residues( } ) -for method in baseline_methods: +for method in copy.deepcopy(baseline_methods): for repeat_index in range(1, max_num_repeats_per_method + 1): method_title = method_mapping[method] - if not os.path.exists(f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5"): - v1_baseline = method == "diffdockv1" - vina_binding_site_method = method.split("_")[-1] if "_" in method else "p2rank" + v1_baseline = method == "diffdockv1" + vina_binding_site_method = method.split("_")[-1] if "_" in method else "p2rank" + + vina_suffix = f"_{vina_binding_site_method}" if "_" in method else "" + method = method.split("_")[0] + if not os.path.exists( + f"{method}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5" + ): with open_dict(cfg): cfg.method = method cfg.repeat_index = repeat_index @@ -300,7 +306,7 @@ def create_temp_pdb_with_only_molecule_type_residues( # NOTE: we iteratively save the interaction dataframes to an HDF5 file with pd.HDFStore( - f"{method}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5" + f"{method}{vina_suffix}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5" ) as store: for i, df in enumerate(posebusters_protein_ligand_interaction_dfs): store.put(f"df_{i}", df)