Skip to content

Commit

Permalink
Update interaction plots
Browse files Browse the repository at this point in the history
  • Loading branch information
amorehead committed Dec 17, 2024
1 parent 8ab777b commit 840a7fe
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 48 deletions.
33 changes: 27 additions & 6 deletions notebooks/astex_method_interaction_analysis_plotting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -510,16 +510,23 @@
" with pd.HDFStore(file_path) as store:\n",
" for key in store.keys():\n",
" for row_index in range(len(store[key])):\n",
" target = store[key].iloc[row_index][\"target\"]\n",
" if not isinstance(target, str):\n",
" target = target.values[0]\n",
"\n",
" try:\n",
" interactions[store[key].iloc[row_index][\"target\"].values[0]].extend(\n",
" interactions[target].extend(\n",
" [\n",
" f\"{split_string_at_numeric(row[0])[0]}:{split_string_at_numeric(row[1])[0]}:{row[2]}\"\n",
" for row in store[key].iloc[row_index].index.values[:-1]\n",
" ]\n",
" )\n",
" except Exception as e:\n",
" print(f\"Error processing {key} row {row_index} due to: {e}. Skipping...\")\n",
" print(\n",
" f\"Error processing {key} row {row_index} for target {target} due to: {e}. Skipping...\"\n",
" )\n",
" continue\n",
"\n",
" df_rows = []\n",
" for target in interactions:\n",
" target_interactions = interactions[target]\n",
Expand Down Expand Up @@ -552,7 +559,7 @@
"\n",
"assert os.path.exists(\n",
" \"astex_diverse_interaction_dataframes.h5\"\n",
"), \"No reference Astex Diverse interaction dataframe found.\"\n",
"), \"No reference interaction dataframe found.\"\n",
"reference_df = bin_interactions(\"astex_diverse_interaction_dataframes.h5\", \"Reference\")\n",
"\n",
"# combine bins from all method dataframes\n",
Expand Down Expand Up @@ -598,16 +605,30 @@
" }\n",
" )\n",
"\n",
"# plot the EMD and WM values for each method\n",
"all_emd_values = [entry[\"EMD\"] for entry in emd_values]\n",
"min_emd = np.min(all_emd_values)\n",
"max_emd = np.max(all_emd_values)\n",
"for entry in emd_values:\n",
" emd = entry[\"EMD\"]\n",
" normalized_score = 1 - (emd - min_emd) / (max_emd - min_emd)\n",
" entry[\"WM\"] = normalized_score\n",
"\n",
"# plot the EMD values for each method\n",
"emd_values_df = pd.DataFrame(emd_values, columns=[\"Category\", \"Target\", \"EMD\"])\n",
"emd_values_df.to_csv(\"astex_diverse_plif_emd_values.csv\")\n",
"emd_values_df = pd.DataFrame(emd_values, columns=[\"Category\", \"Target\", \"EMD\", \"WM\"])\n",
"emd_values_df.to_csv(\"astex_diverse_plif_metrics.csv\")\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=emd_values_df, x=\"Category\", y=\"EMD\")\n",
"plt.xlabel(\"\")\n",
"plt.ylabel(\"PLIF-EMD\")\n",
"plt.savefig(\"astex_diverse_plif_emd_values.png\")\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=emd_values_df, x=\"Category\", y=\"WM\")\n",
"plt.xlabel(\"\")\n",
"plt.ylabel(\"PLIF-WM\")\n",
"plt.savefig(\"astex_diverse_plif_wm_values.png\")\n",
"plt.show()"
]
}
Expand Down
33 changes: 27 additions & 6 deletions notebooks/astex_method_interaction_analysis_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,16 +420,23 @@ def bin_interactions(file_path, category):
with pd.HDFStore(file_path) as store:
for key in store.keys():
for row_index in range(len(store[key])):
target = store[key].iloc[row_index]["target"]
if not isinstance(target, str):
target = target.values[0]

try:
interactions[store[key].iloc[row_index]["target"].values[0]].extend(
interactions[target].extend(
[
f"{split_string_at_numeric(row[0])[0]}:{split_string_at_numeric(row[1])[0]}:{row[2]}"
for row in store[key].iloc[row_index].index.values[:-1]
]
)
except Exception as e:
print(f"Error processing {key} row {row_index} due to: {e}. Skipping...")
print(
f"Error processing {key} row {row_index} for target {target} due to: {e}. Skipping..."
)
continue

df_rows = []
for target in interactions:
target_interactions = interactions[target]
Expand Down Expand Up @@ -462,7 +469,7 @@ def histogram_to_vector(histogram, bins):

assert os.path.exists(
"astex_diverse_interaction_dataframes.h5"
), "No reference Astex Diverse interaction dataframe found."
), "No reference interaction dataframe found."
reference_df = bin_interactions("astex_diverse_interaction_dataframes.h5", "Reference")

# combine bins from all method dataframes
Expand Down Expand Up @@ -508,14 +515,28 @@ def histogram_to_vector(histogram, bins):
}
)

# plot the EMD and WM values for each method
all_emd_values = [entry["EMD"] for entry in emd_values]
min_emd = np.min(all_emd_values)
max_emd = np.max(all_emd_values)
for entry in emd_values:
emd = entry["EMD"]
normalized_score = 1 - (emd - min_emd) / (max_emd - min_emd)
entry["WM"] = normalized_score

# plot the EMD values for each method
emd_values_df = pd.DataFrame(emd_values, columns=["Category", "Target", "EMD"])
emd_values_df.to_csv("astex_diverse_plif_emd_values.csv")
emd_values_df = pd.DataFrame(emd_values, columns=["Category", "Target", "EMD", "WM"])
emd_values_df.to_csv("astex_diverse_plif_metrics.csv")

plt.figure(figsize=(10, 5))
sns.boxplot(data=emd_values_df, x="Category", y="EMD")
plt.xlabel("")
plt.ylabel("PLIF-EMD")
plt.savefig("astex_diverse_plif_emd_values.png")
plt.show()

plt.figure(figsize=(10, 5))
sns.boxplot(data=emd_values_df, x="Category", y="WM")
plt.xlabel("")
plt.ylabel("PLIF-WM")
plt.savefig("astex_diverse_plif_wm_values.png")
plt.show()
33 changes: 27 additions & 6 deletions notebooks/casp15_method_interaction_analysis_plotting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -505,16 +505,23 @@
" with pd.HDFStore(file_path) as store:\n",
" for key in store.keys():\n",
" for row_index in range(len(store[key])):\n",
" target = store[key].iloc[row_index][\"target\"]\n",
" if not isinstance(target, str):\n",
" target = target.values[0]\n",
"\n",
" try:\n",
" interactions[store[key].iloc[row_index][\"target\"].values[0]].extend(\n",
" interactions[target].extend(\n",
" [\n",
" f\"{split_string_at_numeric(row[0])[0]}:{split_string_at_numeric(row[1])[0]}:{row[2]}\"\n",
" for row in store[key].iloc[row_index].index.values[:-1]\n",
" ]\n",
" )\n",
" except Exception as e:\n",
" print(f\"Error processing {key} row {row_index} due to: {e}. Skipping...\")\n",
" print(\n",
" f\"Error processing {key} row {row_index} for target {target} due to: {e}. Skipping...\"\n",
" )\n",
" continue\n",
"\n",
" df_rows = []\n",
" for target in interactions:\n",
" target_interactions = interactions[target]\n",
Expand Down Expand Up @@ -547,7 +554,7 @@
"\n",
"assert os.path.exists(\n",
" \"casp15_interaction_dataframes.h5\"\n",
"), \"No reference CASP15 interaction dataframe found.\"\n",
"), \"No reference interaction dataframe found.\"\n",
"reference_df = bin_interactions(\"casp15_interaction_dataframes.h5\", \"Reference\")\n",
"\n",
"# combine bins from all method dataframes\n",
Expand Down Expand Up @@ -593,16 +600,30 @@
" }\n",
" )\n",
"\n",
"# plot the EMD and WM values for each method\n",
"all_emd_values = [entry[\"EMD\"] for entry in emd_values]\n",
"min_emd = np.min(all_emd_values)\n",
"max_emd = np.max(all_emd_values)\n",
"for entry in emd_values:\n",
" emd = entry[\"EMD\"]\n",
" normalized_score = 1 - (emd - min_emd) / (max_emd - min_emd)\n",
" entry[\"WM\"] = normalized_score\n",
"\n",
"# plot the EMD values for each method\n",
"emd_values_df = pd.DataFrame(emd_values, columns=[\"Category\", \"Target\", \"EMD\"])\n",
"emd_values_df.to_csv(\"casp15_plif_emd_values.csv\")\n",
"emd_values_df = pd.DataFrame(emd_values, columns=[\"Category\", \"Target\", \"EMD\", \"WM\"])\n",
"emd_values_df.to_csv(\"casp15_plif_metrics.csv\")\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=emd_values_df, x=\"Category\", y=\"EMD\")\n",
"plt.xlabel(\"\")\n",
"plt.ylabel(\"PLIF-EMD\")\n",
"plt.savefig(\"casp15_plif_emd_values.png\")\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=emd_values_df, x=\"Category\", y=\"WM\")\n",
"plt.xlabel(\"\")\n",
"plt.ylabel(\"PLIF-WM\")\n",
"plt.savefig(\"casp15_plif_wm_values.png\")\n",
"plt.show()"
]
}
Expand Down
33 changes: 27 additions & 6 deletions notebooks/casp15_method_interaction_analysis_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,16 +415,23 @@ def bin_interactions(file_path, category):
with pd.HDFStore(file_path) as store:
for key in store.keys():
for row_index in range(len(store[key])):
target = store[key].iloc[row_index]["target"]
if not isinstance(target, str):
target = target.values[0]

try:
interactions[store[key].iloc[row_index]["target"].values[0]].extend(
interactions[target].extend(
[
f"{split_string_at_numeric(row[0])[0]}:{split_string_at_numeric(row[1])[0]}:{row[2]}"
for row in store[key].iloc[row_index].index.values[:-1]
]
)
except Exception as e:
print(f"Error processing {key} row {row_index} due to: {e}. Skipping...")
print(
f"Error processing {key} row {row_index} for target {target} due to: {e}. Skipping..."
)
continue

df_rows = []
for target in interactions:
target_interactions = interactions[target]
Expand Down Expand Up @@ -457,7 +464,7 @@ def histogram_to_vector(histogram, bins):

assert os.path.exists(
"casp15_interaction_dataframes.h5"
), "No reference CASP15 interaction dataframe found."
), "No reference interaction dataframe found."
reference_df = bin_interactions("casp15_interaction_dataframes.h5", "Reference")

# combine bins from all method dataframes
Expand Down Expand Up @@ -503,14 +510,28 @@ def histogram_to_vector(histogram, bins):
}
)

# plot the EMD and WM values for each method
all_emd_values = [entry["EMD"] for entry in emd_values]
min_emd = np.min(all_emd_values)
max_emd = np.max(all_emd_values)
for entry in emd_values:
emd = entry["EMD"]
normalized_score = 1 - (emd - min_emd) / (max_emd - min_emd)
entry["WM"] = normalized_score

# plot the EMD values for each method
emd_values_df = pd.DataFrame(emd_values, columns=["Category", "Target", "EMD"])
emd_values_df.to_csv("casp15_plif_emd_values.csv")
emd_values_df = pd.DataFrame(emd_values, columns=["Category", "Target", "EMD", "WM"])
emd_values_df.to_csv("casp15_plif_metrics.csv")

plt.figure(figsize=(10, 5))
sns.boxplot(data=emd_values_df, x="Category", y="EMD")
plt.xlabel("")
plt.ylabel("PLIF-EMD")
plt.savefig("casp15_plif_emd_values.png")
plt.show()

plt.figure(figsize=(10, 5))
sns.boxplot(data=emd_values_df, x="Category", y="WM")
plt.xlabel("")
plt.ylabel("PLIF-WM")
plt.savefig("casp15_plif_wm_values.png")
plt.show()
33 changes: 27 additions & 6 deletions notebooks/dockgen_method_interaction_analysis_plotting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -523,16 +523,23 @@
" with pd.HDFStore(file_path) as store:\n",
" for key in store.keys():\n",
" for row_index in range(len(store[key])):\n",
" target = store[key].iloc[row_index][\"target\"]\n",
" if not isinstance(target, str):\n",
" target = target.values[0]\n",
"\n",
" try:\n",
" interactions[store[key].iloc[row_index][\"target\"].values[0]].extend(\n",
" interactions[target].extend(\n",
" [\n",
" f\"{split_string_at_numeric(row[0])[0]}:{split_string_at_numeric(row[1])[0]}:{row[2]}\"\n",
" for row in store[key].iloc[row_index].index.values[:-1]\n",
" ]\n",
" )\n",
" except Exception as e:\n",
" print(f\"Error processing {key} row {row_index} due to: {e}. Skipping...\")\n",
" print(\n",
" f\"Error processing {key} row {row_index} for target {target} due to: {e}. Skipping...\"\n",
" )\n",
" continue\n",
"\n",
" df_rows = []\n",
" for target in interactions:\n",
" target_interactions = interactions[target]\n",
Expand Down Expand Up @@ -565,7 +572,7 @@
"\n",
"assert os.path.exists(\n",
" \"dockgen_interaction_dataframes.h5\"\n",
"), \"No reference DockGen interaction dataframe found.\"\n",
"), \"No reference interaction dataframe found.\"\n",
"reference_df = bin_interactions(\"dockgen_interaction_dataframes.h5\", \"Reference\")\n",
"\n",
"# combine bins from all method dataframes\n",
Expand Down Expand Up @@ -611,16 +618,30 @@
" }\n",
" )\n",
"\n",
"# plot the EMD and WM values for each method\n",
"all_emd_values = [entry[\"EMD\"] for entry in emd_values]\n",
"min_emd = np.min(all_emd_values)\n",
"max_emd = np.max(all_emd_values)\n",
"for entry in emd_values:\n",
" emd = entry[\"EMD\"]\n",
" normalized_score = 1 - (emd - min_emd) / (max_emd - min_emd)\n",
" entry[\"WM\"] = normalized_score\n",
"\n",
"# plot the EMD values for each method\n",
"emd_values_df = pd.DataFrame(emd_values, columns=[\"Category\", \"Target\", \"EMD\"])\n",
"emd_values_df.to_csv(\"dockgen_plif_emd_values.csv\")\n",
"emd_values_df = pd.DataFrame(emd_values, columns=[\"Category\", \"Target\", \"EMD\", \"WM\"])\n",
"emd_values_df.to_csv(\"dockgen_plif_metrics.csv\")\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=emd_values_df, x=\"Category\", y=\"EMD\")\n",
"plt.xlabel(\"\")\n",
"plt.ylabel(\"PLIF-EMD\")\n",
"plt.savefig(\"dockgen_plif_emd_values.png\")\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=emd_values_df, x=\"Category\", y=\"WM\")\n",
"plt.xlabel(\"\")\n",
"plt.ylabel(\"PLIF-WM\")\n",
"plt.savefig(\"dockgen_plif_wm_values.png\")\n",
"plt.show()"
]
}
Expand Down
Loading

0 comments on commit 840a7fe

Please sign in to comment.