From a8bdbd3e58f13df74031bd37f44fcaca6612a85e Mon Sep 17 00:00:00 2001 From: Alex Morehead Date: Wed, 11 Dec 2024 20:52:11 -0600 Subject: [PATCH] Fix up DockGen scoring --- posebench/analysis/inference_analysis.py | 27 +++++++++++++----------- pyproject.toml | 1 - 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/posebench/analysis/inference_analysis.py b/posebench/analysis/inference_analysis.py index 1abf483..8d6c149 100644 --- a/posebench/analysis/inference_analysis.py +++ b/posebench/analysis/inference_analysis.py @@ -82,6 +82,8 @@ def find_most_similar_frag( continue # Generate the fingerprint for the predicted fragment + mol_pred_frag.UpdatePropertyCache() + Chem.GetSymmSSSR(mol_pred_frag) # Perceive rings for fingerprinting fp_pred = mfpgen.GetFingerprint(mol_pred_frag) # Calculate the Tanimoto similarity @@ -112,15 +114,20 @@ def df_split_mol_frags( new_rows = [] for row in mol_table.itertuples(): try: - mols_true = Chem.SDMolSupplier(str(row.mol_true), removeHs=False) - mols_pred = Chem.SDMolSupplier(str(row.mol_pred), removeHs=False) + mol_true_file_fn = ( + Chem.MolFromPDBFile if str(row.mol_true).endswith(".pdb") else Chem.MolFromMolFile + ) + + mol_true = mol_true_file_fn(str(row.mol_true), removeHs=False) + mol_pred = Chem.MolFromMolFile(str(row.mol_pred), removeHs=False) + + assert mol_true is not None, f"Failed to load the true molecule from {row.mol_true}." assert ( - len(mols_true) == 1 and len(mols_pred) == 1 - ), "Only one molecule per SDF file is supported." + mol_pred is not None + ), f"Failed to load the predicted molecule from {row.mol_pred}." - mol_true, mol_pred = mols_true[0], mols_pred[0] - mol_true_frags = Chem.GetMolFrags(mol_true, asMols=True) - mol_pred_frags = Chem.GetMolFrags(mol_pred, asMols=True) + mol_true_frags = Chem.GetMolFrags(mol_true, asMols=True, sanitizeFrags=False) + mol_pred_frags = Chem.GetMolFrags(mol_pred, asMols=True, sanitizeFrags=False) if select_most_similar_pred_frag: mol_pred_frags = [ @@ -533,11 +540,7 @@ def create_mol_table( ) ) else: - pdb_ids = ( - input_table["pdb_id"].transform(lambda x: "_".join(x.split("_")[:3])) - if cfg.method == "diffdock" - else input_table["pdb_id"] - ) + pdb_ids = input_table["pdb_id"] if cfg.method in RANKED_METHODS: mol_table["mol_pred"] = pdb_ids.apply( lambda x: ( diff --git a/pyproject.toml b/pyproject.toml index 530dcf6..85beb9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ keywords = [ 'molecular docking', 'protein-ligand docking', 'protein-ligand interaction', - 'protein-ligand binding', 'protein-ligand scoring', 'protein-ligand pose prediction', 'protein-ligand pose refinement',