Skip to content

Commit

Permalink
Finish integrating AlphaFold 3
Browse files Browse the repository at this point in the history
  • Loading branch information
amorehead committed Dec 10, 2024
1 parent 9343142 commit b0f9121
Show file tree
Hide file tree
Showing 13 changed files with 229 additions and 30 deletions.
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -861,48 +861,48 @@ python3 posebench/data/af3_output_extraction.py dataset=casp15 repeat_index=1
Relax the generated ligand structures inside of their respective protein pockets

```bash
python3 posebench/models/inference_relaxation.py method=chai-lab dataset=posebusters_benchmark remove_initial_protein_hydrogens=true repeat_index=1
python3 posebench/models/inference_relaxation.py method=alphafold3 dataset=posebusters_benchmark remove_initial_protein_hydrogens=true repeat_index=1
...
python3 posebench/models/inference_relaxation.py method=chai-lab dataset=astex_diverse remove_initial_protein_hydrogens=true repeat_index=1
python3 posebench/models/inference_relaxation.py method=alphafold3 dataset=astex_diverse remove_initial_protein_hydrogens=true repeat_index=1
...
python3 posebench/models/inference_relaxation.py method=chai-lab dataset=dockgen remove_initial_protein_hydrogens=true repeat_index=1
python3 posebench/models/inference_relaxation.py method=alphafold3 dataset=dockgen remove_initial_protein_hydrogens=true repeat_index=1
...
```

Align predicted protein-ligand structures to ground-truth complex structures

```bash
conda activate PyMOL-PoseBench
python3 posebench/analysis/complex_alignment.py method=chai-lab dataset=posebusters_benchmark repeat_index=1
python3 posebench/analysis/complex_alignment.py method=alphafold3 dataset=posebusters_benchmark repeat_index=1
...
python3 posebench/analysis/complex_alignment.py method=chai-lab dataset=astex_diverse repeat_index=1
python3 posebench/analysis/complex_alignment.py method=alphafold3 dataset=astex_diverse repeat_index=1
...
python3 posebench/analysis/complex_alignment.py method=chai-lab dataset=dockgen repeat_index=1
python3 posebench/analysis/complex_alignment.py method=alphafold3 dataset=dockgen repeat_index=1
conda deactivate
...
```

Analyze inference results for each dataset

```bash
python3 posebench/analysis/inference_analysis.py method=chai-lab dataset=posebusters_benchmark repeat_index=1
python3 posebench/analysis/inference_analysis.py method=alphafold3 dataset=posebusters_benchmark repeat_index=1
...
python3 posebench/analysis/inference_analysis.py method=chai-lab dataset=astex_diverse repeat_index=1
python3 posebench/analysis/inference_analysis.py method=alphafold3 dataset=astex_diverse repeat_index=1
...
python3 posebench/analysis/inference_analysis.py method=chai-lab dataset=dockgen repeat_index=1
python3 posebench/analysis/inference_analysis.py method=alphafold3 dataset=dockgen repeat_index=1
...
```

Analyze inference results for the CASP15 dataset

```bash
# first assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring
python3 posebench/models/ensemble_generation.py ensemble_methods=\[chai-lab\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_chai-lab_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=5 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1
python3 posebench/models/ensemble_generation.py ensemble_methods=\[chai-lab\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_chai-lab_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=5 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1
python3 posebench/models/ensemble_generation.py ensemble_methods=\[alphafold3\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_alphafold3_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=5 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1
python3 posebench/models/ensemble_generation.py ensemble_methods=\[alphafold3\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_alphafold3_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=5 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1
# NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ...
...
# now score the CASP15-compliant submissions using the official CASP scoring pipeline
python3 posebench/analysis/inference_analysis_casp.py method=chai-lab dataset=casp15 repeat_index=1
python3 posebench/analysis/inference_analysis_casp.py method=alphafold3 dataset=casp15 repeat_index=1
...
```

Expand Down
4 changes: 2 additions & 2 deletions configs/analysis/complex_alignment.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
method: neuralplexer # the method for which to align predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`)
method: neuralplexer # the method for which to align predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `p2rank`)
dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse
Expand Down
4 changes: 2 additions & 2 deletions configs/analysis/inference_analysis.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics)
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `vina`, `ensemble`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `p2rank`)
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `vina`, `ensemble`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `p2rank`)
dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
input_csv_path: ${resolve_method_input_csv_path:${method},${dataset},${pocket_only_baseline}} # the input CSV filepath with which to run inference
Expand Down
4 changes: 2 additions & 2 deletions configs/analysis/inference_analysis_casp.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics)
python_exec_path: ${oc.env:HOME}/mambaforge/envs/casp15_ligand_scoring/bin/python3 # the Python executable to use
scoring_script_path: ${oc.env:PROJECT_ROOT}/posebench/analysis/casp15_ligand_scoring/score_predictions.py # the path to the script to use for scoring CASP predictions
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `vina`, `ensemble`, `tulip`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`)
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `vina`, `ensemble`, `tulip`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `p2rank`)
dataset: casp15 # the dataset to use - NOTE: must be one of (`casp15`)
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
predictions_dir: ${oc.env:PROJECT_ROOT}/data/test_cases/${dataset}/top_${method}_ensemble_predictions_${repeat_index} # the directory containing the predictions to analyze
Expand Down
4 changes: 3 additions & 1 deletion configs/model/ensemble_generation.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# General inference arguments:
ensemble_methods: [diffdock, dynamicbind, neuralplexer, rfaa] # the methods from which to gather predictions for ensembling - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `vina`, `tulip`)
ensemble_methods: [diffdock, dynamicbind, neuralplexer, rfaa] # the methods from which to gather predictions for ensembling - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `vina`, `tulip`)
generate_vina_scripts: false # whether to generate Vina scripts using other methods' binding site predictions - NOTE: `resume` must also be `true` when this is `true`, meaning other methods' predictions must have already been generated locally
rank_single_method_intrinsically: true # whether to rank single-method predictions using either `consensus` or `vina` ranking (false) or instead using their intrinsic (explicit) rank assignment (true)
output_bash_file_dir: ensemble_generation_scripts # the directory in which to save the generated Bash scripts
Expand Down Expand Up @@ -146,6 +146,8 @@ rfaa_inference_dir_name: null # the name of the inference output directory to us
# Chai-1 inference arguments:
chai_out_path: ${oc.env:PROJECT_ROOT}/forks/chai-lab/inference/chai-lab_ensemble_outputs # the output directory to which to write the predictions
chai_skip_existing: true # whether to skip running inference if the prediction for a target already exists
# AlphaFold 3 inference arguments:
alphafold3_out_path: ${oc.env:PROJECT_ROOT}/forks/alphafold3/inference/alphafold3_ensemble_outputs # the output directory to which to write the predictions
# Vina inference arguments:
vina_binding_site_methods: [p2rank] # the methods to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `flowdock`, `p2rank`)
vina_python2_exec_path: ${oc.env:PROJECT_ROOT}/forks/Vina/ADFR/bin/python # the path to the Python 2 executable
Expand Down
4 changes: 2 additions & 2 deletions configs/model/inference_relaxation.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
method: diffdock # the method for which to relax predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `vina`, `tulip`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `p2rank`)
method: diffdock # the method for which to relax predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `vina`, `tulip`)
vina_binding_site_method: p2rank # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `flowdock`, `rfaa`, `chai-lab`, `alphafold3`, `p2rank`)
dataset: posebusters_benchmark # the dataset for which to relax predictions - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
num_processes: 1 # the number of parallel processes to use for relaxation
Expand Down
6 changes: 6 additions & 0 deletions docs/source/configs/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ Chai-1 output extraction
:language: yaml
:caption: :file:`data/chai_output_extraction.yaml`

AlphaFold 3 output extraction
^^^^^^^^^^^^^^^^^^^^^^^^
.. literalinclude:: ../../../configs/data/alphafold3_output_extraction.yaml
:language: yaml
:caption: :file:`data/alphafold3_output_extraction.yaml`

TULIP output extraction
^^^^^^^^^^^^^^^^^^^^^^^^
.. literalinclude:: ../../../configs/data/tulip_output_extraction.yaml
Expand Down
14 changes: 11 additions & 3 deletions posebench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"flowdock": "FlowDock",
"rfaa": "RoseTTAFold-All-Atom",
"chai-lab": "chai-lab",
"alphafold3": "AlphaFold 3",
"vina": "Vina",
"tulip": "TULIP",
"p2rank": "P2Rank",
Expand Down Expand Up @@ -100,7 +101,7 @@ def resolve_method_protein_dir(
"results",
f"{dataset}{pocket_only_suffix}",
)
elif method in ["neuralplexer", "flowdock", "rfaa", "chai-lab"]:
elif method in ["neuralplexer", "flowdock", "rfaa", "chai-lab", "alphafold3"]:
return os.path.join(
"forks",
METHOD_TITLE_MAPPING.get(method, method),
Expand Down Expand Up @@ -143,10 +144,13 @@ def resolve_method_ligand_dir(
"flowdock",
"rfaa",
"chai-lab",
"alphafold3",
"tulip",
]:
output_suffix = (
"s" if method in ["neuralplexer", "flowdock", "rfaa", "chai-lab", "tulip"] else ""
"s"
if method in ["neuralplexer", "flowdock", "rfaa", "chai-lab", "alphafold3", "tulip"]
else ""
)
return os.path.join(
"forks",
Expand Down Expand Up @@ -208,10 +212,13 @@ def resolve_method_output_dir(
"flowdock",
"rfaa",
"chai-lab",
"alphafold3",
"tulip",
]:
output_suffix = (
"s" if method in ["neuralplexer", "flowdock", "rfaa", "chai-lab", "tulip"] else ""
"s"
if method in ["neuralplexer", "flowdock", "rfaa", "chai-lab", "alphafold3", "tulip"]
else ""
)
return os.path.join(
"forks",
Expand Down Expand Up @@ -260,6 +267,7 @@ def resolve_method_input_csv_path(method: str, dataset: str, pocket_only_baselin
"flowdock",
"rfaa",
"chai-lab",
"alphafold3",
"vina",
"tulip",
]:
Expand Down
25 changes: 24 additions & 1 deletion posebench/analysis/complex_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def main(cfg: DictConfig):
"flowdock",
"rfaa",
"chai-lab",
"alphafold3",
]:
output_dir = Path(str(output_dir).replace("_relaxed", ""))

Expand Down Expand Up @@ -350,6 +351,18 @@ def main(cfg: DictConfig):
and "_LIG_" not in file.stem
]
)
elif cfg.method == "alphafold3":
output_ligand_files = list(output_dir.rglob(f"*_model_ligand{config}.sdf"))
output_ligand_files = sorted(
[
file
for file in output_ligand_files
if config == "_relaxed"
or (config == "" and "_relaxed" not in file.stem)
and "_aligned" not in file.stem
and "_LIG_" not in file.stem
]
)
else:
raise ValueError(f"Invalid method: {cfg.method}")

Expand Down Expand Up @@ -395,6 +408,16 @@ def main(cfg: DictConfig):
and "_aligned" not in file.stem
]
)
elif cfg.method == "alphafold3":
output_protein_files = list(output_dir.rglob("*_model_protein.pdb"))
output_protein_files = sorted(
[
file
for file in output_protein_files
if (config == "_relaxed" or (config == "" and "_relaxed" not in file.stem))
and "_aligned" not in file.stem
]
)
else:
raise ValueError(f"Invalid method: {cfg.method}")

Expand All @@ -410,7 +433,7 @@ def main(cfg: DictConfig):
)
]
)
elif cfg.method in ["rfaa", "chai-lab"]:
elif cfg.method == "chai-lab":
output_protein_files = sorted(
[
item
Expand Down
Loading

0 comments on commit b0f9121

Please sign in to comment.