BioinfoMachineLearning
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 6 additions & 11 deletions b/‎README.md‎
Lines changed: 6 additions & 11 deletions
diff --git a/‎citation.bib‎
Lines changed: 2 additions & 1 deletion b/‎citation.bib‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎configs/analysis/inference_analysis.yaml‎
Lines changed: 1 addition & 1 deletion b/‎configs/analysis/inference_analysis.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎configs/model/diffdock_inference.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/model/diffdock_inference.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎configs/model/dynamicbind_inference.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/model/dynamicbind_inference.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎configs/model/fabind_inference.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/model/fabind_inference.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎configs/model/inference_relaxation.yaml‎
Lines changed: 1 addition & 1 deletion b/‎configs/model/inference_relaxation.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎configs/model/neuralplexer_inference.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/model/neuralplexer_inference.yaml‎
Lines changed: 1 addition & 0 deletions
@@ -173,6 +173,7 @@ configs/local/default.yaml
 /forks/FABind/FABind/
 /forks/NeuralPLexer/NeuralPLexer/
 /forks/NeuralPLexer/**/neuralplexermodels*
+/forks/P2Rank/
 /forks/*/inference*/
 /forks/RoseTTAFold-All-Atom/blast-2.2.26
 /forks/RoseTTAFold-All-Atom/rf2aa/config/inference/*_rfaa_inference.yaml
 
@@ -1,3 +1,12 @@
+### 0.2.0 - 07/04/2024
+
+- Added P2Rank as a new binding site prediction method available to use with AutoDock-Vina
+- Added OpenJDK to the `PoseBench` Conda environment to enable P2Rank inference
+- Added a script to benchmark the required compute resources for each baseline method
+- Updated citation
+- Corrected directory navigation instructions (i.e., `cd` references) in `README.md` to reflect the directory structure of each Zenodo archive file
+- Corrected Biopython, NumPy, and ProDy versions in the DiffDock Conda environment to avoid GCC compilation errors
+
 ### 0.1.0 - 06/08/2024
 
 - First public release
@@ -85,6 +85,10 @@ cd forks/RoseTTAFold-All-Atom/rf2aa/SE3Transformer/ && pip3 install --no-cache-d
 # - AutoDock Vina Tools environment (~1 GB)
 mamba env create -f environments/adfr_environment.yaml --prefix forks/Vina/ADFR/
 conda activate forks/Vina/ADFR/  # NOTE: one still needs to use `conda` to (de)activate environments
+# - P2Rank (~0.5 GB)
+wget -P forks/P2Rank/ https://github.com/rdk/p2rank/releases/download/2.4.2/p2rank_2.4.2.tar.gz
+tar -xzf forks/P2Rank/p2rank_2.4.2.tar.gz -C forks/P2Rank/
+rm forks/P2Rank/p2rank_2.4.2.tar.gz
 ```
 
 Download checkpoints (~8.25 GB total)
@@ -132,7 +136,6 @@ of how to extend `PoseBench`, as outlined below.
 
 ```bash
 # fetch, extract, and clean-up preprocessed Astex Diverse, PoseBusters Benchmark, DockGen, and CASP15 data (~3 GB) #
-cd data/
 wget https://zenodo.org/records/11477766/files/astex_diverse_set.tar.gz
 wget https://zenodo.org/records/11477766/files/posebusters_benchmark_set.tar.gz
 wget https://zenodo.org/records/11477766/files/dockgen_set.tar.gz
@@ -145,50 +148,41 @@ rm astex_diverse_set.tar.gz
 rm posebusters_benchmark_set.tar.gz
 rm dockgen_set.tar.gz
 rm casp15_set.tar.gz
-cd ../
 ```
 
 ### Downloading benchmark method predictions
 
 ```bash
 # fetch, extract, and clean-up benchmark method predictions to reproduce paper results (~19 GB) #
 # DiffDock predictions and results
-cd forks/DiffDock/
 wget https://zenodo.org/records/11477766/files/diffdock_benchmark_method_predictions.tar.gz
 tar -xzf diffdock_benchmark_method_predictions.tar.gz
 rm diffdock_benchmark_method_predictions.tar.gz
 # FABind predictions and results
-cd forks/FABind/
 wget https://zenodo.org/records/11477766/files/fabind_benchmark_method_predictions.tar.gz
 tar -xzf fabind_benchmark_method_predictions.tar.gz
 rm fabind_benchmark_method_predictions.tar.gz
 # DynamicBind predictions and results
-cd forks/DynamicBind/
 wget https://zenodo.org/records/11477766/files/dynamicbind_benchmark_method_predictions.tar.gz
 tar -xzf dynamicbind_benchmark_method_predictions.tar.gz
 rm dynamicbind_benchmark_method_predictions.tar.gz
 # NeuralPLexer predictions and results
-cd forks/NeuralPLexer/
 wget https://zenodo.org/records/11477766/files/neuralplexer_benchmark_method_predictions.tar.gz
 tar -xzf neuralplexer_benchmark_method_predictions.tar.gz
 rm neuralplexer_benchmark_method_predictions.tar.gz
 # RoseTTAFold-All-Atom predictions and results
-cd forks/RoseTTAFold-All-Atom/
 wget https://zenodo.org/records/11477766/files/rfaa_benchmark_method_predictions.tar.gz
 tar -xzf rfaa_benchmark_method_predictions.tar.gz
 rm rfaa_benchmark_method_predictions.tar.gz
 # TULIP predictions and results
-cd forks/TULIP/
 wget https://zenodo.org/records/11477766/files/tulip_benchmark_method_predictions.tar.gz
 tar -xzf tulip_benchmark_method_predictions.tar.gz
 rm tulip_benchmark_method_predictions.tar.gz
 # AutoDock Vina predictions and results
-cd forks/Vina/
 wget https://zenodo.org/records/11477766/files/vina_benchmark_method_predictions.tar.gz
 tar -xzf vina_benchmark_method_predictions.tar.gz
 rm vina_benchmark_method_predictions.tar.gz
 # Astex Diverse, PoseBusters Benchmark (w/ pocket-only results), DockGen, and CASP15 consensus ensemble predictions and results
-cd data/test_cases/
 wget https://zenodo.org/records/11477766/files/astex_diverse_ensemble_benchmark_method_predictions.tar.gz
 wget https://zenodo.org/records/11477766/files/posebusters_benchmark_ensemble_benchmark_method_predictions.tar.gz
 wget https://zenodo.org/records/11477766/files/dockgen_ensemble_benchmark_method_predictions.tar.gz
@@ -892,7 +886,8 @@ If you use the code or benchmark method predictions associated with this reposit
   title={Deep Learning for Protein-Ligand Docking: Are We There Yet?},
   author={Morehead, Alex and Giri, Nabin and Liu, Jian and Cheng, Jianlin},
   booktitle={ICML AI4Science Workshop},
-  year={2024}
+  year={2024},
+  note={selected as a spotlight presentation},
 }
 ```
 
 
@@ -2,5 +2,6 @@ @inproceedings{morehead2024posebench
   title={Deep Learning for Protein-Ligand Docking: Are We There Yet?},
   author={Morehead, Alex and Giri, Nabin and Liu, Jian and Cheng, Jianlin},
   booktitle={ICML AI4Science Workshop},
-  year={2024}
+  year={2024},
+  note={selected as a spotlight presentation},
 }
@@ -1,6 +1,6 @@
 full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics)
 method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`)
-vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`)
+vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`)
 dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
 ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
 input_csv_path: ${resolve_method_input_csv_path:${method},${dataset}} # the input CSV filepath with which to run inference
 
@@ -14,3 +14,4 @@ actual_steps: 19 # the actual number of inference steps to run (i.e., after how
 no_final_step_noise: true # whether to disable the final inference step's noise from being added
 repeat_index: 1 # the repeat index to use for inference
 skip_existing: true # whether to skip inference for existing output directories
+max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference
@@ -13,3 +13,4 @@ num_workers: 1 # the number of workers to use for native relaxation during infer
 skip_existing: true # whether to skip existing predictions
 repeat_index: 1 # the repeat index to use for inference
 pocket_only_baseline: false # whether to run the pocket-only baseline
+max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference
@@ -11,3 +11,4 @@ ckpt_path: ${oc.env:PROJECT_ROOT}/forks/FABind/ckpt/best_model.bin # the checkpo
 output_dir: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_output_${repeat_index} # the output directory to which to save the inference results
 repeat_index: 1 # the repeat index to use for inference
 pocket_only_baseline: false # whether to run the pocket-only baseline
+max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference
@@ -1,5 +1,5 @@
 method: diffdock # the method for which to relax predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `tulip`)
-vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`)
+vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`)
 dataset: posebusters_benchmark # the dataset for which to relax predictions - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
 ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
 num_processes: 1 # the number of parallel processes to use for relaxation
 
@@ -26,3 +26,4 @@ rank_outputs_by_confidence: true # whether to rank the output conformations, by
 plddt_ranking_type: ligand # the type of plDDT ranking to apply to generated samples - NOTE: must be one of (`protein`, `ligand`, `protein_ligand`)
 csv_path: null # the CSV filepath from which to parse benchmarking input data
 repeat_index: 1 # the repeat index to use for inference
+max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference
Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,6 @@ @inproceedings{morehead2024posebench`
`2`	`2`	`title={Deep Learning for Protein-Ligand Docking: Are We There Yet?},`
`3`	`3`	`author={Morehead, Alex and Giri, Nabin and Liu, Jian and Cheng, Jianlin},`
`4`	`4`	`booktitle={ICML AI4Science Workshop},`
`5`		`- year={2024}`
	`5`	`+ year={2024},`
	`6`	`+ note={selected as a spotlight presentation},`
`6`	`7`	`}`