Skip to content

Commit 4ac71fd

Browse files
authored
Merge pull request #7 from BioinfoMachineLearning/af
v0.5.0 - Add results with AlphaFold 3 predicted structures and for Chai-1
2 parents 7b98b21 + e8db61e commit 4ac71fd

File tree

311 files changed

+42472
-3096
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

311 files changed

+42472
-3096
lines changed

.github/workflows/code-quality-main.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ jobs:
2121
python-version: "3.10"
2222

2323
- name: Run pre-commits
24-
uses: pre-commit/action@v2.0.3
24+
uses: pre-commit/action@v3.0.1

.github/workflows/code-quality-pr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,6 @@ jobs:
3333
run: echo '${{ steps.file_changes.outputs.files}}'
3434

3535
- name: Run pre-commits
36-
uses: pre-commit/action@v2.0.3
36+
uses: pre-commit/action@v3.0.1
3737
with:
3838
extra_args: --files ${{ steps.file_changes.outputs.files}}

.github/workflows/release-drafter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,6 @@ jobs:
2222

2323
steps:
2424
# Drafts your next Release notes as Pull Requests are merged into "master"
25-
- uses: release-drafter/release-drafter@v5
25+
- uses: release-drafter/release-drafter@v6
2626
env:
2727
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ configs/local/default.yaml
163163

164164
# Forks
165165
/workdir/
166+
/forks/chai-lab/chai-lab/
167+
/forks/chai-lab/prediction_inputs/
168+
/forks/chai-lab/prediction_outputs/
166169
/forks/DiffDock1.0/
167170
/forks/DiffDock/DiffDock/
168171
/forks/DynamicBind/*.npy
@@ -189,4 +192,4 @@ configs/local/default.yaml
189192
/forks/RoseTTAFold-All-Atom/psipred/
190193
/forks/TULIP/outputs/
191194
/forks/Vina/ADFR/
192-
scripts/inference/
195+
scripts/*inference*/

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
### 0.5.0 - 09/30/2024
2+
3+
- Added results with AlphaFold 3 predicted structures (now the default)
4+
- Added results for the new Chai-1 model from Chai Discovery
5+
- Added a new inference sweep pipeline for HPC clusters to allow users to quickly run an exhaustive sweep of all baseline methods, datasets, and tasks e.g., using generated batch scripts and a SLURM scheduler
6+
- Updated Zenodo links to point to the latest version of the project's Zenodo record, which now includes the above-mentioned AlphaFold 3 predicted structures and baseline method results using them
7+
- Updated documentation project-wide according to the additions listed above
8+
- Fixed some CI testing issues
9+
110
### 0.4.0 - 08/12/2024
211

312
- Renamed `src` root directory to `posebench` to support `pip` packaging

README.md

Lines changed: 205 additions & 97 deletions
Large diffs are not rendered by default.
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
method: neuralplexer # the method for which to align predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`)
2-
vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`)
1+
method: neuralplexer # the method for which to align predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`)
2+
vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`)
33
dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
44
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
55
input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse
6-
output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index}} # the output directory to which to save the relaxed predictions
6+
output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the output directory to which to save the relaxed predictions
77
rank_to_align: 1 # the pose rank to align
8-
aligned_filename_postfix: "_aligned" # the postfix to append to each aligned complex filename
8+
aligned_filename_suffix: "_aligned" # the suffix to append to each aligned complex filename
99
force_process: false # whether to force processing of all complexes, even if they have already been processed
1010
repeat_index: 1 # the repeat index which was used for inference
1111
pocket_only_baseline: false # whether to prepare the pocket-only baseline
12+
v1_baseline: false # whether to prepare the v1 baseline
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics)
2-
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`)
3-
vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`)
2+
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `ensemble`)
3+
vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `p2rank`)
44
dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
55
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
6-
input_csv_path: ${resolve_method_input_csv_path:${method},${dataset}} # the input CSV filepath with which to run inference
6+
input_csv_path: ${resolve_method_input_csv_path:${method},${dataset},${pocket_only_baseline}} # the input CSV filepath with which to run inference
77
input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse
88
posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts
9-
dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test_rmsd_filtered.txt # the path to the (ESMFold RMSD-filtered) DockGen test set IDs file
10-
output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index}} # the output directory to which to save the relaxed predictions
9+
dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test_rmsd_filtered.txt # the path to the (predicted RMSD-filtered) DockGen test set IDs file
10+
output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the output directory to which to save the relaxed predictions
1111
repeat_index: 1 # the repeat index which was used for inference
1212
pocket_only_baseline: false # whether to analyze the pocket-only baseline
13+
v1_baseline: false # whether to analyze the v1 baseline
1314
relax_protein: false # whether to relax the protein - NOTE: currently periodically yields unpredictable protein-ligand separation
Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics)
22
python_exec_path: ${oc.env:HOME}/mambaforge/envs/casp15_ligand_scoring/bin/python3 # the Python executable to use
33
scoring_script_path: ${oc.env:PROJECT_ROOT}/posebench/analysis/casp15_ligand_scoring/score_predictions.py # the path to the script to use for scoring CASP predictions
4-
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`, `tulip`)
5-
vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`)
4+
method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `ensemble`, `tulip`)
5+
vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`)
66
dataset: casp15 # the dataset to use - NOTE: must be one of (`casp15`)
77
ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`)
88
predictions_dir: ${oc.env:PROJECT_ROOT}/data/test_cases/${dataset}/top_${method}_ensemble_predictions_${repeat_index} # the directory containing the predictions to analyze
@@ -12,4 +12,6 @@ fault_tolerant: true # whether to continue processing targets if an error occurs
1212
skip_existing: true # whether to skip processing targets for which output already exists
1313
score_relaxed_structures: true # whether to score relaxed structures in addition to the original (unrelaxed) structures
1414
repeat_index: 1 # the run index to use for scoring predictions
15-
no_pretraining: false # whether to score a model without pretraining
15+
no_ilcl: false # whether to score a model trained without an inter-ligand clash loss (ILCL) - NOTE: only applicable to the `neuralplexer` method
16+
relax_protein: false # whether to relax the protein - NOTE: currently periodically yields unpredictable protein-ligand separation
17+
v1_baseline: false # whether to score the v1 baseline predictions
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`)
2+
input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse
3+
output_scripts_path: ${oc.env:PROJECT_ROOT}/forks/chai-lab/prediction_inputs/${dataset} # the output directory in which to save the input files
4+
posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts
5+
dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file
6+
protein_filepath: null # the path to the protein structure file to use
7+
ligand_smiles: null # the ligand SMILES string for which to predict the binding pose
8+
input_id: null # the input ID to use for inference
9+
pocket_only_baseline: false # whether to prepare the pocket-only baseline

0 commit comments

Comments
 (0)