Skip to content

Commit 8121755

Browse files
committed
Add full dataset interaction notebook materials
1 parent fabac54 commit 8121755

File tree

4 files changed

+74
-6
lines changed

4 files changed

+74
-6
lines changed
-601 KB
Binary file not shown.
636 KB
Loading

notebooks/astex_posebusters_casp15_interaction_analysis_plotting.ipynb renamed to notebooks/dataset_interaction_analysis_plotting.ipynb

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"## Dataset Analysis Plotting"
7+
"## Dataset Interaction Analysis Plotting"
88
]
99
},
1010
{
@@ -67,19 +67,23 @@
6767
"source": [
6868
"ad_set_dir = os.path.join(\"..\", \"data\", \"astex_diverse_set\")\n",
6969
"pb_set_dir = os.path.join(\"..\", \"data\", \"posebusters_benchmark_set\")\n",
70+
"dg_set_dir = os.path.join(\"..\", \"data\", \"dockgen_set\")\n",
7071
"casp15_set_dir = os.path.join(\n",
7172
" \"..\",\n",
7273
" \"data\",\n",
73-
" \"casp15_set\",\n",
74+
" \"casp15_set_public\",\n",
7475
" \"targets\",\n",
75-
")\n",
76+
") # NOTE: change to `casp15_set` directory as needed\n",
7677
"assert os.path.exists(\n",
7778
" ad_set_dir\n",
7879
"), \"Please download the Astex Diverse set from `https://zenodo.org/records/11199233` before proceeding.\"\n",
7980
"assert os.path.exists(\n",
8081
" pb_set_dir\n",
8182
"), \"Please download the PoseBusters Benchmark set from `https://zenodo.org/records/11199233` before proceeding.\"\n",
8283
"assert os.path.exists(\n",
84+
" dg_set_dir\n",
85+
"), \"Please download the DockGen set from `https://zenodo.org/records/11199233` before proceeding.\"\n",
86+
"assert os.path.exists(\n",
8387
" casp15_set_dir\n",
8488
"), \"Please download the (public) CASP15 set from `https://zenodo.org/records/11199233` before proceeding.\"\n",
8589
"\n",
@@ -248,6 +252,68 @@
248252
" store.put(f\"df_{i}\", df)"
249253
]
250254
},
255+
{
256+
"cell_type": "markdown",
257+
"metadata": {},
258+
"source": [
259+
"##### Analyze `DockGen` set interactions"
260+
]
261+
},
262+
{
263+
"cell_type": "code",
264+
"execution_count": null,
265+
"metadata": {},
266+
"outputs": [],
267+
"source": [
268+
"if not os.path.exists(\"dockgen_interaction_dataframes.h5\"):\n",
269+
" dockgen_test_ids_filepath = os.path.join(\n",
270+
" \"..\", \"data\", \"dockgen_set\", \"split_test.txt\"\n",
271+
" ) # NOTE: change as needed\n",
272+
" assert os.path.exists(\n",
273+
" dockgen_test_ids_filepath\n",
274+
" ), f\"Invalid test IDs filepath for DockGen: {os.path.exists(dockgen_test_ids_filepath)}.\"\n",
275+
" with open(dockgen_test_ids_filepath) as f:\n",
276+
" pdb_ids = {line.replace(\" \", \"-\") for line in f.read().splitlines()}\n",
277+
" dg_protein_ligand_filepath_pairs = []\n",
278+
" for item in os.listdir(dg_set_dir):\n",
279+
" if item not in pdb_ids:\n",
280+
" continue\n",
281+
" item_path = os.path.join(dg_set_dir, item)\n",
282+
" if os.path.isdir(item_path):\n",
283+
" protein_filepath = os.path.join(item_path, f\"{item}_protein_processed.pdb\")\n",
284+
" ligand_filepath = os.path.join(item_path, f\"{item}_ligand.pdb\")\n",
285+
" if os.path.exists(protein_filepath) and os.path.exists(ligand_filepath):\n",
286+
" dg_protein_ligand_filepath_pairs.append((protein_filepath, ligand_filepath))\n",
287+
"\n",
288+
" pc = (\n",
289+
" PoseCheck()\n",
290+
" ) # NOTE: despite what `PoseCheck` might say, `reduce` should be available in the `PoseBench` environment\n",
291+
" dg_protein_ligand_interaction_dfs = []\n",
292+
" for protein_filepath, ligand_filepath in tqdm(\n",
293+
" dg_protein_ligand_filepath_pairs, desc=\"Processing DockGen set\"\n",
294+
" ):\n",
295+
" try:\n",
296+
" temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n",
297+
" protein_filepath, molecule_type=\"protein\"\n",
298+
" )\n",
299+
" ligand_mol = Chem.MolFromPDBFile(ligand_filepath)\n",
300+
" if ligand_mol is None:\n",
301+
" ligand_mol = Chem.MolFromPDFile(ligand_filepath, sanitize=False)\n",
302+
" pc.load_protein_from_pdb(temp_protein_filepath)\n",
303+
" pc.load_ligands_from_mols([ligand_mol])\n",
304+
" dg_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
305+
" except Exception as e:\n",
306+
" print(\n",
307+
" f\"Error processing Dockgen filepath pari {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
308+
" )\n",
309+
" continue\n",
310+
"\n",
311+
" # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n",
312+
" with pd.HDFStore(\"dockgen_interaction_dataframes.h5\") as store:\n",
313+
" for i, df in enumerate(dg_protein_ligand_interaction_dfs):\n",
314+
" store.put(f\"df_{i}\", df)"
315+
]
316+
},
251317
{
252318
"cell_type": "markdown",
253319
"metadata": {},
@@ -293,8 +359,7 @@
293359
" pc.load_ligands_from_mols(\n",
294360
" Chem.GetMolFrags(ligand_mol, asMols=True, sanitizeFrags=False)\n",
295361
" )\n",
296-
" interactions = pc.calculate_interactions()\n",
297-
" casp15_protein_ligand_interaction_dfs.append(interactions)\n",
362+
" casp15_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
298363
" except Exception as e:\n",
299364
" print(\n",
300365
" f\"Error processing CASP15 target {protein_ligand_complex_filepath} due to: {e}. Skipping...\"\n",
@@ -366,6 +431,9 @@
366431
" process_dataset(\"posebusters_benchmark_interaction_dataframes.h5\", \"PoseBusters Benchmark\")\n",
367432
" )\n",
368433
"\n",
434+
"if os.path.exists(\"dockgen_interaction_dataframes.h5\"):\n",
435+
" dfs.append(process_dataset(\"dockgen_interaction_dataframes.h5\", \"DockGen\"))\n",
436+
"\n",
369437
"if os.path.exists(\"casp15_interaction_dataframes.h5\"):\n",
370438
" dfs.append(process_dataset(\"casp15_interaction_dataframes.h5\", \"CASP15\"))\n",
371439
"\n",
@@ -416,7 +484,7 @@
416484
" ax.grid(True)\n",
417485
"\n",
418486
"plt.tight_layout()\n",
419-
"plt.savefig(\"astex_posebusters_casp15_interaction_analysis.png\", dpi=300)\n",
487+
"plt.savefig(\"dataset_interaction_analysis.png\", dpi=300)\n",
420488
"plt.show()"
421489
]
422490
}
1.92 MB
Binary file not shown.

0 commit comments

Comments
 (0)