diff --git a/cwl_adapters/clean_smina_pdb.cwl b/cwl_adapters/clean_smina_pdb.cwl new file mode 100644 index 0000000..29592a0 --- /dev/null +++ b/cwl_adapters/clean_smina_pdb.cwl @@ -0,0 +1,48 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 + +class: CommandLineTool + +label: Clean smina pdb file (clean the pdb and rename the resname to LIG) + +doc: | + Clean smina pdb file (clean the pdb and rename the resname to LIG) + +baseCommand: ["python", "/clean_smina_pdb.py"] + +hints: + DockerRequirement: + dockerPull: cyangnyu/clean_smina_pdb + +requirements: + InlineJavascriptRequirement: {} + +inputs: + input_pdb: + label: Input pdb file + type: File + format: + - edam:format_1476 + inputBinding: + prefix: --input_pdb + + output_pdb: + label: Output pdb file + type: string? + format: + - edam:format_1476 + inputBinding: + prefix: --output_pdb + +outputs: + output_pdb: + type: File + format: edam:format_1476 + outputBinding: + glob: $(inputs.output_pdb) + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl \ No newline at end of file diff --git a/cwl_adapters/onionnet-feature.cwl b/cwl_adapters/onionnet-feature.cwl new file mode 100644 index 0000000..145ff2a --- /dev/null +++ b/cwl_adapters/onionnet-feature.cwl @@ -0,0 +1,56 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool + +label: OnionNet (version1) for feature generation of docking poses + +baseCommand: ["python", "/onionnet/generate_features.py"] + +hints: + DockerRequirement: + dockerPull: cyangnyu/onionnet + +requirements: + InlineJavascriptRequirement: {} + +inputs: + complex_path_file: + label: path file of protein-ligand complexes (structures in pdb format) + type: File? + format: + - edam:format_1476 + inputBinding: + prefix: -inp + + num_of_cpus: + label: number of CPUs to use. + type: int? + format: + - edam:format_2330 + inputBinding: + prefix: -nt + default: 1 + + output_feature_file: + label: the output file name containing the features. + type: string? + format: + - edam:format_3752 + inputBinding: + prefix: -out + default: "output.csv" + +outputs: + output_feature_file: + type: File + format: edam:format_3752 + outputBinding: + glob: $(inputs.output_feature_file) + +$namespaces: + edam: https://edamontology.org/ + cwltool: http://commonwl.org/cwltool# + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl diff --git a/cwl_adapters/onionnet-score.cwl b/cwl_adapters/onionnet-score.cwl new file mode 100644 index 0000000..b2b3086 --- /dev/null +++ b/cwl_adapters/onionnet-score.cwl @@ -0,0 +1,90 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool + +label: OnionNet (version1) for rescoring of docking poses + +baseCommand: ["python", "/onionnet/predict.py"] + +hints: + DockerRequirement: + dockerPull: cyangnyu/onionnet + +requirements: + InlineJavascriptRequirement: {} + +inputs: + input_feature_file: + label: feature csv file for protein-ligand complexes + type: File? + format: + - edam:format_3752 + inputBinding: + prefix: -fn + + scaler: + label: the standard scaler file. + type: string? + format: + - edam:format_2330 + inputBinding: + prefix: -scaler + default: "/onionnet/models/StandardScaler.model" + + weights: + label: the trained DNN model file. + type: string? + format: + - edam:format_2330 + inputBinding: + prefix: -weights + default: "/onionnet/models/CNN_final_model_weights.h5" + + output_score_file: + label: the predicted pKa values file + type: string? + format: + - edam:format_3752 + inputBinding: + prefix: -out + default: "predicted_pKa.csv" + + onionnet_score: + type: string? + +outputs: + output_score_file: + type: File + outputBinding: + glob: $(inputs.output_score_file) + format: edam:format_3752 + + onionnet_score: + label: Estimated Free Energy of Binding (onionnet score) + doc: |- + Estimated Free Energy of Binding + type: float + outputBinding: + glob: $(inputs.output_score_file) + loadContents: true + outputEval: | + ${ + const lines = self[0].contents.split("\n"); + // The correct line should be of the form + // ,pKa_predicted + // /var/lib/cwl/stg19c300d1-f7fd-4a38-80d2-0f5615e3eb8f/complex_pdbs.pdb,7.441 + const bfe_line = lines[1]; + // refactor can be used to convert pKa to binding free enegy, based on deltaG = -RT*lnK + const refactor = -0.73349; + const docking_score_string = bfe_line.split(",").filter(function(s) {return !isNaN(parseFloat(s))})[0]; + const onionnet_score = parseFloat(docking_score_string)/refactor; + return onionnet_score + } + +$namespaces: + edam: https://edamontology.org/ + cwltool: http://commonwl.org/cwltool# + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl diff --git a/cwl_adapters/smina_docking.cwl b/cwl_adapters/smina_docking.cwl index 12e28d2..005b242 100644 --- a/cwl_adapters/smina_docking.cwl +++ b/cwl_adapters/smina_docking.cwl @@ -33,7 +33,6 @@ inputs: - edam:format_3815 - edam:format_3816 inputBinding: - position: 1 prefix: -r ligand_file: @@ -50,7 +49,6 @@ inputs: - edam:format_3815 - edam:format_3816 inputBinding: - position: 2 prefix: -l ligand_box: @@ -67,14 +65,24 @@ inputs: - edam:format_3815 - edam:format_3816 inputBinding: - position: 3 prefix: --autobox_ligand + local_only: + label: try local minimization only rather than docking + type: boolean? + inputBinding: + prefix: --local_only + + score_only: + label: Do not do any conformational search; simply rescore. + type: boolean? + inputBinding: + prefix: --score_only + scoring: label: scoring function option, default is vina, options can be (vina, vinardo, or a customized scoring function) type: string? inputBinding: - position: 4 prefix: --scoring default: "vina" @@ -83,7 +91,6 @@ inputs: type: string? format: edam:format_1476 inputBinding: - position: 5 prefix: -o default: "docked.pdb" diff --git a/examples/rescoring/docking_rescoring_onionnet_workflow.yml b/examples/rescoring/docking_rescoring_onionnet_workflow.yml new file mode 100644 index 0000000..568eb7f --- /dev/null +++ b/examples/rescoring/docking_rescoring_onionnet_workflow.yml @@ -0,0 +1,100 @@ +## Protein-ligand docking and docking poses re-ranking +## +## input: pdb structures from PDBbind refined dataset +## output: +## 1. docking poses +## 2. scoring file (vina score, sfct correction, combined_score for re-ranking docking poses) + +steps: +# +- extract_pdbbind_refined: + in: + # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html + # "The query() method uses a slightly modified Python syntax by default. + # For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or. + # This is syntactically valid Python, however the semantics are different." + query: '(Kd_Ki == "Kd") and (value < 0.000002)' + # to obtain a broader experimental dGs + max_row: 1 + convert_Kd_dG: 'True' + output_txt_path: '&binding_data.txt' + output_pdb_paths: '&pdbbind_pdbs' + output_sdf_paths: '&pdbbind_sdfs' + experimental_dGs: '&exp_dGs' + +- fix_side_chain: + scatter: [input_pdb_path] + in: + input_pdb_path: '*pdbbind_pdbs' + output_pdb_path: '&pdbbind_pdbs.pdb' + +- minimize_ligand_only.yml: + scatter: [sdf_path] + in: + sdf_path: '*pdbbind_sdfs' + +- smina_docking: + scatter: [receptor_file, ligand_file, ligand_box] + scatterMethod: dotproduct + in: + receptor_file: '*pdbbind_pdbs.pdb' + ligand_file: '*ligand_min.mol2' + ligand_box: '*ligand_min.mol2' + scoring: 'vina' + local_only: True + output_dock_file: '&ligand_opt.pdb' + output_path: output + +- clean_smina_pdb: + scatter: [input_pdb] + in: + input_pdb: '*ligand_opt.pdb' + output_pdb: '&ligand_opt_clean.pdb' + +- cat_pdb: + scatter: [input_structure1, input_structure2] + scatterMethod: dotproduct + in: + input_structure1: '*pdbbind_pdbs.pdb' + input_structure2: '*ligand_opt_clean.pdb' + output_structure_path: '&complex_pdbs.pdb' + +- onionnet-feature: + scatter: [complex_path_file] + in: + complex_path_file: '*complex_pdbs.pdb' + output_feature_file: '&output_features.csv' + +- onionnet-score: + scatter: [input_feature_file] + in: + input_feature_file: '*output_features.csv' + output_score_file: '&predicted_pKa.csv' + onionnet_score: '&onionnet_score' + +- scatter_plot: + in: + xs: '*exp_dGs' + ys: '*onionnet_score' + +wic: + graphviz: + label: Protein-ligand docking (Smina) and docking poses re-ranking (OnionNet-sfct) + steps: + (1, extract_pdbbind_refined): + wic: + graphviz: + label: extract protein-ligand structure (protein.pdb and ligand.sdf) from pdbbind_refined dataset + (2, fix_side_chain): + wic: + graphviz: + label: fix_side_chain of protein structure. + (3, minimize_ligand_only.yml): + wic: + inlineable: False + graphviz: + label: minimize (obminimize) ligand structure. + (4, smina_docking): + wic: + graphviz: + label: Smina docking (flexible ligand - rigid protein docking) \ No newline at end of file diff --git a/examples/scripts/Dockerfile_clean_smina_pdb b/examples/scripts/Dockerfile_clean_smina_pdb new file mode 100644 index 0000000..de81026 --- /dev/null +++ b/examples/scripts/Dockerfile_clean_smina_pdb @@ -0,0 +1,6 @@ +FROM python + +RUN apt-get update && apt-get install -y wget +RUN apt-get clean + +COPY clean_smina_pdb.py / diff --git a/examples/scripts/Dockerfile_onionnet b/examples/scripts/Dockerfile_onionnet new file mode 100644 index 0000000..2836c7d --- /dev/null +++ b/examples/scripts/Dockerfile_onionnet @@ -0,0 +1,40 @@ +FROM condaforge/mambaforge +# NOT mambaforge-pypy3 (pandas & rdkit & mdtraj are incompatible with pypy) + +# Install requirements +RUN apt-get update && apt-get install -y wget git + +# Create environment +# Since python 3.10 is already installed in the base image condaforge/mambaforge, +# if not specify the python version requirement, python version will has conflict with the openbabel <3.0. +#0 23.40 Pinned packages: +#0 23.40 - python 3.10.* +#0 23.40 The following packages are incompatible +#0 23.40 └─ openbabel <3.0 is installable with the potential options +#0 23.40 ├─ openbabel 2.4.1 would require +#0 23.40 │ └─ python >=2.7,<2.8.0a0 , which can be installed; +#0 23.40 ├─ openbabel 2.4.1 would require +#0 23.40 │ └─ python >=3.6,<3.7.0a0 , which can be installed; +#0 23.40 └─ openbabel 2.4.1 would require +#0 23.40 └─ python >=3.7,<3.8.0a0 , which can be installed. +# So, explicitly downgrade to python=3.7.* +RUN mamba install -c conda-forge "python=3.7.*" "openbabel<3.0" numpy pandas mdtraj biopandas tensorflow -y +# /opt/conda/lib/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15: +# FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. +RUN pip install -U "scikit-learn<0.23" rdkit-pypi + +# cleanup +RUN apt-get clean +RUN mamba clean --all --yes +RUN pip cache purge + +# Install onionnet +RUN git clone https://github.com/cyangNYU/onionnet.git +WORKDIR /onionnet + +# Download models +## the default model of onionnet-v1 in github repo is not correct, the actually size is around 600 MB. +## The authors provided a google drive link to download it, +## but their command wget "https://drive.google.com/uc?export=download&id=1cwJN44TgaVBWYEEb_SGU5JBJp6WbFdM1" -O "CNN_final_model_weights.h5" is not working. +RUN cd models && rm CNN_final_model_weights.h5 && wget https://huggingface.co/cyangNYU/onionnet-v1/resolve/main/CNN_final_model_weights.h5 +ADD Dockerfile_onionnet . diff --git a/examples/scripts/Dockerfile_onionnet-sfct b/examples/scripts/Dockerfile_onionnet-sfct index 2cf192a..b0815ed 100644 --- a/examples/scripts/Dockerfile_onionnet-sfct +++ b/examples/scripts/Dockerfile_onionnet-sfct @@ -31,6 +31,7 @@ RUN git clone https://github.com/cyangNYU/OnionNet-SFCT.git WORKDIR /OnionNet-SFCT # Download models +## This weights model is originally extracted from the docker image (https://hub.docker.com/r/hotwa/onionnet_sfct). RUN mkdir -p data && cd data && wget https://huggingface.co/cyangNYU/OnionNet-SFCT-final-model/resolve/main/sfct_std_final.model ADD Dockerfile_onionnet-sfct . diff --git a/examples/scripts/clean_smina_pdb.py b/examples/scripts/clean_smina_pdb.py new file mode 100644 index 0000000..91425e0 --- /dev/null +++ b/examples/scripts/clean_smina_pdb.py @@ -0,0 +1,29 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--input_pdb', type=str) +parser.add_argument('--output_pdb', type=str) +args = parser.parse_args() + + +def clean_smina_pdb(input_pdb: str, output_pdb: str) -> None: + """Clean smina pdb file (clean the pdb and rename the resname to LIG) + + Args: + input_pdb (str): input pdb file + output_pdb (str): output pdb file + """ + with open(input_pdb, mode='r', encoding='utf-8') as f1, open(output_pdb, mode='w', encoding='utf-8') as f2: + for line in f1.readlines(): + if line.startswith(('ATOM', 'HETATM', 'CONNECT', 'TER')): + # https://www.biostat.jhsph.edu/~iruczins/teaching/260.655/links/pdbformat.pdf + # residue name is present in column 18-20, since the column starts from 0, should be 17-19. + # onionnet-v1 based on residue name (LIG) to recognize ligand structure. + if len(line) >= 21: + f2.write(line[:17]+'LIG'+line[20:]) + else: + f2.write(line) + + +if __name__ == '__main__': + clean_smina_pdb(args.input_pdb, args.output_pdb)