Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions cwl_adapters/clean_smina_pdb.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Clean smina pdb file (clean the pdb and rename the resname to LIG)

doc: |
Clean smina pdb file (clean the pdb and rename the resname to LIG)

baseCommand: ["python", "/clean_smina_pdb.py"]

hints:
DockerRequirement:
dockerPull: cyangnyu/clean_smina_pdb

requirements:
InlineJavascriptRequirement: {}

inputs:
input_pdb:
label: Input pdb file
type: File
format:
- edam:format_1476
inputBinding:
prefix: --input_pdb

output_pdb:
label: Output pdb file
type: string?
format:
- edam:format_1476
inputBinding:
prefix: --output_pdb

outputs:
output_pdb:
type: File
format: edam:format_1476
outputBinding:
glob: $(inputs.output_pdb)

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
56 changes: 56 additions & 0 deletions cwl_adapters/onionnet-feature.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool

label: OnionNet (version1) for feature generation of docking poses

baseCommand: ["python", "/onionnet/generate_features.py"]

hints:
DockerRequirement:
dockerPull: cyangnyu/onionnet

requirements:
InlineJavascriptRequirement: {}

inputs:
complex_path_file:
label: path file of protein-ligand complexes (structures in pdb format)
type: File?
format:
- edam:format_1476
inputBinding:
prefix: -inp

num_of_cpus:
label: number of CPUs to use.
type: int?
format:
- edam:format_2330
inputBinding:
prefix: -nt
default: 1

output_feature_file:
label: the output file name containing the features.
type: string?
format:
- edam:format_3752
inputBinding:
prefix: -out
default: "output.csv"

outputs:
output_feature_file:
type: File
format: edam:format_3752
outputBinding:
glob: $(inputs.output_feature_file)

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
90 changes: 90 additions & 0 deletions cwl_adapters/onionnet-score.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool

label: OnionNet (version1) for rescoring of docking poses

baseCommand: ["python", "/onionnet/predict.py"]

hints:
DockerRequirement:
dockerPull: cyangnyu/onionnet

requirements:
InlineJavascriptRequirement: {}

inputs:
input_feature_file:
label: feature csv file for protein-ligand complexes
type: File?
format:
- edam:format_3752
inputBinding:
prefix: -fn

scaler:
label: the standard scaler file.
type: string?
format:
- edam:format_2330
inputBinding:
prefix: -scaler
default: "/onionnet/models/StandardScaler.model"

weights:
label: the trained DNN model file.
type: string?
format:
- edam:format_2330
inputBinding:
prefix: -weights
default: "/onionnet/models/CNN_final_model_weights.h5"

output_score_file:
label: the predicted pKa values file
type: string?
format:
- edam:format_3752
inputBinding:
prefix: -out
default: "predicted_pKa.csv"

onionnet_score:
type: string?

outputs:
output_score_file:
type: File
outputBinding:
glob: $(inputs.output_score_file)
format: edam:format_3752

onionnet_score:
label: Estimated Free Energy of Binding (onionnet score)
doc: |-
Estimated Free Energy of Binding
type: float
outputBinding:
glob: $(inputs.output_score_file)
loadContents: true
outputEval: |
${
const lines = self[0].contents.split("\n");
// The correct line should be of the form
// ,pKa_predicted
// /var/lib/cwl/stg19c300d1-f7fd-4a38-80d2-0f5615e3eb8f/complex_pdbs.pdb,7.441
const bfe_line = lines[1];
// refactor can be used to convert pKa to binding free enegy, based on deltaG = -RT*lnK
const refactor = -0.73349;
const docking_score_string = bfe_line.split(",").filter(function(s) {return !isNaN(parseFloat(s))})[0];
const onionnet_score = parseFloat(docking_score_string)/refactor;
return onionnet_score
}

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
17 changes: 12 additions & 5 deletions cwl_adapters/smina_docking.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ inputs:
- edam:format_3815
- edam:format_3816
inputBinding:
position: 1
prefix: -r

ligand_file:
Expand All @@ -50,7 +49,6 @@ inputs:
- edam:format_3815
- edam:format_3816
inputBinding:
position: 2
prefix: -l

ligand_box:
Expand All @@ -67,14 +65,24 @@ inputs:
- edam:format_3815
- edam:format_3816
inputBinding:
position: 3
prefix: --autobox_ligand

local_only:
label: try local minimization only rather than docking
type: boolean?
inputBinding:
prefix: --local_only

score_only:
label: Do not do any conformational search; simply rescore.
type: boolean?
inputBinding:
prefix: --score_only

scoring:
label: scoring function option, default is vina, options can be (vina, vinardo, or a customized scoring function)
type: string?
inputBinding:
position: 4
prefix: --scoring
default: "vina"

Expand All @@ -83,7 +91,6 @@ inputs:
type: string?
format: edam:format_1476
inputBinding:
position: 5
prefix: -o
default: "docked.pdb"

Expand Down
100 changes: 100 additions & 0 deletions examples/rescoring/docking_rescoring_onionnet_workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
## Protein-ligand docking and docking poses re-ranking
##
## input: pdb structures from PDBbind refined dataset
## output:
## 1. docking poses
## 2. scoring file (vina score, sfct correction, combined_score for re-ranking docking poses)

steps:
#
- extract_pdbbind_refined:
in:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
# "The query() method uses a slightly modified Python syntax by default.
# For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or.
# This is syntactically valid Python, however the semantics are different."
query: '(Kd_Ki == "Kd") and (value < 0.000002)'
# to obtain a broader experimental dGs
max_row: 1
convert_Kd_dG: 'True'
output_txt_path: '&binding_data.txt'
output_pdb_paths: '&pdbbind_pdbs'
output_sdf_paths: '&pdbbind_sdfs'
experimental_dGs: '&exp_dGs'

- fix_side_chain:
scatter: [input_pdb_path]
in:
input_pdb_path: '*pdbbind_pdbs'
output_pdb_path: '&pdbbind_pdbs.pdb'

- minimize_ligand_only.yml:
scatter: [sdf_path]
in:
sdf_path: '*pdbbind_sdfs'

- smina_docking:
scatter: [receptor_file, ligand_file, ligand_box]
scatterMethod: dotproduct
in:
receptor_file: '*pdbbind_pdbs.pdb'
ligand_file: '*ligand_min.mol2'
ligand_box: '*ligand_min.mol2'
scoring: 'vina'
local_only: True
output_dock_file: '&ligand_opt.pdb'
output_path: output

- clean_smina_pdb:
scatter: [input_pdb]
in:
input_pdb: '*ligand_opt.pdb'
output_pdb: '&ligand_opt_clean.pdb'

- cat_pdb:
scatter: [input_structure1, input_structure2]
scatterMethod: dotproduct
in:
input_structure1: '*pdbbind_pdbs.pdb'
input_structure2: '*ligand_opt_clean.pdb'
output_structure_path: '&complex_pdbs.pdb'

- onionnet-feature:
scatter: [complex_path_file]
in:
complex_path_file: '*complex_pdbs.pdb'
output_feature_file: '&output_features.csv'

- onionnet-score:
scatter: [input_feature_file]
in:
input_feature_file: '*output_features.csv'
output_score_file: '&predicted_pKa.csv'
onionnet_score: '&onionnet_score'

- scatter_plot:
in:
xs: '*exp_dGs'
ys: '*onionnet_score'

wic:
graphviz:
label: Protein-ligand docking (Smina) and docking poses re-ranking (OnionNet-sfct)
steps:
(1, extract_pdbbind_refined):
wic:
graphviz:
label: extract protein-ligand structure (protein.pdb and ligand.sdf) from pdbbind_refined dataset
(2, fix_side_chain):
wic:
graphviz:
label: fix_side_chain of protein structure.
(3, minimize_ligand_only.yml):
wic:
inlineable: False
graphviz:
label: minimize (obminimize) ligand structure.
(4, smina_docking):
wic:
graphviz:
label: Smina docking (flexible ligand - rigid protein docking)
6 changes: 6 additions & 0 deletions examples/scripts/Dockerfile_clean_smina_pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM python

RUN apt-get update && apt-get install -y wget
RUN apt-get clean

COPY clean_smina_pdb.py /
40 changes: 40 additions & 0 deletions examples/scripts/Dockerfile_onionnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
FROM condaforge/mambaforge
# NOT mambaforge-pypy3 (pandas & rdkit & mdtraj are incompatible with pypy)

# Install requirements
RUN apt-get update && apt-get install -y wget git

# Create environment
# Since python 3.10 is already installed in the base image condaforge/mambaforge,
# if not specify the python version requirement, python version will has conflict with the openbabel <3.0.
#0 23.40 Pinned packages:
#0 23.40 - python 3.10.*
#0 23.40 The following packages are incompatible
#0 23.40 └─ openbabel <3.0 is installable with the potential options
#0 23.40 ├─ openbabel 2.4.1 would require
#0 23.40 │ └─ python >=2.7,<2.8.0a0 , which can be installed;
#0 23.40 ├─ openbabel 2.4.1 would require
#0 23.40 │ └─ python >=3.6,<3.7.0a0 , which can be installed;
#0 23.40 └─ openbabel 2.4.1 would require
#0 23.40 └─ python >=3.7,<3.8.0a0 , which can be installed.
# So, explicitly downgrade to python=3.7.*
RUN mamba install -c conda-forge "python=3.7.*" "openbabel<3.0" numpy pandas mdtraj biopandas tensorflow -y
# /opt/conda/lib/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15:
# FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23.
RUN pip install -U "scikit-learn<0.23" rdkit-pypi

# cleanup
RUN apt-get clean
RUN mamba clean --all --yes
RUN pip cache purge

# Install onionnet
RUN git clone https://github.com/cyangNYU/onionnet.git
WORKDIR /onionnet

# Download models
## the default model of onionnet-v1 in github repo is not correct, the actually size is around 600 MB.
## The authors provided a google drive link to download it,
## but their command wget "https://drive.google.com/uc?export=download&id=1cwJN44TgaVBWYEEb_SGU5JBJp6WbFdM1" -O "CNN_final_model_weights.h5" is not working.
RUN cd models && rm CNN_final_model_weights.h5 && wget https://huggingface.co/cyangNYU/onionnet-v1/resolve/main/CNN_final_model_weights.h5
ADD Dockerfile_onionnet .
Loading