Skip to content

Commit

Permalink
Merge pull request #66 from haddocking/ScanNet-implementation
Browse files Browse the repository at this point in the history
ScanNet implementation
  • Loading branch information
AldovdN authored Jul 27, 2022
2 parents 56635ea + f5fe3dd commit 0618184
Show file tree
Hide file tree
Showing 11 changed files with 1,898 additions and 8 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ Please also refer to the original publication:
| [ISPRED4](https://ispred4.biocomp.unibo.it/ispred/default/index) | 🟢 | ✔️ |
| [SPPIDER](https://sppider.cchmc.org) | 🟢 | ✔️ |
| [meta-PPISP](https://pipe.rcc.fsu.edu/meta-ppisp.html) | 🟢 | ✔️ |
| [PredUs2](http://honig.c2b2.columbia.edu/predus) | 🟠 | ✔️ |
| [PredUs2](http://honig.c2b2.columbia.edu/predus) | 🟠 | |
| [Cons-PPISP](https://pipe.rcc.fsu.edu/ppisp.html) | 🟢 | ✔️ |
| [PredictProtein](https://predictprotein.org) | 🟢 | ✔️ |
| [PSIVER](https://mizuguchilab.org/PSIVER/) | 🟢 | ✔️ |
| [CSM-Potential](http://biosig.unimelb.edu.au/csm_potential/) | 🟢 | ✔️ |
| [ScanNet](http://bioinfo3d.cs.tau.ac.il/ScanNet/index_real.html) | 🟢 | ✔️ |

## Installation

Expand Down
3 changes: 2 additions & 1 deletion etc/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"predus2",
"predictprotein",
"psiver",
"csm_potential"
"csm_potential",
"scannet"
]
}
1 change: 1 addition & 0 deletions src/cport/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def main(pdb_file, chain_id, pdb_id, pred, fasta_file):
"cons_ppisp",
"predictprotein",
"csm_potential",
"scannet",
]

threads = {}
Expand Down
24 changes: 24 additions & 0 deletions src/cport/modules/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cport.modules.predictprotein_api import Predictprotein
from cport.modules.predus2 import Predus2
from cport.modules.psiver import Psiver
from cport.modules.scannet import ScanNet
from cport.modules.scriber import Scriber
from cport.modules.sppider import Sppider
from cport.modules.whiscy import Whiscy
Expand Down Expand Up @@ -247,6 +248,28 @@ def run_csm_potential(pdb_file, chain_id):
return predictions


def run_scannet(pdb_file, chain_id):
"""
Run the ScanNet predictor.
Parameters
----------
pdb_file : str
Path to PDB file.
chain_id : str
Chain identifier.
Returns
-------
predictions : dict
Dictionary containing the predictions.
"""
scannet = ScanNet(pdb_file, chain_id)
predictions = scannet.run()
log.info(predictions)
return predictions


def run_placeholder(fasta_str):
"""
Run the PLACEHOLDER predictor.
Expand All @@ -272,6 +295,7 @@ def run_placeholder(fasta_str):
"sppider": run_sppider,
"whiscy": run_whiscy,
"csm_potential": run_csm_potential,
"scannet": run_scannet,
}

FASTA_PREDICTORS = {"placeholder": run_placeholder}
Expand Down
181 changes: 181 additions & 0 deletions src/cport/modules/scannet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""ScanNet module."""
import io
import logging
import re
import sys
import time

import mechanicalsoup as ms
from Bio import PDB

from cport.url import SCANNET_URL

log = logging.getLogger("cportlog")
result_url = "http://bioinfo3d.cs.tau.ac.il/ScanNet/results/0407500892.html"

# Total wait (seconds) = WAIT_INTERVAL * NUM_RETRIES
WAIT_INTERVAL = 30 # seconds
NUM_RETRIES = 36


class ScanNet:
"""ScanNet class."""

def __init__(self, pdb_file, chain_id):
"""
Initialize the class.
Parameters
----------
pdb_file : str
Path to PDB file.
chain_id : str
Chain identifier.
"""
self.pdb_file = pdb_file
self.chain_id = chain_id
self.wait = WAIT_INTERVAL
self.tries = NUM_RETRIES

def submit(self):
"""
Make a submission to the ScanNet server.
Returns
-------
processing_url : str
The url to the processing page.
"""
browser = ms.StatefulBrowser()
browser.open(SCANNET_URL, verify=False)

input_form = browser.select_form(nr=0)
input_form.set(name="PDBfile", value=self.pdb_file)
input_form.set(name="email", value="[email protected]")
input_form.set(name="chain", value=self.chain_id)
browser.submit_selected()

browser.follow_link(browser.links()[7])
processing_url = browser.get_url()
log.debug(f"The url being looked at: {processing_url}")

return processing_url

def retrieve_prediction_link(self, url=None, page_text=None):
"""
Retrieve the link to the result page.
Parameters
----------
url : str
The url to the result results.
page_text : str
The text of the page to parse - used for testing.
Returns
-------
url : str
The url to the prediction page.
"""
browser = ms.StatefulBrowser()

if page_text:
# this is used in the testing
browser.open_fake_page(page_text=page_text)
url = page_text
else:
browser.open(url, verify=False)

completed = False
while not completed:
# Check if the variable with the results is present
match = re.search(r"stringContainingTheWholePdbFile", str(browser.page))
if match:
completed = True
else:
# still running, wait a bit
log.debug(f"Waiting for ScanNet to finish... {self.tries}")
time.sleep(self.wait)
browser.refresh()
self.tries -= 1

if self.tries == 0:
# if tries is 0, then the server is not responding
log.error(f"ScanNet server is not responding, url was {url}")
sys.exit()

return url

def parse_prediction(self, url=None, test_file=None):
"""
Take the results extracts the active and passive residue predictions.
Parameters
----------
url : str
The url to the results.
test_file : str
The file to parse.
Returns
-------
prediction_dict : dict
The dictionary containing the parsed prediction results with active
and passive sites.
"""
parser = PDB.PDBParser()
if not test_file:
browser = ms.StatefulBrowser()

browser.open(url)
# page contains PDB file as a string with results in b_factor column
pdb_string = re.findall(
r"stringContainingTheWholePdbFile = (.*?);",
str(browser.page),
re.DOTALL,
)[0]

structure = parser.get_structure("pdb", io.StringIO(pdb_string))

else:
structure = parser.get_structure("pdb", test_file)

model = structure[0]
chain = model[self.chain_id]

prediction_dict = {"active": [], "passive": []}

for res in chain:
for atom in res:
b_fact = atom.get_bfactor()

# arbitrary value for active
if b_fact >= 0.5:
prediction_dict["active"].append([res.id[1], b_fact])
else:
prediction_dict["passive"].append(res.id[1])

return prediction_dict

def run(self):
"""
Execute the ScanNet prediction.
Returns
-------
prediction_dict : dict
A dictionary containing the raw prediction.
"""
log.info("Running ScanNet")
log.info(f"Will try {self.tries} times waiting {self.wait}s between tries")

submitted_url = self.submit()
prediction_url = self.retrieve_prediction_link(url=submitted_url)
prediction_dict = self.parse_prediction(url=prediction_url)

return prediction_dict
4 changes: 3 additions & 1 deletion src/cport/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"psiver",
"scriber",
"csm_potential",
"scannet",
]

pdb_predictors = [
Expand All @@ -34,6 +35,7 @@
"predus2",
"sppider",
"csm_potential",
"scannet",
]


Expand Down Expand Up @@ -231,7 +233,7 @@ def get_residue_range(result_dic):
active_reslist += [x for x in result_dic[pred]["active"]]

reslist = passive_reslist + active_reslist
absolute_range = list(range(min(reslist), max(reslist)))
absolute_range = list(range(min(reslist), max(reslist) + 1))
return absolute_range


Expand Down
1 change: 1 addition & 0 deletions src/cport/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
PREDICTPROTEIN_API = "https://predictprotein.org/api/ppc_fetch"
PSIVER_URL = "https://mizuguchilab.org/PSIVER/"
CSM_POTENTIAL_URL = "http://biosig.unimelb.edu.au/csm_potential/api/predict"
SCANNET_URL = "http://bioinfo3d.cs.tau.ac.il/ScanNet/index_real.html"

PDB_URL = "https://files.rcsb.org/download/"
PDB_FASTA_URL = "https://www.rcsb.org/fasta/entry/"
Loading

0 comments on commit 0618184

Please sign in to comment.