Merge pull request #66 from haddocking/ScanNet-implementation

ScanNet implementation
haddocking · Jul 27, 2022 · 0618184 · 0618184
2 parents 56635ea + f5fe3dd
commit 0618184
Show file tree

Hide file tree

Showing 11 changed files with 1,898 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -30,11 +30,12 @@ Please also refer to the original publication:
 | [ISPRED4](https://ispred4.biocomp.unibo.it/ispred/default/index) | 🟢     | ✔️          |
 | [SPPIDER](https://sppider.cchmc.org)                             | 🟢     | ✔️          |
 | [meta-PPISP](https://pipe.rcc.fsu.edu/meta-ppisp.html)           | 🟢     | ✔️          |
-| [PredUs2](http://honig.c2b2.columbia.edu/predus)                 | 🟠     | ✔️          |
+| [PredUs2](http://honig.c2b2.columbia.edu/predus)                 | 🟠     |             |
 | [Cons-PPISP](https://pipe.rcc.fsu.edu/ppisp.html)                | 🟢     | ✔️          |
 | [PredictProtein](https://predictprotein.org)                     | 🟢     | ✔️          |
 | [PSIVER](https://mizuguchilab.org/PSIVER/)                       | 🟢     | ✔️          |
 | [CSM-Potential](http://biosig.unimelb.edu.au/csm_potential/)     | 🟢     | ✔️          |
+| [ScanNet](http://bioinfo3d.cs.tau.ac.il/ScanNet/index_real.html) | 🟢     | ✔️          |
 
 ## Installation
 

diff --git a/etc/config.json b/etc/config.json
@@ -11,6 +11,7 @@
     "predus2",
     "predictprotein",
     "psiver",
-    "csm_potential"
+    "csm_potential",
+    "scannet"
   ]
 }
diff --git a/src/cport/cli.py b/src/cport/cli.py
@@ -150,6 +150,7 @@ def main(pdb_file, chain_id, pdb_id, pred, fasta_file):
             "cons_ppisp",
             "predictprotein",
             "csm_potential",
+            "scannet",
         ]
 
     threads = {}

diff --git a/src/cport/modules/loader.py b/src/cport/modules/loader.py
@@ -10,6 +10,7 @@
 from cport.modules.predictprotein_api import Predictprotein
 from cport.modules.predus2 import Predus2
 from cport.modules.psiver import Psiver
+from cport.modules.scannet import ScanNet
 from cport.modules.scriber import Scriber
 from cport.modules.sppider import Sppider
 from cport.modules.whiscy import Whiscy
@@ -247,6 +248,28 @@ def run_csm_potential(pdb_file, chain_id):
     return predictions
 
 
+def run_scannet(pdb_file, chain_id):
+    """
+    Run the ScanNet predictor.
+
+    Parameters
+    ----------
+    pdb_file : str
+        Path to PDB file.
+    chain_id : str
+        Chain identifier.
+
+    Returns
+    -------
+    predictions : dict
+        Dictionary containing the predictions.
+    """
+    scannet = ScanNet(pdb_file, chain_id)
+    predictions = scannet.run()
+    log.info(predictions)
+    return predictions
+
+
 def run_placeholder(fasta_str):
     """
     Run the PLACEHOLDER predictor.
@@ -272,6 +295,7 @@ def run_placeholder(fasta_str):
     "sppider": run_sppider,
     "whiscy": run_whiscy,
     "csm_potential": run_csm_potential,
+    "scannet": run_scannet,
 }
 
 FASTA_PREDICTORS = {"placeholder": run_placeholder}

diff --git a/src/cport/modules/scannet.py b/src/cport/modules/scannet.py
@@ -0,0 +1,181 @@
+"""ScanNet module."""
+import io
+import logging
+import re
+import sys
+import time
+
+import mechanicalsoup as ms
+from Bio import PDB
+
+from cport.url import SCANNET_URL
+
+log = logging.getLogger("cportlog")
+result_url = "http://bioinfo3d.cs.tau.ac.il/ScanNet/results/0407500892.html"
+
+# Total wait (seconds) = WAIT_INTERVAL * NUM_RETRIES
+WAIT_INTERVAL = 30  # seconds
+NUM_RETRIES = 36
+
+
+class ScanNet:
+    """ScanNet class."""
+
+    def __init__(self, pdb_file, chain_id):
+        """
+        Initialize the class.
+
+        Parameters
+        ----------
+        pdb_file : str
+            Path to PDB file.
+        chain_id : str
+            Chain identifier.
+
+        """
+        self.pdb_file = pdb_file
+        self.chain_id = chain_id
+        self.wait = WAIT_INTERVAL
+        self.tries = NUM_RETRIES
+
+    def submit(self):
+        """
+        Make a submission to the ScanNet server.
+
+        Returns
+        -------
+        processing_url : str
+            The url to the processing page.
+
+        """
+        browser = ms.StatefulBrowser()
+        browser.open(SCANNET_URL, verify=False)
+
+        input_form = browser.select_form(nr=0)
+        input_form.set(name="PDBfile", value=self.pdb_file)
+        input_form.set(name="email", value="[email protected]")
+        input_form.set(name="chain", value=self.chain_id)
+        browser.submit_selected()
+
+        browser.follow_link(browser.links()[7])
+        processing_url = browser.get_url()
+        log.debug(f"The url being looked at: {processing_url}")
+
+        return processing_url
+
+    def retrieve_prediction_link(self, url=None, page_text=None):
+        """
+        Retrieve the link to the result page.
+
+        Parameters
+        ----------
+        url : str
+            The url to the result results.
+        page_text : str
+            The text of the page to parse - used for testing.
+
+        Returns
+        -------
+        url : str
+            The url to the prediction page.
+
+        """
+        browser = ms.StatefulBrowser()
+
+        if page_text:
+            # this is used in the testing
+            browser.open_fake_page(page_text=page_text)
+            url = page_text
+        else:
+            browser.open(url, verify=False)
+
+        completed = False
+        while not completed:
+            # Check if the variable with the results is present
+            match = re.search(r"stringContainingTheWholePdbFile", str(browser.page))
+            if match:
+                completed = True
+            else:
+                # still running, wait a bit
+                log.debug(f"Waiting for ScanNet to finish... {self.tries}")
+                time.sleep(self.wait)
+                browser.refresh()
+                self.tries -= 1
+
+            if self.tries == 0:
+                # if tries is 0, then the server is not responding
+                log.error(f"ScanNet server is not responding, url was {url}")
+                sys.exit()
+
+        return url
+
+    def parse_prediction(self, url=None, test_file=None):
+        """
+        Take the results extracts the active and passive residue predictions.
+
+        Parameters
+        ----------
+        url : str
+            The url to the results.
+        test_file : str
+            The file to parse.
+
+        Returns
+        -------
+        prediction_dict : dict
+            The dictionary containing the parsed prediction results with active
+            and passive sites.
+
+        """
+        parser = PDB.PDBParser()
+        if not test_file:
+            browser = ms.StatefulBrowser()
+
+            browser.open(url)
+            # page contains PDB file as a string with results in b_factor column
+            pdb_string = re.findall(
+                r"stringContainingTheWholePdbFile = (.*?);",
+                str(browser.page),
+                re.DOTALL,
+            )[0]
+
+            structure = parser.get_structure("pdb", io.StringIO(pdb_string))
+
+        else:
+            structure = parser.get_structure("pdb", test_file)
+
+        model = structure[0]
+        chain = model[self.chain_id]
+
+        prediction_dict = {"active": [], "passive": []}
+
+        for res in chain:
+            for atom in res:
+                b_fact = atom.get_bfactor()
+
+            # arbitrary value for active
+            if b_fact >= 0.5:
+                prediction_dict["active"].append([res.id[1], b_fact])
+            else:
+                prediction_dict["passive"].append(res.id[1])
+
+        return prediction_dict
+
+    def run(self):
+        """
+        Execute the ScanNet prediction.
+
+        Returns
+        -------
+        prediction_dict : dict
+            A dictionary containing the raw prediction.
+
+        """
+        log.info("Running ScanNet")
+        log.info(f"Will try {self.tries} times waiting {self.wait}s between tries")
+
+        submitted_url = self.submit()
+        prediction_url = self.retrieve_prediction_link(url=submitted_url)
+        prediction_dict = self.parse_prediction(url=prediction_url)
+
+        return prediction_dict
diff --git a/src/cport/modules/utils.py b/src/cport/modules/utils.py
@@ -24,6 +24,7 @@
     "psiver",
     "scriber",
     "csm_potential",
+    "scannet",
 ]
 
 pdb_predictors = [
@@ -34,6 +35,7 @@
     "predus2",
     "sppider",
     "csm_potential",
+    "scannet",
 ]
 
 
@@ -231,7 +233,7 @@ def get_residue_range(result_dic):
             active_reslist += [x for x in result_dic[pred]["active"]]
 
     reslist = passive_reslist + active_reslist
-    absolute_range = list(range(min(reslist), max(reslist)))
+    absolute_range = list(range(min(reslist), max(reslist) + 1))
     return absolute_range
 
 

diff --git a/src/cport/url.py b/src/cport/url.py
@@ -10,6 +10,7 @@
 PREDICTPROTEIN_API = "https://predictprotein.org/api/ppc_fetch"
 PSIVER_URL = "https://mizuguchilab.org/PSIVER/"
 CSM_POTENTIAL_URL = "http://biosig.unimelb.edu.au/csm_potential/api/predict"
+SCANNET_URL = "http://bioinfo3d.cs.tau.ac.il/ScanNet/index_real.html"
 
 PDB_URL = "https://files.rcsb.org/download/"
 PDB_FASTA_URL = "https://www.rcsb.org/fasta/entry/"