From 92730230e2ae7159925f88cc2f6e43bd0162ec91 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:33:16 +0800 Subject: [PATCH 1/4] selenium update --- mdgo/forcefield/pubchem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mdgo/forcefield/pubchem.py b/mdgo/forcefield/pubchem.py index e04b28fb..648e84a8 100644 --- a/mdgo/forcefield/pubchem.py +++ b/mdgo/forcefield/pubchem.py @@ -68,6 +68,7 @@ def __init__( "profile.managed_default_content_settings.images": 2, } self.options = webdriver.ChromeOptions() + self.server = webdriver.ChromeService(chromedriver_dir) self.options.add_argument( 'user-agent="Mozilla/5.0 ' "(Macintosh; Intel Mac OS X 10_14_6) " @@ -79,7 +80,7 @@ def __init__( self.options.add_argument("--headless") self.options.add_experimental_option("prefs", self.preferences) self.options.add_experimental_option("excludeSwitches", ["enable-automation"]) - self.web = webdriver.Chrome(chromedriver_dir, options=self.options) + self.web = webdriver.Chrome(options=self.options, service=self.server) self.wait = WebDriverWait(self.web, 10) self.web.get("https://pubchem.ncbi.nlm.nih.gov/") time.sleep(1) From bcddad522570214cc8a529a28073c25bdacbe2fe Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:33:40 +0800 Subject: [PATCH 2/4] xpath update --- mdgo/forcefield/pubchem.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mdgo/forcefield/pubchem.py b/mdgo/forcefield/pubchem.py index 648e84a8..03c5e8eb 100644 --- a/mdgo/forcefield/pubchem.py +++ b/mdgo/forcefield/pubchem.py @@ -146,6 +146,7 @@ def _obtain_entry_web(self, search_text: str, name: str, output_format: str) -> url = "https://pubchem.ncbi.nlm.nih.gov/#query=" + query self.web.get(url) time.sleep(1) + loaded_element_path = '//*[@id="main-results"]/div[1]/div/ul' best_xpath = '//*[@id="featured-results"]/div/div[2]' "/div/div[1]/div[2]/div[1]/a/span/span" relevant_xpath = ( '//*[@id="collection-results-container"]' @@ -158,8 +159,8 @@ def _obtain_entry_web(self, search_text: str, name: str, output_format: str) -> match = self.web.find_element(By.XPATH, relevant_xpath) match.click() # density_locator = '//*[@id="Density"]/div[2]/div[1]/p' - cid_locator = '//*[@id="main-content"]/div/div/div[1]/' "div[3]/div/table/tbody/tr[1]/td" - smiles_locator = '//*[@id="Canonical-SMILES"]/div[2]/div[1]/p' + cid_locator = '//*[@id="Title-and-Summary"]/div/div/div/div[1]/div[2]' + smiles_locator = '//*[@id="Canonical-SMILES"]/div[2]/div[1]' self.wait.until(EC.presence_of_element_located((By.XPATH, cid_locator))) cid = self.web.find_element(By.XPATH, cid_locator).text smiles = self.web.find_element(By.XPATH, smiles_locator).text From f172fa4214cdf88e2dbce0beccbfd31a50f05b5f Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:34:09 +0800 Subject: [PATCH 3/4] page load fix --- mdgo/forcefield/pubchem.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mdgo/forcefield/pubchem.py b/mdgo/forcefield/pubchem.py index 03c5e8eb..e488aa53 100644 --- a/mdgo/forcefield/pubchem.py +++ b/mdgo/forcefield/pubchem.py @@ -145,8 +145,8 @@ def _obtain_entry_web(self, search_text: str, name: str, output_format: str) -> query = quote(search_text) url = "https://pubchem.ncbi.nlm.nih.gov/#query=" + query self.web.get(url) - time.sleep(1) loaded_element_path = '//*[@id="main-results"]/div[1]/div/ul' + self.wait.until(EC.presence_of_element_located((By.XPATH, loaded_element_path))) best_xpath = '//*[@id="featured-results"]/div/div[2]' "/div/div[1]/div[2]/div[1]/a/span/span" relevant_xpath = ( '//*[@id="collection-results-container"]' @@ -157,7 +157,9 @@ def _obtain_entry_web(self, search_text: str, name: str, output_format: str) -> match = self.web.find_element(By.XPATH, best_xpath) else: match = self.web.find_element(By.XPATH, relevant_xpath) + self.wait.until(EC.element_to_be_clickable(match)) match.click() + time.sleep(1) # density_locator = '//*[@id="Density"]/div[2]/div[1]/p' cid_locator = '//*[@id="Title-and-Summary"]/div/div/div/div[1]/div[2]' smiles_locator = '//*[@id="Canonical-SMILES"]/div[2]/div[1]' From 4ab4d4d00e4985bcfa2ecb9d4aeeee27f6cdde29 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:39:59 +0800 Subject: [PATCH 4/4] linting --- mdgo/util/packmol.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mdgo/util/packmol.py b/mdgo/util/packmol.py index 97d79d37..da5cabb1 100644 --- a/mdgo/util/packmol.py +++ b/mdgo/util/packmol.py @@ -17,9 +17,8 @@ import subprocess from pathlib import Path from typing import Dict, List, Optional, Union - -from pymatgen.core import Molecule from shutil import which +from pymatgen.core import Molecule # from pymatgen.io.core import InputFile, InputSet, InputGenerator