diff --git a/pyproject.toml b/pyproject.toml index e5e352f..583358f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "webtranspose" -version = "0.3.1" +version = "0.3.2" description = "Reliable APIs for the website data" authors = ["Mike Gee "] diff --git a/src/webtranspose/openai.py b/src/webtranspose/openai.py index 82fe676..49ceab9 100644 --- a/src/webtranspose/openai.py +++ b/src/webtranspose/openai.py @@ -18,7 +18,7 @@ def __init__( chunk_size (int, optional): The size of each chunk of text to process. Defaults to 2500. overlap_size (int, optional): The size of the overlap between chunks. Defaults to 100. """ - self.api_key = os.environ["OPENAI_API_KEY"] + self.api_key = os.environ.get("OPENAI_API_KEY") self.encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") self.chunk_size = chunk_size self.overlap_size = overlap_size diff --git a/src/webtranspose/scrape.py b/src/webtranspose/scrape.py index 86479ba..efec5c4 100644 --- a/src/webtranspose/scrape.py +++ b/src/webtranspose/scrape.py @@ -21,6 +21,7 @@ def __init__( verbose: bool = False, scraper: OpenAIScraper = None, api_key: str = None, + proxy: str = None, _created: bool = False, ): """ @@ -34,6 +35,7 @@ def __init__( verbose (bool, optional): Whether to print verbose output. Defaults to False. scraper (OpenAIScraper, optional): The scraper object. Defaults to None. api_key (str, optional): The API key. Defaults to None. + proxy (str, optional): The proxy. Defaults to None. _created (bool, optional): Whether the scraper has been created. Defaults to False. """ self.api_key = api_key @@ -48,6 +50,7 @@ def __init__( self.scraper = scraper self.render_js = render_js self.scraper_id = scraper_id + self.proxy = proxy if self.scraper is None: self.scraper = OpenAIScraper() if self.scraper_id is None: @@ -107,6 +110,7 @@ def create_scraper_api(self): "name": self.name, "schema": self.schema, "render_js": self.render_js, + "proxy": self.proxy, } out_json = run_webt_api( create_json, @@ -156,6 +160,7 @@ def scrape(self, url=None, html=None, timeout=30): "scraper_id": self.scraper_id, "url": url, "html": html, + "proxy": self.proxy, } out_json = run_webt_api( scrape_json, @@ -178,6 +183,7 @@ def status(self): "verbose": self.verbose, "render_js": self.render_js, "schema": self.schema, + "proxy": self.proxy, } else: get_json = { @@ -195,6 +201,7 @@ def status(self): "verbose": self.verbose, "render_js": scraper["render_js"], "schema": scraper["schema"], + "proxy": scraper["proxy"] } @@ -231,6 +238,7 @@ def get_scraper(scraper_id, api_key: str = None): schema=scraper["schema"], render_js=scraper["render_js"], api_key=api_key, + proxy=scraper['proxy'], _created=True, )