diff --git a/ftest/playwright_request.py b/ftest/playwright_request.py index b2f7503ae8..3bff07829f 100644 --- a/ftest/playwright_request.py +++ b/ftest/playwright_request.py @@ -1,9 +1,7 @@ from minet.browser import ThreadsafeBrowser with ThreadsafeBrowser(headless=False, adblock=True) as browser: - response = browser.request( - "https://github.com/emsojemgrjgr", raise_on_statuses=(404,) - ) + response = browser.request("https://lemonde.fr", raise_on_statuses=(404,)) print(response) print(response.stack) diff --git a/ftest/pyppeteer.py b/ftest/pyppeteer.py deleted file mode 100644 index 705c9aef96..0000000000 --- a/ftest/pyppeteer.py +++ /dev/null @@ -1,121 +0,0 @@ -import asyncio -import threading -from pyppeteer import launch, connect -from quenouille import imap_unordered -from functools import partial - -# TODO: add async delete to drop connection -# TODO: try to give loop to connection? asyncio.ensure_future -CONTEXT_POOL = [] - - -class ThreadContext(object): - def __init__(self, endpoint): - self.loop = asyncio.new_event_loop() - self.endpoint = endpoint - - self.browser = self.run_until_complete( - connect(browserWSEndpoint=self.endpoint, loop=self.loop) - ) - - CONTEXT_POOL.append(self) - - def run_until_complete(self, task): - return self.loop.run_until_complete(task) - - -local_data = threading.local() - -URLS = [ - "https://www.lemonde.fr/", - "https://www.lefigaro.fr/", - "https://www.liberation.fr/", -] - - -async def boot(): - browser = await launch(handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False) - - endpoint = browser.wsEndpoint - - return browser, endpoint - - -LOOP = asyncio.get_event_loop() -MASTER, ENDPOINT = LOOP.run_until_complete(boot()) - - -# TODO: use incognito browser context? -async def work(url): - browser = await connect(browserWSEndpoint=ENDPOINT) - page = await browser.newPage() - await page.goto(url) - - title = await page.evaluate( - """ - () => { - return document.title; - } - """ - ) - - await browser.disconnect() - - return title - - -async def work_with_connection(browser, url): - page = await browser.newPage() - await page.goto(url) - - title = await page.evaluate( - """ - () => { - return document.title; - } - """ - ) - - await page.close() - - return title - - -def threaded_work(url): - if not hasattr(local_data, "context"): - context = ThreadContext(ENDPOINT) - local_data.context = context - - context = local_data.context - - return context.run_until_complete(work(url)) - - -def dummy_work(url): - return url - - -for title in imap_unordered(URLS, threaded_work, 3): - print(title) - - -def cleanup(): - global local_data - global CONTEXT_POOL - - print("cleanup") - for context in CONTEXT_POOL: - context.run_until_complete(context.browser.disconnect()) - context.loop.close() - del context - - del local_data - del CONTEXT_POOL - - LOOP.run_until_complete(MASTER.close()) - - -cleanup() - - -# TODO: thread local + page closing + browser closing diff --git a/minet/browser/threadsafe_browser.py b/minet/browser/threadsafe_browser.py index 7b2d259471..745de3a1ef 100644 --- a/minet/browser/threadsafe_browser.py +++ b/minet/browser/threadsafe_browser.py @@ -8,7 +8,7 @@ Literal, Container, ) -from minet.types import Concatenate, ParamSpec +from minet.types import Concatenate, ParamSpec, AnyTimeout import os import asyncio @@ -30,7 +30,7 @@ BrowserYetUnimplementedError, InvalidStatusError, ) -from minet.web import Response +from minet.web import Response, coerce_timeout_to_milliseconds from minet.browser.plawright_shim import install_browser from minet.browser.utils import ( get_browsers_path, @@ -260,10 +260,16 @@ async def __request( context: BrowserContext, url: str, raise_on_statuses: Optional[Container[int]] = None, + timeout: Optional[AnyTimeout] = None, ) -> Response: async with await context.new_page() as page: try: - emulated_response = await page.goto(url) + actual_timeout = None + + if timeout is not None: + actual_timeout = coerce_timeout_to_milliseconds(timeout) + + emulated_response = await page.goto(url, timeout=actual_timeout) except (PlaywrightError, PlaywrightTimeoutError) as e: error = convert_playwright_error(e) @@ -328,8 +334,11 @@ async def __request( return response def request( - self, url: str, raise_on_statuses: Optional[Container[int]] = None + self, + url: str, + raise_on_statuses: Optional[Container[int]] = None, + timeout: Optional[AnyTimeout] = None, ) -> Response: return self.run_in_default_context( - self.__request, url, raise_on_statuses=raise_on_statuses + self.__request, url, raise_on_statuses=raise_on_statuses, timeout=timeout ) diff --git a/minet/web.py b/minet/web.py index 62514cc4d1..0fe4cd7568 100644 --- a/minet/web.py +++ b/minet/web.py @@ -168,7 +168,7 @@ def create_pool_manager( DEFAULT_POOL_MANAGER = create_pool_manager() -def timeout_to_final_time(timeout: AnyTimeout) -> float: +def coerce_timeout_to_seconds(timeout: AnyTimeout) -> float: seconds: float if isinstance(timeout, urllib3.Timeout): @@ -179,6 +179,16 @@ def timeout_to_final_time(timeout: AnyTimeout) -> float: else: seconds = timeout + return seconds + + +def coerce_timeout_to_milliseconds(timeout: AnyTimeout) -> float: + return coerce_timeout_to_seconds(timeout) * 1000 + + +def timeout_to_final_time(timeout: AnyTimeout) -> float: + seconds = coerce_timeout_to_seconds(timeout) + # Some epsilon so sockets can timeout themselves properly seconds += 0.01