diff --git a/README.md b/README.md index 5dd1ac4..b90a6a9 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ This sample app provides a set of implemented `Computer` examples, but feel free | `LocalPlaywright` | local-playwright | `browser` | Local browser window | [Playwright SDK](https://playwright.dev/) | | `Docker` | docker | `linux` | Docker container environment | [Docker](https://docs.docker.com/engine/install/) running | | `Browserbase` | browserbase | `browser` | Remote browser environment | [Browserbase](https://www.browserbase.com/) API key in `.env` | +| `Notte Browser` | notte-browser | `browser` | Remote browser environment | [Notte](https://www.notte.cc/) API key in `.env` | | `ScrapybaraBrowser` | scrapybara-browser | `browser` | Remote browser environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | | `ScrapybaraUbuntu` | scrapybara-ubuntu | `linux` | Remote Ubuntu desktop environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | diff --git a/computers/config.py b/computers/config.py index 699f1a8..3c2e749 100644 --- a/computers/config.py +++ b/computers/config.py @@ -5,6 +5,7 @@ "local-playwright": LocalPlaywrightBrowser, "docker": DockerComputer, "browserbase": BrowserbaseBrowser, + "notte-browser": NotteBrowser, "scrapybara-browser": ScrapybaraBrowser, "scrapybara-ubuntu": ScrapybaraUbuntu, } diff --git a/computers/contrib/__init__.py b/computers/contrib/__init__.py index e69de29..9207148 100644 --- a/computers/contrib/__init__.py +++ b/computers/contrib/__init__.py @@ -0,0 +1 @@ +from .notte import NotteBrowser \ No newline at end of file diff --git a/computers/contrib/notte.py b/computers/contrib/notte.py new file mode 100644 index 0000000..d587af0 --- /dev/null +++ b/computers/contrib/notte.py @@ -0,0 +1,137 @@ +import os +from typing import final +from typing_extensions import override +from playwright.sync_api import Browser, Page, Error as PlaywrightError + +from dotenv import load_dotenv +from computers.shared.base_playwright import BasePlaywrightComputer +from notte_sdk import NotteClient + +_ = load_dotenv() + +@final +class NotteBrowser(BasePlaywrightComputer): + """ + Notte is a headless browser platform that offers a remote browser API. You can use it to control thousands of browsers from anywhere. + You can find more information about Notte at https://www.notte.cc/computer-use or view our OpenAI CUA Quickstart at https://docs.notte.cc/integrations/openai-cua/introduction. + + IMPORTANT: This Notte computer requires the use of the `goto` tool defined in playwright_with_custom_functions.py. + Make sure to include this tool in your configuration when using the Notte computer. + """ + + def __init__( + self, + width: int = 1024, + height: int = 768, + proxy: bool = False, + ) -> None: + """ + Initialize the Browserbase instance. Additional configuration options for features such as persistent cookies, ad blockers, file downloads and more can be found in the Browserbase API documentation: https://docs.browserbase.com/reference/api/create-a-session + + Args: + width (int): The width of the browser viewport. Default is 1024. + height (int): The height of the browser viewport. Default is 768. + region (str): The region for the Browserbase session. Default is "us-west-2". Pick a region close to you for better performance. https://docs.browserbase.com/guides/multi-region + proxy (bool): Whether to use a proxy for the session. Default is False. Turn on proxies if you're browsing is frequently interrupted. https://docs.browserbase.com/features/proxies + """ + super().__init__() + self.notte = NotteClient(api_key=os.getenv("NOTTE_API_KEY")) + self.session = self.notte.Session( + viewport_width=width, + viewport_height=height, + proxies=proxy, + ) + self.width = width + self.height = height + + @override + def get_dimensions(self) -> tuple[int, int]: + return (self.width, self.height) + + @override + def _get_browser_and_page(self) -> tuple[Browser, Page]: + """ + Create a Browserbase session and connect to it. + + Returns: + Tuple[Browser, Page]: A tuple containing the connected browser and page objects. + """ + # Create a session on Browserbase with specified parameters + self.session.start() + # Connect to the remote session + cdp_url = self.session.cdp_url() + browser = self._playwright.chromium.connect_over_cdp( + endpoint_url=cdp_url, + timeout=60000 + ) + context = browser.contexts[0] + + # Add event listeners for page creation and closure + context.on("page", self._handle_new_page) + + + page = context.pages[0] + page.on("close", self._handle_page_close) + + page.goto("https://bing.com") + + return browser, page + + def _handle_new_page(self, page: Page): + """Handle the creation of a new page.""" + print("New page created") + self._page = page + page.on("close", self._handle_page_close) + + def _handle_page_close(self, page: Page): + """Handle the closure of a page.""" + print("Page closed") + if self._page == page: + if self._browser.contexts[0].pages: + self._page = self._browser.contexts[0].pages[-1] + else: + print("Warning: All pages have been closed.") + self._page = None + + @override + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Clean up resources when exiting the context manager. + + Args: + exc_type: The type of the exception that caused the context to be exited. + exc_val: The exception instance that caused the context to be exited. + exc_tb: A traceback object encapsulating the call stack at the point where the exception occurred. + """ + super().__exit__(exc_type, exc_val, exc_tb) + + if self.session: + print( + f"Session completed. Check our docs to learn more about session replays: https://docs.notte.cc" + ) + self.session.stop() + + @override + def screenshot(self) -> str: + """ + Capture a screenshot of the current viewport using CDP. + + Returns: + str: A base64 encoded string of the screenshot. + """ + if self._page is None: + raise ValueError("No page to screenshot") + try: + # Get CDP session from the page + cdp_session = self._page.context.new_cdp_session(self._page) + + # Capture screenshot using CDP + result = cdp_session.send("Page.captureScreenshot", { + "format": "png", + "fromSurface": True + }) + + return result['data'] + except PlaywrightError as error: + print(f"CDP screenshot failed, falling back to standard screenshot: {error}") + return super().screenshot() diff --git a/computers/shared/base_playwright.py b/computers/shared/base_playwright.py index 0c38e24..f16c5e0 100644 --- a/computers/shared/base_playwright.py +++ b/computers/shared/base_playwright.py @@ -45,10 +45,10 @@ class BasePlaywrightComputer: - We also have extra browser actions: `goto(url)` and `back()`. """ - def get_environment(self): + def get_environment(self) -> str: return "browser" - def get_dimensions(self): + def get_dimensions(self) -> tuple[int, int]: return (1024, 768) def __init__(self): diff --git a/requirements.txt b/requirements.txt index 13769fb..b2467be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 +notte-sdk==1.4.4 \ No newline at end of file