diff --git a/testzeus_hercules/core/agents/browser_nav_agent.py b/testzeus_hercules/core/agents/browser_nav_agent.py index ef79463..38ca6a4 100644 --- a/testzeus_hercules/core/agents/browser_nav_agent.py +++ b/testzeus_hercules/core/agents/browser_nav_agent.py @@ -56,12 +56,20 @@ def __init__(self, model_config_list, llm_config_params: dict[str, Any], system_ system_message = "\n".join(system_prompt) else: system_message = system_prompt - logger.info(f"Using custom system prompt for BrowserNavAgent: {system_message}") - - system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}" + logger.info( + f"Using custom system prompt for BrowserNavAgent: {system_message}" + ) + + system_message = ( + system_message + + "\n" + + f"Today's date is {datetime.now().strftime('%d %B %Y')}" + ) if user_ltm: # add the user LTM to the system prompt if it exists user_ltm = "\n" + user_ltm - system_message = Template(system_message).substitute(basic_user_information=user_ltm) + system_message = Template(system_message).substitute( + basic_user_information=user_ltm + ) logger.info(f"Browser nav agent using model: {model_config_list[0]['model']}") self.agent = autogen.ConversableAgent( name="browser_navigation_agent", @@ -71,7 +79,7 @@ def __init__(self, model_config_list, llm_config_params: dict[str, Any], system_ **llm_config_params, # unpack all the name value pairs in llm_config_params as is }, ) - # add_text_compressor(self.agent) + add_text_compressor(self.agent) self.__register_tools() def __get_ltm(self) -> str | None: @@ -94,25 +102,37 @@ def __register_tools(self) -> None: # self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click) # self.browser_nav_executor.register_for_execution()(enter_text_and_click) - self.agent.register_for_llm(description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"])(get_dom_with_content_type) + self.agent.register_for_llm( + description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"] + )(get_dom_with_content_type) self.browser_nav_executor.register_for_execution()(get_dom_with_content_type) - self.agent.register_for_llm(description=LLM_PROMPTS["CLICK_PROMPT"])(click_element) + self.agent.register_for_llm(description=LLM_PROMPTS["CLICK_PROMPT"])( + click_element + ) self.browser_nav_executor.register_for_execution()(click_element) self.agent.register_for_llm(description=LLM_PROMPTS["GET_URL_PROMPT"])(geturl) self.browser_nav_executor.register_for_execution()(geturl) - self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text) + self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])( + bulk_enter_text + ) self.browser_nav_executor.register_for_execution()(bulk_enter_text) - self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])(entertext) + self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])( + entertext + ) self.browser_nav_executor.register_for_execution()(entertext) - self.agent.register_for_llm(description=LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"])(press_key_combination) + self.agent.register_for_llm( + description=LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"] + )(press_key_combination) self.browser_nav_executor.register_for_execution()(press_key_combination) - self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf) + self.agent.register_for_llm( + description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"] + )(extract_text_from_pdf) self.browser_nav_executor.register_for_execution()(extract_text_from_pdf) self.agent.register_for_llm(description=LLM_PROMPTS["HOVER_PROMPT"])(hover) @@ -162,7 +182,9 @@ def __load_additional_tools(self) -> None: elif tool_path.endswith(".py") and os.path.isfile(tool_path): # If the path is a specific .py file, load it directly - module_name = os.path.basename(tool_path)[:-3] # Strip .py extension + module_name = os.path.basename(tool_path)[ + :-3 + ] # Strip .py extension directory_path = os.path.dirname(tool_path).replace("/", ".") module_path = f"{directory_path}.{module_name}" importlib.import_module(module_path) diff --git a/testzeus_hercules/core/memory/prompt_compressor.py b/testzeus_hercules/core/memory/prompt_compressor.py index 159675d..b5bd18b 100644 --- a/testzeus_hercules/core/memory/prompt_compressor.py +++ b/testzeus_hercules/core/memory/prompt_compressor.py @@ -4,8 +4,8 @@ from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor from testzeus_hercules.utils.logger import logger -TEXT_COMPRESSOR_LLM = LLMLingua() -TEXT_COMPRESSOR = TextMessageCompressor(text_compressor=TEXT_COMPRESSOR_LLM) +# TEXT_COMPRESSOR_LLM = LLMLingua() +# TEXT_COMPRESSOR = TextMessageCompressor(text_compressor=TEXT_COMPRESSOR_LLM) def add_text_compressor(agent: ConversableAgent) -> None: @@ -14,6 +14,8 @@ def add_text_compressor(agent: ConversableAgent) -> None: Args: agent (ConversableAgent): The agent that needs text compression in prompts """ + return + # removed the text compressor as its making the prompt lossy and causing lots of halucination. context_handling = transform_messages.TransformMessages( transforms=[TEXT_COMPRESSOR] ) diff --git a/testzeus_hercules/core/tools/enter_text_using_selector.py b/testzeus_hercules/core/tools/enter_text_using_selector.py index c4468ce..7d08f15 100644 --- a/testzeus_hercules/core/tools/enter_text_using_selector.py +++ b/testzeus_hercules/core/tools/enter_text_using_selector.py @@ -125,7 +125,9 @@ async def custom_fill_element(page: Page, selector: str, text_to_enter: str): ) logger.debug(f"custom_fill_element result: {result}") except Exception as e: - logger.error(f"Error in custom_fill_element, Selector: {selector}, Text: {text_to_enter}. Error: {str(e)}") + logger.error( + f"Error in custom_fill_element, Selector: {selector}, Text: {text_to_enter}. Error: {str(e)}" + ) raise @@ -249,18 +251,24 @@ def detect_dom_changes(changes: str): # type: ignore ) result = await do_entertext(page, query_selector, text_to_enter) - await asyncio.sleep(0.1) # sleep for 100ms to allow the mutation observer to detect changes + await asyncio.sleep( + 0.1 + ) # sleep for 100ms to allow the mutation observer to detect changes unsubscribe(detect_dom_changes) await browser_manager.take_screenshots(f"{function_name}_end", page) - await browser_manager.notify_user(result["summary_message"], message_type=MessageType.ACTION) + await browser_manager.notify_user( + result["summary_message"], message_type=MessageType.ACTION + ) if dom_changes_detected: return f"{result['detailed_message']}.\n As a consequence of this action, new elements have appeared in view: {dom_changes_detected}. This means that the action of entering text {text_to_enter} is not yet executed and needs further interaction. Get all_fields DOM to complete the interaction." return result["detailed_message"] -async def do_entertext(page: Page, selector: str, text_to_enter: str, use_keyboard_fill: bool = True): +async def do_entertext( + page: Page, selector: str, text_to_enter: str, use_keyboard_fill: bool = True +): """ Performs the text entry operation on a DOM or Shadow DOM element. @@ -359,18 +367,20 @@ async def find_element_in_shadow_dom(page: Page, selector: str): if use_keyboard_fill: await elem.focus() - await asyncio.sleep(0.1) + await asyncio.sleep(0.05) await press_key_combination("Control+A") - await asyncio.sleep(0.1) - await press_key_combination("Backspace") - await asyncio.sleep(0.1) + await asyncio.sleep(0.05) + await press_key_combination("Delete") + await asyncio.sleep(0.05) logger.debug(f"Focused element with selector {selector} to enter text") await page.keyboard.type(text_to_enter, delay=1) else: await custom_fill_element(page, selector, text_to_enter) await elem.focus() - logger.info(f'Success. Text "{text_to_enter}" set successfully in the element with selector {selector}') + logger.info( + f'Success. Text "{text_to_enter}" set successfully in the element with selector {selector}' + ) success_msg = f'Success. Text "{text_to_enter}" set successfully in the element with selector {selector}' return { "summary_message": success_msg, @@ -385,46 +395,44 @@ async def find_element_in_shadow_dom(page: Page, selector: str): async def bulk_enter_text( entries: Annotated[ - List[dict[str, str]], - "List of objects, each containing 'query_selector' and 'text'.", + List[EnterTextEntry], + "List of EnterTextEntry objects. An object containing 'query_selector' (DOM selector query using mmid attribute e.g. [mmid='114']) and 'text' (text to enter on the element).", ] # noqa: UP006 ) -> Annotated[ - List[dict[str, str]], - "List of dictionaries, each containing 'query_selector' and the result of the operation.", + List[str], + "List of results from the entertext operation for each entry.", ]: # noqa: UP006 """ Enters text into multiple DOM elements using a bulk operation. This function enters text into multiple DOM elements using a bulk operation. - It takes a list of dictionaries, where each dictionary contains a 'query_selector' and 'text' pair. + It takes a list of EnterTextEntry objects, where each contains 'query_selector' and 'text' attributes. The function internally calls the 'entertext' function to perform the text entry operation for each entry. Args: - entries: List of objects, each containing 'query_selector' and 'text'. + entries: List of EnterTextEntry objects. Returns: - List of dictionaries, each containing 'query_selector' and the result of the operation. + List of results from the entertext operation for each entry. Example: entries = [ - {"query_selector": "#username", "text": "test_user"}, - {"query_selector": "#password", "text": "test_password"} + EnterTextEntry(query_selector="#username", text="test_user"), + EnterTextEntry(query_selector="#password", text="test_password") ] results = await bulk_enter_text(entries) Note: - - Each entry in the 'entries' list should be a dictionary with 'query_selector' and 'text' keys. - - The result is a list of dictionaries, where each dictionary contains the 'query_selector' and the result of the operation. + - Each entry in the 'entries' list should be an instance of EnterTextEntry. + - The result is a list of strings returned by the 'entertext' function for each entry. """ add_event(EventType.INTERACTION, EventData(detail="bulk_enter_text")) - results: List[dict[str, str]] = [] # noqa: UP006 + results: List[str] = [] # noqa: UP006 logger.info("Executing bulk Enter Text Command") for entry in entries: - query_selector = entry["query_selector"] - text_to_enter = entry["text"] - logger.info(f"Entering text: {text_to_enter} in element with selector: {query_selector}") - result = await entertext(EnterTextEntry(query_selector=query_selector, text=text_to_enter)) - - results.append({"query_selector": query_selector, "result": result}) - + logger.info( + f"Entering text: {entry['text']} in element with selector: {entry['query_selector']}" + ) + result = await entertext(entry) + results.append(result) return results diff --git a/testzeus_hercules/utils/logger.py b/testzeus_hercules/utils/logger.py index b9a680a..74243d1 100644 --- a/testzeus_hercules/utils/logger.py +++ b/testzeus_hercules/utils/logger.py @@ -37,7 +37,7 @@ def configure_logger(level: str = "INFO") -> None: http_loggers = ["openai", "autogen"] for http_logger in http_loggers: lib_logger = logging.getLogger(http_logger) - lib_logger.setLevel(logging.DEBUG) + lib_logger.setLevel(logging.INFO) lib_logger.handlers = [] # Clear any existing handlers lib_logger.addHandler(handler) # Add the same handler