Skip to content

Commit

Permalink
solving div type buttons issue based on our observations with users
Browse files Browse the repository at this point in the history
  • Loading branch information
shriyanshagnihotri committed Dec 12, 2024
1 parent 854b479 commit 54d1745
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 32 deletions.
7 changes: 2 additions & 5 deletions testzeus_hercules/core/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,11 +446,8 @@
"ENTER_TEXT_PROMPT": """Single enter given text in the DOM element matching the given mmid attribute value. This will only enter the text and not press enter or anything else.
Returns Success if text entry was successful or appropriate error message if text could not be entered.""",
"CLICK_BY_TEXT_PROMPT": """Executes a click action on the element matching the text. If multiple text matches are found, it will click on all of them. Use this as last resort when all else fails.""",
"BULK_ENTER_TEXT_PROMPT": """Bulk enter text in multiple DOM fields. To be used when there are multiple fields to be filled on the same page.
Enters text in the DOM elements matching the given mmid attribute value.
The input will receive a list of objects containing the DOM query selector and the text to enter.
This will only enter the text and not press enter or anything else.
Returns each selector and the result for attempting to enter text.""",
"BULK_ENTER_TEXT_PROMPT": """Multiple enter given texts in the DOM element matching the given mmid attribute value. This will only enter the text and not press enter or anything else.
Returns Success if text entry was successful or appropriate error message if text could not be entered.""",
"PRESS_KEY_COMBINATION_PROMPT": """Presses the given key on the current web page.
This is useful for pressing the enter button to submit a search query, PageDown to scroll, ArrowDown to change selection in a focussed list etc.""",
"ADD_TO_MEMORY_PROMPT": """"Save any information that you may need later in this term memory. This could be useful for saving things to do, saving information for personalisation, or even saving information you may need in future for efficiency purposes E.g. Remember to call John at 5pm, This user likes Tesla company and considered buying shares, The user enrollment form is available in <url> etc.""",
Expand Down
151 changes: 124 additions & 27 deletions testzeus_hercules/utils/get_detailed_accessibility_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,42 @@ async def __inject_attributes(page: Page) -> None:
});
return idCounter;
};
function isInteractiveElement(element) {
// Common semantic interactive elements
const interactiveTags = ['button', 'a', 'input', 'select', 'textarea'];
if (interactiveTags.includes(element.tagName.toLowerCase())) {
return true;
}
// ARIA roles indicating interactive behavior
if (element.matches('[role="button"], [role="link"], [role="textbox"], [role="checkbox"], [role="menuitem"], [role="option"]')) {
return true;
}
// Inline event attributes that suggest interactivity
const inlineEventAttributes = [
'onclick', 'oninput', 'onchange', 'onkeydown', 'onkeyup', 'onfocus', 'onblur'
];
for (const attr of inlineEventAttributes) {
if (element.hasAttribute(attr)) {
return true;
}
}
const hasTextBoxClass = element.classList && element.classList.contains('text_box');
if (hasTextBoxClass) {
return true;
}
return false;
}
// Helper function to determine if an element is interactive
const isInteractiveElement = (element) => {
const interactiveTags = ['button', 'a', 'input', 'select', 'textarea'];
return interactiveTags.includes(element.tagName.toLowerCase()) || element.hasAttribute('tabindex');
};
// const isInteractiveElement = (element) => {
// const interactiveTags = ['button', 'a', 'input', 'select', 'textarea'];
// return interactiveTags.includes(element.tagName.toLowerCase()) || element.hasAttribute('tabindex');
// };
// Start processing the DOM
const allElements = document.querySelectorAll('*');
Expand All @@ -93,7 +123,9 @@ async def __inject_attributes(page: Page) -> None:
logger.debug(f"Added MMID into {last_mmid} elements")


async def __fetch_dom_info(page: Page, accessibility_tree: dict[str, Any], only_input_fields: bool) -> dict[str, Any]:
async def __fetch_dom_info(
page: Page, accessibility_tree: dict[str, Any], only_input_fields: bool
) -> dict[str, Any]:
"""
Iterates over the accessibility tree, fetching additional information from the DOM based on 'mmid',
and constructs a new JSON structure with detailed information.
Expand Down Expand Up @@ -121,7 +153,9 @@ async def __fetch_dom_info(page: Page, accessibility_tree: dict[str, Any], only_
"aria-controls",
"aria-describedby",
]
backup_attributes = [] # if the attributes are not found, then try to get these attributes
backup_attributes = (
[]
) # if the attributes are not found, then try to get these attributes
tags_to_ignore = [
"head",
"style",
Expand Down Expand Up @@ -362,7 +396,9 @@ async def process_node(node: dict[str, Any]) -> None:
)

if "keyshortcuts" in node:
del node["keyshortcuts"] # remove keyshortcuts since it is not needed
del node[
"keyshortcuts"
] # remove keyshortcuts since it is not needed

node["mmid"] = mmid

Expand All @@ -371,23 +407,46 @@ async def process_node(node: dict[str, Any]) -> None:
node.update(element_attributes)

# check if 'name' and 'mmid' are the same
if node.get("name") == node.get("mmid") and node.get("role") != "textbox":
if (
node.get("name") == node.get("mmid")
and node.get("role") != "textbox"
):
del node["name"] # Remove 'name' from the node

if (
"name" in node
and "description" in node
and (node["name"] == node["description"] or node["name"] == node["description"].replace("\n", " ") or node["description"].replace("\n", "") in node["name"])
and (
node["name"] == node["description"]
or node["name"] == node["description"].replace("\n", " ")
or node["description"].replace("\n", "") in node["name"]
)
):
del node["description"] # if the name is same as description, then remove the description to avoid duplication
del node[
"description"
] # if the name is same as description, then remove the description to avoid duplication

if "name" in node and "aria-label" in node and node["aria-label"] in node["name"]:
del node["aria-label"] # if the name is same as the aria-label, then remove the aria-label to avoid duplication
if (
"name" in node
and "aria-label" in node
and node["aria-label"] in node["name"]
):
del node[
"aria-label"
] # if the name is same as the aria-label, then remove the aria-label to avoid duplication

if "name" in node and "text" in node and node["name"] == node["text"]:
del node["text"] # if the name is same as the text, then remove the text to avoid duplication
if (
"name" in node
and "text" in node
and node["name"] == node["text"]
):
del node[
"text"
] # if the name is same as the text, then remove the text to avoid duplication

if node.get("tag") == "select": # children are not needed for select menus since "options" attriburte is already added
if (
node.get("tag") == "select"
): # children are not needed for select menus since "options" attriburte is already added
node.pop("children", None)
node.pop("role", None)
node.pop("description", None)
Expand All @@ -397,7 +456,11 @@ async def process_node(node: dict[str, Any]) -> None:
del node["role"]

# avoid duplicate aria-label
if node.get("aria-label") and node.get("placeholder") and node.get("aria-label") == node.get("placeholder"):
if (
node.get("aria-label")
and node.get("placeholder")
and node.get("aria-label") == node.get("placeholder")
):
del node["aria-label"]

if node.get("role") == "link":
Expand Down Expand Up @@ -478,7 +541,9 @@ async def process_node(node: dict[str, Any]) -> None:
if attribute_to_delete in node:
node.pop(attribute_to_delete, None)
else:
logger.debug(f"No element found with mmid: {mmid}, deleting node: {node}")
logger.debug(
f"No element found with mmid: {mmid}, deleting node: {node}"
)
node["marked_for_deletion_by_mm"] = True

# Process each node in the tree starting from the root
Expand Down Expand Up @@ -543,7 +608,9 @@ async def __cleanup_dom(page: Page) -> None:
logger.debug("DOM cleanup complete")


def __prune_tree(node: dict[str, Any], only_input_fields: bool) -> dict[str, Any] | None:
def __prune_tree(
node: dict[str, Any], only_input_fields: bool
) -> dict[str, Any] | None:
"""
Recursively prunes a tree starting from `node`, based on pruning conditions and handling of 'unraveling'.
Expand Down Expand Up @@ -584,8 +651,14 @@ def __prune_tree(node: dict[str, Any], only_input_fields: bool) -> dict[str, Any
if "marked_for_unravel_children" in child:
# Replace the current child with its children
if "children" in child:
node["children"] = node["children"][:i] + child["children"] + node["children"][i + 1 :]
i += len(child["children"]) - 1 # Adjust the index for the new children
node["children"] = (
node["children"][:i]
+ child["children"]
+ node["children"][i + 1 :]
)
i += (
len(child["children"]) - 1
) # Adjust the index for the new children
else:
# If the node marked for unraveling has no children, remove it
node["children"].pop(i)
Expand Down Expand Up @@ -622,10 +695,21 @@ def __should_prune_node(node: dict[str, Any], only_input_fields: bool) -> bool:
bool: True if the node should be pruned, False otherwise.
"""
# If the request is for only input fields and this is not an input field, then mark the node for prunning
if node.get("role") != "WebArea" and only_input_fields and not (node.get("tag") in ("input", "button", "textarea") or node.get("role") == "button"):
if (
node.get("role") != "WebArea"
and only_input_fields
and not (
node.get("tag") in ("input", "button", "textarea")
or node.get("role") == "button"
)
):
return True

if node.get("role") == "generic" and "children" not in node and not ("name" in node and node.get("name")): # The presence of 'children' is checked after potentially deleting it above
if (
node.get("role") == "generic"
and "children" not in node
and not ("name" in node and node.get("name"))
): # The presence of 'children' is checked after potentially deleting it above
return True

if node.get("role") in ["separator", "LineBreak"]:
Expand All @@ -641,7 +725,12 @@ def __should_prune_node(node: dict[str, Any], only_input_fields: bool) -> bool:
processed_name = ""

# check if the node only have name and role, then delete that node
if len(node) == 2 and "name" in node and "role" in node and not (node.get("role") == "text" and processed_name != ""):
if (
len(node) == 2
and "name" in node
and "role" in node
and not (node.get("role") == "text" and processed_name != "")
):
return True
return False

Expand All @@ -657,7 +746,9 @@ async def get_node_dom_element(page: Page, mmid: str) -> Any:
)


async def get_element_attributes(page: Page, mmid: str, attributes: list[str]) -> dict[str, Any]:
async def get_element_attributes(
page: Page, mmid: str, attributes: list[str]
) -> dict[str, Any]:
return await page.evaluate(
"""
(inputParams) => {
Expand Down Expand Up @@ -744,7 +835,9 @@ async def get_dom_with_accessibility_info() -> Annotated[
return await do_get_accessibility_info(page)


async def do_get_accessibility_info(page: Page, only_input_fields: bool = False) -> dict[str, Any] | None:
async def do_get_accessibility_info(
page: Page, only_input_fields: bool = False
) -> dict[str, Any] | None:
"""
Retrieves the accessibility information of a web page and saves it as JSON files.
Expand Down Expand Up @@ -977,12 +1070,16 @@ async def do_get_accessibility_info(page: Page, only_input_fields: bool = False)

await __cleanup_dom(page)
try:
enhanced_tree = await __fetch_dom_info(page, accessibility_tree, only_input_fields)
enhanced_tree = await __fetch_dom_info(
page, accessibility_tree, only_input_fields
)

logger.debug("Enhanced Accessibility Tree ready")

with open(
os.path.join(get_source_log_folder_path(), "json_accessibility_dom_enriched.json"),
os.path.join(
get_source_log_folder_path(), "json_accessibility_dom_enriched.json"
),
"w",
encoding="utf-8",
) as f:
Expand Down

0 comments on commit 54d1745

Please sign in to comment.