Skip to content

Commit 647c2d1

Browse files
committed
tag span refactor; readme update; version bump
1 parent 73242a2 commit 647c2d1

File tree

3 files changed

+23
-15
lines changed

3 files changed

+23
-15
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ if __name__ == '__main__':
8484
asyncio.run(main())
8585
```
8686

87+
Keep in mind that Tarsier tags different types of elements differently to help your LLM identify what actions are performable on each element. Specifically:
88+
- `[#ID]`: text-insertable fields (e.g. `textarea`, `input` with textual type)
89+
- `[@ID]`: hyperlinks (`<a>` tags)
90+
- `[$ID]`: other interactable elements (e.g. `button`, `select`)
91+
- `[ID]`: plain text (if you pass `tag_text_elements=True`)
92+
8793
## Local Development
8894

8995
### Setup

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "tarsier"
3-
version = "0.4.5"
3+
version = "0.5.0"
44
description = "Vision utilities for web interaction agents"
55
authors = ["Rohan Pandey", "Adam Watkins", "Asim Shrestha"]
66
readme = "README.md"

tarsier/tag_utils.ts

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,27 @@ function getElementXPath(element: HTMLElement | null) {
110110
return iframe_str + "//" + path_parts.join("/");
111111
}
112112

113-
function create_tagged_span(idStr: string) {
113+
function create_tagged_span(idNum: number, el: HTMLElement) {
114+
let idStr: string;
115+
if (isInteractable(el)) {
116+
if (isTextInsertable(el))
117+
idStr = `[#${idNum}]`;
118+
else if (el.tagName.toLowerCase() == 'a')
119+
idStr = `[@${idNum}]`;
120+
else
121+
idStr = `[$${idNum}]`;
122+
} else {
123+
idStr = `[${idNum}]`;
124+
}
125+
114126
let idSpan = document.createElement("span");
115127
idSpan.id = "__tarsier_id";
116128
idSpan.style.all = "inherit";
117129
idSpan.style.display = "inline";
118130
idSpan.style.color = "white";
119131
idSpan.style.backgroundColor = "red";
120132
idSpan.textContent = idStr;
133+
121134
return idSpan;
122135
}
123136

@@ -186,18 +199,7 @@ window.tagifyWebpage = (tagLeafTexts = false) => {
186199
continue;
187200
}
188201

189-
let idStr: string;
190-
if (isInteractable(el)) {
191-
if (isTextInsertable(el))
192-
idStr = `[#${idNum}]`;
193-
else if (el.tagName.toLowerCase() == 'a')
194-
idStr = `[@${idNum}]`;
195-
else
196-
idStr = `[$${idNum}]`;
197-
} else {
198-
idStr = `[${idNum}]`;
199-
}
200-
let idSpan = create_tagged_span(idStr);
202+
let idSpan = create_tagged_span(idNum, el);
201203

202204
if (isInteractable(el)) {
203205
if (isTextInsertable(el) && el.parentElement) {
@@ -210,7 +212,7 @@ window.tagifyWebpage = (tagLeafTexts = false) => {
210212
for (let child of Array.from(el.childNodes)) {
211213
if (child.nodeType === Node.TEXT_NODE && /\S/.test(child.textContent || "")) {
212214
// This is a text node with non-whitespace text
213-
let idSpan = create_tagged_span(idStr);
215+
let idSpan = create_tagged_span(idNum, el);
214216
el.insertBefore(idSpan, child);
215217
idNum++;
216218
}

0 commit comments

Comments
 (0)