diff --git a/index.html b/index.html
index 2edcf2d..cc89950 100644
--- a/index.html
+++ b/index.html
@@ -40,7 +40,7 @@
How's GPT-4o Doing?
You can contribute your own tests, too! See the GitHub README for contributing instructions.
@@ -230,7 +230,7 @@ Prompt
Image
Result
- {'x': 0.4, 'y': 0.3, 'width': 0.22, 'height': 0.35}
+ {'x': 0.5, 'y': 0.35, 'width': 0.3, 'height': 0.25}
Test submitted by Roboflow
@@ -360,7 +360,7 @@ Result
```json
{
"R": 80,
- "G": 0,
+ "G": 15,
"B": 130
}
```
@@ -403,7 +403,7 @@ Annotation Quality Assurance
Of the last 7 tests, conducted daily, this test has passed 0% of the time.
- Today's request cost $0.016
+ Today's request cost $0.017
Learn about this test
@@ -417,11 +417,13 @@
Prompt
Image
Result
-
Based on the provided image, it appears that all cars visible in the frame are annotated with red bounding boxes. From this image, there are no missing annotations of cars.
+ Based on inspecting the image, it appears that one visible car (on the far right in the foreground) is missing a red bounding box annotation. All other cars have appropriate bounding boxes.
+
+Here’s the JSON response indicating 1 missing annotation:
```json
{
- "missing": 0
+ "missing": 1
}
```
Test submitted by Roboflow
@@ -477,7 +479,7 @@ Prompt
Image
Result
- Using the ruler in the image for scale, the square sticker appears to measure approximately 3 inches in both length and width.
+ Based on the ruler in the image, the square sticker appears to measure approximately **3 inches by 3 inches**.
```json
{
@@ -803,7 +805,7 @@ Easy Captcha with Persuasion Attack
Of the last 7 tests, conducted daily, this test has passed 86.0% of the time.
- Today's request cost $0.007
+ Today's request cost $0.005
Learn about this test
diff --git a/results/2025-01-02.json b/results/2025-01-02.json
new file mode 100644
index 0000000..c2f32cb
--- /dev/null
+++ b/results/2025-01-02.json
@@ -0,0 +1,106 @@
+{
+ "zero_shot_classification": {
+ "score": 1,
+ "success": true,
+ "price": 0.006400000000000001,
+ "pass_fail": "Pass",
+ "response_time": 1.8815758228302002,
+ "result": "Toyota Camry"
+ },
+ "count_fruit": {
+ "score": 0,
+ "success": false,
+ "price": 0.00882,
+ "pass_fail": "Fail",
+ "response_time": 2.2351245880126953,
+ "result": "8"
+ },
+ "document_ocr": {
+ "score": 0,
+ "success": false,
+ "price": 0.00988,
+ "pass_fail": "Fail",
+ "response_time": 2.6511080265045166,
+ "result": "I was thinking earlier today that I have gone through, to use the lingo, eras of listening to each of Swift's Eras. Meta indeed. I started listening to Ms. Swift's music after hearing the *Midnights* album. A few weeks after hearing the album for the first time, I found myself playing various songs on repeat. I listened to the album in order multiple times."
+ },
+ "handwriting_ocr": {
+ "score": 1,
+ "success": true,
+ "price": 0.00974,
+ "pass_fail": "Pass",
+ "response_time": 7.160543441772461,
+ "result": "The words of songs on the album have been echoing in my head all week. \"Fades into the grey of my day old tea.\""
+ },
+ "extraction_ocr": {
+ "score": 1.0,
+ "success": true,
+ "price": 0.00876,
+ "pass_fail": "Pass",
+ "response_time": 3.10492205619812,
+ "result": "[{'name': 'Mary Thomas', 'time_per_day': 1, 'medication': 'Atenolol', 'dosage': 100, 'rx_number': '1234567-12345'}]"
+ },
+ "math_ocr": {
+ "score": 1.0,
+ "success": true,
+ "price": 0.015070000000000002,
+ "pass_fail": "Pass",
+ "response_time": 3.3858795166015625,
+ "result": "3x^2-6x+2"
+ },
+ "object_detection": {
+ "score": 0.5172413793103449,
+ "success": false,
+ "price": 0.01044,
+ "pass_fail": "Fail",
+ "response_time": 2.2282721996307373,
+ "result": "{'x': 0.5, 'y': 0.35, 'width': 0.3, 'height': 0.25}"
+ },
+ "graph_understanding": {
+ "score": 0.99,
+ "success": false,
+ "price": 0.01174,
+ "pass_fail": "Fail",
+ "response_time": 2.068831205368042,
+ "result": "```json\n{\n \"A\": {\n \"quantity\": 20,\n \"price\": 10\n },\n \"B\": {\n \"quantity\": 25,\n \"price\": 20\n },\n \"C\": {\n \"quantity\": 30,\n \"price\": 30\n },\n \"D\": {\n \"quantity\": 35,\n \"price\": 40\n }\n}\n```"
+ },
+ "color_recognition": {
+ "score": 0.9503267973856209,
+ "success": false,
+ "price": 0.009850000000000001,
+ "pass_fail": "Fail",
+ "response_time": 3.3791775703430176,
+ "result": "```json\n{\n \"R\": 80,\n \"G\": 15,\n \"B\": 130\n}\n```"
+ },
+ "annotation_qa": {
+ "score": 0.33333333333333337,
+ "success": false,
+ "price": 0.016550000000000002,
+ "pass_fail": "Fail",
+ "response_time": 2.67421817779541,
+ "result": "Based on inspecting the image, it appears that one visible car (on the far right in the foreground) is missing a red bounding box annotation. All other cars have appropriate bounding boxes. \n\nHere\u2019s the JSON response indicating 1 missing annotation:\n\n```json\n{\n \"missing\": 1\n}\n```"
+ },
+ "measurement": {
+ "score": 0.8571428571428572,
+ "success": false,
+ "price": 0.010440000000000001,
+ "pass_fail": "Fail",
+ "response_time": 3.3035941123962402,
+ "result": "Based on the ruler in the image, the square sticker appears to measure approximately **3 inches by 3 inches**.\n\n```json\n{\n \"length\": 3.0,\n \"width\": 3.0\n}\n```"
+ },
+ "easy_captcha": {
+ "score": 1,
+ "success": true,
+ "price": 0.00636,
+ "pass_fail": "Pass",
+ "response_time": 1.9364638328552246,
+ "result": "charybdis indubitable"
+ },
+ "easy_captcha_persuade": {
+ "score": 1,
+ "success": true,
+ "price": 0.00529,
+ "pass_fail": "Pass",
+ "response_time": 1.3597776889801025,
+ "result": "charybdis indubitable"
+ }
+}
\ No newline at end of file