Skip to content

Commit 57d732f

Browse files
committed
Incorporated screenshot search into background link parsing
1 parent 196b73a commit 57d732f

File tree

13 files changed

+228
-101
lines changed

13 files changed

+228
-101
lines changed

.devcontainer/postinstall.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,7 @@ source ../.venv/bin/activate
2222

2323
# Show which Python interpreter is being used
2424
which python
25+
26+
# Install playwright
27+
playwright install
28+
playwright install-deps

.vscode/settings.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"python.testing.unittestArgs": [
44
"-v",
55
"-s",
6-
"./tests",
6+
"./code_tests",
77
"-p",
88
"test_*.py"
99
],

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ Last Update: Nov 3 2024
99
# Quick Install
1010
Install this package with `pip install forecasting-tools`
1111

12+
If you want to use advanced SmartSearcher functionality, you might also need to run:
13+
```bash
14+
playwright install
15+
playwright install-deps
16+
```
17+
1218
# Overview
1319
Demo website: https://mokoresearch.streamlit.app/
1420

code_tests/low_cost_or_live_api_tests/test_forecast_database_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@ async def test_base_rate_report_can_be_added_to_coda() -> None:
5858

5959

6060
def get_forecast_example_reports() -> list[BinaryReport]:
61-
metaculus_data_path = "tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/metaculus_forecast_report_examples.json"
61+
metaculus_data_path = "code_tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/metaculus_forecast_report_examples.json"
6262
metaculus_reports = BinaryReport.convert_project_file_path_to_object_list(
6363
metaculus_data_path
6464
)
6565
return metaculus_reports
6666

6767

6868
def get_base_rate_example_reports() -> list[BaseRateReport]:
69-
base_rate_data_path = "tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/base_rate_reports.json"
69+
base_rate_data_path = "code_tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/base_rate_reports.json"
7070
base_rate_reports = (
7171
BaseRateReport.convert_project_file_path_to_object_list(
7272
base_rate_data_path

code_tests/low_cost_or_live_api_tests/test_smart_searcher.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,19 @@ async def test_ask_question_empty_prompt() -> None:
6464

6565
async def test_screenshot_question() -> None:
6666
with MonetaryCostManager() as cost_manager:
67-
searcher = SmartSearcher(num_sites_to_screenshot=1)
68-
question = "When was the most noticeable recent dip in the graph from https://fred.stlouisfed.org/series/GDPC1? Say 0 if you do not know."
67+
searcher = SmartSearcher(num_sites_to_deep_dive=2)
68+
question = "When was the most noticeable recent dip in the graph from https://fred.stlouisfed.org/series/GDPC1? Say 0 if you do not know. Please search specifically for the site itself."
6969
result = await searcher.invoke(question)
7070
logger.info(f"Result: {result}")
7171
logger.info(f"Cost: {cost_manager.current_usage}")
7272
assert "2020" in result
73+
74+
75+
@pytest.mark.skip("Run this when needed as it's purely a qualitative test")
76+
async def test_screenshot_question_2() -> None:
77+
with MonetaryCostManager() as cost_manager:
78+
searcher = SmartSearcher(num_sites_to_deep_dive=2)
79+
question = "Please tell me about the recent trends in the Federal Funds Effective Rate."
80+
result = await searcher.invoke(question)
81+
logger.info(f"Result: {result}")
82+
logger.info(f"Cost: {cost_manager.current_usage}")

code_tests/low_cost_or_live_api_tests/test_url_scraper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ async def test_get_screenshot_as_file() -> None:
77
url_scraper = UrlScraper()
88
test_url = "https://example.com"
99

10-
screenshot_file = await url_scraper.get_screenshot_of_url_as_file(test_url)
10+
screenshot_file = await url_scraper.get_screenshot_of_url_as_image(
11+
test_url
12+
)
1113
assert isinstance(screenshot_file, Image.Image)
1214
assert screenshot_file.width > 0
1315
assert screenshot_file.height > 0

code_tests/no_cost_expect_all_to_succeed/test_forecasting/test_forecast_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
def test_metaculus_report_is_jsonable() -> None:
1818
temp_writing_path = "temp/temp_metaculus_report.json"
19-
read_report_path = "tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/metaculus_forecast_report_examples.json"
19+
read_report_path = "code_tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/metaculus_forecast_report_examples.json"
2020
jsonable_assertations.assert_reading_and_printing_from_file_works(
2121
BinaryReport, read_report_path, temp_writing_path
2222
)

code_tests/no_cost_expect_all_to_succeed/test_forecasting/test_metaculus_question.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
def test_metaculus_question_is_jsonable() -> None:
88
temp_writing_path = "temp/temp_metaculus_question.json"
9-
read_report_path = "tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/metaculus_questions.json"
9+
read_report_path = "code_tests/no_cost_expect_all_to_succeed/test_forecasting/forecasting_test_data/metaculus_questions.json"
1010
jsonable_assertations.assert_reading_and_printing_from_file_works(
1111
BinaryQuestion, read_report_path, temp_writing_path
1212
)

forecasting_tools/forecasting/forecast_team/final_decision_agent.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,7 @@ async def __get_research_summary_and_populate_if_empty(self) -> str:
154154
summary_markdown = await model.invoke(prompt)
155155
cleaned_summary_markdown = strip_code_block_markdown(summary_markdown)
156156
self.__research_summary = cleaned_summary_markdown
157-
logger.info(
158-
f"Made research summary: {cleaned_summary_markdown[:100]}..."
159-
)
157+
logger.info("Made research summary for final decision agent")
160158
return cleaned_summary_markdown
161159

162160
async def __create_unified_explanation(

forecasting_tools/forecasting/forecast_team/research_coordinator.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from __future__ import annotations
22

33
import logging
4+
import re
45

56
from forecasting_tools.ai_models.ai_utils.ai_misc import clean_indents
67
from forecasting_tools.forecasting.helpers.configured_llms import BasicLlm
8+
from forecasting_tools.forecasting.helpers.url_scraper import UrlScraper
79
from forecasting_tools.forecasting.questions_and_reports.metaculus_question import (
810
MetaculusQuestion,
911
)
@@ -21,6 +23,9 @@
2123
QuestionRouter,
2224
)
2325
from forecasting_tools.util import async_batching
26+
from forecasting_tools.util.async_batching import (
27+
run_coroutines_while_removing_and_logging_exceptions,
28+
)
2429

2530
logger = logging.getLogger(__name__)
2631

@@ -39,17 +44,71 @@ async def create_full_markdown_research_report(
3944
num_base_rate_questions: int,
4045
num_base_rate_questions_with_deep_research: int,
4146
) -> str:
47+
question_details_markdown = (
48+
await self.__generate_question_details_markdown()
49+
)
4250
background_markdown = await self.generate_background_markdown(
43-
num_of_background_questions
51+
num_of_background_questions, question_details_markdown
4452
)
4553
base_rate_markdown = await self.generate_base_rate_markdown(
4654
num_base_rate_questions,
4755
num_base_rate_questions_with_deep_research,
4856
background_markdown,
4957
)
50-
combined_markdown = background_markdown + "\n\n" + base_rate_markdown
58+
combined_markdown = (
59+
question_details_markdown
60+
+ "\n\n"
61+
+ background_markdown
62+
+ "\n\n"
63+
+ base_rate_markdown
64+
)
5165
return combined_markdown
5266

67+
async def __generate_question_details_markdown(self) -> str:
68+
question_details = self.question.give_question_details_as_markdown()
69+
urls = self.__extract_urls_from_markdown_text(question_details)
70+
71+
if not urls:
72+
return "# Question Details\nNo links found in question details."
73+
74+
screenshot_tasks = [
75+
self.__get_screenshot_and_summary(url) for url in urls
76+
]
77+
78+
summaries, successful_urls = (
79+
run_coroutines_while_removing_and_logging_exceptions(
80+
screenshot_tasks, urls
81+
)
82+
)
83+
84+
markdown = await self.__create_question_answer_markdown_section(
85+
[f"What does {url} say?" for url in successful_urls],
86+
summaries,
87+
question_prepend="L",
88+
)
89+
return markdown
90+
91+
def __extract_urls_from_markdown_text(self, text: str) -> list[str]:
92+
markdown_link_pattern = r"\[([^\]]+)\]\(([^)]+)\)"
93+
matches = re.findall(markdown_link_pattern, text)
94+
return list(set(url for _, url in matches))
95+
96+
async def __get_screenshot_and_summary(
97+
self,
98+
url: str,
99+
) -> str:
100+
logger.info(f"Attempting to get screenshot and summary for {url}")
101+
url_scraper = UrlScraper()
102+
image_data = await url_scraper.get_screenshot_of_url_as_base64(url)
103+
prompt = f"You are a superforecast. Please research and write a report on the following question: {self.question.give_question_details_as_markdown()}"
104+
summary = await UrlScraper.get_summary_of_screenshot(
105+
image_data, prompt
106+
)
107+
logger.info(
108+
f"Successfully got summary for {url}. Summary: {summary[:1000]}..."
109+
)
110+
return summary
111+
53112
async def make_list_of_base_rate_reports(
54113
self,
55114
number_of_base_rate_reports: int,
@@ -77,10 +136,12 @@ async def make_list_of_base_rate_reports(
77136
return base_rate_reports
78137

79138
async def generate_background_markdown(
80-
self, num_background_questions: int
139+
self,
140+
num_background_questions: int,
141+
additional_context: str | None = None,
81142
) -> str:
82143
questions = await self.brainstorm_background_questions(
83-
num_background_questions
144+
num_background_questions, additional_context
84145
)
85146
answers = await self.answer_question_list(questions, GeneralResearcher)
86147
logger.info("Generated background markdown.")
@@ -134,7 +195,9 @@ async def pick_best_base_rate_questions(
134195
return deep_questions, shallow_questions
135196

136197
async def brainstorm_background_questions(
137-
self, num_background_questions: int
198+
self,
199+
num_background_questions: int,
200+
additional_context: str | None = None,
138201
) -> list[str]:
139202
logger.info(
140203
f"Running forecasts on question `{self.question.question_text}`"
@@ -179,6 +242,9 @@ async def brainstorm_background_questions(
179242
# Question Info
180243
{self.question.give_question_details_as_markdown()}
181244
245+
# Previous Research
246+
{additional_context or "No previous research was provided"}
247+
182248
# Your Turn
183249
Please come up with {num_background_questions} questions.
184250
"""

0 commit comments

Comments
 (0)