Skip to content

Commit 060c60b

Browse files
authored
Merge pull request #9 from CodexVeritas/develop
Added benchmarks for Claude Sonnet 3.5
2 parents 4522528 + cf62f02 commit 060c60b

File tree

6 files changed

+738268
-6
lines changed

6 files changed

+738268
-6
lines changed

forecasting_tools/ai_models/claude35sonnet.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77

88
class Claude35Sonnet(AnthropicTextToTextModel):
99
# See Anthropic Limit on the account dashboard for most up-to-date limit
10+
# Latest as of Nov 6 2024 is claude-2-5-sonnet-20241022
11+
# Latest in general is claude-3-5-sonnet-latest
12+
# See models here https://docs.anthropic.com/en/docs/about-claude/models
1013
MODEL_NAME: Final[str] = "claude-3-5-sonnet-20240620"
11-
REQUESTS_PER_PERIOD_LIMIT: Final[int] = 50
14+
REQUESTS_PER_PERIOD_LIMIT: Final[int] = 1_750
1215
REQUEST_PERIOD_IN_SECONDS: Final[int] = 60
1316
TIMEOUT_TIME: Final[int] = 40
14-
TOKENS_PER_PERIOD_LIMIT: Final[int] = 40000
17+
TOKENS_PER_PERIOD_LIMIT: Final[int] = 140_000
1518
TOKEN_PERIOD_IN_SECONDS: Final[int] = 60

forecasting_tools/ai_models/exa_searcher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ class SearchInput(BaseModel, Jsonable):
6262
description="The query to search within each document using semantic similarity"
6363
)
6464
include_domains: list[str] = Field(
65-
description="List of domains to require in the search results for example: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter."
65+
description="List of domains to require in the search results for example: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter. This will constrain search to ONLY results from these domains."
6666
)
6767
exclude_domains: list[str] = Field(
68-
description="List of domains to exclude from the search results: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter."
68+
description="List of domains to exclude from the search results: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter. This will constrain search to exclude results from these domains."
6969
)
7070
include_text: str | None = Field(
7171
description="A 1-5 word phrase that must be exactly present in the text of the search results"

forecasting_tools/forecasting/llms/smart_searcher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ async def __come_up_with_search_queries(
9494
{self.llm.get_schema_format_instructions_for_pydantic_type(SearchInput)}
9595
9696
Make sure to return a list of the search inputs as a list of JSON objects in this schema.
97+
Do not give the json in separate chunks. It needs to be in one combined list.
9798
"""
9899
)
99100
search_terms = await self.llm.invoke_and_return_verified_type(

front_end/app_pages/benchmark_page.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,17 @@ class BenchmarkPage(AppPage):
1616
URL_PATH: str = "/benchmark"
1717
BENCHMARK_FILE_SELECTBOX_KEY: str = "benchmark_file_selectbox"
1818
BENCHMARK_FILES_TO_SHOW: dict[str, str] = {
19-
"GPT-4O for research and GPT-O1 for reasoning": "2024-11-06_00-05-28__q4_initial_bot__score_0.0079__git_b666874.json",
19+
"GPT-4O for research and GPT-O1 for final decision": "2024-11-06_00-05-28__q4_initial_bot__score_0.0079__git_b666874.json",
20+
"Claude 3.5 Sonnet for all tasks": "2024-11-06_19-32-35__q4_initial_bot_anthropic__score_0.024__git_a7572c1.json",
21+
# "Claude 3.5 Sonnet Incomplete (5 questions)": "2024-11-06_11-05-17__q4_initial_bot_with_anthropic__score_0.0092.json",
2022
# "Research Format Update": "2024-08-30_17-22-42__research_format_update__score_0.0802.json",
2123
# "Original Bot": "2024-08-30_16-46-19__original_bot__score_0.0657.json",
2224
}
2325
BENCHMARK_FOLDER: str = "front_end/benchmarks"
2426

2527
@classmethod
2628
async def _async_main(cls) -> None:
27-
st.title("Benchmarks")
29+
st.title("📈 Benchmarking Forecast Bot")
2830
st.write("")
2931
selected_file = st.selectbox(
3032
"Select a benchmark file:",

0 commit comments

Comments
 (0)