Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions forecasting_tools/ai_models/claude35sonnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@

class Claude35Sonnet(AnthropicTextToTextModel):
# See Anthropic Limit on the account dashboard for most up-to-date limit
# Latest as of Nov 6 2024 is claude-2-5-sonnet-20241022
# Latest in general is claude-3-5-sonnet-latest
# See models here https://docs.anthropic.com/en/docs/about-claude/models
MODEL_NAME: Final[str] = "claude-3-5-sonnet-20240620"
REQUESTS_PER_PERIOD_LIMIT: Final[int] = 50
REQUESTS_PER_PERIOD_LIMIT: Final[int] = 1_750
REQUEST_PERIOD_IN_SECONDS: Final[int] = 60
TIMEOUT_TIME: Final[int] = 40
TOKENS_PER_PERIOD_LIMIT: Final[int] = 40000
TOKENS_PER_PERIOD_LIMIT: Final[int] = 140_000
TOKEN_PERIOD_IN_SECONDS: Final[int] = 60
4 changes: 2 additions & 2 deletions forecasting_tools/ai_models/exa_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ class SearchInput(BaseModel, Jsonable):
description="The query to search within each document using semantic similarity"
)
include_domains: list[str] = Field(
description="List of domains to require in the search results for example: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter."
description="List of domains to require in the search results for example: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter. This will constrain search to ONLY results from these domains."
)
exclude_domains: list[str] = Field(
description="List of domains to exclude from the search results: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter."
description="List of domains to exclude from the search results: ['youtube.com', 'en.wikipedia.org']. An empty list means no filter. This will constrain search to exclude results from these domains."
)
include_text: str | None = Field(
description="A 1-5 word phrase that must be exactly present in the text of the search results"
Expand Down
1 change: 1 addition & 0 deletions forecasting_tools/forecasting/llms/smart_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ async def __come_up_with_search_queries(
{self.llm.get_schema_format_instructions_for_pydantic_type(SearchInput)}

Make sure to return a list of the search inputs as a list of JSON objects in this schema.
Do not give the json in separate chunks. It needs to be in one combined list.
"""
)
search_terms = await self.llm.invoke_and_return_verified_type(
Expand Down
6 changes: 4 additions & 2 deletions front_end/app_pages/benchmark_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@ class BenchmarkPage(AppPage):
URL_PATH: str = "/benchmark"
BENCHMARK_FILE_SELECTBOX_KEY: str = "benchmark_file_selectbox"
BENCHMARK_FILES_TO_SHOW: dict[str, str] = {
"GPT-4O for research and GPT-O1 for reasoning": "2024-11-06_00-05-28__q4_initial_bot__score_0.0079__git_b666874.json",
"GPT-4O for research and GPT-O1 for final decision": "2024-11-06_00-05-28__q4_initial_bot__score_0.0079__git_b666874.json",
"Claude 3.5 Sonnet for all tasks": "2024-11-06_19-32-35__q4_initial_bot_anthropic__score_0.024__git_a7572c1.json",
# "Claude 3.5 Sonnet Incomplete (5 questions)": "2024-11-06_11-05-17__q4_initial_bot_with_anthropic__score_0.0092.json",
# "Research Format Update": "2024-08-30_17-22-42__research_format_update__score_0.0802.json",
# "Original Bot": "2024-08-30_16-46-19__original_bot__score_0.0657.json",
}
BENCHMARK_FOLDER: str = "front_end/benchmarks"

@classmethod
async def _async_main(cls) -> None:
st.title("Benchmarks")
st.title("📈 Benchmarking Forecast Bot")
st.write("")
selected_file = st.selectbox(
"Select a benchmark file:",
Expand Down
Loading
Loading