Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
8ce4621
Reorgnanized file structure and added UrlScraper screenshot capability
CodexVeritas Nov 9, 2024
196b73a
Added screenshoting to the smartsearcher
CodexVeritas Nov 9, 2024
57d732f
Incorporated screenshot search into background link parsing
CodexVeritas Nov 9, 2024
0cf2264
Updated key factors deduplication, and renovated the MetaculusAPI
CodexVeritas Nov 11, 2024
4247052
Added question id field to question object
CodexVeritas Nov 12, 2024
1b67ed5
Added benchmark for screenshot feature, and a few touch ups
CodexVeritas Nov 13, 2024
ed2053f
Changed the smart searcher back to pre-screenshot config
CodexVeritas Nov 14, 2024
1775850
Added screenshot-with-smart-searcher reverted benchamrk
CodexVeritas Nov 15, 2024
9b81110
Added jupyter notebook support
CodexVeritas Nov 15, 2024
1da3210
Added dspy to poetry
CodexVeritas Nov 16, 2024
ff9357c
Created 'simulate-a-tournament' notebook
CodexVeritas Nov 17, 2024
24a73f7
Tested more scoring method with the tournament simulation notebook
CodexVeritas Nov 18, 2024
5508518
Added benchmark files for gpt4o and o1-preview
CodexVeritas Dec 11, 2024
9201130
Added benchmark files for o1-preview that I missed
CodexVeritas Dec 11, 2024
89d7780
Removed URL Scraper and summary that was added to research_coordinator
CodexVeritas Dec 11, 2024
9a2de50
Replaced spaces with %20 in smart searcher
CodexVeritas Dec 11, 2024
1a8040a
Removed the 'exclude text' from exa searcher
CodexVeritas Dec 11, 2024
763301b
Renamed unit test folder
CodexVeritas Dec 14, 2024
d9e8a00
Changed deviation score caculation to use expected log score
CodexVeritas Dec 14, 2024
6d2da30
Renamed tournament forecast script
CodexVeritas Dec 14, 2024
70bfd38
Got initial interface for forecast bot finished and team_manager repl…
CodexVeritas Dec 14, 2024
525c63d
Got first draft of default bot, and numeric distribution code written
CodexVeritas Dec 15, 2024
ff95342
Removed forecastTeam, FinalDecisionAgent, and TeamManager. Imports an…
CodexVeritas Dec 15, 2024
ee41126
All tests passing except for runing and publishing the new numeric an…
CodexVeritas Dec 15, 2024
faf0387
Got mc/binary/numeric prediction and posting working for template bot
CodexVeritas Dec 16, 2024
2bae01c
Got the publish to tournament script working
CodexVeritas Dec 16, 2024
2d1ec91
Refactored create_unified_explanation
CodexVeritas Dec 18, 2024
d0a2d8a
Did a number of refactors, added github action, started documentation
CodexVeritas Dec 18, 2024
f83a93a
Finished adding new tests for reports and bots
CodexVeritas Dec 18, 2024
33a097c
Finished adding new tests for reports and bots
CodexVeritas Dec 18, 2024
558f4e4
All tests are passing
CodexVeritas Dec 18, 2024
91ac581
Updated the Readme, Yaml Scripts, and tournament script
CodexVeritas Dec 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .devcontainer/postinstall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ source ../.venv/bin/activate

# Show which Python interpreter is being used
which python

# Install playwright
# playwright install
# playwright install-deps
8 changes: 3 additions & 5 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
PYTHONPATH=.

# Currently not being used as models, but might be in the future
PERPLEXITY_API_KEY=
OPENAI_API_KEY=
EXA_API_KEY=

# Fill this in if using the Metaculus API
METACULUS_TOKEN=

# Right now only used for free semantic similarity calculation, but defaults to OpenAI if not filled in
# Right now only used for free semantic similarity calculation in Deduplicator, but defaults to OpenAI if not filled in
HUGGINGFACE_API_KEY=

# Currently not being used as models, but might be in the future
PERPLEXITY_API_KEY=
ANTHROPIC_API_KEY=

# Only needed in Streamlit Cloud in order to save responses to a database and track usage
CODA_API_KEY=

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ temp
.personal/
.sandbox/
sandbox.py
sandbox.ipynb


.DS_Store
Expand Down
6 changes: 3 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
"python.testing.unittestArgs": [
"-v",
"-s",
"./tests",
"./code_tests",
"-p",
"*_test.py"
"test_*.py"
],
"python.testing.pytestEnabled": true,
"python.testing.unittestEnabled": false,
"python.testing.pytestArgs": [
"tests"
"code_tests"
],
"python.analysis.typeCheckingMode": "basic",
"files.trimTrailingWhitespace": true,
Expand Down
269 changes: 185 additions & 84 deletions README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
from datetime import datetime

from forecasting_tools.forecasting.sub_question_responders.base_rate_researcher import (
from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
BaseRateReport,
BaseRateResearcher,
DenominatorOption,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
MonetaryCostManager,
)
from forecasting_tools.forecasting.metaculus_api import MetaculusApi
from forecasting_tools.forecasting.sub_question_responders.key_factors_researcher import (
from forecasting_tools.forecasting.helpers.metaculus_api import MetaculusApi
from forecasting_tools.forecasting.sub_question_researchers.key_factors_researcher import (
KeyFactorsResearcher,
ScoredKeyFactor,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
MonetaryCostManager,
)
from forecasting_tools.forecasting.sub_question_responders.niche_list_researcher import (
from forecasting_tools.forecasting.sub_question_researchers.niche_list_researcher import (
FactCheckedItem,
NicheListResearcher,
)
Expand Down Expand Up @@ -92,6 +92,10 @@ def test_large_lists_fail(things_to_generate: str) -> None:
"2010 Moldovan constitutional referendum", # See above
"2019 Moldovan referendum", # See above
"2024 Moldovan European Union membership referendum", # See above
# Should I add this to the lists? -> "2014 Gagauzia Referendum on Customs Union",
# https://www.rferl.org/a/moldova-gagauz-referendum-counting/25251251.html
# Moldova said it was illegal, but it passed with 98% https://balkaninsight.com/2024/01/31/moldova-condemns-separatist-march-in-restive-gagauzia-region/
# See Perplexity's attempt: https://www.perplexity.ai/search/list-for-me-every-moldovan-ref-qxcLgs2WQbSpW8HDIayNEg
],
),
(
Expand Down Expand Up @@ -163,6 +167,7 @@ def test_large_lists_fail(things_to_generate: str) -> None:
"Typhoon Touch Technologies (touch screen)",
"Nokia v. Apple (wireless, iPhone)",
"Ericsson vs. Apple",
# Consider if Perplexity got any that I missed (see second message) https://www.perplexity.ai/search/countries-that-have-successful-b2nwlS8DSeqIjBFsKm94og
],
), # See above
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@

from forecasting_tools.ai_models.ai_utils.ai_misc import clean_indents
from forecasting_tools.ai_models.gpt4o import Gpt4o
from forecasting_tools.forecasting.sub_question_responders.base_rate_researcher import (
from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
BaseRateResearcher,
)
from forecasting_tools.forecasting.sub_question_responders.general_researcher import (
from forecasting_tools.forecasting.sub_question_researchers.general_researcher import (
GeneralResearcher,
)
from forecasting_tools.forecasting.sub_question_responders.question_responder import (
from forecasting_tools.forecasting.sub_question_researchers.question_responder import (
QuestionResponder,
)
from forecasting_tools.forecasting.sub_question_responders.question_router import (
from forecasting_tools.forecasting.sub_question_researchers.question_router import (
QuestionRouter,
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import logging

from forecasting_tools.forecasting.llms.smart_searcher import SmartSearcher
from tests.utilities_for_tests.coroutine_testing import (
from code_tests.utilities_for_tests.coroutine_testing import (
assert_coroutines_run_under_x_times_duration_of_benchmark,
)
from forecasting_tools.forecasting.helpers.smart_searcher import SmartSearcher

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,17 @@

import pytest

from code_tests.unit_tests.test_ai_models.ai_mock_manager import (
AiModelMockManager,
)
from code_tests.unit_tests.test_ai_models.models_to_test import ModelsToTest
from code_tests.utilities_for_tests import coroutine_testing
from forecasting_tools.ai_models.basic_model_interfaces.ai_model import AiModel
from forecasting_tools.ai_models.claude35sonnet import Claude35Sonnet
from forecasting_tools.ai_models.exa_searcher import ExaSearcher
from forecasting_tools.ai_models.gpto1 import GptO1
from forecasting_tools.ai_models.metaculus4o import Gpt4oMetaculusProxy
from forecasting_tools.ai_models.perplexity import Perplexity
from tests.no_cost_expect_all_to_succeed.test_ai_models.ai_mock_manager import (
AiModelMockManager,
)
from tests.no_cost_expect_all_to_succeed.test_ai_models.models_to_test import (
ModelsToTest,
)
from tests.utilities_for_tests import coroutine_testing

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

import pytest

from code_tests.unit_tests.test_ai_models.ai_mock_manager import (
AiModelMockManager,
)
from forecasting_tools.ai_models.exa_searcher import (
ExaHighlightQuote,
ExaSearcher,
ExaSource,
SearchInput,
)
from tests.no_cost_expect_all_to_succeed.test_ai_models.ai_mock_manager import (
AiModelMockManager,
)


async def test_invoke_for_highlights_in_relevance_order(mocker: Mock) -> None:
Expand Down Expand Up @@ -96,14 +96,12 @@ async def test_filtered_invoke() -> None:
model = ExaSearcher(
num_results=num_results, include_highlights=False, include_text=True
)
exclude_domains = ["alliance.health"]
search = SearchInput(
web_search_query="coronavirus",
highlight_query=None,
include_domains=[],
exclude_domains=exclude_domains,
exclude_domains=["alliance.health"],
include_text="pregnancy",
exclude_text="symptoms",
start_published_date=datetime(2022, 11, 1),
end_published_date=datetime(2022, 11, 30),
)
Expand All @@ -117,13 +115,11 @@ async def test_filtered_invoke() -> None:
assert source.published_date <= search.end_published_date
assert source.published_date >= search.start_published_date
assert search.include_text is not None
assert search.exclude_text is not None
assert search.include_text in source.text
assert search.exclude_text not in source.text
assert source.url is not None
assert all(
exclude_domain not in source.url
for exclude_domain in exclude_domains
for exclude_domain in search.exclude_domains
)
assert len(source.highlights) == 0
assert len(source.highlight_scores) == 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

import pytest

from code_tests.unit_tests.test_ai_models.ai_mock_manager import (
AiModelMockManager,
)
from code_tests.unit_tests.test_ai_models.models_to_test import ModelsToTest
from forecasting_tools.ai_models.ai_utils.response_types import (
TextTokenCostResponse,
)
Expand All @@ -22,12 +26,6 @@
MonetaryCostManager,
)
from forecasting_tools.util import async_batching
from tests.no_cost_expect_all_to_succeed.test_ai_models.ai_mock_manager import (
AiModelMockManager,
)
from tests.no_cost_expect_all_to_succeed.test_ai_models.models_to_test import (
ModelsToTest,
)

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

from code_tests.unit_tests.test_ai_models.models_to_test import ModelsToTest
from forecasting_tools.ai_models.ai_utils.response_types import (
TextTokenCostResponse,
)
Expand All @@ -16,9 +17,6 @@
from forecasting_tools.ai_models.model_archetypes.traditional_online_llm import (
TraditionalOnlineLlm,
)
from tests.no_cost_expect_all_to_succeed.test_ai_models.models_to_test import (
ModelsToTest,
)


@pytest.mark.parametrize("subclass", ModelsToTest.TOKENS_INCUR_COST_LIST)
Expand Down
Loading
Loading