CodexVeritas · CodexVeritas · Dec 18, 2024 · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024
diff --git a/.devcontainer/postinstall.sh b/.devcontainer/postinstall.sh
@@ -22,3 +22,7 @@ source ../.venv/bin/activate
 
 # Show which Python interpreter is being used
 which python
+
+# Install playwright
+# playwright install
+# playwright install-deps
diff --git a/.env.template b/.env.template
@@ -1,18 +1,16 @@
 PYTHONPATH=.
 
+# Currently not being used as models, but might be in the future
+PERPLEXITY_API_KEY=
 OPENAI_API_KEY=
 EXA_API_KEY=
 
 # Fill this in if using the Metaculus API
 METACULUS_TOKEN=
 
-# Right now only used for free semantic similarity calculation, but defaults to OpenAI if not filled in
+# Right now only used for free semantic similarity calculation in Deduplicator, but defaults to OpenAI if not filled in
 HUGGINGFACE_API_KEY=
 
-# Currently not being used as models, but might be in the future
-PERPLEXITY_API_KEY=
-ANTHROPIC_API_KEY=
-
 # Only needed in Streamlit Cloud in order to save responses to a database and track usage
 CODA_API_KEY=
 

diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ temp
 .personal/
 .sandbox/
 sandbox.py
+sandbox.ipynb
 
 
 .DS_Store

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -3,14 +3,14 @@
   "python.testing.unittestArgs": [
     "-v",
     "-s",
-    "./tests",
+    "./code_tests",
     "-p",
-    "*_test.py"
+    "test_*.py"
   ],
   "python.testing.pytestEnabled": true,
   "python.testing.unittestEnabled": false,
   "python.testing.pytestArgs": [
-    "tests"
+    "code_tests"
   ],
   "python.analysis.typeCheckingMode": "basic",
   "files.trimTrailingWhitespace": true,

diff --git a/README.md b/README.md
diff --git a/...dividually/test_ex_base_rate_responder.py → ...dividually/test_ex_base_rate_responder.py b/...dividually/test_ex_base_rate_responder.py → ...dividually/test_ex_base_rate_responder.py
@@ -1,7 +1,7 @@
 import asyncio
 from datetime import datetime
 
-from forecasting_tools.forecasting.sub_question_responders.base_rate_researcher import (
+from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
     BaseRateReport,
     BaseRateResearcher,
     DenominatorOption,

diff --git a/...tests_individually/test_ex_key_factors.py → ...tests_individually/test_ex_key_factors.py b/...tests_individually/test_ex_key_factors.py → ...tests_individually/test_ex_key_factors.py
@@ -5,8 +5,8 @@
 from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
     MonetaryCostManager,
 )
-from forecasting_tools.forecasting.metaculus_api import MetaculusApi
-from forecasting_tools.forecasting.sub_question_responders.key_factors_researcher import (
+from forecasting_tools.forecasting.helpers.metaculus_api import MetaculusApi
+from forecasting_tools.forecasting.sub_question_researchers.key_factors_researcher import (
     KeyFactorsResearcher,
     ScoredKeyFactor,
 )

diff --git a/...vidually/test_ex_niche_list_researcher.py → ...vidually/test_ex_niche_list_researcher.py b/...vidually/test_ex_niche_list_researcher.py → ...vidually/test_ex_niche_list_researcher.py
@@ -8,7 +8,7 @@
 from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
     MonetaryCostManager,
 )
-from forecasting_tools.forecasting.sub_question_responders.niche_list_researcher import (
+from forecasting_tools.forecasting.sub_question_researchers.niche_list_researcher import (
     FactCheckedItem,
     NicheListResearcher,
 )
@@ -92,6 +92,10 @@ def test_large_lists_fail(things_to_generate: str) -> None:
                 "2010 Moldovan constitutional referendum",  # See above
                 "2019 Moldovan referendum",  # See above
                 "2024 Moldovan European Union membership referendum",  # See above
+                # Should I add this to the lists? -> "2014 Gagauzia Referendum on Customs Union",
+                # https://www.rferl.org/a/moldova-gagauz-referendum-counting/25251251.html
+                # Moldova said it was illegal, but it passed with 98% https://balkaninsight.com/2024/01/31/moldova-condemns-separatist-march-in-restive-gagauzia-region/
+                # See Perplexity's attempt: https://www.perplexity.ai/search/list-for-me-every-moldovan-ref-qxcLgs2WQbSpW8HDIayNEg
             ],
         ),
         (
@@ -163,6 +167,7 @@ def test_large_lists_fail(things_to_generate: str) -> None:
                 "Typhoon Touch Technologies (touch screen)",
                 "Nokia v. Apple (wireless, iPhone)",
                 "Ericsson vs. Apple",
+                # Consider if Perplexity got any that I missed (see second message) https://www.perplexity.ai/search/countries-that-have-successful-b2nwlS8DSeqIjBFsKm94og
             ],
         ),  # See above
         (

diff --git a/...dividually/test_ex_question_responders.py → ...dividually/test_ex_question_responders.py b/...dividually/test_ex_question_responders.py → ...dividually/test_ex_question_responders.py
@@ -5,16 +5,16 @@
 
 from forecasting_tools.ai_models.ai_utils.ai_misc import clean_indents
 from forecasting_tools.ai_models.gpt4o import Gpt4o
-from forecasting_tools.forecasting.sub_question_responders.base_rate_researcher import (
+from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
     BaseRateResearcher,
 )
-from forecasting_tools.forecasting.sub_question_responders.general_researcher import (
+from forecasting_tools.forecasting.sub_question_researchers.general_researcher import (
     GeneralResearcher,
 )
-from forecasting_tools.forecasting.sub_question_responders.question_responder import (
+from forecasting_tools.forecasting.sub_question_researchers.question_responder import (
     QuestionResponder,
 )
-from forecasting_tools.forecasting.sub_question_responders.question_router import (
+from forecasting_tools.forecasting.sub_question_researchers.question_router import (
     QuestionRouter,
 )
 

diff --git a/...ts_individually/test_ex_smart_searcher.py → ...ts_individually/test_ex_smart_searcher.py b/...ts_individually/test_ex_smart_searcher.py → ...ts_individually/test_ex_smart_searcher.py
@@ -1,9 +1,9 @@
 import logging
 
-from forecasting_tools.forecasting.llms.smart_searcher import SmartSearcher
-from tests.utilities_for_tests.coroutine_testing import (
+from code_tests.utilities_for_tests.coroutine_testing import (
     assert_coroutines_run_under_x_times_duration_of_benchmark,
 )
+from forecasting_tools.forecasting.helpers.smart_searcher import SmartSearcher
 
 logger = logging.getLogger(__name__)
 

diff --git a/...live_api_tests/test_ai_model_interface.py → ...test_ai_models/test_ai_model_interface.py b/...live_api_tests/test_ai_model_interface.py → ...test_ai_models/test_ai_model_interface.py
@@ -5,19 +5,17 @@
 
 import pytest
 
+from code_tests.unit_tests.test_ai_models.ai_mock_manager import (
+    AiModelMockManager,
+)
+from code_tests.unit_tests.test_ai_models.models_to_test import ModelsToTest
+from code_tests.utilities_for_tests import coroutine_testing
 from forecasting_tools.ai_models.basic_model_interfaces.ai_model import AiModel
 from forecasting_tools.ai_models.claude35sonnet import Claude35Sonnet
 from forecasting_tools.ai_models.exa_searcher import ExaSearcher
 from forecasting_tools.ai_models.gpto1 import GptO1
 from forecasting_tools.ai_models.metaculus4o import Gpt4oMetaculusProxy
 from forecasting_tools.ai_models.perplexity import Perplexity
-from tests.no_cost_expect_all_to_succeed.test_ai_models.ai_mock_manager import (
-    AiModelMockManager,
-)
-from tests.no_cost_expect_all_to_succeed.test_ai_models.models_to_test import (
-    ModelsToTest,
-)
-from tests.utilities_for_tests import coroutine_testing
 
 logger = logging.getLogger(__name__)
 

diff --git a/..._cost_or_live_api_tests/test_exa_model.py → ...pi_tests/test_ai_models/test_exa_model.py b/..._cost_or_live_api_tests/test_exa_model.py → ...pi_tests/test_ai_models/test_exa_model.py
@@ -3,15 +3,15 @@
 
 import pytest
 
+from code_tests.unit_tests.test_ai_models.ai_mock_manager import (
+    AiModelMockManager,
+)
 from forecasting_tools.ai_models.exa_searcher import (
     ExaHighlightQuote,
     ExaSearcher,
     ExaSource,
     SearchInput,
 )
-from tests.no_cost_expect_all_to_succeed.test_ai_models.ai_mock_manager import (
-    AiModelMockManager,
-)
 
 
 async def test_invoke_for_highlights_in_relevance_order(mocker: Mock) -> None:
@@ -96,14 +96,12 @@ async def test_filtered_invoke() -> None:
     model = ExaSearcher(
         num_results=num_results, include_highlights=False, include_text=True
     )
-    exclude_domains = ["alliance.health"]
     search = SearchInput(
         web_search_query="coronavirus",
         highlight_query=None,
         include_domains=[],
-        exclude_domains=exclude_domains,
+        exclude_domains=["alliance.health"],
         include_text="pregnancy",
-        exclude_text="symptoms",
         start_published_date=datetime(2022, 11, 1),
         end_published_date=datetime(2022, 11, 30),
     )
@@ -117,13 +115,11 @@ async def test_filtered_invoke() -> None:
         assert source.published_date <= search.end_published_date
         assert source.published_date >= search.start_published_date
         assert search.include_text is not None
-        assert search.exclude_text is not None
         assert search.include_text in source.text
-        assert search.exclude_text not in source.text
         assert source.url is not None
         assert all(
             exclude_domain not in source.url
-            for exclude_domain in exclude_domains
+            for exclude_domain in search.exclude_domains
         )
         assert len(source.highlights) == 0
         assert len(source.highlight_scores) == 0

diff --git a/...e_api_tests/test_models_incurring_cost.py → ...t_ai_models/test_models_incurring_cost.py b/...e_api_tests/test_models_incurring_cost.py → ...t_ai_models/test_models_incurring_cost.py
@@ -5,6 +5,10 @@
 
 import pytest
 
+from code_tests.unit_tests.test_ai_models.ai_mock_manager import (
+    AiModelMockManager,
+)
+from code_tests.unit_tests.test_ai_models.models_to_test import ModelsToTest
 from forecasting_tools.ai_models.ai_utils.response_types import (
     TextTokenCostResponse,
 )
@@ -22,12 +26,6 @@
     MonetaryCostManager,
 )
 from forecasting_tools.util import async_batching
-from tests.no_cost_expect_all_to_succeed.test_ai_models.ai_mock_manager import (
-    AiModelMockManager,
-)
-from tests.no_cost_expect_all_to_succeed.test_ai_models.models_to_test import (
-    ModelsToTest,
-)
 
 logger = logging.getLogger(__name__)
 

diff --git a/..._tests/test_models_tracking_token_cost.py → ...models/test_models_tracking_token_cost.py b/..._tests/test_models_tracking_token_cost.py → ...models/test_models_tracking_token_cost.py
@@ -2,6 +2,7 @@
 
 import pytest
 
+from code_tests.unit_tests.test_ai_models.models_to_test import ModelsToTest
 from forecasting_tools.ai_models.ai_utils.response_types import (
     TextTokenCostResponse,
 )
@@ -16,9 +17,6 @@
 from forecasting_tools.ai_models.model_archetypes.traditional_online_llm import (
     TraditionalOnlineLlm,
 )
-from tests.no_cost_expect_all_to_succeed.test_ai_models.models_to_test import (
-    ModelsToTest,
-)
 
 
 @pytest.mark.parametrize("subclass", ModelsToTest.TOKENS_INCUR_COST_LIST)