Future-House · whitead · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -75,7 +75,7 @@ repos:
         args: [--pretty, --ignore-missing-imports]
         additional_dependencies:
           - aiohttp
-          - fhaviary[llm]>=0.5 # Match pyproject.toml
+          - fhaviary[llm]>=0.6 # Match pyproject.toml
           - ldp>=0.4 # Match pyproject.toml
           - html2text
           - httpx

diff --git a/paperqa/agents/env.py b/paperqa/agents/env.py
@@ -9,7 +9,7 @@
 from paperqa.docs import Docs
 from paperqa.llms import EmbeddingModel, LiteLLMModel
 from paperqa.settings import Settings
-from paperqa.types import Answer
+from paperqa.types import Answer, LLMResult
 from paperqa.utils import get_year
 
 from .models import QueryRequest
@@ -151,6 +151,17 @@ def export_frame(self) -> Frame:
     async def step(
         self, action: ToolRequestMessage
     ) -> tuple[list[Message], float, bool, bool]:
+
+        # add usage for action if it has usage
+        info = action.info
+        if info and "usage" in info and "model" in info:
+            r = LLMResult(
+                model=info["model"],
+                prompt_count=info["usage"][0],
+                completion_count=info["usage"][1],
+            )
+            self.state.answer.add_tokens(r)
+
         # If the action has empty tool_calls, the agent can later take that into account
         msgs = cast(
             list[Message],

diff --git a/paperqa/agents/main.py b/paperqa/agents/main.py
@@ -109,7 +109,8 @@ def to_aviary_tool_selector(
         )
     ):
         return ToolSelector(
-            model=query.settings.agent.agent_llm,
+            model_name=query.settings.agent.agent_llm,
+            acompletion=query.settings.get_agent_llm().router.acompletion,
             **(query.settings.agent.agent_config or {}),
         )
     return None
@@ -220,7 +221,7 @@ async def run_agent(
         f"Finished agent {agent_type!r} run with question {query.query!r} and status"
         f" {agent_status}."
     )
-    return AnswerResponse(answer=answer, usage=answer.token_counts, status=agent_status)
+    return AnswerResponse(answer=answer, status=agent_status)
 
 
 async def run_fake_agent(

diff --git a/paperqa/agents/models.py b/paperqa/agents/models.py
@@ -85,7 +85,6 @@ def set_docs_name(self, docs_name: str) -> None:
 
 class AnswerResponse(BaseModel):
     answer: Answer
-    usage: dict[str, list[int]]
     bibtex: dict[str, str] | None = None
     status: AgentStatus
     timing_info: dict[str, dict[str, float]] | None = None

diff --git a/paperqa/settings.py b/paperqa/settings.py
@@ -239,13 +239,19 @@ class AgentSettings(BaseModel):
         default="gpt-4o-2024-08-06",
         description="Model to use for agent",
     )
+
+    agent_llm_config: dict | None = Field(
+        default=None,
+        description="Optional kwargs for LLM constructor",
+    )
+
     agent_type: str = Field(
         default="fake",
         description="Type of agent to use",
     )
     agent_config: dict[str, Any] | None = Field(
         default=None,
-        description="Optional keyword argument configuration for the agent.",
+        description="Optional kwarg for AGENT constructor",
     )
 
     agent_system_prompt: str | None = Field(
@@ -500,6 +506,13 @@ def get_summary_llm(self) -> LiteLLMModel:
             or self._default_litellm_router_settings(self.summary_llm),
         )
 
+    def get_agent_llm(self) -> LiteLLMModel:
+        return LiteLLMModel(
+            name=self.agent.agent_llm,
+            config=self.agent.agent_llm_config
+            or self._default_litellm_router_settings(self.agent.agent_llm),
+        )
+
     def get_embedding_model(self) -> EmbeddingModel:
         return embedding_model_factory(self.embedding, **(self.embedding_config or {}))
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,7 @@ dependencies = [
     "PyCryptodome",
     "aiohttp",  # TODO: remove in favor of httpx
     "anyio",
-    "fhaviary[llm]>=0.5",  # For ToolSelector
+    "fhaviary[llm]>=0.6",  # For info on Message
     "html2text",  # TODO: evaluate moving to an opt-in dependency
     "httpx",
     "litellm",

diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -75,16 +75,32 @@ async def test_agent_types(
 ) -> None:
     question = "How can you use XAI for chemical property prediction?"
 
+    # make sure agent_llm is different from default, so we can correctly track tokens
+    # for agent
+    agent_test_settings.agent.agent_llm = "gpt-4o-2024-08-06"
+    agent_test_settings.llm = "gpt-4o-mini"
+    agent_test_settings.summary_llm = "gpt-4o-mini"
+    agent_test_settings.agent.agent_prompt += (
+        "\n\n Call each tool once in appropriate order and "
+        " accept the answer for now, as we're in debug mode."
+    )
     request = QueryRequest(query=question, settings=agent_test_settings)
     response = await agent_query(request, agent_type=agent_type)
     assert response.answer.answer, "Answer not generated"
     assert response.answer.answer != "I cannot answer", "Answer not generated"
     assert response.answer.context, "No contexts were found"
     assert response.answer.question == question
     agent_llm = request.settings.agent.agent_llm
-    assert response.usage[agent_llm][0] > 5000, "Expected many prompt tokens"
-    assert response.usage[agent_llm][1] > 250, "Expected many completion tokens"
-    assert response.answer.cost > 0, "Expected nonzero cost"
+    # TODO: once LDP can track tokens, we can remove this check
+    if agent_type not in {"fake", SimpleAgent}:
+        print(response.answer.token_counts)
+        assert (
+            response.answer.token_counts[agent_llm][0] > 1000
+        ), "Expected many prompt tokens"
+        assert (
+            response.answer.token_counts[agent_llm][1] > 50
+        ), "Expected many completion tokens"
+        assert response.answer.cost > 0, "Expected nonzero cost"
 
 
 @pytest.mark.asyncio
@@ -356,7 +372,7 @@ def test_answers_are_striped() -> None:
             )
         ],
     )
-    response = AnswerResponse(answer=answer, usage={}, bibtex={}, status="success")
+    response = AnswerResponse(answer=answer, bibtex={}, status="success")
 
     assert response.answer.contexts[0].text.embedding is None
     assert response.answer.contexts[0].text.text == ""  # type: ignore[unreachable,unused-ignore]