Future-House · whitead · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/paperqa/agents/env.py b/paperqa/agents/env.py
@@ -151,21 +151,22 @@ def export_frame(self) -> Frame:
     async def step(
         self, action: ToolRequestMessage
     ) -> tuple[list[Message], float, bool, bool]:
+
+        # add usage for actions that have usage
-        # add usage for actions that have usage
+        # add usage if the action has usage
-        # add usage for actions that have usage
+        # add usage if the action has usage
+        info = action.info
+        if info and "usage" in info and "model" in info:
+            r = LLMResult(
+                model=info["model"],
+                prompt_count=info["usage"][0],
+                completion_count=info["usage"][1],
+            )
+            self.state.answer.add_tokens(r)
+
         # If the action has empty tool_calls, the agent can later take that into account
         msgs = cast(
             list[Message],
             await self.exec_tool_calls(action, state=self.state, handle_tool_exc=True),
         )
-        # add usage for any messages that has it
-        for msg in msgs:
-            info = msg.info
-            if info and "usage" in info and "model" in info:
-                r = LLMResult(
-                    model=info["model"],
-                    prompt_count=info["usage"][0],
-                    response_count=info["usage"][1],
-                )
-                self.state.answer.add_tokens(r)
         return (
             msgs,
             0,  # Reward is computed in post-processing, use 0 as a placeholder

diff --git a/paperqa/agents/main.py b/paperqa/agents/main.py
@@ -109,7 +109,8 @@ def to_aviary_tool_selector(
         )
     ):
         return ToolSelector(
-            model=query.settings.agent.agent_llm,
+            model_name=query.settings.agent.agent_llm,
+            acompletion=query.settings.get_agent_llm().router.acompletion,
             **(query.settings.agent.agent_config or {}),
         )
     return None
@@ -220,7 +221,7 @@ async def run_agent(
         f"Finished agent {agent_type!r} run with question {query.query!r} and status"
         f" {agent_status}."
     )
-    return AnswerResponse(answer=answer, usage=answer.token_counts, status=agent_status)
+    return AnswerResponse(answer=answer, status=agent_status)
 
 
 async def run_fake_agent(

diff --git a/paperqa/agents/models.py b/paperqa/agents/models.py
@@ -85,7 +85,6 @@ def set_docs_name(self, docs_name: str) -> None:
 
 class AnswerResponse(BaseModel):
     answer: Answer
-    usage: dict[str, list[int]]
     bibtex: dict[str, str] | None = None
     status: AgentStatus
     timing_info: dict[str, dict[str, float]] | None = None

diff --git a/paperqa/settings.py b/paperqa/settings.py
@@ -239,6 +239,12 @@ class AgentSettings(BaseModel):
         default="gpt-4o-2024-08-06",
         description="Model to use for agent",
     )
+
+    agent_llm_config: dict | None = Field(
+        default=None,
+        description="Extra kwargs to pass to agent LLM model",
+    )
+
     agent_type: str = Field(
         default="fake",
         description="Type of agent to use",
@@ -500,6 +506,13 @@ def get_summary_llm(self) -> LiteLLMModel:
             or self._default_litellm_router_settings(self.summary_llm),
         )
 
+    def get_agent_llm(self) -> LiteLLMModel:
+        return LiteLLMModel(
+            name=self.agent.agent_llm,
+            config=self.agent.agent_llm_config
+            or self._default_litellm_router_settings(self.agent.agent_llm),
+        )
+
     def get_embedding_model(self) -> EmbeddingModel:
         return embedding_model_factory(self.embedding, **(self.embedding_config or {}))
 

diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -75,16 +75,32 @@ async def test_agent_types(
 ) -> None:
     question = "How can you use XAI for chemical property prediction?"
 
+    # make sure agent_llm is different from default, so we can correctly track tokens
+    # for agent
+    agent_test_settings.agent.agent_llm = "gpt-4o-2024-08-06"
+    agent_test_settings.llm = "gpt-4o-mini"
+    agent_test_settings.summary_llm = "gpt-4o-mini"
+    agent_test_settings.agent.agent_prompt += (
+        "\n\n Call each tool once in appropriate order and "
+        " accept the answer for now, as we're in debug mode."
+    )
     request = QueryRequest(query=question, settings=agent_test_settings)
     response = await agent_query(request, agent_type=agent_type)
     assert response.answer.answer, "Answer not generated"
     assert response.answer.answer != "I cannot answer", "Answer not generated"
     assert response.answer.context, "No contexts were found"
     assert response.answer.question == question
     agent_llm = request.settings.agent.agent_llm
-    assert response.usage[agent_llm][0] > 5000, "Expected many prompt tokens"
-    assert response.usage[agent_llm][1] > 250, "Expected many completion tokens"
-    assert response.answer.cost > 0, "Expected nonzero cost"
+    # TODO: once LDP can track tokens, we can remove this check
+    if agent_type not in {"fake", SimpleAgent}:
+        print(response.answer.token_counts)
+        assert (
+            response.answer.token_counts[agent_llm][0] > 1000
+        ), "Expected many prompt tokens"
+        assert (
+            response.answer.token_counts[agent_llm][1] > 50
+        ), "Expected many completion tokens"
+        assert response.answer.cost > 0, "Expected nonzero cost"
 
 
 @pytest.mark.asyncio
@@ -356,7 +372,7 @@ def test_answers_are_striped() -> None:
             )
         ],
     )
-    response = AnswerResponse(answer=answer, usage={}, bibtex={}, status="success")
+    response = AnswerResponse(answer=answer, bibtex={}, status="success")
 
     assert response.answer.contexts[0].text.embedding is None
     assert response.answer.contexts[0].text.text == ""  # type: ignore[unreachable,unused-ignore]