From e673c43dba5bbc8762b38816f2065f50906ee75b Mon Sep 17 00:00:00 2001 From: Michael Skarlinski Date: Wed, 11 Sep 2024 08:01:00 -0700 Subject: [PATCH 1/2] add high-quality settings --- paperqa/configs/high_quality.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 paperqa/configs/high_quality.json diff --git a/paperqa/configs/high_quality.json b/paperqa/configs/high_quality.json new file mode 100644 index 00000000..0ffa8162 --- /dev/null +++ b/paperqa/configs/high_quality.json @@ -0,0 +1,15 @@ +{ + "answer": { + "evidence_k": 20, + "answer_max_sources": 5, + "max_concurrent_requests": 10 + }, + "parsing": { + "use_doc_details": true, + "chunk_size": 7000, + "overlap": 250 + }, + "prompts": { + "use_json": true + } +} From 618005c178162cb93ca16c723e738ea1ba5439d4 Mon Sep 17 00:00:00 2001 From: Michael Skarlinski Date: Wed, 11 Sep 2024 08:52:49 -0700 Subject: [PATCH 2/2] add explanation table for settings, limit search to twice having the same query, add agentic default to high_quality --- README.md | 15 +++++++++++++++ paperqa/agents/tools.py | 7 +++---- paperqa/configs/high_quality.json | 3 +++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 323a742e..cadf68bf 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,21 @@ or I can use the normal ask pqa -i nanomaterials ask 'Are there nm scale features in thermoelectric materials?' ``` +Both the CLI and module have pre-configured settings based on prior performance and our publications, they can be invoked as follows: + +``` +pqa --settings ask 'Are there nm scale features in thermoelectric materials?' +``` + +Here is a table of some useful settings: + +| Setting Name | Description | +| ------------ | ---------------------------------------------------------------------------------------------------------------------------- | +| high_quality | Highly performant, relatively expensive (due to having `evidence_k` = 15) query using a `ToolSelector` agent. | +| fast | Setting to get answers cheaply and quickly. | +| wikicrow | Setting to emulate the Wikipedia article writing used in our WikiCrow publication. | +| contracrow | Setting to find contradictions in papers, your query should be a claim that needs to be flagged as a contradiction (or not). | + ### Module Usage PaperQA's full workflow can be accessed via Python directly: diff --git a/paperqa/agents/tools.py b/paperqa/agents/tools.py index ef457ee3..cc366955 100644 --- a/paperqa/agents/tools.py +++ b/paperqa/agents/tools.py @@ -70,7 +70,7 @@ async def paper_search( """ Search for papers to increase the paper count. - Repeat previous calls with the same query and years to continue a search. + Repeat previous calls with the same query and years to continue a search. Only repeat a maximum of twice. This tool can be called concurrently. This tool introduces novel papers, so invoke this tool when just beginning or when unsatisfied with the current evidence. @@ -210,9 +210,7 @@ class GenerateAnswer(NamedTool): def did_not_fail_to_answer(cls, message: str) -> bool: return not message.startswith(cls.FAILED_TO_ANSWER) - async def gen_answer( - self, question: str, state: EnvironmentState # noqa: ARG002 - ) -> str: + async def gen_answer(self, question: str, state: EnvironmentState) -> str: """ Ask a model to propose an answer using current evidence. @@ -224,6 +222,7 @@ async def gen_answer( question: Question to be answered. state: Current state. """ + logger.info(f"Generating answer for '{question}'.") # TODO: Should we allow the agent to change the question? # self.answer.question = query state.answer = await state.docs.aquery( diff --git a/paperqa/configs/high_quality.json b/paperqa/configs/high_quality.json index 0ffa8162..a4160a8d 100644 --- a/paperqa/configs/high_quality.json +++ b/paperqa/configs/high_quality.json @@ -11,5 +11,8 @@ }, "prompts": { "use_json": true + }, + "agent": { + "agent_type": "ToolSelector" } }