Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -927,15 +927,14 @@ will return much faster than the first query and we'll be certain the authors ma
| `parsing.chunking_algorithm` | `ChunkingOptions.SIMPLE_OVERLAP` | Algorithm for chunking. |
| `parsing.doc_filters` | `None` | Optional filters for allowed documents. |
| `parsing.use_human_readable_clinical_trials` | `False` | Parse clinical trial JSONs into readable text. |
| `prompt.summary` | `summary_prompt` | Template for summarizing text, must contain variables matching `summary_prompt`. |
| `prompt.summary` | `summary_prompt` | User prompt template(s) to use when generating contextual summaries. |
| `prompt.qa` | `qa_prompt` | Template for QA, must contain variables matching `qa_prompt`. |
| `prompt.select` | `select_paper_prompt` | Template for selecting papers, must contain variables matching `select_paper_prompt`. |
| `prompt.pre` | `None` | Optional pre-prompt templated with just the original question to append information before a qa prompt. |
| `prompt.post` | `None` | Optional post-processing prompt that can access PQASession fields. |
| `prompt.system` | `default_system_prompt` | System prompt for the model. |
| `prompt.system` | `default_system_prompt` | System prompt to use when generating contextual summaries and answers. |
| `prompt.use_json` | `True` | Whether to use JSON formatting. |
| `prompt.summary_json` | `summary_json_prompt` | JSON-specific summary prompt. |
| `prompt.summary_json_system` | `summary_json_system_prompt` | System prompt for JSON summaries. |
| `prompt.summary_json` | `summary_json_prompt` | JSON-specific user prompt template(s) to use when generating contextual summaries. |
| `prompt.context_outer` | `CONTEXT_OUTER_PROMPT` | Prompt for how to format all contexts in generate answer. |
| `prompt.context_inner` | `CONTEXT_INNER_PROMPT` | Prompt for how to format a single context in generate answer. Must contain 'name' and 'text' variables. |
| `agent.agent_llm` | `"gpt-4o-2024-11-20"` | Model to use for agent making tool selections. |
Expand Down
7 changes: 4 additions & 3 deletions docs/tutorials/settings_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@
" default_system_prompt,\n",
" env_reset_prompt,\n",
" env_system_prompt,\n",
" include_text_prompt_template,\n",
" qa_prompt,\n",
" select_paper_prompt,\n",
" structured_citation_prompt,\n",
" summary_json_prompt,\n",
" summary_json_system_prompt,\n",
" summary_prompt,\n",
")\n",
Expand Down Expand Up @@ -201,8 +201,9 @@
" post=None,\n",
" system=default_system_prompt,\n",
" use_json=True,\n",
" summary_json=summary_json_prompt,\n",
" summary_json_system=summary_json_system_prompt,\n",
" summary_json=[\n",
" summary_json_system_prompt, include_text_prompt_template\n",
" ],\n",
" context_outer=CONTEXT_OUTER_PROMPT,\n",
" context_inner=CONTEXT_INNER_PROMPT,\n",
" ),\n",
Expand Down
5 changes: 2 additions & 3 deletions docs/tutorials/settings_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ from paperqa.prompts import (
default_system_prompt,
env_reset_prompt,
env_system_prompt,
include_text_prompt_template,
qa_prompt,
select_paper_prompt,
structured_citation_prompt,
summary_json_prompt,
summary_json_system_prompt,
summary_prompt,
)
Expand Down Expand Up @@ -174,8 +174,7 @@ settings = Settings(
post=None,
system=default_system_prompt,
use_json=True,
summary_json=summary_json_prompt,
summary_json_system=summary_json_system_prompt,
summary_json=[summary_json_system_prompt, include_text_prompt_template],
context_outer=CONTEXT_OUTER_PROMPT,
context_inner=CONTEXT_INNER_PROMPT,
),
Expand Down
6 changes: 4 additions & 2 deletions src/paperqa/configs/contracrow.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@
"post": null,
"system": "Answer in a direct and concise tone. Your audience is an expert, so be highly specific. If there are ambiguous terms or acronyms, first define them.",
"use_json": true,
"summary_json": "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\n",
"summary_json_system": "Provide a summary of the relevant information that could help determine if a claim is contradicted or supported by this excerpt. The excerpt may be irrelevant. Do not directly answer if it is contradicted - only summarize relevant information. Respond with the following JSON format:\n\n{{\n \"summary\": \"...\",\n \"relevance_score\": \"...\"\n}}\n\nwhere `summary` is relevant information from excerpt ({summary_length}) and `relevance_score` is the relevance of `summary` to support or contradict the claim (integer out of 10). If any string entry in the JSON has newlines, be sure to escape them. "
"summary_json": [
"Provide a summary of the relevant information that could help determine if a claim is contradicted or supported by this excerpt. The excerpt may be irrelevant. Do not directly answer if it is contradicted - only summarize relevant information. Respond with the following JSON format:\n\n{{\n \"summary\": \"...\",\n \"relevance_score\": \"...\"\n}}\n\nwhere `summary` is relevant information from excerpt ({summary_length}) and `relevance_score` is the relevance of `summary` to support or contradict the claim (integer out of 10). If any string entry in the JSON has newlines, be sure to escape them.",
"Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}"
]
},
"agent": {
"agent_llm": "gpt-4o-2024-08-06",
Expand Down
6 changes: 4 additions & 2 deletions src/paperqa/configs/wikicrow.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@
"post": null,
"system": "Answer in a direct and concise tone.",
"use_json": true,
"summary_json": "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\n",
"summary_json_system": "Provide a summary of the relevant information that could help answer the question based on the excerpt. The excerpt may be irrelevant. Do not directly answer the question - only summarize relevant information. \n\nRespond with the following JSON format:\n\n{{\n \"summary\": \"...\",\n \"relevance_score\": \"...\",\n \"gene_name: \"...\"\n}}\n\nwhere `summary` is relevant information from text - {summary_length}, \n`gene_name` is the gene discussed in the excerpt (may be different than query), and `relevance_score` is the relevance of `summary` to answer the question (integer out of 10)"
"summary_json": [
"Provide a summary of the relevant information that could help answer the question based on the excerpt. The excerpt may be irrelevant. Do not directly answer the question - only summarize relevant information. \n\nRespond with the following JSON format:\n\n{{\n \"summary\": \"...\",\n \"relevance_score\": \"...\",\n \"gene_name: \"...\"\n}}\n\nwhere `summary` is relevant information from text - {summary_length}, \n`gene_name` is the gene discussed in the excerpt (may be different than query), and `relevance_score` is the relevance of `summary` to answer the question (integer out of 10)",
"Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}"
]
},
"agent": {
"agent_llm": "gpt-4-turbo-2024-04-09",
Expand Down
31 changes: 23 additions & 8 deletions src/paperqa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ async def _map_fxn_summary( # noqa: PLR0912
text: Text,
question: str,
summary_llm_model: LLMModel | None,
prompt_templates: tuple[str, str] | None,
prompt_templates: tuple[str | list[str], str] | None,
extra_prompt_data: dict[str, str] | None = None,
parser: Callable[[str], dict[str, Any]] | None = None,
callbacks: Sequence[Callable[[str], None]] | None = None,
Expand All @@ -154,8 +154,9 @@ async def _map_fxn_summary( # noqa: PLR0912
text: The text to parse.
question: The question to use for summarization.
summary_llm_model: The LLM model to use for generating summaries.
prompt_templates: Optional two-elements tuple containing templates for the user and system prompts.
prompt_templates = (user_prompt_template, system_prompt_template)
prompt_templates: Optional two-tuple containing
the user prompt template(s) and a system prompt.
prompt_templates = (user_prompt_template(s), system_prompt_template)
extra_prompt_data: Optional extra data to pass to the prompt template.
parser: Optional parser function to parse LLM output into structured data.
Should return dict with at least 'summary' field.
Expand Down Expand Up @@ -202,13 +203,27 @@ async def _map_fxn_summary( # noqa: PLR0912
else cleaned_text
),
} | (extra_prompt_data or {})
message_prompt, system_prompt = (pt.format(**data) for pt in prompt_templates)
user_msg_prompts: list[str] = (
[prompt_templates[0].format(**data)]
if isinstance(prompt_templates[0], str)
else [pt.format(**data) for pt in prompt_templates[0]]
)
system_msg = Message(role="system", content=prompt_templates[1])
prepend_msgs = (
[
system_msg,
*(Message(content=m) for m in user_msg_prompts[:-1]),
]
if len(user_msg_prompts) > 1
else [system_msg]
)
msg_with_media_prompt = user_msg_prompts[-1]
try:
llm_result = await summary_llm_model.call_single(
messages=[
Message(role="system", content=system_prompt),
*prepend_msgs,
Message.create_message(
text=message_prompt,
text=msg_with_media_prompt,
images=(
[i.to_image_url() for i in text.media]
if text.media
Expand All @@ -231,8 +246,8 @@ async def _map_fxn_summary( # noqa: PLR0912
)
llm_result = await summary_llm_model.call_single(
messages=[
Message(role="system", content=system_prompt),
Message(content=message_prompt),
*prepend_msgs,
Message(content=msg_with_media_prompt),
*append_msgs,
],
callbacks=callbacks,
Expand Down
21 changes: 8 additions & 13 deletions src/paperqa/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,18 +643,14 @@ async def aget_evidence(
else matches
)

prompt_templates = None
if not answer_config.evidence_skip_summary:
if prompt_config.use_json:
prompt_templates = (
prompt_config.summary_json,
prompt_config.summary_json_system,
)
else:
prompt_templates = (
prompt_config.summary,
prompt_config.system,
)
prompt_templates: tuple[str | list[str], str] | None = (
prompt_config.summary_json
if prompt_config.use_json
else prompt_config.summary
), prompt_config.system
else:
prompt_templates = None

with set_llm_session_ids(session.id):
results = await gather_with_concurrency(
Expand All @@ -666,8 +662,7 @@ async def aget_evidence(
summary_llm_model=summary_llm_model,
prompt_templates=prompt_templates,
extra_prompt_data={
"summary_length": answer_config.evidence_summary_length,
"citation": f"{m.name}: {m.doc.formatted_citation}",
"summary_length": answer_config.evidence_summary_length
},
parser=llm_parse_json if prompt_config.use_json else None,
callbacks=callbacks,
Expand Down
18 changes: 9 additions & 9 deletions src/paperqa/prompts.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from datetime import datetime

include_text_prompt_template = (
"Excerpt from {citation}\n\n------------\n\n{text}\n\n------------"
"\n\nQuestion: {question}"
)

summary_prompt = (
"Summarize the excerpt below to help answer a question.\n\nExcerpt from"
" {citation}\n\n------------\n\n{text}\n\n------------"
"\n\nQuestion: {question}\n\nDo not directly"
" answer the question, instead summarize to give evidence to help answer the"
"Summarize the excerpt below to help answer a question."
f"\n\n{include_text_prompt_template}"
"\n\nDo not directly answer the question,"
" instead summarize to give evidence to help answer the"
" question. Stay detailed; report specific numbers, equations, or direct quotes"
' (marked with quotation marks). Reply "Not applicable" if the excerpt is'
" irrelevant. At the end of your response, provide an integer score from 1-10 on a"
Expand All @@ -18,11 +23,6 @@
"\n\n------------\n\n{tables}"
)

summary_json_prompt = (
"Excerpt from {citation}\n\n------------\n\n{text}\n\n------------"
"\n\nQuestion: {question}\n\n"
)

# The below "cannot answer" sentinel phrase should:
# 1. Lead to complete tool being called with has_successful_answer=False
# 2. Can be used for unit testing
Expand Down
31 changes: 23 additions & 8 deletions src/paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@
default_system_prompt,
env_reset_prompt,
env_system_prompt,
include_text_prompt_template,
qa_prompt,
select_paper_prompt,
structured_citation_prompt,
summary_json_prompt,
summary_json_system_prompt,
summary_prompt,
)
Expand Down Expand Up @@ -371,7 +371,13 @@ class PromptSettings(BaseModel):
# SEE: https://nwtc.libguides.com/citations/MLA#s-lg-box-707489
EXAMPLE_CITATION: ClassVar[str] = "(pqac-0f650d59)"

summary: str = summary_prompt
summary: str | list[str] = Field(
default=summary_prompt,
description=(
"User prompt template(s) to use when generating contextual summaries."
" Must contain variables matching the default argument `summary_prompt`."
),
)
qa: str = qa_prompt
answer_iteration_prompt: str | None = Field(
default=answer_iteration_prompt_template,
Expand All @@ -392,13 +398,22 @@ class PromptSettings(BaseModel):
),
)
post: str | None = None
system: str = default_system_prompt
system: str = Field(
default=default_system_prompt,
description="System prompt to use when generating contextual summaries and answers.",
)
use_json: bool = True
# Not thrilled about this model,
# but need to split out the system/summary
# to get JSON
summary_json: str = summary_json_prompt
summary_json_system: str = summary_json_system_prompt
summary_json: str | list[str] = Field(
default_factory=lambda: [
summary_json_system_prompt,
include_text_prompt_template,
],
description=(
"JSON-specific user prompt template(s) to use"
" when generating contextual summaries."
" Must contain variables matching the default argument `summary_prompt`."
),
)
context_outer: str = Field(
default=CONTEXT_OUTER_PROMPT,
description="Prompt for how to format all contexts in generate answer.",
Expand Down
Loading