-
Notifications
You must be signed in to change notification settings - Fork 2k
feat(max): deep research template system #39104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,8 @@ | |
from ee.hogai.assistant.deep_research_assistant import DeepResearchAssistant | ||
from ee.hogai.assistant.insights_assistant import InsightsAssistant | ||
from ee.hogai.assistant.main_assistant import MainAssistant | ||
from ee.hogai.graph.deep_research.types import DeepResearchState, PartialDeepResearchState | ||
from ee.hogai.utils.types import AssistantState, PartialAssistantState | ||
from ee.hogai.utils.types.base import AssistantMode | ||
from ee.hogai.utils.types.composed import AssistantMaxGraphState, AssistantMaxPartialGraphState | ||
from ee.models import Conversation | ||
|
@@ -30,23 +32,59 @@ def create( | |
trace_id: Optional[str | UUID] = None, | ||
initial_state: Optional[AssistantMaxGraphState | AssistantMaxPartialGraphState] = None, | ||
billing_context: Optional[MaxBillingContext] = None, | ||
deep_research_template: Optional[dict[str, Any]] = None, | ||
) -> BaseAssistant: | ||
assistant_class: type[BaseAssistant] | ||
if mode == AssistantMode.ASSISTANT: | ||
assistant_class = MainAssistant | ||
assistant_initial_state: Optional[AssistantState | PartialAssistantState] = None | ||
if initial_state is not None: | ||
if isinstance(initial_state, (AssistantState | PartialAssistantState)): | ||
assistant_initial_state = initial_state | ||
return MainAssistant( | ||
team, | ||
conversation, | ||
new_message=new_message, | ||
user=user, | ||
session_id=session_id, | ||
contextual_tools=contextual_tools, | ||
is_new_conversation=is_new_conversation, | ||
trace_id=trace_id, | ||
billing_context=billing_context, | ||
initial_state=assistant_initial_state, | ||
) | ||
elif mode == AssistantMode.INSIGHTS_TOOL: | ||
assistant_class = InsightsAssistant | ||
assistant_initial_state = None | ||
if initial_state is not None: | ||
if isinstance(initial_state, (AssistantState | PartialAssistantState)): | ||
assistant_initial_state = initial_state | ||
Comment on lines
+55
to
+58
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: This code duplicates the type checking logic from the ASSISTANT mode. Consider extracting this pattern into a helper method to avoid repetition. Prompt To Fix With AIThis is a comment left during a code review.
Path: ee/hogai/assistant/assistant.py
Line: 55:58
Comment:
**style:** This code duplicates the type checking logic from the ASSISTANT mode. Consider extracting this pattern into a helper method to avoid repetition.
How can I resolve this? If you propose a fix, please make it concise. |
||
return InsightsAssistant( | ||
team, | ||
conversation, | ||
new_message=new_message, | ||
user=user, | ||
session_id=session_id, | ||
contextual_tools=contextual_tools, | ||
is_new_conversation=is_new_conversation, | ||
trace_id=trace_id, | ||
billing_context=billing_context, | ||
initial_state=assistant_initial_state, | ||
) | ||
elif mode == AssistantMode.DEEP_RESEARCH: | ||
assistant_class = DeepResearchAssistant | ||
return assistant_class( | ||
team, | ||
conversation, | ||
new_message=new_message, | ||
user=user, | ||
session_id=session_id, | ||
contextual_tools=contextual_tools, | ||
is_new_conversation=is_new_conversation, | ||
trace_id=trace_id, | ||
billing_context=billing_context, | ||
initial_state=initial_state, # type: ignore | ||
) | ||
deep_research_initial_state: Optional[DeepResearchState | PartialDeepResearchState] = None | ||
if initial_state is not None: | ||
if isinstance(initial_state, (DeepResearchState | PartialDeepResearchState)): | ||
deep_research_initial_state = initial_state | ||
return DeepResearchAssistant( | ||
team, | ||
conversation, | ||
new_message=new_message, | ||
user=user, | ||
session_id=session_id, | ||
contextual_tools=contextual_tools, | ||
is_new_conversation=is_new_conversation, | ||
trace_id=trace_id, | ||
billing_context=billing_context, | ||
initial_state=deep_research_initial_state, | ||
deep_research_template=deep_research_template, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Radical proposal, what if you instead of adding this separate field, add the notebook as UI context to the message, so that we slowly move towards supporting notebooks as context too? |
||
) | ||
else: | ||
raise ValueError(f"Unknown assistant mode: {mode}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ def __init__( | |
trace_id: Optional[str | UUID] = None, | ||
billing_context: Optional[MaxBillingContext] = None, | ||
initial_state: Optional[DeepResearchState | PartialDeepResearchState] = None, | ||
deep_research_template: Optional[dict[str, Any]] = None, | ||
): | ||
super().__init__( | ||
team, | ||
|
@@ -57,6 +58,7 @@ def __init__( | |
trace_id=trace_id, | ||
billing_context=billing_context, | ||
initial_state=initial_state, | ||
deep_research_template=deep_research_template, | ||
) | ||
|
||
@property | ||
|
@@ -110,15 +112,35 @@ def _should_persist_commentary_message(self, node_name: MaxNodeName) -> bool: | |
return False | ||
|
||
def get_initial_state(self) -> DeepResearchState: | ||
if self._latest_message: | ||
return DeepResearchState( | ||
messages=[self._latest_message], | ||
start_id=self._latest_message.id, | ||
graph_status=None, | ||
notebook_short_id=None, | ||
) | ||
else: | ||
return DeepResearchState(messages=[]) | ||
# Inject a default human message when a template is selected without user input, | ||
# and stream it immediately by setting _latest_message. | ||
message_for_state = self._latest_message | ||
if not self._latest_message and self._deep_research_template: | ||
from uuid import uuid4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. import at the top |
||
|
||
title = None | ||
if isinstance(self._deep_research_template, dict): | ||
title = self._deep_research_template.get("notebook_title") | ||
|
||
content = f"Load template: {title}" if title else "Load template" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this message? |
||
message_for_state = HumanMessage(content=content, id=str(uuid4())) | ||
self._latest_message = message_for_state | ||
|
||
base_state = DeepResearchState( | ||
messages=[message_for_state] if message_for_state else [], | ||
start_id=message_for_state.id if message_for_state else None, | ||
graph_status=None, | ||
notebook_short_id=None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. notebook_short_id should be removed, it's not on |
||
) | ||
|
||
if self._deep_research_template: | ||
if isinstance(self._deep_research_template, dict): | ||
notebook_short_id = self._deep_research_template.get("notebook_short_id") | ||
if notebook_short_id: | ||
base_state.template_notebook_short_id = notebook_short_id | ||
base_state.skip_onboarding = True | ||
|
||
return base_state | ||
|
||
def get_resumed_state(self) -> PartialDeepResearchState: | ||
if not self._latest_message: | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,7 +1,19 @@ | ||||||
from uuid import uuid4 | ||||||
|
||||||
from langchain_core.prompts import ChatPromptTemplate | ||||||
from langchain_core.runnables import RunnableConfig | ||||||
from posthoganalytics import capture_exception | ||||||
|
||||||
from posthog.schema import ( | ||||||
DeepResearchNotebook, | ||||||
DeepResearchType, | ||||||
HumanMessage, | ||||||
NotebookUpdateMessage, | ||||||
ProsemirrorJSONContent, | ||||||
) | ||||||
|
||||||
from posthog.schema import DeepResearchNotebook, DeepResearchType, HumanMessage | ||||||
from posthog.models.notebook.notebook import Notebook | ||||||
from posthog.sync import database_sync_to_async | ||||||
|
||||||
from ee.hogai.graph.deep_research.base.nodes import DeepResearchNode | ||||||
from ee.hogai.graph.deep_research.notebook.prompts import DEEP_RESEARCH_NOTEBOOK_PLANNING_PROMPT | ||||||
|
@@ -15,18 +27,47 @@ def node_name(self) -> MaxNodeName: | |||||
return DeepResearchNodeName.NOTEBOOK_PLANNING | ||||||
|
||||||
async def arun(self, state: DeepResearchState, config: RunnableConfig) -> PartialDeepResearchState: | ||||||
# Load template | ||||||
template_markdown = await self._retrieve_template_markdown(state) | ||||||
# We use instructions with the OpenAI Responses API | ||||||
instructions = DEEP_RESEARCH_NOTEBOOK_PLANNING_PROMPT.format( | ||||||
core_memory=await self._aget_core_memory(), | ||||||
) | ||||||
|
||||||
# Get last message if available, otherwise use empty string for template-only mode | ||||||
if not state.messages: | ||||||
raise IndexError("No messages in state") | ||||||
|
||||||
last_message = state.messages[-1] | ||||||
if not isinstance(last_message, HumanMessage): | ||||||
raise ValueError("Last message is not a human message.") | ||||||
|
||||||
human_content = last_message.content if last_message else "" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: Redundant condition -
Suggested change
Prompt To Fix With AIThis is a comment left during a code review.
Path: ee/hogai/graph/deep_research/notebook/nodes.py
Line: 45:45
Comment:
**logic:** Redundant condition - `last_message` is guaranteed to exist due to the check on line 42
```suggestion
human_content = last_message.content
```
How can I resolve this? If you propose a fix, please make it concise. |
||||||
# If a template was provided, emit a synthetic "loaded notebook" message once | ||||||
pre_messages: list = [] | ||||||
if template_markdown and not state.has_emitted_template_loaded: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is all of this needed to create custom templates? It seems extremely convoluted. This is the flow I would expect:
In both cases it seems like you would need an additional node that only does this, and this node should then be relegated to just the planning. This would also help us with separation of concerns when we want to move to the Google DR flow. |
||||||
serializer = self._get_notebook_serializer() | ||||||
json_content = serializer.from_markdown_to_json(template_markdown) | ||||||
loaded_message = NotebookUpdateMessage( | ||||||
id=str(uuid4()), | ||||||
notebook_id=str(state.template_notebook_short_id or ""), | ||||||
content=ProsemirrorJSONContent.model_validate(json_content.model_dump(exclude_none=True)), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Weird spacing |
||||||
notebook_type="deep_research", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be either |
||||||
event="loaded", | ||||||
) | ||||||
await self._write_message(loaded_message) | ||||||
pre_messages.append(loaded_message) | ||||||
|
||||||
# If template exists, use it (with or without additional human content) | ||||||
# If no template, use human content (which should exist in this case) | ||||||
if template_markdown: | ||||||
human_message = f"{template_markdown}\n\n{human_content}" if human_content else template_markdown | ||||||
else: | ||||||
human_message = human_content | ||||||
|
||||||
prompt = ChatPromptTemplate.from_messages( | ||||||
[ | ||||||
("human", last_message.content), | ||||||
("human", human_message), | ||||||
] | ||||||
) | ||||||
|
||||||
|
@@ -49,8 +90,46 @@ async def arun(self, state: DeepResearchState, config: RunnableConfig) -> Partia | |||||
notebook_update_message.current_run_notebooks = current_run_notebooks | ||||||
|
||||||
return PartialDeepResearchState( | ||||||
messages=[notebook_update_message], | ||||||
messages=[*pre_messages, notebook_update_message], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This "loaded" message seems to be something we send to trigger a frontend interaction, so it shouldn't really be in the state. |
||||||
previous_response_id=None, # we reset the previous response id because we're starting a new conversation after the onboarding | ||||||
conversation_notebooks=[notebook_info], | ||||||
current_run_notebooks=current_run_notebooks, | ||||||
has_emitted_template_loaded=True if pre_messages else state.has_emitted_template_loaded, | ||||||
) | ||||||
|
||||||
@database_sync_to_async | ||||||
def get_notebook(self, state: DeepResearchState) -> Notebook: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
return Notebook.objects.filter( | ||||||
team=self._team, short_id=str(state.template_notebook_short_id), deleted=False | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be better to check the state for this id, before doing the query, and then returning None in case |
||||||
).first() | ||||||
|
||||||
async def _retrieve_template_markdown(self, state: DeepResearchState) -> str | None: | ||||||
if not ( | ||||||
state.template_notebook_short_id and not state.template_markdown and not state.has_emitted_template_loaded | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also |
||||||
): | ||||||
Comment on lines
+107
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: Complex nested boolean logic makes this condition hard to understand. Consider extracting to a helper method with descriptive name like Prompt To Fix With AIThis is a comment left during a code review.
Path: ee/hogai/graph/deep_research/notebook/nodes.py
Line: 107:109
Comment:
**style:** Complex nested boolean logic makes this condition hard to understand. Consider extracting to a helper method with descriptive name like `should_retrieve_template_from_db`.
How can I resolve this? If you propose a fix, please make it concise. |
||||||
return state.template_markdown | ||||||
|
||||||
try: | ||||||
notebook = await self.get_notebook(state) | ||||||
|
||||||
if not notebook: | ||||||
return state.template_markdown | ||||||
|
||||||
text_content = getattr(notebook, "text_content", None) | ||||||
if text_content: | ||||||
return text_content | ||||||
|
||||||
content = getattr(notebook, "content", None) | ||||||
if content: | ||||||
try: | ||||||
nb_json = ProsemirrorJSONContent.model_validate(notebook.content) | ||||||
from ee.hogai.notebook.notebook_serializer import NotebookSerializer | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. import at the top if possible |
||||||
|
||||||
return NotebookSerializer().from_json_to_markdown(nb_json) | ||||||
except Exception: | ||||||
return state.template_markdown | ||||||
|
||||||
return state.template_markdown | ||||||
except Exception as e: | ||||||
capture_exception(e) | ||||||
return state.template_markdown |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,12 @@ def node_name(self) -> MaxNodeName: | |
return DeepResearchNodeName.NOTEBOOK_PLANNING | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: The node_name returns NOTEBOOK_PLANNING but the class is DeepResearchOnboardingNode - this seems inconsistent and could be confusing Prompt To Fix With AIThis is a comment left during a code review.
Path: ee/hogai/graph/deep_research/onboarding/nodes.py
Line: 20:20
Comment:
**logic:** The node_name returns NOTEBOOK_PLANNING but the class is DeepResearchOnboardingNode - this seems inconsistent and could be confusing
How can I resolve this? If you propose a fix, please make it concise. |
||
|
||
def should_run_onboarding_at_start(self, state: DeepResearchState) -> Literal["onboarding", "planning", "continue"]: | ||
# Skipping onboarding when provided a template | ||
if state.skip_onboarding: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need |
||
if state.current_run_notebooks: | ||
return "continue" | ||
return "planning" | ||
|
||
if not state.messages: | ||
return "onboarding" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're sending the whole notebook here, it's overkill