-
Notifications
You must be signed in to change notification settings - Fork 1.4k
feat: knowledge base for long-term memory (#1099) #1115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 38 commits
2c18d7c
d07d875
f45ca7c
ece99c9
852ab43
f678309
3b9c0e4
3d10e6d
47f4f10
fbd480b
b7af46a
7216ce8
4e2dcb2
c363187
3d2f4b6
d328676
b2fd959
70cd6df
b4bfc4e
e195c47
dd804f2
606533f
06d19b1
b4fba05
1b0f125
e134793
52d06b2
3086710
a275992
ba023f5
8d8b573
3564835
60367cb
b2d810a
3bd0880
0971ae6
4cbdaf2
7e7991e
516caa2
d000b44
dafac62
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
|
|
||
| """ | ||
| Long-term memory via markdown files (issue #1099). | ||
|
|
||
| Memory is architecture-level: .eigent/memory.md is the index; the agent | ||
| reads/writes .eigent/*.md via file operations. This toolkit exposes no tools; | ||
| it stays selectable so chat_service can detect "knowledge_base_toolkit" in | ||
| data.tools and inject MEMORY_ARCHITECTURE_PROMPT + get_index_for_prompt() | ||
| into the system prompt. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| import os | ||
| from typing import Final | ||
|
|
||
| from camel.toolkits.base import BaseToolkit | ||
| from camel.toolkits.function_tool import FunctionTool | ||
|
|
||
| from app.agent.toolkit.abstract_toolkit import AbstractToolkit | ||
| from app.component.environment import env | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| _DEFAULT_WORKING_DIR: Final[str] = "~/.eigent" | ||
|
|
||
|
|
||
| def _resolve_working_directory(working_directory: str | None) -> str: | ||
| if working_directory is None or not str(working_directory).strip(): | ||
| working_directory = env("file_save_path", os.path.expanduser(_DEFAULT_WORKING_DIR)) | ||
| resolved = os.path.expanduser(str(working_directory).strip()) | ||
| try: | ||
| os.makedirs(resolved, exist_ok=True) | ||
| except OSError as e: | ||
| logger.warning("Could not create working directory %s: %s", resolved, e) | ||
| return resolved | ||
|
|
||
|
|
||
| class KnowledgeBaseToolkit(BaseToolkit, AbstractToolkit): | ||
| """ | ||
| Project long-term memory (architecture-only). Intentionally provides no | ||
| tools; the agent uses file/terminal tools to read and write .eigent/*.md. | ||
| When this toolkit is selected, chat_service injects the memory index and | ||
| architecture into the system prompt. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| api_task_id: str, | ||
| working_directory: str | None = None, | ||
| agent_name: str | None = None, | ||
| timeout: float | None = None, | ||
| ) -> None: | ||
| api_task_id = (api_task_id or "").strip() | ||
| if not api_task_id: | ||
| raise ValueError("api_task_id cannot be empty") | ||
|
|
||
| super().__init__(timeout=timeout) | ||
| self.api_task_id = api_task_id | ||
| self.working_directory = _resolve_working_directory(working_directory) | ||
| self.agent_name = (agent_name or "agent").strip() or "agent" | ||
|
|
||
| logger.debug( | ||
| "KnowledgeBaseToolkit initialized", | ||
| extra={ | ||
| "api_task_id": self.api_task_id, | ||
| "working_directory": self.working_directory, | ||
| "agent_name": self.agent_name, | ||
| }, | ||
| ) | ||
|
|
||
| def get_tools(self) -> list[FunctionTool]: | ||
| return [] | ||
|
|
||
|
|
||
| def get_tools( | ||
| api_task_id: str, | ||
| working_directory: str | None = None, | ||
| agent_name: str | None = None, | ||
| ) -> list[FunctionTool]: | ||
| return KnowledgeBaseToolkit( | ||
| api_task_id=api_task_id, | ||
| working_directory=working_directory, | ||
| agent_name=agent_name, | ||
| ).get_tools() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,6 +58,10 @@ | |
| ) | ||
| from app.utils.event_loop_utils import set_main_event_loop | ||
| from app.utils.file_utils import get_working_directory | ||
| from app.utils.memory_file import ( | ||
| MEMORY_ARCHITECTURE_PROMPT, | ||
| get_index_for_prompt, | ||
| ) | ||
| from app.utils.server.sync_step import sync_step | ||
| from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback | ||
| from app.utils.workforce import Workforce | ||
|
|
@@ -235,7 +239,8 @@ def check_conversation_history_length( | |
|
|
||
|
|
||
| def build_conversation_context( | ||
| task_lock: TaskLock, header: str = "=== CONVERSATION HISTORY ===" | ||
| task_lock: TaskLock, | ||
| header: str = "=== CONVERSATION HISTORY ===", | ||
| ) -> str: | ||
| """Build conversation context from task_lock history | ||
| with files listed only once at the end. | ||
|
|
@@ -245,14 +250,13 @@ def build_conversation_context( | |
| header: Header text for the context section | ||
|
|
||
| Returns: | ||
| Formatted context string with task history | ||
| and files listed once at the end | ||
| Formatted context string with task history and files listed once at the end | ||
| """ | ||
| context = "" | ||
| working_directories = set() # Collect all unique working directories | ||
|
|
||
| if task_lock.conversation_history: | ||
| context = f"{header}\n" | ||
| context += f"{header}\n" | ||
|
|
||
| for entry in task_lock.conversation_history: | ||
| if entry["role"] == "task_result": | ||
|
|
@@ -553,7 +557,8 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock): | |
| "without workforce" | ||
| ) | ||
| conv_ctx = build_conversation_context( | ||
| task_lock, header="=== Previous Conversation ===" | ||
| task_lock, | ||
| header="=== Previous Conversation ===", | ||
| ) | ||
bitloi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| simple_answer_prompt = ( | ||
| f"{conv_ctx}" | ||
|
|
@@ -1967,7 +1972,8 @@ async def question_confirm( | |
| context_prompt = "" | ||
| if task_lock: | ||
| context_prompt = build_conversation_context( | ||
| task_lock, header="=== Previous Conversation ===" | ||
| task_lock, | ||
| header="=== Previous Conversation ===", | ||
| ) | ||
|
|
||
| full_prompt = f"""{context_prompt}User Query: {prompt} | ||
|
|
@@ -2435,6 +2441,12 @@ async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat): | |
| the current date. | ||
| """ | ||
|
|
||
| if "knowledge_base_toolkit" in data.tools: | ||
| enhanced_description += MEMORY_ARCHITECTURE_PROMPT | ||
| memory_index = get_index_for_prompt(working_directory) | ||
| if memory_index: | ||
| enhanced_description += "\n" + memory_index | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This memory_index is changing right? I am not sure whether it's proper to add it to the system prompt |
||
|
|
||
| # Pass per-agent custom model config if available | ||
| custom_model_config = getattr(data, "custom_model_config", None) | ||
| return agent_model( | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe rename to long_term_memory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
|
|
||
| """ | ||
| Markdown-based long-term memory for agents (issue #1099). | ||
|
|
||
| memory.md in the project's .eigent/ directory acts as an index: only a short | ||
| prefix (e.g. first 200 lines) is passed into the system prompt. Topic-specific | ||
| memories live in other .md files under .eigent/; the agent reads and writes | ||
| them on demand via file operations (no dedicated remember/read tools). | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| from pathlib import Path | ||
| from typing import Final | ||
|
|
||
| logger = logging.getLogger("memory_file") | ||
|
|
||
| _MEMORY_FILENAME: Final[str] = "memory.md" | ||
| _EIGENT_DIR: Final[str] = ".eigent" | ||
| _DEFAULT_INDEX_LINES: Final[int] = 200 | ||
| _MAX_INDEX_LINES: Final[int] = 2000 | ||
|
|
||
| _CONTINUATION_NOTE: Final[str] = "\n\n...(further memory in .eigent/; read files as needed)\n" | ||
| _INDEX_HEADER: Final[str] = "=== Project memory index (.eigent/memory.md) ===\n" | ||
|
|
||
|
|
||
| class MemoryFileError(Exception): | ||
| """Base exception for memory file operations.""" | ||
|
|
||
|
|
||
| class MemoryReadError(MemoryFileError): | ||
| """Raised when reading the memory file fails.""" | ||
|
|
||
|
|
||
| def _validate_working_directory(working_directory: str) -> Path: | ||
| if not working_directory or not working_directory.strip(): | ||
| raise ValueError("working_directory cannot be empty") | ||
| path = Path(working_directory).expanduser().resolve() | ||
| if not path.exists(): | ||
| raise ValueError(f"working_directory does not exist: {path}") | ||
| if not path.is_dir(): | ||
| raise ValueError(f"working_directory is not a directory: {path}") | ||
| return path | ||
|
|
||
|
|
||
| def get_memory_file_path(working_directory: str) -> Path: | ||
| """Return the path to the project's memory file (.eigent/memory.md).""" | ||
| base_path = _validate_working_directory(working_directory) | ||
| eigent_dir = base_path / _EIGENT_DIR | ||
| eigent_dir.mkdir(parents=True, exist_ok=True) | ||
| return eigent_dir / _MEMORY_FILENAME | ||
|
|
||
|
|
||
| def read_memory(working_directory: str) -> str | None: | ||
| """Read the full content of the memory file, or None if missing/invalid.""" | ||
| try: | ||
| memory_path = get_memory_file_path(working_directory) | ||
| except ValueError as e: | ||
| logger.warning("Invalid working directory: %s", e) | ||
| return None | ||
|
|
||
| if not memory_path.exists(): | ||
| return None | ||
|
|
||
| try: | ||
| content = memory_path.read_text(encoding="utf-8") | ||
| return content if content.strip() else None | ||
| except OSError as e: | ||
| logger.error("Failed to read memory file %s: %s", memory_path, e) | ||
| return None | ||
|
|
||
|
|
||
| MEMORY_ARCHITECTURE_PROMPT: Final[str] = """ | ||
| Project long-term memory lives under .eigent/ in the project directory. | ||
| - .eigent/memory.md is the index: it lists or summarizes memory topics (e.g. user_preferences.md, decisions.md). | ||
| - You can read any .eigent/*.md file when you need topic-specific information. | ||
| - To remember something: create or edit markdown files under .eigent/ (e.g. append to an existing topic file or create one). Use normal file operations (read/write/append) or shell commands; no dedicated memory tool is required. | ||
| """ | ||
|
|
||
|
|
||
| def get_index_for_prompt( | ||
| working_directory: str, | ||
| max_lines: int = _DEFAULT_INDEX_LINES, | ||
| ) -> str | None: | ||
| """ | ||
| Return the first max_lines of memory.md formatted for system-prompt injection. | ||
| Callers should use this instead of dumping the full file; topic-specific | ||
| content is read by the agent via file operations. | ||
| """ | ||
bitloi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if not working_directory or not working_directory.strip(): | ||
| return None | ||
| if max_lines <= 0: | ||
| return None | ||
| effective_max = min(max_lines, _MAX_INDEX_LINES) | ||
|
|
||
| content = read_memory(working_directory) | ||
| if not content: | ||
| return None | ||
|
|
||
| lines = content.splitlines() | ||
| if len(lines) > effective_max: | ||
| index_content = "\n".join(lines[:effective_max]) + _CONTINUATION_NOTE | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||
| else: | ||
| index_content = content | ||
|
|
||
| return _INDEX_HEADER + index_content + "\n" | ||
Uh oh!
There was an error while loading. Please reload this page.