-
Notifications
You must be signed in to change notification settings - Fork 1.4k
feat: knowledge base for long-term memory (#1099) #1115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2c18d7c
d07d875
f45ca7c
ece99c9
852ab43
f678309
3b9c0e4
3d10e6d
47f4f10
fbd480b
b7af46a
7216ce8
4e2dcb2
c363187
3d2f4b6
d328676
b2fd959
70cd6df
b4bfc4e
e195c47
dd804f2
606533f
06d19b1
b4fba05
1b0f125
e134793
52d06b2
3086710
a275992
ba023f5
8d8b573
3564835
60367cb
b2d810a
3bd0880
0971ae6
4cbdaf2
7e7991e
516caa2
d000b44
dafac62
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,6 +58,10 @@ | |
| ) | ||
| from app.utils.event_loop_utils import set_main_event_loop | ||
| from app.utils.file_utils import get_working_directory | ||
| from app.utils.memory_file import ( | ||
| MEMORY_ARCHITECTURE_PROMPT, | ||
| get_index_for_prompt, | ||
| ) | ||
| from app.utils.server.sync_step import sync_step | ||
| from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback | ||
| from app.utils.workforce import Workforce | ||
|
|
@@ -235,7 +239,8 @@ def check_conversation_history_length( | |
|
|
||
|
|
||
| def build_conversation_context( | ||
| task_lock: TaskLock, header: str = "=== CONVERSATION HISTORY ===" | ||
| task_lock: TaskLock, | ||
| header: str = "=== CONVERSATION HISTORY ===", | ||
| ) -> str: | ||
| """Build conversation context from task_lock history | ||
| with files listed only once at the end. | ||
|
|
@@ -245,14 +250,13 @@ def build_conversation_context( | |
| header: Header text for the context section | ||
|
|
||
| Returns: | ||
| Formatted context string with task history | ||
| and files listed once at the end | ||
| Formatted context string with task history and files listed once at the end | ||
| """ | ||
| context = "" | ||
| working_directories = set() # Collect all unique working directories | ||
|
|
||
| if task_lock.conversation_history: | ||
| context = f"{header}\n" | ||
| context += f"{header}\n" | ||
|
|
||
| for entry in task_lock.conversation_history: | ||
| if entry["role"] == "task_result": | ||
|
|
@@ -1242,8 +1246,7 @@ async def run_decomposition(): | |
| "workforce" | ||
| ) | ||
| conv_ctx = build_conversation_context( | ||
| task_lock, | ||
| header="=== Previous Conversation ===", | ||
| task_lock, header="=== Previous Conversation ===" | ||
| ) | ||
| simple_answer_prompt = ( | ||
| f"{conv_ctx}" | ||
|
|
@@ -2435,6 +2438,12 @@ async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat): | |
| the current date. | ||
| """ | ||
|
|
||
| if getattr(data, "use_project_memory", False): | ||
| enhanced_description += MEMORY_ARCHITECTURE_PROMPT | ||
| memory_index = get_index_for_prompt(working_directory) | ||
| if memory_index: | ||
| enhanced_description += "\n" + memory_index | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This memory_index is changing right? I am not sure whether it's proper to add it to the system prompt |
||
|
|
||
| # Pass per-agent custom model config if available | ||
| custom_model_config = getattr(data, "custom_model_config", None) | ||
| return agent_model( | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe rename to long_term_memory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
|
|
||
| """ | ||
| Markdown-based long-term memory for agents (issue #1099). | ||
|
|
||
| memory.md in the project's .eigent/ directory acts as an index: only a short | ||
| prefix (e.g. first 200 lines) is passed into the system prompt. Topic-specific | ||
| memories live in other .md files under .eigent/; the agent reads and writes | ||
| them on demand via file operations (no dedicated remember/read tools). | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| from pathlib import Path | ||
| from typing import Final | ||
|
|
||
| logger = logging.getLogger("memory_file") | ||
|
|
||
| _MEMORY_FILENAME: Final[str] = "memory.md" | ||
| _EIGENT_DIR: Final[str] = ".eigent" | ||
| _DEFAULT_INDEX_LINES: Final[int] = 200 | ||
| _MAX_INDEX_LINES: Final[int] = 2000 | ||
|
|
||
| _CONTINUATION_NOTE: Final[str] = "\n\n...(further memory in .eigent/; read files as needed)\n" | ||
| _INDEX_HEADER: Final[str] = "=== Project memory index (.eigent/memory.md) ===\n" | ||
|
|
||
|
|
||
| class MemoryFileError(Exception): | ||
| """Base exception for memory file operations.""" | ||
|
|
||
|
|
||
| class MemoryReadError(MemoryFileError): | ||
| """Raised when reading the memory file fails.""" | ||
|
|
||
|
|
||
| def _validate_working_directory(working_directory: str) -> Path: | ||
| if not working_directory or not working_directory.strip(): | ||
| raise ValueError("working_directory cannot be empty") | ||
| path = Path(working_directory).expanduser().resolve() | ||
| if not path.exists(): | ||
| raise ValueError(f"working_directory does not exist: {path}") | ||
| if not path.is_dir(): | ||
| raise ValueError(f"working_directory is not a directory: {path}") | ||
| return path | ||
|
|
||
|
|
||
| def get_memory_file_path(working_directory: str) -> Path: | ||
| """Return the path to the project's memory file (.eigent/memory.md).""" | ||
| base_path = _validate_working_directory(working_directory) | ||
| eigent_dir = base_path / _EIGENT_DIR | ||
| eigent_dir.mkdir(parents=True, exist_ok=True) | ||
| return eigent_dir / _MEMORY_FILENAME | ||
|
|
||
|
|
||
| def read_memory(working_directory: str) -> str | None: | ||
| """Read the full content of the memory file, or None if missing/invalid.""" | ||
| try: | ||
| memory_path = get_memory_file_path(working_directory) | ||
| except ValueError as e: | ||
| logger.warning("Invalid working directory: %s", e) | ||
| return None | ||
|
|
||
| if not memory_path.exists(): | ||
| return None | ||
|
|
||
| try: | ||
| content = memory_path.read_text(encoding="utf-8") | ||
| return content if content.strip() else None | ||
| except OSError as e: | ||
| logger.error("Failed to read memory file %s: %s", memory_path, e) | ||
| return None | ||
|
|
||
|
|
||
| MEMORY_ARCHITECTURE_PROMPT: Final[str] = """ | ||
| Project long-term memory lives under .eigent/ in the project directory. | ||
| - .eigent/memory.md is the index: it lists or summarizes memory topics (e.g. user_preferences.md, decisions.md). | ||
| - You can read any .eigent/*.md file when you need topic-specific information. | ||
| - To remember something: create or edit markdown files under .eigent/ (e.g. append to an existing topic file or create one). Use normal file operations (read/write/append) or shell commands; no dedicated memory tool is required. | ||
| """ | ||
|
|
||
|
|
||
| def get_index_for_prompt( | ||
| working_directory: str, | ||
| max_lines: int = _DEFAULT_INDEX_LINES, | ||
| ) -> str | None: | ||
| """ | ||
| Return the first max_lines of memory.md formatted for system-prompt injection. | ||
| Callers should use this instead of dumping the full file; topic-specific | ||
| content is read by the agent via file operations. | ||
| """ | ||
bitloi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if not working_directory or not working_directory.strip(): | ||
| return None | ||
| if max_lines <= 0: | ||
| return None | ||
| effective_max = min(max_lines, _MAX_INDEX_LINES) | ||
|
|
||
| content = read_memory(working_directory) | ||
| if not content: | ||
| return None | ||
|
|
||
| lines = content.splitlines() | ||
| if len(lines) > effective_max: | ||
| index_content = "\n".join(lines[:effective_max]) + _CONTINUATION_NOTE | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||
| else: | ||
| index_content = content | ||
|
|
||
| return _INDEX_HEADER + index_content + "\n" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move to backend/tests/app/utils/memory_file.py |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= | ||
|
|
||
| """Unit tests for the markdown-based memory file (issue #1099).""" | ||
|
|
||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
|
|
||
| from app.utils import memory_file as mf | ||
|
|
||
|
|
||
| def _write_memory(working_dir: str, content: str) -> None: | ||
| """Write content to .eigent/memory.md (tests only; production uses file ops).""" | ||
| path = mf.get_memory_file_path(working_dir) | ||
| path.write_text(content, encoding="utf-8") | ||
|
|
||
|
|
||
| @pytest.mark.unit | ||
| class TestMemoryFile: | ||
| """Test memory_file read and get_index_for_prompt.""" | ||
|
|
||
| def test_read_nonexistent_memory(self, tmp_path: Path) -> None: | ||
| """Reading memory from a directory without memory.md returns None.""" | ||
| content = mf.read_memory(str(tmp_path)) | ||
| assert content is None | ||
|
|
||
| def test_read_memory(self, tmp_path: Path) -> None: | ||
| """Read returns content written to memory.md.""" | ||
| working_dir = str(tmp_path) | ||
| _write_memory(working_dir, "# Project Memory\n\nLong-term memory.\n\nUser prefers dark mode.") | ||
| content = mf.read_memory(working_dir) | ||
| assert content is not None | ||
| assert "Project Memory" in content | ||
| assert "dark mode" in content | ||
|
|
||
| def test_read_multiple_sections(self, tmp_path: Path) -> None: | ||
| """Read returns full file content.""" | ||
| working_dir = str(tmp_path) | ||
| _write_memory( | ||
| working_dir, | ||
| "# Project Memory\n\nFirst entry.\n\n## Section 2\n\nSecond entry.\n\nThird entry.", | ||
| ) | ||
| content = mf.read_memory(working_dir) | ||
| assert content is not None | ||
| assert "First entry" in content | ||
| assert "Second entry" in content | ||
| assert "Third entry" in content | ||
|
|
||
| def test_get_index_for_prompt(self, tmp_path: Path) -> None: | ||
| """get_index_for_prompt returns first portion of memory.md for system prompt.""" | ||
| working_dir = str(tmp_path) | ||
| _write_memory(working_dir, "# Project Memory\n\nUser prefers Python 3.10.") | ||
|
|
||
| ctx = mf.get_index_for_prompt(working_dir) | ||
| assert ctx is not None | ||
| assert "memory index" in ctx.lower() or "memory.md" in ctx | ||
| assert "Python 3.10" in ctx | ||
|
|
||
| def test_get_index_for_prompt_empty(self, tmp_path: Path) -> None: | ||
| """get_index_for_prompt returns None for empty/nonexistent memory.""" | ||
| working_dir = str(tmp_path) | ||
| ctx = mf.get_index_for_prompt(working_dir) | ||
| assert ctx is None | ||
|
|
||
| def test_get_index_for_prompt_max_lines(self, tmp_path: Path) -> None: | ||
| """get_index_for_prompt limits to first max_lines and adds note.""" | ||
| working_dir = str(tmp_path) | ||
| lines = ["# Project Memory", ""] + [f"Line entry {i}." for i in range(300)] | ||
| _write_memory(working_dir, "\n".join(lines)) | ||
|
|
||
| ctx = mf.get_index_for_prompt(working_dir, max_lines=50) | ||
| assert ctx is not None | ||
| assert "further memory" in ctx or ".eigent" in ctx | ||
| assert len(ctx.splitlines()) <= 55 | ||
|
|
||
| def test_memory_file_path(self, tmp_path: Path) -> None: | ||
| """Memory file path is .eigent/memory.md under working dir.""" | ||
| working_dir = str(tmp_path) | ||
| memory_path = mf.get_memory_file_path(working_dir) | ||
| assert ".eigent" in str(memory_path) | ||
| assert str(memory_path).endswith("memory.md") | ||
|
|
||
| def test_invalid_working_directory(self) -> None: | ||
| """Invalid working directory returns None for read.""" | ||
| content = mf.read_memory("/nonexistent/path/that/does/not/exist") | ||
| assert content is None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: