-
Notifications
You must be signed in to change notification settings - Fork 1.5k
fix(semantic): add budget guard to overview generation with batched summarization #683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
bb1fd3a
c2be248
15caec4
43fa1d0
f2acd87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -354,7 +354,12 @@ async def _process_memory_directory(self, msg: SemanticMsg) -> None: | |
| file_summaries.append({"name": file_name, "summary": ""}) | ||
|
|
||
| overview = await self._generate_overview(dir_uri, file_summaries, []) | ||
| semantic = get_openviking_config().semantic | ||
| if len(overview) > semantic.overview_max_chars: | ||
| overview = overview[: semantic.overview_max_chars] | ||
| abstract = self._extract_abstract_from_overview(overview) | ||
| if len(abstract) > semantic.abstract_max_chars: | ||
| abstract = abstract[: semantic.abstract_max_chars - 3] + "..." | ||
|
|
||
| try: | ||
| await viking_fs.write_file(f"{dir_uri}/.overview.md", overview, ctx=ctx) | ||
|
|
@@ -577,8 +582,8 @@ async def _generate_text_summary( | |
| logger.warning(f"Failed to decode file as UTF-8, skipping: {file_path}") | ||
| return {"name": file_name, "summary": ""} | ||
|
|
||
| # Limit content length (about 10000 tokens) | ||
| max_chars = 30000 | ||
| # Limit content length | ||
| max_chars = get_openviking_config().semantic.max_file_content_chars | ||
| if len(content) > max_chars: | ||
| content = content[:max_chars] + "\n...(truncated)" | ||
|
|
||
|
|
@@ -747,6 +752,11 @@ async def _generate_overview( | |
| ) -> str: | ||
| """Generate directory's .overview.md (L1). | ||
|
|
||
| For small directories, generates a single overview from all file summaries. | ||
| For large directories that would exceed the prompt budget, splits file | ||
| summaries into batches, generates a partial overview per batch, then | ||
| merges the partials into a final overview. | ||
|
|
||
| Args: | ||
| dir_uri: Directory URI | ||
| file_summaries: File summary list | ||
|
|
@@ -755,9 +765,10 @@ async def _generate_overview( | |
| Returns: | ||
| Overview content | ||
| """ | ||
| import re | ||
|
|
||
| vlm = get_openviking_config().vlm | ||
| config = get_openviking_config() | ||
| vlm = config.vlm | ||
| semantic = config.semantic | ||
|
|
||
| if not vlm.is_available(): | ||
| logger.warning("VLM not available, using default overview") | ||
|
|
@@ -778,7 +789,42 @@ async def _generate_overview( | |
| else "None" | ||
| ) | ||
|
|
||
| # Generate overview | ||
| # Budget guard: check if prompt would be oversized | ||
| estimated_size = len(file_summaries_str) + len(children_abstracts_str) | ||
| if ( | ||
| estimated_size > semantic.max_overview_prompt_chars | ||
| and len(file_summaries) > semantic.overview_batch_size | ||
|
||
| ): | ||
| logger.info( | ||
| f"Overview prompt for {dir_uri} exceeds budget " | ||
| f"({estimated_size} chars, {len(file_summaries)} files). " | ||
| f"Splitting into batches of {semantic.overview_batch_size}." | ||
| ) | ||
| overview = await self._batched_generate_overview( | ||
| dir_uri, file_summaries, children_abstracts, file_index_map | ||
| ) | ||
| else: | ||
| overview = await self._single_generate_overview( | ||
| dir_uri, | ||
| file_summaries_str, | ||
| children_abstracts_str, | ||
| file_index_map, | ||
| ) | ||
|
|
||
| return overview | ||
|
|
||
| async def _single_generate_overview( | ||
| self, | ||
| dir_uri: str, | ||
| file_summaries_str: str, | ||
| children_abstracts_str: str, | ||
| file_index_map: Dict[int, str], | ||
| ) -> str: | ||
| """Generate overview from a single prompt (small directories).""" | ||
| import re | ||
|
|
||
| vlm = get_openviking_config().vlm | ||
|
|
||
| try: | ||
| prompt = render_prompt( | ||
| "semantic.overview_generation", | ||
|
|
@@ -801,9 +847,105 @@ def replace_index(match): | |
| return overview.strip() | ||
|
|
||
| except Exception as e: | ||
| logger.error(f"Failed to generate overview for {dir_uri}: {e}", exc_info=True) | ||
| logger.error( | ||
| f"Failed to generate overview for {dir_uri}: {e}", | ||
| exc_info=True, | ||
| ) | ||
| return f"# {dir_uri.split('/')[-1]}\n\nDirectory overview" | ||
|
|
||
| async def _batched_generate_overview( | ||
| self, | ||
| dir_uri: str, | ||
| file_summaries: List[Dict[str, str]], | ||
| children_abstracts: List[Dict[str, str]], | ||
| file_index_map: Dict[int, str], | ||
| ) -> str: | ||
| """Generate overview by batching file summaries and merging. | ||
|
|
||
| Splits file summaries into batches, generates a partial overview per | ||
| batch, then merges all partials into a final overview. | ||
| """ | ||
| import re | ||
|
|
||
| vlm = get_openviking_config().vlm | ||
| semantic = get_openviking_config().semantic | ||
| batch_size = semantic.overview_batch_size | ||
| dir_name = dir_uri.split("/")[-1] | ||
|
|
||
| # Split file summaries into batches | ||
| batches = [ | ||
| file_summaries[i : i + batch_size] for i in range(0, len(file_summaries), batch_size) | ||
| ] | ||
| logger.info(f"Generating overview for {dir_uri} in {len(batches)} batches") | ||
|
|
||
| # Generate partial overview per batch | ||
| partial_overviews = [] | ||
| for batch_idx, batch in enumerate(batches): | ||
| batch_lines = [] | ||
| for idx, item in enumerate(batch, 1): | ||
| batch_lines.append(f"[{idx}] {item['name']}: {item['summary']}") | ||
|
||
| batch_str = "\n".join(batch_lines) | ||
|
|
||
| # Only include children abstracts in the first batch | ||
| children_str = "None" | ||
| if batch_idx == 0 and children_abstracts: | ||
| children_str = "\n".join( | ||
| f"- {item['name']}/: {item['abstract']}" for item in children_abstracts | ||
| ) | ||
|
|
||
| try: | ||
| prompt = render_prompt( | ||
| "semantic.overview_generation", | ||
| { | ||
| "dir_name": dir_name, | ||
| "file_summaries": batch_str, | ||
| "children_abstracts": children_str, | ||
| }, | ||
| ) | ||
| partial = await vlm.get_completion_async(prompt) | ||
| partial_overviews.append(partial.strip()) | ||
| except Exception as e: | ||
| logger.warning( | ||
| f"Failed to generate partial overview batch " | ||
| f"{batch_idx + 1}/{len(batches)} for {dir_uri}: {e}" | ||
| ) | ||
|
|
||
| if not partial_overviews: | ||
| return f"# {dir_name}\n\nDirectory overview" | ||
|
|
||
| # If only one batch succeeded, use it directly | ||
| if len(partial_overviews) == 1: | ||
| overview = partial_overviews[0] | ||
| else: | ||
| # Merge partials into a final overview | ||
| combined = "\n\n---\n\n".join(partial_overviews) | ||
| try: | ||
| prompt = render_prompt( | ||
| "semantic.overview_generation", | ||
| { | ||
| "dir_name": dir_name, | ||
| "file_summaries": combined, | ||
| "children_abstracts": "None", | ||
|
||
| }, | ||
| ) | ||
| overview = await vlm.get_completion_async(prompt) | ||
| overview = overview.strip() | ||
| except Exception as e: | ||
| logger.error( | ||
| f"Failed to merge partial overviews for {dir_uri}: {e}", | ||
| exc_info=True, | ||
| ) | ||
| overview = partial_overviews[0] | ||
|
|
||
| # Post-process: replace [number] with actual file name | ||
| def replace_index(match): | ||
| idx = int(match.group(1)) | ||
| return file_index_map.get(idx, match.group(0)) | ||
|
|
||
| overview = re.sub(r"\[(\d+)\]", replace_index, overview) | ||
|
|
||
| return overview | ||
|
|
||
| async def _vectorize_directory( | ||
| self, | ||
| uri: str, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| # Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """Tests for SemanticConfig and overview budget estimation.""" | ||
|
|
||
| from openviking_cli.utils.config.parser_config import SemanticConfig | ||
|
|
||
|
|
||
| def test_semantic_config_defaults(): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Suggestion] (non-blocking) The tests here thoroughly validate Consider adding at least one test with a mocked VLM that verifies:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Acknowledged — will add behavioral tests with mocked VLM in a follow-up if needed. The current tests validate the config, budget logic, and batch splitting which are the core pure-function components. |
||
| """Test default values match previously hardcoded constants.""" | ||
| config = SemanticConfig() | ||
| assert config.max_file_content_chars == 30000 | ||
| assert config.max_overview_prompt_chars == 60000 | ||
| assert config.overview_batch_size == 50 | ||
| assert config.abstract_max_chars == 256 | ||
| assert config.overview_max_chars == 4000 | ||
|
|
||
|
|
||
| def test_semantic_config_custom_values(): | ||
| """Test custom values override defaults.""" | ||
| config = SemanticConfig( | ||
| max_overview_prompt_chars=100000, | ||
| overview_batch_size=100, | ||
| ) | ||
| assert config.max_overview_prompt_chars == 100000 | ||
| assert config.overview_batch_size == 100 | ||
| # Unchanged defaults | ||
| assert config.max_file_content_chars == 30000 | ||
| assert config.abstract_max_chars == 256 | ||
|
|
||
|
|
||
| def test_budget_under_limit_no_batching(): | ||
| """Small directories should not trigger batching.""" | ||
| config = SemanticConfig() | ||
| # 10 file summaries, each ~100 chars = ~1000 chars total | ||
| summaries = [{"name": f"file_{i}.py", "summary": "x" * 100} for i in range(10)] | ||
| total = sum(len(f"[{i}] {s['name']}: {s['summary']}") for i, s in enumerate(summaries, 1)) | ||
| assert total < config.max_overview_prompt_chars | ||
| assert len(summaries) <= config.overview_batch_size | ||
|
|
||
|
|
||
| def test_budget_over_limit_triggers_batching(): | ||
| """Large directories should exceed budget and require batching.""" | ||
| config = SemanticConfig() | ||
| # 200 file summaries, each ~500 chars = ~100000+ chars total | ||
| summaries = [{"name": f"file_{i}.py", "summary": "x" * 500} for i in range(200)] | ||
| total = sum(len(f"[{i}] {s['name']}: {s['summary']}") for i, s in enumerate(summaries, 1)) | ||
| assert total > config.max_overview_prompt_chars | ||
| assert len(summaries) > config.overview_batch_size | ||
|
|
||
|
|
||
| def test_abstract_truncation(): | ||
| """Test abstract is truncated to abstract_max_chars.""" | ||
| config = SemanticConfig(abstract_max_chars=100) | ||
| abstract = "x" * 200 | ||
| if len(abstract) > config.abstract_max_chars: | ||
| abstract = abstract[: config.abstract_max_chars - 3] + "..." | ||
| assert len(abstract) == 100 | ||
| assert abstract.endswith("...") | ||
|
|
||
|
|
||
| def test_overview_truncation(): | ||
| """Test overview is truncated to overview_max_chars.""" | ||
| config = SemanticConfig(overview_max_chars=500) | ||
| overview = "x" * 1000 | ||
| if len(overview) > config.overview_max_chars: | ||
| overview = overview[: config.overview_max_chars] | ||
| assert len(overview) == 500 | ||
|
|
||
|
|
||
| def test_batch_splitting(): | ||
| """Test batch splitting logic produces correct batch count.""" | ||
| config = SemanticConfig(overview_batch_size=50) | ||
| summaries = [{"name": f"f{i}.py", "summary": "s"} for i in range(120)] | ||
| batches = [ | ||
| summaries[i : i + config.overview_batch_size] | ||
| for i in range(0, len(summaries), config.overview_batch_size) | ||
| ] | ||
| assert len(batches) == 3 # 50 + 50 + 20 | ||
| assert len(batches[0]) == 50 | ||
| assert len(batches[1]) == 50 | ||
| assert len(batches[2]) == 20 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Design] (non-blocking) This truncation block is duplicated nearly identically in
semantic_processor.py:_process_memory_directory(lines ~357-362). Consider extracting a shared helper like_enforce_size_limits(overview, abstract, config)→(overview, abstract)to avoid divergence when limits or truncation logic change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Extracted
_enforce_size_limits(overview, abstract)onSemanticProcessor. Both call sites (semantic_processor.py and semantic_dag.py) now use it.