|
| 1 | +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. |
| 2 | +# SPDX-License-Identifier: AGPL-3.0 |
| 3 | +"""Compress service for batch memory abstract reduction.""" |
| 4 | + |
| 5 | +from typing import Any, Dict, List |
| 6 | + |
| 7 | +from openviking.server.identity import RequestContext |
| 8 | +from openviking.storage.viking_fs import get_viking_fs |
| 9 | +from openviking_cli.utils.logger import get_logger |
| 10 | + |
| 11 | +logger = get_logger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +class CompressService: |
| 15 | + """Scan a directory and re-summarize memories with abstracts exceeding a target length.""" |
| 16 | + |
| 17 | + def __init__(self, max_abstract_length: int = 128): |
| 18 | + self.max_abstract_length = max_abstract_length |
| 19 | + |
| 20 | + async def compress_directory( |
| 21 | + self, |
| 22 | + uri: str, |
| 23 | + ctx: RequestContext, |
| 24 | + dry_run: bool = False, |
| 25 | + ) -> Dict[str, Any]: |
| 26 | + """Scan directory for memories with verbose abstracts and re-summarize. |
| 27 | +
|
| 28 | + Returns stats: files_scanned, files_compressed, estimated_tokens_saved. |
| 29 | + """ |
| 30 | + viking_fs = get_viking_fs() |
| 31 | + if not viking_fs: |
| 32 | + return {"status": "error", "message": "VikingFS not available"} |
| 33 | + |
| 34 | + try: |
| 35 | + entries = await viking_fs.list_directory(uri, ctx=ctx) |
| 36 | + except Exception as e: |
| 37 | + logger.error("Failed to list directory %s: %s", uri, e) |
| 38 | + return {"status": "error", "message": str(e)} |
| 39 | + |
| 40 | + files_scanned = 0 |
| 41 | + files_compressed = 0 |
| 42 | + chars_saved = 0 |
| 43 | + verbose_files: List[Dict[str, Any]] = [] |
| 44 | + |
| 45 | + for entry in entries: |
| 46 | + entry_uri = entry.get("uri", "") |
| 47 | + if not entry_uri.endswith(".md"): |
| 48 | + continue |
| 49 | + files_scanned += 1 |
| 50 | + |
| 51 | + abstract = entry.get("abstract", "") |
| 52 | + if len(abstract) <= self.max_abstract_length: |
| 53 | + continue |
| 54 | + |
| 55 | + excess = len(abstract) - self.max_abstract_length |
| 56 | + verbose_files.append( |
| 57 | + { |
| 58 | + "uri": entry_uri, |
| 59 | + "current_length": len(abstract), |
| 60 | + "excess": excess, |
| 61 | + } |
| 62 | + ) |
| 63 | + |
| 64 | + if not dry_run: |
| 65 | + try: |
| 66 | + truncated = abstract[: self.max_abstract_length].rsplit(" ", 1)[0] + "..." |
| 67 | + await viking_fs.write_metadata(entry_uri, {"abstract": truncated}, ctx=ctx) |
| 68 | + files_compressed += 1 |
| 69 | + chars_saved += excess |
| 70 | + except Exception as e: |
| 71 | + logger.warning("Failed to compress %s: %s", entry_uri, e) |
| 72 | + else: |
| 73 | + files_compressed += 1 |
| 74 | + chars_saved += excess |
| 75 | + |
| 76 | + return { |
| 77 | + "status": "ok", |
| 78 | + "files_scanned": files_scanned, |
| 79 | + "files_compressed": files_compressed, |
| 80 | + "chars_saved": chars_saved, |
| 81 | + "dry_run": dry_run, |
| 82 | + "verbose_files": verbose_files[:20], |
| 83 | + } |
0 commit comments