Skip to content

Commit f7f3573

Browse files
Matt Van Hornclaude
authored andcommitted
feat(session): add batch memory compression CLI command
Add `ov compress` command that scans a directory for memories with verbose abstracts and truncates them to a target length. - CompressService: scans directory, filters by abstract length, truncates excess (or dry-run preview) - POST /v1/content/compress endpoint following reindex pattern - `ov compress viking://user/memories/ --max-abstract-length 128 --dry-run` - Returns stats: files_scanned, files_compressed, chars_saved Relates to #350, #578 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a092d64 commit f7f3573

File tree

6 files changed

+206
-0
lines changed

6 files changed

+206
-0
lines changed

crates/ov_cli/src/client.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,20 @@ impl HttpClient {
328328
self.post("/api/v1/content/reindex", &body).await
329329
}
330330

331+
pub async fn compress(
332+
&self,
333+
uri: &str,
334+
max_abstract_length: u32,
335+
dry_run: bool,
336+
) -> Result<serde_json::Value> {
337+
let body = serde_json::json!({
338+
"uri": uri,
339+
"max_abstract_length": max_abstract_length,
340+
"dry_run": dry_run,
341+
});
342+
self.post("/api/v1/content/compress", &body).await
343+
}
344+
331345
/// Download file as raw bytes
332346
pub async fn get_bytes(&self, uri: &str) -> Result<Vec<u8>> {
333347
let url = format!("{}/api/v1/content/download", self.base_url);

crates/ov_cli/src/commands/content.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,19 @@ pub async fn reindex(
5151
Ok(())
5252
}
5353

54+
pub async fn compress(
55+
client: &HttpClient,
56+
uri: &str,
57+
max_abstract_length: u32,
58+
dry_run: bool,
59+
output_format: OutputFormat,
60+
compact: bool,
61+
) -> Result<()> {
62+
let result = client.compress(uri, max_abstract_length, dry_run).await?;
63+
crate::output::output_success(result, output_format, compact);
64+
Ok(())
65+
}
66+
5467
pub async fn get(client: &HttpClient, uri: &str, local_path: &str) -> Result<()> {
5568
// Check if target path already exists
5669
let path = Path::new(local_path);

crates/ov_cli/src/main.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,17 @@ enum Commands {
332332
#[arg(long, default_value = "true")]
333333
wait: bool,
334334
},
335+
/// Compress verbose memory abstracts in a directory
336+
Compress {
337+
/// Viking URI (directory to scan)
338+
uri: String,
339+
/// Maximum abstract length in characters
340+
#[arg(long, default_value = "128")]
341+
max_abstract_length: u32,
342+
/// Preview changes without modifying anything
343+
#[arg(long)]
344+
dry_run: bool,
345+
},
335346
/// Download file to local path (supports binaries/images)
336347
Get {
337348
/// Viking URI
@@ -750,6 +761,11 @@ async fn main() {
750761
regenerate,
751762
wait,
752763
} => handle_reindex(uri, regenerate, wait, ctx).await,
764+
Commands::Compress {
765+
uri,
766+
max_abstract_length,
767+
dry_run,
768+
} => handle_compress(uri, max_abstract_length, dry_run, ctx).await,
753769
Commands::Get { uri, local_path } => handle_get(uri, local_path, ctx).await,
754770
Commands::Find {
755771
query,
@@ -1193,6 +1209,24 @@ async fn handle_reindex(uri: String, regenerate: bool, wait: bool, ctx: CliConte
11931209
.await
11941210
}
11951211

1212+
async fn handle_compress(
1213+
uri: String,
1214+
max_abstract_length: u32,
1215+
dry_run: bool,
1216+
ctx: CliContext,
1217+
) -> Result<()> {
1218+
let client = ctx.get_client();
1219+
commands::content::compress(
1220+
&client,
1221+
&uri,
1222+
max_abstract_length,
1223+
dry_run,
1224+
ctx.output_format,
1225+
ctx.compact,
1226+
)
1227+
.await
1228+
}
1229+
11961230
async fn handle_get(uri: String, local_path: String, ctx: CliContext) -> Result<()> {
11971231
let client = ctx.get_client();
11981232
commands::content::get(&client, &uri, &local_path).await

openviking/server/routers/content.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ class ReindexRequest(BaseModel):
2828
wait: bool = True
2929

3030

31+
class CompressRequest(BaseModel):
32+
"""Request to compress memory abstracts in a directory."""
33+
34+
uri: str
35+
max_abstract_length: int = 128
36+
dry_run: bool = False
37+
38+
3139
router = APIRouter(prefix="/api/v1/content", tags=["content"])
3240

3341

@@ -172,6 +180,33 @@ async def reindex(
172180
)
173181

174182

183+
@router.post("/compress")
184+
async def compress(
185+
request: CompressRequest = Body(...),
186+
_ctx: RequestContext = Depends(get_request_context),
187+
):
188+
"""Compress verbose memory abstracts in a directory.
189+
190+
Scans the target directory for memory files with abstracts exceeding
191+
max_abstract_length and truncates them. Use dry_run=True to preview
192+
what would be compressed without modifying anything.
193+
"""
194+
from openviking.utils.compress_service import CompressService
195+
196+
service = CompressService(max_abstract_length=request.max_abstract_length)
197+
result = await service.compress_directory(
198+
uri=request.uri,
199+
ctx=_ctx,
200+
dry_run=request.dry_run,
201+
)
202+
if result.get("status") == "error":
203+
return Response(
204+
status="error",
205+
error=ErrorInfo(code="COMPRESS_FAILED", message=result.get("message", "")),
206+
)
207+
return Response(status="ok", result=result)
208+
209+
175210
async def _do_reindex(
176211
service,
177212
uri: str,
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
2+
# SPDX-License-Identifier: AGPL-3.0
3+
"""Compress service for batch memory abstract reduction."""
4+
5+
from typing import Any, Dict, List
6+
7+
from openviking.server.identity import RequestContext
8+
from openviking.storage.viking_fs import get_viking_fs
9+
from openviking_cli.utils.logger import get_logger
10+
11+
logger = get_logger(__name__)
12+
13+
14+
class CompressService:
15+
"""Scan a directory and re-summarize memories with abstracts exceeding a target length."""
16+
17+
def __init__(self, max_abstract_length: int = 128):
18+
self.max_abstract_length = max_abstract_length
19+
20+
async def compress_directory(
21+
self,
22+
uri: str,
23+
ctx: RequestContext,
24+
dry_run: bool = False,
25+
) -> Dict[str, Any]:
26+
"""Scan directory for memories with verbose abstracts and re-summarize.
27+
28+
Returns stats: files_scanned, files_compressed, estimated_tokens_saved.
29+
"""
30+
viking_fs = get_viking_fs()
31+
if not viking_fs:
32+
return {"status": "error", "message": "VikingFS not available"}
33+
34+
try:
35+
entries = await viking_fs.list_directory(uri, ctx=ctx)
36+
except Exception as e:
37+
logger.error("Failed to list directory %s: %s", uri, e)
38+
return {"status": "error", "message": str(e)}
39+
40+
files_scanned = 0
41+
files_compressed = 0
42+
chars_saved = 0
43+
verbose_files: List[Dict[str, Any]] = []
44+
45+
for entry in entries:
46+
entry_uri = entry.get("uri", "")
47+
if not entry_uri.endswith(".md"):
48+
continue
49+
files_scanned += 1
50+
51+
abstract = entry.get("abstract", "")
52+
if len(abstract) <= self.max_abstract_length:
53+
continue
54+
55+
excess = len(abstract) - self.max_abstract_length
56+
verbose_files.append(
57+
{
58+
"uri": entry_uri,
59+
"current_length": len(abstract),
60+
"excess": excess,
61+
}
62+
)
63+
64+
if not dry_run:
65+
try:
66+
truncated = abstract[: self.max_abstract_length].rsplit(" ", 1)[0] + "..."
67+
await viking_fs.write_metadata(entry_uri, {"abstract": truncated}, ctx=ctx)
68+
files_compressed += 1
69+
chars_saved += excess
70+
except Exception as e:
71+
logger.warning("Failed to compress %s: %s", entry_uri, e)
72+
else:
73+
files_compressed += 1
74+
chars_saved += excess
75+
76+
return {
77+
"status": "ok",
78+
"files_scanned": files_scanned,
79+
"files_compressed": files_compressed,
80+
"chars_saved": chars_saved,
81+
"dry_run": dry_run,
82+
"verbose_files": verbose_files[:20],
83+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
2+
# SPDX-License-Identifier: AGPL-3.0
3+
"""Tests for CompressService."""
4+
5+
import pytest
6+
7+
from openviking.utils.compress_service import CompressService
8+
9+
10+
def test_compress_service_init_defaults():
11+
service = CompressService()
12+
assert service.max_abstract_length == 128
13+
14+
15+
def test_compress_service_init_custom():
16+
service = CompressService(max_abstract_length=256)
17+
assert service.max_abstract_length == 256
18+
19+
20+
@pytest.mark.asyncio
21+
async def test_compress_directory_no_viking_fs(monkeypatch):
22+
"""Returns error when VikingFS is not available."""
23+
monkeypatch.setattr("openviking.utils.compress_service.get_viking_fs", lambda: None)
24+
service = CompressService()
25+
result = await service.compress_directory("viking://user/memories/", ctx=None)
26+
assert result["status"] == "error"
27+
assert "VikingFS" in result["message"]

0 commit comments

Comments
 (0)