Skip to content

Commit 175ce6a

Browse files
feat(save_memory): enrich observations via LLM metadata and extract knowledge
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 5aa2317 commit 175ce6a

File tree

6 files changed

+184
-12
lines changed

6 files changed

+184
-12
lines changed

crates/core/src/observation/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ use std::fmt;
2525

2626
use serde::{Deserialize, Serialize};
2727

28+
/// Extracted metadata fields for enriching save_memory observations.
29+
pub struct ObservationMetadata {
30+
pub facts: Vec<String>,
31+
pub concepts: Vec<Concept>,
32+
pub keywords: Vec<String>,
33+
pub files_read: Vec<String>,
34+
pub files_modified: Vec<String>,
35+
}
36+
2837
/// Ordinal position of a prompt within a session.
2938
///
3039
/// Semantically distinct from token counts or other numeric identifiers —

crates/llm/src/ai_types.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,18 @@ pub struct ObservationJson {
124124
pub struct SummaryJson {
125125
pub summary: String,
126126
}
127+
128+
/// LLM response for metadata enrichment of save_memory observations.
129+
#[derive(Deserialize)]
130+
pub struct MetadataJson {
131+
#[serde(default, deserialize_with = "null_or_invalid_as_default_vec")]
132+
pub facts: Vec<String>,
133+
#[serde(default, deserialize_with = "null_or_invalid_as_default_vec")]
134+
pub concepts: Vec<String>,
135+
#[serde(default, deserialize_with = "null_or_invalid_as_default_vec")]
136+
pub keywords: Vec<String>,
137+
#[serde(default, deserialize_with = "null_or_invalid_as_default_vec")]
138+
pub files_read: Vec<String>,
139+
#[serde(default, deserialize_with = "null_or_invalid_as_default_vec")]
140+
pub files_modified: Vec<String>,
141+
}

crates/llm/src/observation.rs

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
use chrono::Utc;
22
use opencode_mem_core::{
3-
Concept, NoiseLevel, Observation, ObservationInput, ObservationType, sanitize_input,
3+
Concept, NoiseLevel, Observation, ObservationInput, ObservationMetadata, ObservationType,
4+
sanitize_input,
45
};
56
use std::str::FromStr as _;
67

7-
use crate::ai_types::{ChatRequest, Message, ObservationJson, ResponseFormat, ResponseFormatType};
8+
use crate::ai_types::{
9+
ChatRequest, Message, MetadataJson, ObservationJson, ResponseFormat, ResponseFormatType,
10+
};
811
use crate::client::LlmClient;
912
use crate::compression_prompt::build_compression_prompt;
1013
use crate::error::LlmError;
@@ -176,4 +179,65 @@ impl LlmClient {
176179
&candidate_ids,
177180
)
178181
}
182+
183+
/// Extract structured metadata from an observation's title and narrative.
184+
///
185+
/// # Errors
186+
/// Returns an error if the API call or JSON parsing fails.
187+
pub async fn enrich_observation_metadata(
188+
&self,
189+
title: &str,
190+
narrative: &str,
191+
) -> Result<ObservationMetadata, LlmError> {
192+
let prompt = format!(
193+
"Extract structured metadata from this observation.\n\n\
194+
Title: {title}\n\
195+
Narrative: {narrative}\n\n\
196+
Return JSON with these fields:\n\
197+
- \"facts\": array of specific facts (file paths, function names, decisions, concrete details)\n\
198+
- \"concepts\": array from [{concepts}]\n\
199+
- \"keywords\": array of search keywords (3-8 terms)\n\
200+
- \"files_read\": array of file paths mentioned as read/referenced\n\
201+
- \"files_modified\": array of file paths mentioned as modified/created\n\n\
202+
If a field has no relevant data, return an empty array.",
203+
concepts = Concept::ALL_VARIANTS_STR,
204+
);
205+
206+
let request = ChatRequest {
207+
model: self.model(),
208+
messages: vec![Message {
209+
role: "user".to_owned(),
210+
content: prompt,
211+
}],
212+
response_format: ResponseFormat {
213+
format_type: ResponseFormatType::JsonObject,
214+
},
215+
max_tokens: None,
216+
};
217+
218+
let response = self.chat_completion(&request).await?;
219+
let stripped = opencode_mem_core::strip_markdown_json(&response);
220+
let meta: MetadataJson =
221+
serde_json::from_str(stripped).map_err(|e| LlmError::JsonParse {
222+
context: format!(
223+
"metadata enrichment (content: {})",
224+
response.get(..300).unwrap_or(&response)
225+
),
226+
source: e,
227+
})?;
228+
229+
let concepts = meta
230+
.concepts
231+
.iter()
232+
.filter_map(|s| Concept::from_str(s).ok())
233+
.collect();
234+
235+
Ok(ObservationMetadata {
236+
facts: meta.facts,
237+
concepts,
238+
keywords: meta.keywords,
239+
files_read: meta.files_read,
240+
files_modified: meta.files_modified,
241+
})
242+
}
179243
}

crates/service/src/observation_service/save_memory.rs

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Direct memory storage — bypasses LLM compression pipeline.
22
33
use opencode_mem_core::{NoiseLevel, Observation, ObservationType, sanitize_input};
4+
use opencode_mem_storage::traits::ObservationStore;
45

56
use super::{ObservationService, SaveMemoryResult};
67
use crate::ServiceError;
@@ -42,13 +43,12 @@ impl ObservationService {
4243
}
4344

4445
// Project filter check (Privacy)
45-
if let Some(p) = project {
46-
if let Some(ref filter) = self.project_filter {
47-
if filter.is_excluded(p) {
48-
tracing::info!(project = %p, "Skipping save_memory — project is excluded by privacy policy");
49-
return Ok(SaveMemoryResult::Filtered);
50-
}
51-
}
46+
if let Some(p) = project
47+
&& let Some(ref filter) = self.project_filter
48+
&& filter.is_excluded(p)
49+
{
50+
tracing::info!(project = %p, "Skipping save_memory — project is excluded by privacy policy");
51+
return Ok(SaveMemoryResult::Filtered);
5252
}
5353

5454
let title_str = match title {
@@ -92,8 +92,61 @@ impl ObservationService {
9292

9393
let result = self.persist_and_notify(&obs, None).await?;
9494
match result {
95-
Some((persisted_obs, _was_new)) => Ok(SaveMemoryResult::Created(persisted_obs)),
95+
Some((persisted_obs, _was_new)) => {
96+
self.spawn_enrichment(persisted_obs.clone());
97+
Ok(SaveMemoryResult::Created(persisted_obs))
98+
}
9699
None => Ok(SaveMemoryResult::Duplicate(obs)),
97100
}
98101
}
102+
103+
fn spawn_enrichment(&self, obs: Observation) {
104+
let llm = self.llm.clone();
105+
let storage = self.storage.clone();
106+
let svc = self.clone();
107+
108+
tokio::spawn(async move {
109+
let narrative = obs.narrative.as_deref().unwrap_or("");
110+
if narrative.is_empty() && obs.title.is_empty() {
111+
return;
112+
}
113+
114+
match llm.enrich_observation_metadata(&obs.title, narrative).await {
115+
Ok(metadata) => {
116+
let result = storage
117+
.guarded(|| storage.update_observation_metadata(obs.id.as_ref(), &metadata))
118+
.await;
119+
if let Err(e) = result {
120+
tracing::warn!(
121+
observation_id = %obs.id,
122+
error = %e,
123+
"Failed to persist enriched metadata"
124+
);
125+
} else {
126+
tracing::info!(
127+
observation_id = %obs.id,
128+
facts = metadata.facts.len(),
129+
keywords = metadata.keywords.len(),
130+
"Enriched save_memory observation with metadata"
131+
);
132+
}
133+
}
134+
Err(e) => {
135+
tracing::warn!(
136+
observation_id = %obs.id,
137+
error = %e,
138+
"LLM metadata enrichment failed"
139+
);
140+
}
141+
}
142+
143+
if let Err(e) = svc.extract_knowledge(&obs).await {
144+
tracing::warn!(
145+
observation_id = %obs.id,
146+
error = %e,
147+
"Knowledge extraction failed for save_memory observation"
148+
);
149+
}
150+
});
151+
}
99152
}

crates/storage/src/pg_storage/observations.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use super::*;
55
use crate::error::StorageError;
66
use crate::traits::ObservationStore;
77
use async_trait::async_trait;
8-
use opencode_mem_core::{Observation, SearchResult};
8+
use opencode_mem_core::{Observation, ObservationMetadata, SearchResult};
99

1010
impl PgStorage {
1111
async fn update_observation_fields(
@@ -372,4 +372,27 @@ impl ObservationStore for PgStorage {
372372
tx.commit().await?;
373373
Ok(())
374374
}
375+
376+
async fn update_observation_metadata(
377+
&self,
378+
id: &str,
379+
metadata: &ObservationMetadata,
380+
) -> Result<(), StorageError> {
381+
let concepts_str: Vec<String> = metadata.concepts.iter().map(|c| c.to_string()).collect();
382+
sqlx::query(
383+
"UPDATE observations \
384+
SET facts = $1, concepts = $2, keywords = $3, \
385+
files_read = $4, files_modified = $5 \
386+
WHERE id = $6",
387+
)
388+
.bind(serde_json::to_value(&metadata.facts)?)
389+
.bind(serde_json::to_value(&concepts_str)?)
390+
.bind(serde_json::to_value(&metadata.keywords)?)
391+
.bind(serde_json::to_value(&metadata.files_read)?)
392+
.bind(serde_json::to_value(&metadata.files_modified)?)
393+
.bind(id)
394+
.execute(&self.pool)
395+
.await?;
396+
Ok(())
397+
}
375398
}

crates/storage/src/traits/observation.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use async_trait::async_trait;
2-
use opencode_mem_core::{Observation, SearchResult};
2+
use opencode_mem_core::{Observation, ObservationMetadata, SearchResult};
33

44
use crate::error::StorageError;
55

@@ -67,4 +67,12 @@ pub trait ObservationStore: Send + Sync {
6767
newer: &Observation,
6868
force_newer: bool,
6969
) -> Result<(), StorageError>;
70+
71+
/// Update only metadata fields (facts, concepts, keywords, files_read, files_modified)
72+
/// on an existing observation.
73+
async fn update_observation_metadata(
74+
&self,
75+
id: &str,
76+
metadata: &ObservationMetadata,
77+
) -> Result<(), StorageError>;
7078
}

0 commit comments

Comments
 (0)