|
1 | 1 | use chrono::Utc; |
2 | 2 | use opencode_mem_core::{ |
3 | | - Concept, NoiseLevel, Observation, ObservationInput, ObservationType, sanitize_input, |
| 3 | + Concept, NoiseLevel, Observation, ObservationInput, ObservationMetadata, ObservationType, |
| 4 | + sanitize_input, |
4 | 5 | }; |
5 | 6 | use std::str::FromStr as _; |
6 | 7 |
|
7 | | -use crate::ai_types::{ChatRequest, Message, ObservationJson, ResponseFormat, ResponseFormatType}; |
| 8 | +use crate::ai_types::{ |
| 9 | + ChatRequest, Message, MetadataJson, ObservationJson, ResponseFormat, ResponseFormatType, |
| 10 | +}; |
8 | 11 | use crate::client::LlmClient; |
9 | 12 | use crate::compression_prompt::build_compression_prompt; |
10 | 13 | use crate::error::LlmError; |
@@ -176,4 +179,65 @@ impl LlmClient { |
176 | 179 | &candidate_ids, |
177 | 180 | ) |
178 | 181 | } |
| 182 | + |
| 183 | + /// Extract structured metadata from an observation's title and narrative. |
| 184 | + /// |
| 185 | + /// # Errors |
| 186 | + /// Returns an error if the API call or JSON parsing fails. |
| 187 | + pub async fn enrich_observation_metadata( |
| 188 | + &self, |
| 189 | + title: &str, |
| 190 | + narrative: &str, |
| 191 | + ) -> Result<ObservationMetadata, LlmError> { |
| 192 | + let prompt = format!( |
| 193 | + "Extract structured metadata from this observation.\n\n\ |
| 194 | + Title: {title}\n\ |
| 195 | + Narrative: {narrative}\n\n\ |
| 196 | + Return JSON with these fields:\n\ |
| 197 | + - \"facts\": array of specific facts (file paths, function names, decisions, concrete details)\n\ |
| 198 | + - \"concepts\": array from [{concepts}]\n\ |
| 199 | + - \"keywords\": array of search keywords (3-8 terms)\n\ |
| 200 | + - \"files_read\": array of file paths mentioned as read/referenced\n\ |
| 201 | + - \"files_modified\": array of file paths mentioned as modified/created\n\n\ |
| 202 | + If a field has no relevant data, return an empty array.", |
| 203 | + concepts = Concept::ALL_VARIANTS_STR, |
| 204 | + ); |
| 205 | + |
| 206 | + let request = ChatRequest { |
| 207 | + model: self.model(), |
| 208 | + messages: vec![Message { |
| 209 | + role: "user".to_owned(), |
| 210 | + content: prompt, |
| 211 | + }], |
| 212 | + response_format: ResponseFormat { |
| 213 | + format_type: ResponseFormatType::JsonObject, |
| 214 | + }, |
| 215 | + max_tokens: None, |
| 216 | + }; |
| 217 | + |
| 218 | + let response = self.chat_completion(&request).await?; |
| 219 | + let stripped = opencode_mem_core::strip_markdown_json(&response); |
| 220 | + let meta: MetadataJson = |
| 221 | + serde_json::from_str(stripped).map_err(|e| LlmError::JsonParse { |
| 222 | + context: format!( |
| 223 | + "metadata enrichment (content: {})", |
| 224 | + response.get(..300).unwrap_or(&response) |
| 225 | + ), |
| 226 | + source: e, |
| 227 | + })?; |
| 228 | + |
| 229 | + let concepts = meta |
| 230 | + .concepts |
| 231 | + .iter() |
| 232 | + .filter_map(|s| Concept::from_str(s).ok()) |
| 233 | + .collect(); |
| 234 | + |
| 235 | + Ok(ObservationMetadata { |
| 236 | + facts: meta.facts, |
| 237 | + concepts, |
| 238 | + keywords: meta.keywords, |
| 239 | + files_read: meta.files_read, |
| 240 | + files_modified: meta.files_modified, |
| 241 | + }) |
| 242 | + } |
179 | 243 | } |
0 commit comments