@@ -13,6 +13,30 @@ def to_snake(s: str) -> str:
1313 return s [:48 ] or "fact"
1414
1515
16+ def _looks_like_food_preference (text : str ) -> bool :
17+ t = (text or "" ).strip ().lower ()
18+ if not t :
19+ return False
20+ # quick guardrails against obvious technical/professional phrases
21+ technical_markers = (
22+ "python" , "javascript" , "typescript" , "rust" , "golang" , "java" , "c++" , "sql" ,
23+ "coding" , "code" , "repo" , "framework" , "api" , "model" , "research" , "paper" ,
24+ )
25+ if any (m in t for m in technical_markers ):
26+ return False
27+
28+ # explicit food/drink context words
29+ food_context = (
30+ "food" , "foods" , "snack" , "snacks" , "drink" , "drinks" , "tea" , "coffee" , "fruit" ,
31+ "meal" , "dessert" , "breakfast" , "lunch" , "dinner" , "juice" ,
32+ )
33+ if any (m in t for m in food_context ):
34+ return True
35+
36+ # single-item preference strings (e.g., "blueberries") are treated as likely food preferences
37+ return len (t .split ()) <= 3
38+
39+
1640def heuristics (user_text : str , assistant_text : str = "" ) -> Dict [str , List [Dict [str , Any ]]]:
1741 t = (user_text or "" ).strip ()
1842 tl = t .lower ()
@@ -47,6 +71,12 @@ def heuristics(user_text: str, assistant_text: str = "") -> Dict[str, List[Dict[
4771 if m :
4872 facts .append ({"entity" : "user" , "key" : "favorite_drink" , "value" : m .group (1 ).strip ()[:48 ], "kind" : "preference" , "confidence" : 0.97 })
4973
74+ m = re .search (r"\bi\s+(?:love|like|prefer|am\s+into)\s+([a-zA-Z0-9 _'-]{2,48})" , t , flags = re .IGNORECASE )
75+ if m :
76+ pref_text = m .group (1 ).strip ()[:48 ]
77+ pref_key = "favorite_food" if _looks_like_food_preference (pref_text ) else "user_preference"
78+ facts .append ({"entity" : "user" , "key" : pref_key , "value" : pref_text , "kind" : "preference" , "confidence" : 0.84 })
79+
5080 if re .search (r"\bwhen\s+i\s+ask\s+for\s+code" , tl ) and "python" in tl :
5181 facts .append ({"entity" : "user" , "key" : "coding_style" , "value" : "Python 3.11+ and type hints" , "kind" : "preference" , "confidence" : 0.9 })
5282
@@ -63,10 +93,47 @@ def heuristics(user_text: str, assistant_text: str = "") -> Dict[str, List[Dict[
6393
6494def should_call_llm (user_text : str , assistant_text : str = "" ) -> bool :
6595 tl = (user_text or "" ).lower ()
66- triggers = ("my " , "i am" , "i prefer" , "call me" , "timezone" , "for this session" , "from now on" , "always" , "don't" )
96+ triggers = (
97+ "my " , "i am" , "i prefer" , "i like" , "i love" , "i hate" , "i'm into" , "im into" ,
98+ "call me" , "timezone" , "for this session" , "from now on" , "always" , "don't" , "dont" ,
99+ "as a " , "i work as" , "i teach" , "i research" , "i code in" , "please remember" ,
100+ )
67101 return any (t in tl for t in triggers ) or ("worked" in tl or "fixed" in tl or "solved" in tl )
68102
69103
104+ def _coerce_llm_payload (data : Dict [str , Any ]) -> Dict [str , List [Dict [str , Any ]]]:
105+ """Normalize alternate JSON shapes into canonical {'facts': [...], 'skills': [...]} payload."""
106+ if not isinstance (data , dict ):
107+ return {"facts" : [], "skills" : []}
108+
109+ facts = data .get ("facts" , [])
110+ skills = data .get ("skills" , [])
111+
112+ if not isinstance (facts , list ):
113+ facts = []
114+ if not isinstance (skills , list ):
115+ skills = []
116+
117+ # Support generic single-object memory outputs, e.g.
118+ # {"memory_text":"I love blueberries","category":"food","emotion":"positive"}
119+ if not facts :
120+ memory_text = str (data .get ("memory_text" , "" )).strip ()
121+ category = to_snake (str (data .get ("category" , "" )).strip ())
122+ if memory_text and category :
123+ mapped_key = "favorite_food" if category in {"food" , "drink" , "snack" } else "user_preference"
124+ facts .append (
125+ {
126+ "entity" : "user" ,
127+ "key" : mapped_key ,
128+ "value" : memory_text [:120 ],
129+ "kind" : "preference" ,
130+ "confidence" : 0.82 ,
131+ }
132+ )
133+
134+ return {"facts" : facts [:8 ], "skills" : skills [:1 ]}
135+
136+
70137async def llm_extract (client , user_text : str , assistant_text : str = "" , tool_summaries : List [str ] | None = None ) -> Dict [str , List [Dict [str , Any ]]]:
71138 if not MEMORY_EXTRACTION_ENABLED or client is None :
72139 return {"facts" : [], "skills" : []}
@@ -95,21 +162,38 @@ async def llm_extract(client, user_text: str, assistant_text: str = "", tool_sum
95162 except Exception :
96163 return {"facts" : [], "skills" : []}
97164
98- facts = data .get ("facts" , []) if isinstance (data , dict ) else []
99- skills = data .get ("skills" , []) if isinstance (data , dict ) else []
100- if not isinstance (facts , list ):
101- facts = []
102- if not isinstance (skills , list ):
103- skills = []
104- return {"facts" : facts [:8 ], "skills" : skills [:1 ]}
165+ return _coerce_llm_payload (data )
166+
167+
168+ def _normalize_key_value (key : str , value : str ) -> tuple [str , str ]:
169+ k = to_snake (key )
170+ v = (value or "" ).strip ()[:120 ]
171+
172+ if k in {"fav_color" , "colour" , "favorite_colour" }:
173+ k = "favorite_color"
174+ elif k in {"fav_drink" , "favorite_beverage" }:
175+ k = "favorite_drink"
176+ elif k in {"fav_food" , "favorite_snack" , "liked_food" , "food_preference" }:
177+ k = "favorite_food"
178+ elif k in {"job" , "occupation" , "profession" }:
179+ k = "work_role"
180+ elif k in {"risk" , "risk_tolerance" , "risk_appetite" }:
181+ k = "risk_profile"
182+ elif k in {"style" , "communication_preference" }:
183+ k = "communication_style"
184+
185+ return k , v
105186
106187
107188def sanitize (data : Dict [str , List [Dict [str , Any ]]]) -> Dict [str , List [Dict [str , Any ]]]:
108189 out_facts , out_skills = [], []
109- allow = {"timezone" , "preferred_name" , "output_format" , "favorite_color" , "favorite_drink" , "dog_name" , "default_location" , "name" , "coding_style" , "run_memory_test_command" }
190+ allow = {
191+ "timezone" , "preferred_name" , "output_format" , "favorite_color" , "favorite_drink" , "favorite_food" ,
192+ "dog_name" , "default_location" , "name" , "coding_style" , "run_memory_test_command" ,
193+ "work_role" , "communication_style" , "risk_profile" , "primary_goal" , "user_preference" ,
194+ }
110195 for f in data .get ("facts" , []) or []:
111- key = to_snake (str (f .get ("key" , "" )))
112- value = str (f .get ("value" , "" )).strip ()[:120 ]
196+ key , value = _normalize_key_value (str (f .get ("key" , "" )), str (f .get ("value" , "" )))
113197 if not key or not value :
114198 continue
115199 conf = max (0.0 , min (1.0 , float (f .get ("confidence" , 0.6 ) or 0.6 )))
0 commit comments