Skip to content
Draft
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
5cd6068
deps(website): add @xenova/transformers and @mlc-ai/web-llm
arolariu May 6, 2026
7d0b8d4
chore(invoice-ai): scaffold AI module barrel placeholder
arolariu May 6, 2026
7530327
feat(invoice-ai): scaffold shared types + barrel placeholder
arolariu May 6, 2026
db6bcdf
feat(invoice-ai): hardware eligibility gate for Layer 2 slot LLM
arolariu May 6, 2026
300ca58
fix(invoice-ai): close eligibility-gate logic gap + add JSDoc and bra…
arolariu May 6, 2026
d9ef20c
feat(invoice-ai): intent catalog with 10 v1 entries
arolariu May 6, 2026
21c274f
feat(invoice-ai): locale-aware slot lexicon for timeframe + topK
arolariu May 6, 2026
96c1a00
feat(invoice-ai): intent resolver with strict slot validation
arolariu May 6, 2026
71e24f8
feat(invoice-ai): seed phrases for embedding classifier (en/ro/fr)
arolariu May 6, 2026
cfee2ce
test(invoice-ai): aggregator fixtures (empty, single-currency, multi-…
arolariu May 6, 2026
a085077
feat(invoice-ai): shared aggregator helpers (date windows + currency)
arolariu May 6, 2026
560d3ed
feat(invoice-ai): totalSpend aggregator
arolariu May 6, 2026
27c0077
feat(invoice-ai): invoiceCount aggregator with optional category brea…
arolariu May 6, 2026
8b2a3de
feat(invoice-ai): topSpendingByCategory aggregator
arolariu May 6, 2026
e185c0a
feat(invoice-ai): topMerchantsByCount aggregator
arolariu May 6, 2026
be50bb2
feat(invoice-ai): topMerchantsBySpend aggregator (per-currency top-K)
arolariu May 6, 2026
8bc5b95
feat(invoice-ai): topProductsByCount aggregator
arolariu May 6, 2026
509b319
feat(invoice-ai): topProductsBySpend aggregator
arolariu May 6, 2026
2a105e7
feat(invoice-ai): spendComparison aggregator with deltaPct=null guard
arolariu May 6, 2026
ac14ab8
feat(invoice-ai): averageSpendPerVisit aggregator with optional merch…
arolariu May 6, 2026
d3cd838
feat(invoice-ai): categoryBreakdown aggregator (percent-of-total per …
arolariu May 6, 2026
9d82db9
feat(invoice-ai): aggregator registry with exhaustive intent dispatch
arolariu May 6, 2026
26b7d41
feat(invoice-ai): InvoiceAssistant.* i18n namespace (en/ro/fr)
arolariu May 6, 2026
ff7bbdb
feat(invoice-ai): answer renderer dispatches StructuredAnswer to pros…
arolariu May 6, 2026
36fc098
feat(invoice-ai): viz primitives — bar/single-stat/comparison/donut
arolariu May 6, 2026
f9070b3
feat(invoice-ai): session-only state machine reducer
arolariu May 6, 2026
6b4986e
feat(invoice-ai): build-time embeddings generator + placeholder seed …
arolariu May 6, 2026
2741186
feat(invoice-ai): Layer 1 embedding worker (multilingual-e5-small)
arolariu May 6, 2026
d35d7da
feat(invoice-ai): embedding host factory using createWorkerHost
arolariu May 6, 2026
43c5fc5
feat(invoice-ai): Layer 2 slot extractor worker (Qwen2.5-1.5B in-proc…
arolariu May 6, 2026
f9d1a0a
feat(invoice-ai): slot extractor host factory using createWorkerHost
arolariu May 6, 2026
ea47e6b
feat(invoice-ai): useInvoiceAssistant hook ties reducer + hosts + pip…
arolariu May 6, 2026
bab0243
feat(invoice-ai): AssistantMessage component renders one history entry
arolariu May 6, 2026
349269b
feat(invoice-ai): AssistantPanel composes input + history + layer2 CTA
arolariu May 6, 2026
ffaf85e
feat(invoice-ai): wire AssistantPanel into GenerativeView chat tab
arolariu May 6, 2026
ded787b
chore(invoice-ai): regenerate seed embeddings + fix script imports fo…
arolariu May 6, 2026
1820edd
tools(invoice-ai): calibration script for embedding confidence thresh…
arolariu May 6, 2026
c677b99
test(invoice-ai): Playwright E2E spec for the assistant tab
arolariu May 6, 2026
a8bd811
fix(invoice-ai): restore corrupt host files + slotHost cleanup + retr…
arolariu May 6, 2026
2f03231
fix(invoice-ai): disable submit button during slot-extracting state too
arolariu May 6, 2026
d3870d9
fix(invoice-ai): three-layer fix for Transformers.js Turbopack crash …
arolariu May 7, 2026
172c48e
refactor(invoice-ai): migrate Layer 1 from @xenova/transformers to @h…
arolariu May 7, 2026
8caa643
chore: ignore Clerk local config directory
arolariu May 7, 2026
d6dc375
fix(invoice-ai): three UX fixes — singleton hosts, empty-corpus, inel…
arolariu May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
554 changes: 546 additions & 8 deletions package-lock.json

Large diffs are not rendered by default.

70 changes: 70 additions & 0 deletions scripts/calibrate-assistant-embeddings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/**
* @fileoverview Calibration tool for the embedding-classifier confidence thresholds.
* @module scripts/calibrate-assistant-embeddings
*
* @remarks
* Reports intra-class vs inter-class cosine similarity distribution
* across all 300 seed phrases. Recommends confidence thresholds.
*
* Manual: node scripts/calibrate-assistant-embeddings.ts
*/

import seedRows from "../sites/arolariu.ro/src/app/domains/invoices/_components/ai/workers/seedEmbeddings.json" with {type: "json"};

type Row = {locale: string; intent: string; phrase: string; embedding: number[]};

function cosineSim(a: number[], b: number[]): number {
let d = 0;
let aa = 0;
let bb = 0;
const len = Math.min(a.length, b.length);
for (let i = 0; i < len; i++) {
d += a[i]! * b[i]!;
aa += a[i]! * a[i]!;
bb += b[i]! * b[i]!;
}
const denom = Math.sqrt(aa) * Math.sqrt(bb);
return denom === 0 ? 0 : d / denom;
}

const rows = seedRows as Row[];
const intra: number[] = [];
const inter: number[] = [];

for (const a of rows) {
for (const b of rows) {
if (a === b) continue;
if (a.locale !== b.locale) continue;
const s = cosineSim(a.embedding, b.embedding);
(a.intent === b.intent ? intra : inter).push(s);
}
}

function stats(arr: number[]): {mean: number; min: number; max: number; p10: number; p90: number} {
if (arr.length === 0) return {mean: 0, min: 0, max: 0, p10: 0, p90: 0};
const sorted = [...arr].sort((x, y) => x - y);
return {
mean: sorted.reduce((s, x) => s + x, 0) / sorted.length,
min: sorted[0]!,
max: sorted[sorted.length - 1]!,
p10: sorted[Math.floor(sorted.length * 0.1)]!,
p90: sorted[Math.floor(sorted.length * 0.9)]!,
};
}

const intraStats = stats(intra);
const interStats = stats(inter);

console.log("=== Calibration Report ===");
console.log(`Seed rows: ${rows.length} (across locales).`);
console.log(`Intra-class pairs: ${intra.length}`);
console.log(`Inter-class pairs: ${inter.length}`);
console.log("");
console.log("Intra-class (same intent, same locale):", intraStats);
console.log("Inter-class (different intent, same locale):", interStats);
console.log("");
console.log("Recommended thresholds:");
console.log(` canonical (>=): ${intraStats.p10.toFixed(2)} (10th percentile of intra-class)`);
console.log(` uncertain (>=): ${interStats.p90.toFixed(2)} (90th percentile of inter-class)`);
console.log("");
console.log("Update CONFIDENCE_THRESHOLDS in types.ts accordingly.");
53 changes: 53 additions & 0 deletions scripts/generate.embeddings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**
* @fileoverview Build-time embeddings generator for the invoice AI assistant.
* @module scripts/generate.embeddings
*
* @remarks
* Loads multilingual-e5-small via Transformers.js, encodes all 300 seed
* phrases (10 intents x 10 phrasings x 3 locales) with `query: ` prefix +
* mean-pool + L2-normalize, and writes the matrix to:
* sites/arolariu.ro/src/app/domains/invoices/_components/ai/workers/seedEmbeddings.json
*
* First run downloads the 118 MB model into the Transformers.js cache.
* Re-run whenever seedPhrases.{en,ro,fr}.ts changes.
*
* Manual: node scripts/generate.embeddings.ts
*/

import {pipeline} from "@xenova/transformers";
import * as fs from "node:fs/promises";
import * as path from "node:path";
import {SEED_PHRASES_EN} from "../sites/arolariu.ro/src/app/domains/invoices/_components/ai/intents/seedPhrases.en.ts";
import {SEED_PHRASES_RO} from "../sites/arolariu.ro/src/app/domains/invoices/_components/ai/intents/seedPhrases.ro.ts";
import {SEED_PHRASES_FR} from "../sites/arolariu.ro/src/app/domains/invoices/_components/ai/intents/seedPhrases.fr.ts";

type Locale = "en" | "ro" | "fr";

async function main(): Promise<void> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const extractor: any = await pipeline("feature-extraction", "Xenova/multilingual-e5-small");
const allLocales: Array<[Locale, typeof SEED_PHRASES_EN]> = [
Comment on lines +26 to +29
["en", SEED_PHRASES_EN],
Comment on lines +26 to +30
["ro", SEED_PHRASES_RO],
["fr", SEED_PHRASES_FR],
];
const rows: Array<{locale: Locale; intent: string; phrase: string; embedding: number[]}> = [];
for (const [locale, phrases] of allLocales) {
for (const intent of Object.keys(phrases)) {
for (const phrase of phrases[intent as keyof typeof phrases]) {
const out = await extractor(`query: ${phrase}`, {pooling: "mean", normalize: true});
rows.push({locale, intent, phrase, embedding: Array.from(out.data as Float32Array)});
}
}
}
const target = path.join(
"sites/arolariu.ro/src/app/domains/invoices/_components/ai/workers/seedEmbeddings.json",
);
await fs.writeFile(target, JSON.stringify(rows, null, 0), "utf8");
console.log(`Wrote ${rows.length} embeddings to ${target}.`);
}

main().catch((e) => {
console.error(e);
process.exit(1);
});
78 changes: 78 additions & 0 deletions sites/arolariu.ro/messages/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -4367,6 +4367,84 @@
"uploadScans": "Upload Scans",
"viewScans": "View Scans"
},
"InvoiceAssistant": {
"panel": {
"title": "Ask your invoices",
"subtitle": "Analytical questions over your local receipts. Your data stays in your browser.",
"inputPlaceholder": "Ask a question — e.g. 'Top merchants last month'",
"submit": "Ask",
"callStatus": "Last answer"
},
"states": {
"capabilityCheck": "Preparing assistant…",
"embeddingLoading": "Downloading multilingual analyzer ({progress}%) — works on most devices.",
"embeddingFailed": "Couldn't load the analyzer. Check your connection and try again.",
"embeddingReady": "Assistant ready.",
"classifying": "Analyzing your question…",
"slotExtracting": "Working through paraphrasing — this takes a few seconds for tricky phrasings.",
"slotLlmTimeout": "Took too long to understand that one. Want to try again?",
"slotLlmUnavailable": "I couldn't quite catch that. Try a clearer phrasing, or enable enhanced understanding (~1 GB) for paraphrase support.",
"aggregatorError": "Something went wrong calculating your answer. Try a different question.",
"workersUnavailable": "Your browser doesn't support Web Workers. Use a modern browser to access the assistant.",
"outOfScope": "I can answer questions about: top categories, top merchants, totals, comparisons, averages, breakdowns. Try one of these:"
},
"layer2": {
"ctaTitle": "Enhanced understanding available",
"ctaSubtitle": "Download the optional 1 GB language model to unlock paraphrase support.",
"ctaButton": "Enable",
"downloading": "Downloading enhanced model ({progress}%)…",
"ready": "Enhanced understanding active",
"failed": "Enhanced model download failed",
"retry": "Retry",
"unavailableTooltip": "Enhanced understanding requires WebGPU + at least 4 GB device memory + 2 GB free storage."
},
"actions": {
"retry": "Try again",
"reset": "Reset assistant",
"cancel": "Cancel",
"useExample": "Try this question"
},
"answers": {
"totalSpend": "In {timeframe} you spent a total of {amount} across {count, plural, one {# receipt} other {# receipts}}.",
"totalSpendEmpty": "You have no receipts in {timeframe}. Try {alternatives}.",
"totalSpendCategory": "In {timeframe} you spent {amount} on {category} across {count, plural, one {# receipt} other {# receipts}}.",
"invoiceCount": "You have {count, plural, one {# receipt} other {# receipts}} from {timeframe}.",
"invoiceCountWithBreakdown": "You have {count, plural, one {# receipt} other {# receipts}} from {timeframe} — {breakdown}.",
"invoiceCountEmpty": "You have no receipts in {timeframe}. Try {alternatives}.",
"topSpendingByCategory": "In {timeframe} you spent the most on {topList}.",
"topMerchantsByCount": "In {timeframe}, your top {topK} stores by visit count: {topList}.",
"topMerchantsBySpend": "Your highest-spend merchants in {timeframe}: {topList}.",
"topProductsByCount": "Most-purchased items in {timeframe}: {topList}.",
"topProductsBySpend": "You spent the most on: {topList}.",
"spendComparison": "You spent {amountA} in {timeframeA} vs {amountB} in {timeframeB} — that's {direction} ({deltaPct}%).",
"spendComparisonNoChange": "You spent {amountA} in {timeframeA} vs {amountB} in {timeframeB} — about the same.",
"averageSpendPerVisit": "Across {sampleSize, plural, one {# visit} other {# visits}} in {timeframe}, your average basket was {amount}.",
"averageSpendPerVisitMerchant": "Across {sampleSize, plural, one {# visit} other {# visits}} at {merchantName} in {timeframe}, your average basket was {amount}.",
"categoryBreakdown": "Your {timeframe} spending breakdown: {topList}."
},
"timeframes": {
"this-week": "this week",
"last-week": "last week",
"this-month": "this month",
"last-month": "last month",
"last-3-months": "the past 3 months",
"last-6-months": "the past 6 months",
"this-quarter": "this quarter",
"last-quarter": "last quarter",
"this-year": "this year",
"last-year": "last year",
"all-time": "all time",
"custom": "the selected period"
},
"exampleChips": {
"topMerchantsThisMonth": "Top merchants this month",
"totalGrocerySpendLastMonth": "Total grocery spend last month",
"topCategoriesLastQuarter": "Top categories last quarter",
"averageBasketSize": "Average basket size",
"monthVsLastMonth": "This month vs last month",
"topProducts": "Most-purchased products"
}
},
"Profile": {
"header": {
"completionHint": "Complete your profile to unlock all features",
Expand Down
78 changes: 78 additions & 0 deletions sites/arolariu.ro/messages/fr.json
Original file line number Diff line number Diff line change
Expand Up @@ -4367,6 +4367,84 @@
"uploadScans": "Télécharger des Scans",
"viewScans": "Voir les Scans"
},
"InvoiceAssistant": {
"panel": {
"title": "Interrogez vos factures",
"subtitle": "Questions analytiques sur vos reçus locaux. Vos données restent dans votre navigateur.",
"inputPlaceholder": "Posez une question — ex : 'Top marchands le mois dernier'",
"submit": "Demander",
"callStatus": "Dernière réponse"
},
"states": {
"capabilityCheck": "Préparation de l'assistant…",
"embeddingLoading": "Téléchargement de l'analyseur multilingue ({progress}%) — fonctionne sur la plupart des appareils.",
"embeddingFailed": "Impossible de charger l'analyseur. Vérifiez votre connexion et réessayez.",
"embeddingReady": "Assistant prêt.",
"classifying": "Analyse de votre question…",
"slotExtracting": "Travail sur la paraphrase — cela prend quelques secondes pour les formulations complexes.",
"slotLlmTimeout": "Trop long à comprendre. Voulez-vous réessayer ?",
"slotLlmUnavailable": "Je n'ai pas tout à fait saisi. Essayez une formulation plus claire ou activez la compréhension améliorée (~1 Go) pour le support de paraphrase.",
"aggregatorError": "Quelque chose s'est mal passé lors du calcul de votre réponse. Essayez une autre question.",
"workersUnavailable": "Votre navigateur ne prend pas en charge les Web Workers. Utilisez un navigateur moderne pour accéder à l'assistant.",
"outOfScope": "Je peux répondre aux questions sur : top catégories, top marchands, totaux, comparaisons, moyennes, répartitions. Essayez l'une de celles-ci :"
},
"layer2": {
"ctaTitle": "Compréhension améliorée disponible",
"ctaSubtitle": "Téléchargez le modèle linguistique optionnel de 1 Go pour débloquer le support de paraphrase.",
"ctaButton": "Activer",
"downloading": "Téléchargement du modèle amélioré ({progress}%)…",
"ready": "Compréhension améliorée active",
"failed": "Échec du téléchargement du modèle amélioré",
"retry": "Réessayer",
"unavailableTooltip": "La compréhension améliorée nécessite WebGPU + au moins 4 Go de mémoire + 2 Go d'espace libre."
},
"actions": {
"retry": "Réessayer",
"reset": "Réinitialiser l'assistant",
"cancel": "Annuler",
"useExample": "Essayer cette question"
},
"answers": {
"totalSpend": "En {timeframe} vous avez dépensé un total de {amount} sur {count, plural, one {# reçu} other {# reçus}}.",
"totalSpendEmpty": "Vous n'avez aucun reçu pour {timeframe}. Essayez {alternatives}.",
"totalSpendCategory": "En {timeframe} vous avez dépensé {amount} en {category} sur {count, plural, one {# reçu} other {# reçus}}.",
"invoiceCount": "Vous avez {count, plural, one {# reçu} other {# reçus}} pour {timeframe}.",
"invoiceCountWithBreakdown": "Vous avez {count, plural, one {# reçu} other {# reçus}} pour {timeframe} — {breakdown}.",
"invoiceCountEmpty": "Vous n'avez aucun reçu pour {timeframe}. Essayez {alternatives}.",
"topSpendingByCategory": "En {timeframe} vous avez le plus dépensé en {topList}.",
"topMerchantsByCount": "En {timeframe}, vos {topK} meilleurs magasins par visites : {topList}.",
"topMerchantsBySpend": "Vos marchands aux dépenses les plus élevées en {timeframe} : {topList}.",
"topProductsByCount": "Articles les plus achetés en {timeframe} : {topList}.",
"topProductsBySpend": "Vous avez le plus dépensé en : {topList}.",
"spendComparison": "Vous avez dépensé {amountA} en {timeframeA} vs {amountB} en {timeframeB} — soit {direction} ({deltaPct}%).",
"spendComparisonNoChange": "Vous avez dépensé {amountA} en {timeframeA} vs {amountB} en {timeframeB} — à peu près identique.",
"averageSpendPerVisit": "Sur {sampleSize, plural, one {# visite} other {# visites}} en {timeframe}, votre panier moyen était de {amount}.",
"averageSpendPerVisitMerchant": "Sur {sampleSize, plural, one {# visite} other {# visites}} chez {merchantName} en {timeframe}, votre panier moyen était de {amount}.",
"categoryBreakdown": "Répartition de vos dépenses pour {timeframe} : {topList}."
},
"timeframes": {
"this-week": "cette semaine",
"last-week": "la semaine dernière",
"this-month": "ce mois",
"last-month": "le mois dernier",
"last-3-months": "les 3 derniers mois",
"last-6-months": "les 6 derniers mois",
"this-quarter": "ce trimestre",
"last-quarter": "le trimestre dernier",
"this-year": "cette année",
"last-year": "l'année dernière",
"all-time": "depuis toujours",
"custom": "la période sélectionnée"
},
"exampleChips": {
"topMerchantsThisMonth": "Top marchands ce mois",
"totalGrocerySpendLastMonth": "Dépenses totales en alimentaire le mois dernier",
"topCategoriesLastQuarter": "Top catégories le trimestre dernier",
"averageBasketSize": "Taille moyenne du panier",
"monthVsLastMonth": "Ce mois vs le mois dernier",
"topProducts": "Produits les plus achetés"
}
},
"Profile": {
"header": {
"completionHint": "Complétez votre profil pour débloquer toutes les fonctionnalités",
Expand Down
Loading
Loading