Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ This creates:

The generated `mcp.json` includes all environment variables (empty = not configured). Edit the file to fill in your values.

⚠️ **Configure embedding BEFORE the MCP server starts for the first time.** Tables are created on first startup with the configured dimension.
💡 **Embedding dimension** defaults to `EMBEDDING_DIM=0` (auto-infer): Memoria probes your embedding service on first startup and uses the returned vector length. Set `EMBEDDING_DIM` explicitly if the service may be unavailable at boot time. Either way, the dimension is locked into the database schema on first run — changing it later requires dropping the schema.

### 4. Restart & verify

Expand Down Expand Up @@ -529,8 +529,7 @@ Leave all empty to use local embedding (all-MiniLM-L6-v2, dim=384).
**💡 Local Embedding Tips:**
Local embedding requires building from source with `--features local-embedding` (pre-built binaries don't include it). See [Local Embedding Guide](skills/local-embedding/SKILL.md) for build instructions, supported models, and troubleshooting.

**⚠️ CRITICAL: Configure embedding BEFORE the MCP server starts for the first time.**
Tables are created on first startup with the configured dimension. Changing it later requires re-creating the embedding column (destructive).
**💡 Embedding dimension is auto-inferred by default** (`EMBEDDING_DIM=0`): Memoria probes your embedding service on startup to detect the correct dimension. The dimension is then locked into the database schema — changing models later requires dropping the schema (destructive). Set `EMBEDDING_DIM` explicitly if you need deterministic startup without a probe call.

---

Expand Down
66 changes: 66 additions & 0 deletions memoria/crates/memoria-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,16 +327,25 @@ async fn cmd_serve(db_url: Option<String>, port: u16, master_key: String) -> Res
cfg.db_url = v;
}

// Auto-infer embedding dimension when EMBEDDING_DIM=0 (or unset).
// Probes the embedding service with a test call and uses the returned
// vector length as the actual dimension.
if cfg.embedding_dim == 0 && cfg.has_embedding() {
cfg.embedding_dim = probe_embedding_dim(&cfg).await?;
}

tracing::info!(
db_url = %cfg.db_url, port = port,
instance_id = %cfg.instance_id,
embedding_dim = cfg.embedding_dim,
has_llm = cfg.has_llm(), has_embedding = cfg.has_embedding(),
governance_plugin_binding = %cfg.governance_plugin_binding,
"Starting Memoria API server"
);

let store = SqlMemoryStore::connect(&cfg.db_url, cfg.embedding_dim).await?;
store.migrate().await?;
store.check_embedding_dim_compat().await?;

let pool = MySqlPool::connect(&cfg.db_url).await?;
let git = Arc::new(GitForDataService::new(pool, &cfg.db_name));
Expand Down Expand Up @@ -431,9 +440,15 @@ async fn cmd_mcp(
cfg.db_name = v;
}

// Auto-infer embedding dimension when EMBEDDING_DIM=0 (or unset).
if cfg.embedding_dim == 0 && cfg.has_embedding() {
cfg.embedding_dim = probe_embedding_dim(&cfg).await?;
}

tracing::info!(
db_url = %cfg.db_url,
embedding_provider = %cfg.embedding_provider,
embedding_dim = cfg.embedding_dim,
has_llm = cfg.has_llm(),
governance_plugin_binding = %cfg.governance_plugin_binding,
user = %cfg.user,
Expand All @@ -442,6 +457,7 @@ async fn cmd_mcp(

let store = SqlMemoryStore::connect(&cfg.db_url, cfg.embedding_dim).await?;
store.migrate().await?;
store.check_embedding_dim_compat().await?;

let pool = MySqlPool::connect(&cfg.db_url).await?;
let git = Arc::new(GitForDataService::new(pool, &cfg.db_name));
Expand Down Expand Up @@ -788,6 +804,56 @@ fn cmd_plugin_dev_keygen(dir: &Path) -> Result<()> {

// ── Shared helpers ────────────────────────────────────────────────────────────

/// Probe the configured embedding service to determine the vector dimension.
///
/// Called when `EMBEDDING_DIM=0` (the default). Makes a single embedding
/// request with a short probe string and returns `vec.len()` as the
/// actual dimension, which is then used to create or validate the database
/// schema.
///
/// # Errors
/// Returns an error if the embedding service is unreachable or returns an
/// empty vector, with a suggestion to set `EMBEDDING_DIM` explicitly.
async fn probe_embedding_dim(cfg: &memoria_service::Config) -> Result<usize> {
use memoria_core::interfaces::EmbeddingProvider;
use memoria_embedding::HttpEmbedder;

// Build a temporary embedder with dim=0 (dim is not used by embed()).
let embedder = HttpEmbedder::new(
&cfg.embedding_base_url,
&cfg.embedding_api_key,
&cfg.embedding_model,
0,
);

tracing::info!(
model = %cfg.embedding_model,
base_url = %cfg.embedding_base_url,
"EMBEDDING_DIM=0: probing embedding service to auto-infer dimension"
);

let vec = embedder
.embed("dimension probe")
.await
.map_err(|e| anyhow::anyhow!(
"EMBEDDING_DIM=0 but the embedding probe failed: {e}. \
Set EMBEDDING_DIM explicitly (e.g. EMBEDDING_DIM=768 for \
nomic-embed-text, EMBEDDING_DIM=1024 for BAAI/bge-m3) or \
check that your embedding service is reachable."
))?;

if vec.is_empty() {
return Err(anyhow::anyhow!(
"EMBEDDING_DIM=0: embedding service returned an empty vector. \
Set EMBEDDING_DIM explicitly."
));
}

let dim = vec.len();
tracing::info!(embedding_dim = dim, "Auto-inferred embedding dimension");
Ok(dim)
}

fn build_embedder(
cfg: &memoria_service::Config,
) -> Option<Arc<dyn memoria_core::interfaces::EmbeddingProvider>> {
Expand Down
2 changes: 1 addition & 1 deletion memoria/crates/memoria-service/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl Config {
let embedding_dim = std::env::var("EMBEDDING_DIM")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1024usize);
.unwrap_or(0usize); // 0 = auto-infer from embedding service at startup

let llm_api_key = std::env::var("LLM_API_KEY").ok().filter(|s| !s.is_empty());

Expand Down
47 changes: 47 additions & 0 deletions memoria/crates/memoria-storage/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,53 @@ impl SqlMemoryStore {
Ok(())
}

/// Check that the configured embedding dimension matches the dimension
/// already stored in the database schema.
///
/// If `mem_memories` already exists with a different dimension, returning
/// an error here is far better than silently failing on the first INSERT.
/// Called after `migrate()` so the table is guaranteed to exist.
///
/// # Errors
/// Returns [`MemoriaError::Internal`] when a mismatch is detected,
/// with a human-readable message explaining how to resolve it.
pub async fn check_embedding_dim_compat(&self) -> Result<(), MemoriaError> {
// Query the actual column type stored in the schema, e.g. "vecf32(768)"
let col_type: Option<String> = sqlx::query_scalar(
"SELECT column_type \
FROM information_schema.columns \
WHERE table_schema = DATABASE() \
AND table_name = 'mem_memories' \
AND column_name = 'embedding'",
)
.fetch_optional(&self.pool)
.await
.map_err(db_err)?;

if let Some(ct) = col_type {
// Parse "vecf32(768)" → Some(768)
if let Some(schema_dim) = ct
.trim_start_matches("vecf32(")
.trim_end_matches(')')
.parse::<usize>()
.ok()
{
if schema_dim != self.embedding_dim {
return Err(MemoriaError::Internal(format!(
"Embedding dimension mismatch: the database schema has \
{}d vectors but Memoria is configured for {}d. \
To fix: either set EMBEDDING_DIM={} to match the \
existing schema, or drop the database (data loss) and \
restart to rebuild with the new dimension.",
schema_dim, self.embedding_dim, schema_dim
)));
}
}
}

Ok(())
}

// ── Audit log ─────────────────────────────────────────────────────────────

/// Create a safety snapshot before destructive operations. Best-effort.
Expand Down
2 changes: 1 addition & 1 deletion skills/deployment/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Services: API on `:8100`, MatrixOne on `:6001`. Verify: `curl http://localhost:8
| `MEMORIA_EMBEDDING_MODEL` | `all-MiniLM-L6-v2` | Model name |
| `MEMORIA_EMBEDDING_API_KEY` | — | Required if provider is `openai` |
| `MEMORIA_EMBEDDING_BASE_URL` | — | Custom endpoint (OpenAI-compatible) |
| `MEMORIA_EMBEDDING_DIM` | `0` (auto) | Embedding dimension |
| `MEMORIA_EMBEDDING_DIM` | `0` (auto) | Embedding dimension. `0` = auto-infer: Memoria probes the embedding service on startup and uses the returned vector length. Set explicitly (e.g. `768`, `1024`) to skip the probe or when the embedding service may be unavailable at boot time. |

### Distributed

Expand Down
Loading