diff --git a/README.md b/README.md index 523030aa0..37609339a 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,10 @@ If you encounter issues, check the [FAQ](https://github.com/mindcraft-bots/mindc You can configure project details in `settings.js`. [See file.](settings.js) +### Remote UI access + +Set `"host_public": true` in `settings.js` when you need the web UI reachable from other machines; this binds the MindServer to `0.0.0.0`. Make sure your firewall only exposes the port to trusted networks. + You can configure the agent's name, model, and prompts in their profile like `andy.json`. The model can be specified with the `model` field, with values like `model: "gemini-2.5-pro"`. You will need the correct API key for the API provider you choose. See all supported APIs below.
diff --git a/main.js b/main.js index 4402cb964..42225b90a 100644 --- a/main.js +++ b/main.js @@ -63,7 +63,7 @@ if (process.env.LOG_ALL) { settings.log_all_prompts = process.env.LOG_ALL; } -Mindcraft.init(false, settings.mindserver_port, settings.auto_open_ui); +Mindcraft.init(settings.host_public ?? false, settings.mindserver_port, settings.auto_open_ui); for (let profile of settings.profiles) { const profile_json = JSON.parse(readFileSync(profile, 'utf8')); diff --git a/settings.js b/settings.js index e59457db6..6c728de76 100644 --- a/settings.js +++ b/settings.js @@ -6,6 +6,7 @@ const settings = { // the mindserver manages all agents and hosts the UI "mindserver_port": 8080, + "host_public": false, // set true to bind the UI to 0.0.0.0 for remote access "auto_open_ui": true, // opens UI in browser on startup "base_profile": "assistant", // survival, assistant, creative, or god_mode diff --git a/src/agent/library/skill_library.js b/src/agent/library/skill_library.js index 4470586f1..b803e3e24 100644 --- a/src/agent/library/skill_library.js +++ b/src/agent/library/skill_library.js @@ -1,6 +1,7 @@ import { cosineSimilarity } from '../../utils/math.js'; import { getSkillDocs } from './index.js'; import { wordOverlapScore } from '../../utils/text.js'; +import { embedWithProgress } from '../../utils/rate_limiter.js'; export class SkillLibrary { constructor(agent,embedding_model) { @@ -15,13 +16,27 @@ export class SkillLibrary { this.skill_docs = skillDocs; if (this.embedding_model) { try { - const embeddingPromises = skillDocs.map((doc) => { - return (async () => { - let func_name_desc = doc.split('\n').slice(0, 2).join(''); - this.skill_docs_embeddings[doc] = await this.embedding_model.embed(func_name_desc); - })(); - }); - await Promise.all(embeddingPromises); + const docsToEmbed = skillDocs.map(doc => ({ + doc, + text: doc.split('\n').slice(0, 2).join('') + })); + + const modelName = this.embedding_model.model_name || this.embedding_model.constructor?.name || 'unknown'; + + const embeddings = await embedWithProgress( + docsToEmbed, + async (text) => await this.embedding_model.embed(text), + 'skills', + { + cacheKey: 'skills', + modelName: modelName, + getTextFn: (item) => item.text + } + ); + + for (const [item, embedding] of embeddings) { + this.skill_docs_embeddings[item.doc] = embedding; + } } catch (error) { console.warn('Error with embedding model, using word-overlap instead.'); this.embedding_model = null; diff --git a/src/models/prompter.js b/src/models/prompter.js index 6ee93b2e7..8474b9925 100644 --- a/src/models/prompter.js +++ b/src/models/prompter.js @@ -110,8 +110,8 @@ export class Prompter { async initExamples() { try { - this.convo_examples = new Examples(this.embedding_model, settings.num_examples); - this.coding_examples = new Examples(this.embedding_model, settings.num_examples); + this.convo_examples = new Examples(this.embedding_model, settings.num_examples, 'convo_examples'); + this.coding_examples = new Examples(this.embedding_model, settings.num_examples, 'coding_examples'); // Wait for both examples to load before proceeding await Promise.all([ diff --git a/src/models/replicate.js b/src/models/replicate.js index aa296c57d..6146a9e60 100644 --- a/src/models/replicate.js +++ b/src/models/replicate.js @@ -24,16 +24,35 @@ export class ReplicateAPI { const prompt = toSinglePrompt(turns, null, stop_seq); let model_name = this.model_name || 'meta/meta-llama-3-70b-instruct'; - const input = { - prompt, - system_prompt: systemMessage, - ...(this.params || {}) - }; + // Detect model type to use correct input format + const isGemini = model_name.includes('gemini'); + const isLlama = model_name.includes('llama') || model_name.includes('meta/'); + + let input; + if (isGemini) { + // Gemini models use system_instruction and expect the full prompt with system message + const fullPrompt = systemMessage + '\n\n' + prompt; + input = { + prompt: fullPrompt, + ...(this.params || {}) + }; + } else { + // Llama and other models use system_prompt + input = { + prompt, + system_prompt: systemMessage, + ...(this.params || {}) + }; + } + let res = null; try { console.log('Awaiting Replicate API response...'); + console.log(' Model:', model_name, isGemini ? '(Gemini format)' : '(Llama format)'); let result = ''; + let eventCount = 0; for await (const event of this.replicate.stream(model_name, { input })) { + eventCount++; result += event; if (result === '') break; if (result.includes(stop_seq)) { @@ -42,19 +61,65 @@ export class ReplicateAPI { } } res = result; + console.log('Received. Events:', eventCount, 'Response length:', res.length); + console.log('Response:', res.substring(0, 500)); + if (!res || res.trim() === '') { + console.log('WARNING: Empty response from model'); + } } catch (err) { - console.log(err); + console.log('Replicate error:', err); res = 'My brain disconnected, try again.'; } - console.log('Received.'); return res; } async embed(text) { - const output = await this.replicate.run( - this.model_name || "mark3labs/embeddings-gte-base:d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47", - { input: {text} } + // Always use a dedicated embedding model, not the chat model + const DEFAULT_EMBEDDING_MODEL = "mark3labs/embeddings-gte-base:d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47"; + + // Validate text input + if (!text || typeof text !== 'string') { + throw new Error('Text is required for embedding'); + } + + // Check if model_name is an embedding model or a chat model + // Chat models (like meta/meta-llama-3-70b-instruct) won't work for embeddings + const isEmbeddingModel = this.model_name && ( + this.model_name.includes('embed') || + this.model_name.includes('gte') || + this.model_name.includes('e5-') ); - return output.vectors; + const embeddingModel = isEmbeddingModel ? this.model_name : DEFAULT_EMBEDDING_MODEL; + + // Helper to extract embedding from various output formats + const extractEmbedding = (output) => { + if (output.vectors) { + return output.vectors; + } else if (Array.isArray(output)) { + // Some models return the embedding array directly + return output; + } else if (output.embedding) { + return output.embedding; + } else if (output.embeddings) { + return Array.isArray(output.embeddings[0]) ? output.embeddings[0] : output.embeddings; + } + return null; + }; + + // Try different input formats since models have varying expectations + try { + const output = await this.replicate.run( + embeddingModel, + { input: { text } } + ); + const embedding = extractEmbedding(output); + if (embedding) { + return embedding; + } + throw new Error('Unknown embedding output format'); + } catch (err) { + console.error('Replicate embed error:', err.message || err); + throw err; + } } } \ No newline at end of file diff --git a/src/utils/embedding_cache.js b/src/utils/embedding_cache.js new file mode 100644 index 000000000..832bfaa9f --- /dev/null +++ b/src/utils/embedding_cache.js @@ -0,0 +1,125 @@ +/** + * Persistent cache for embeddings to avoid re-computing on restart + */ + +import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { createHash } from 'crypto'; +import path from 'path'; + +const CACHE_DIR = './bots/.cache'; +const CACHE_VERSION = 1; // Bump this if cache format changes + +/** + * Get a hash of the content for cache keying + */ +function hashContent(content) { + return createHash('md5').update(content).digest('hex'); +} + +/** + * Load embeddings from cache + * @param {string} cacheKey - Unique key for this cache (e.g., 'examples', 'skills') + * @param {string} modelName - Model name to invalidate cache if model changes + * @returns {Object|null} Cached embeddings or null if not found/invalid + */ +export function loadEmbeddingCache(cacheKey, modelName) { + const cachePath = path.join(CACHE_DIR, `${cacheKey}_embeddings.json`); + + try { + if (!existsSync(cachePath)) { + return null; + } + + const cache = JSON.parse(readFileSync(cachePath, 'utf8')); + + // Validate cache version and model + if (cache.version !== CACHE_VERSION || cache.model !== modelName) { + console.log(`Embedding cache for ${cacheKey} invalidated (model or version changed)`); + return null; + } + + console.log(`Loaded ${Object.keys(cache.embeddings).length} cached embeddings for ${cacheKey}`); + return cache.embeddings; + } catch (err) { + console.warn(`Failed to load embedding cache for ${cacheKey}:`, err.message); + return null; + } +} + +/** + * Save embeddings to cache + * @param {string} cacheKey - Unique key for this cache + * @param {string} modelName - Model name for cache invalidation + * @param {Object} embeddings - Map of text -> embedding + */ +export function saveEmbeddingCache(cacheKey, modelName, embeddings) { + const cachePath = path.join(CACHE_DIR, `${cacheKey}_embeddings.json`); + + try { + mkdirSync(CACHE_DIR, { recursive: true }); + + const cache = { + version: CACHE_VERSION, + model: modelName, + timestamp: new Date().toISOString(), + embeddings: embeddings + }; + + writeFileSync(cachePath, JSON.stringify(cache), 'utf8'); + console.log(`Saved ${Object.keys(embeddings).length} embeddings to cache for ${cacheKey}`); + } catch (err) { + console.warn(`Failed to save embedding cache for ${cacheKey}:`, err.message); + } +} + +/** + * Get embeddings with caching support + * @param {Array} items - Items to embed + * @param {Function} getTextFn - Function to extract text from item: (item) => string + * @param {Function} embedFn - Async function to embed text: (text) => embedding + * @param {string} cacheKey - Cache key for this set of embeddings + * @param {string} modelName - Model name for cache invalidation + * @param {Function} progressFn - Optional progress callback: (current, total, item) => void + * @returns {Promise} Map of item -> embedding + */ +export async function getEmbeddingsWithCache(items, getTextFn, embedFn, cacheKey, modelName, progressFn = null) { + const results = new Map(); + const cachedEmbeddings = loadEmbeddingCache(cacheKey, modelName) || {}; + const toEmbed = []; + + // Check what's already cached + for (const item of items) { + const text = getTextFn(item); + const hash = hashContent(text); + + if (cachedEmbeddings[hash]) { + results.set(item, cachedEmbeddings[hash]); + } else { + toEmbed.push({ item, text, hash }); + } + } + + if (toEmbed.length === 0) { + console.log(`${cacheKey}: All ${items.length} embeddings loaded from cache`); + return results; + } + + console.log(`${cacheKey}: Embedding ${toEmbed.length} items (${items.length - toEmbed.length} cached)...`); + + // Embed missing items + const newEmbeddings = {}; + for (let i = 0; i < toEmbed.length; i++) { + const { item, text, hash } = toEmbed[i]; + + const embedding = await embedFn(text); + results.set(item, embedding); + newEmbeddings[hash] = embedding; + cachedEmbeddings[hash] = embedding; + } + + // Save updated cache + saveEmbeddingCache(cacheKey, modelName, cachedEmbeddings); + console.log(`${cacheKey}: Done (${toEmbed.length} embedded, ${results.size} total)`); + + return results; +} diff --git a/src/utils/examples.js b/src/utils/examples.js index 470663d20..889584668 100644 --- a/src/utils/examples.js +++ b/src/utils/examples.js @@ -1,12 +1,14 @@ import { cosineSimilarity } from './math.js'; import { stringifyTurns, wordOverlapScore } from './text.js'; +import { embedWithProgress } from './rate_limiter.js'; export class Examples { - constructor(model, select_num=2) { + constructor(model, select_num=2, cacheKey='examples') { this.examples = []; this.model = model; this.select_num = select_num; this.embeddings = {}; + this.cacheKey = cacheKey; } turnsToText(turns) { @@ -26,17 +28,23 @@ export class Examples { return; try { - // Create array of promises first - const embeddingPromises = examples.map(example => { - const turn_text = this.turnsToText(example); - return this.model.embed(turn_text) - .then(embedding => { - this.embeddings[turn_text] = embedding; - }); - }); + const textsToEmbed = examples.map(example => this.turnsToText(example)); + const modelName = this.model.model_name || this.model.constructor?.name || 'unknown'; - // Wait for all embeddings to complete - await Promise.all(embeddingPromises); + const embeddings = await embedWithProgress( + textsToEmbed, + async (text) => await this.model.embed(text), + this.cacheKey, + { + cacheKey: this.cacheKey, + modelName: modelName, + getTextFn: (text) => text + } + ); + + for (const [text, embedding] of embeddings) { + this.embeddings[text] = embedding; + } } catch (err) { console.warn('Error with embedding model, using word-overlap instead.'); this.model = null; diff --git a/src/utils/rate_limiter.js b/src/utils/rate_limiter.js new file mode 100644 index 000000000..f9bf5a3c0 --- /dev/null +++ b/src/utils/rate_limiter.js @@ -0,0 +1,105 @@ +/** + * Utility for rate-limited operations with exponential backoff retry + */ + +import { getEmbeddingsWithCache } from './embedding_cache.js'; + +/** + * Execute an async function with exponential backoff retry on rate limit errors + * @param {Function} fn - Async function to execute + * @param {Object} options - Options + * @param {number} options.maxRetries - Maximum number of retries (default: 5) + * @param {number} options.initialDelay - Initial delay in ms (default: 1000) + * @param {number} options.maxDelay - Maximum delay in ms (default: 60000) + * @returns {Promise} Result of the function + */ +export async function withRetry(fn, options = {}) { + const { maxRetries = 5, initialDelay = 1000, maxDelay = 60000 } = options; + let lastError; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + return await fn(); + } catch (err) { + lastError = err; + const errMsg = err.message || String(err); + + // Check if it's a rate limit error + const isRateLimit = errMsg.includes('429') || + errMsg.includes('rate limit') || + errMsg.includes('Too Many Requests') || + errMsg.includes('throttled'); + + if (!isRateLimit || attempt === maxRetries) { + throw err; + } + + // Parse retry_after from error if available, otherwise use exponential backoff + let delay = initialDelay * Math.pow(2, attempt); + const retryAfterMatch = errMsg.match(/retry.after[^\d]*(\d+)/i); + if (retryAfterMatch) { + delay = parseInt(retryAfterMatch[1]) * 1000 + 1000; // Add 1s buffer + } + delay = Math.min(delay, maxDelay); + + console.log(`Rate limited, retrying in ${(delay/1000).toFixed(1)}s (attempt ${attempt + 1}/${maxRetries})...`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + throw lastError; +} + +/** + * Process items with embedding, showing progress, handling rate limits, and caching + * @param {Array} items - Items to process + * @param {Function} embedFn - Async function to embed an item: (item, index) => embedding + * @param {string} label - Label for progress display (e.g., "examples", "skills") + * @param {Object} options - Options including retry options and cache settings + * @param {string} options.cacheKey - Cache key for persistent storage + * @param {string} options.modelName - Model name for cache invalidation + * @param {Function} options.getTextFn - Function to extract text from item for caching + * @returns {Promise} Map of item -> embedding + */ +export async function embedWithProgress(items, embedFn, label = 'items', options = {}) { + const { cacheKey, modelName, getTextFn } = options; + const total = items.length; + + if (total === 0) return new Map(); + + // If caching is enabled, use the cache system + if (cacheKey && modelName && getTextFn) { + const embedWithRetry = async (text) => { + return await withRetry(() => embedFn(text), options); + }; + + const results = await getEmbeddingsWithCache( + items, + getTextFn, + embedWithRetry, + cacheKey, + modelName, + null // No per-item progress to avoid spam + ); + + return results; + } + + // Fallback to non-cached embedding + const results = new Map(); + console.log(`${label}: Embedding ${total} items...`); + + for (let i = 0; i < total; i++) { + const item = items[i]; + + try { + const embedding = await withRetry(() => embedFn(item, i), options); + results.set(item, embedding); + } catch (err) { + console.error(`${label}: Failed to embed item ${i + 1}: ${err.message}`); + throw err; + } + } + + console.log(`${label}: Done (${total} embedded)`); + return results; +}