diff --git a/README.md b/README.md
index 523030aa0..37609339a 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,10 @@ If you encounter issues, check the [FAQ](https://github.com/mindcraft-bots/mindc
 
 You can configure project details in `settings.js`. [See file.](settings.js)
 
+### Remote UI access
+
+Set `"host_public": true` in `settings.js` when you need the web UI reachable from other machines; this binds the MindServer to `0.0.0.0`. Make sure your firewall only exposes the port to trusted networks.
+
 You can configure the agent's name, model, and prompts in their profile like `andy.json`. The model can be specified with the `model` field, with values like `model: "gemini-2.5-pro"`. You will need the correct API key for the API provider you choose. See all supported APIs below.
 
 <details>
diff --git a/main.js b/main.js
index 4402cb964..42225b90a 100644
--- a/main.js
+++ b/main.js
@@ -63,7 +63,7 @@ if (process.env.LOG_ALL) {
     settings.log_all_prompts = process.env.LOG_ALL;
 }
 
-Mindcraft.init(false, settings.mindserver_port, settings.auto_open_ui);
+Mindcraft.init(settings.host_public ?? false, settings.mindserver_port, settings.auto_open_ui);
 
 for (let profile of settings.profiles) {
     const profile_json = JSON.parse(readFileSync(profile, 'utf8'));
diff --git a/settings.js b/settings.js
index e59457db6..6c728de76 100644
--- a/settings.js
+++ b/settings.js
@@ -6,6 +6,7 @@ const settings = {
 
     // the mindserver manages all agents and hosts the UI
     "mindserver_port": 8080,
+    "host_public": false, // set true to bind the UI to 0.0.0.0 for remote access
     "auto_open_ui": true, // opens UI in browser on startup
     
     "base_profile": "assistant", // survival, assistant, creative, or god_mode
diff --git a/src/agent/library/skill_library.js b/src/agent/library/skill_library.js
index 4470586f1..b803e3e24 100644
--- a/src/agent/library/skill_library.js
+++ b/src/agent/library/skill_library.js
@@ -1,6 +1,7 @@
 import { cosineSimilarity } from '../../utils/math.js';
 import { getSkillDocs } from './index.js';
 import { wordOverlapScore } from '../../utils/text.js';
+import { embedWithProgress } from '../../utils/rate_limiter.js';
 
 export class SkillLibrary {
     constructor(agent,embedding_model) {
@@ -15,13 +16,27 @@ export class SkillLibrary {
         this.skill_docs = skillDocs;
         if (this.embedding_model) {
             try {
-                const embeddingPromises = skillDocs.map((doc) => {
-                    return (async () => {
-                        let func_name_desc = doc.split('\n').slice(0, 2).join('');
-                        this.skill_docs_embeddings[doc] = await this.embedding_model.embed(func_name_desc);
-                    })();
-                });
-                await Promise.all(embeddingPromises);
+                const docsToEmbed = skillDocs.map(doc => ({
+                    doc,
+                    text: doc.split('\n').slice(0, 2).join('')
+                }));
+                
+                const modelName = this.embedding_model.model_name || this.embedding_model.constructor?.name || 'unknown';
+                
+                const embeddings = await embedWithProgress(
+                    docsToEmbed,
+                    async (text) => await this.embedding_model.embed(text),
+                    'skills',
+                    {
+                        cacheKey: 'skills',
+                        modelName: modelName,
+                        getTextFn: (item) => item.text
+                    }
+                );
+                
+                for (const [item, embedding] of embeddings) {
+                    this.skill_docs_embeddings[item.doc] = embedding;
+                }
             } catch (error) {
                 console.warn('Error with embedding model, using word-overlap instead.');
                 this.embedding_model = null;
diff --git a/src/models/prompter.js b/src/models/prompter.js
index 6ee93b2e7..8474b9925 100644
--- a/src/models/prompter.js
+++ b/src/models/prompter.js
@@ -110,8 +110,8 @@ export class Prompter {
 
     async initExamples() {
         try {
-            this.convo_examples = new Examples(this.embedding_model, settings.num_examples);
-            this.coding_examples = new Examples(this.embedding_model, settings.num_examples);
+            this.convo_examples = new Examples(this.embedding_model, settings.num_examples, 'convo_examples');
+            this.coding_examples = new Examples(this.embedding_model, settings.num_examples, 'coding_examples');
             
             // Wait for both examples to load before proceeding
             await Promise.all([
diff --git a/src/models/replicate.js b/src/models/replicate.js
index aa296c57d..6146a9e60 100644
--- a/src/models/replicate.js
+++ b/src/models/replicate.js
@@ -24,16 +24,35 @@ export class ReplicateAPI {
 		const prompt = toSinglePrompt(turns, null, stop_seq);
 		let model_name = this.model_name || 'meta/meta-llama-3-70b-instruct';
 
-		const input = { 
-			prompt, 
-			system_prompt: systemMessage,
-			...(this.params || {})
-		};
+		// Detect model type to use correct input format
+		const isGemini = model_name.includes('gemini');
+		const isLlama = model_name.includes('llama') || model_name.includes('meta/');
+		
+		let input;
+		if (isGemini) {
+			// Gemini models use system_instruction and expect the full prompt with system message
+			const fullPrompt = systemMessage + '\n\n' + prompt;
+			input = { 
+				prompt: fullPrompt,
+				...(this.params || {})
+			};
+		} else {
+			// Llama and other models use system_prompt
+			input = { 
+				prompt, 
+				system_prompt: systemMessage,
+				...(this.params || {})
+			};
+		}
+		
 		let res = null;
 		try {
 			console.log('Awaiting Replicate API response...');
+			console.log('  Model:', model_name, isGemini ? '(Gemini format)' : '(Llama format)');
 			let result = '';
+			let eventCount = 0;
 			for await (const event of this.replicate.stream(model_name, { input })) {
+				eventCount++;
 				result += event;
 				if (result === '') break;
 				if (result.includes(stop_seq)) {
@@ -42,19 +61,65 @@ export class ReplicateAPI {
 				}
 			}
 			res = result;
+			console.log('Received. Events:', eventCount, 'Response length:', res.length);
+			console.log('Response:', res.substring(0, 500));
+			if (!res || res.trim() === '') {
+				console.log('WARNING: Empty response from model');
+			}
 		} catch (err) {
-			console.log(err);
+			console.log('Replicate error:', err);
 			res = 'My brain disconnected, try again.';
 		}
-		console.log('Received.');
 		return res;
 	}
 
 	async embed(text) {
-		const output = await this.replicate.run(
-			this.model_name || "mark3labs/embeddings-gte-base:d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47",
-			{ input: {text} }
+		// Always use a dedicated embedding model, not the chat model
+		const DEFAULT_EMBEDDING_MODEL = "mark3labs/embeddings-gte-base:d619cff29338b9a37c3d06605042e1ff0594a8c3eff0175fd6967f5643fc4d47";
+		
+		// Validate text input
+		if (!text || typeof text !== 'string') {
+			throw new Error('Text is required for embedding');
+		}
+		
+		// Check if model_name is an embedding model or a chat model
+		// Chat models (like meta/meta-llama-3-70b-instruct) won't work for embeddings
+		const isEmbeddingModel = this.model_name && (
+			this.model_name.includes('embed') || 
+			this.model_name.includes('gte') ||
+			this.model_name.includes('e5-')
 		);
-		return output.vectors;
+		const embeddingModel = isEmbeddingModel ? this.model_name : DEFAULT_EMBEDDING_MODEL;
+		
+		// Helper to extract embedding from various output formats
+		const extractEmbedding = (output) => {
+			if (output.vectors) {
+				return output.vectors;
+			} else if (Array.isArray(output)) {
+				// Some models return the embedding array directly
+				return output;
+			} else if (output.embedding) {
+				return output.embedding;
+			} else if (output.embeddings) {
+				return Array.isArray(output.embeddings[0]) ? output.embeddings[0] : output.embeddings;
+			}
+			return null;
+		};
+		
+		// Try different input formats since models have varying expectations
+		try {
+			const output = await this.replicate.run(
+				embeddingModel,
+				{ input: { text } }
+			);
+			const embedding = extractEmbedding(output);
+			if (embedding) {
+				return embedding;
+			}
+			throw new Error('Unknown embedding output format');
+		} catch (err) {
+			console.error('Replicate embed error:', err.message || err);
+			throw err;
+		}
 	}
 }
\ No newline at end of file
diff --git a/src/utils/embedding_cache.js b/src/utils/embedding_cache.js
new file mode 100644
index 000000000..832bfaa9f
--- /dev/null
+++ b/src/utils/embedding_cache.js
@@ -0,0 +1,125 @@
+/**
+ * Persistent cache for embeddings to avoid re-computing on restart
+ */
+
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
+import { createHash } from 'crypto';
+import path from 'path';
+
+const CACHE_DIR = './bots/.cache';
+const CACHE_VERSION = 1; // Bump this if cache format changes
+
+/**
+ * Get a hash of the content for cache keying
+ */
+function hashContent(content) {
+    return createHash('md5').update(content).digest('hex');
+}
+
+/**
+ * Load embeddings from cache
+ * @param {string} cacheKey - Unique key for this cache (e.g., 'examples', 'skills')
+ * @param {string} modelName - Model name to invalidate cache if model changes
+ * @returns {Object|null} Cached embeddings or null if not found/invalid
+ */
+export function loadEmbeddingCache(cacheKey, modelName) {
+    const cachePath = path.join(CACHE_DIR, `${cacheKey}_embeddings.json`);
+    
+    try {
+        if (!existsSync(cachePath)) {
+            return null;
+        }
+        
+        const cache = JSON.parse(readFileSync(cachePath, 'utf8'));
+        
+        // Validate cache version and model
+        if (cache.version !== CACHE_VERSION || cache.model !== modelName) {
+            console.log(`Embedding cache for ${cacheKey} invalidated (model or version changed)`);
+            return null;
+        }
+        
+        console.log(`Loaded ${Object.keys(cache.embeddings).length} cached embeddings for ${cacheKey}`);
+        return cache.embeddings;
+    } catch (err) {
+        console.warn(`Failed to load embedding cache for ${cacheKey}:`, err.message);
+        return null;
+    }
+}
+
+/**
+ * Save embeddings to cache
+ * @param {string} cacheKey - Unique key for this cache
+ * @param {string} modelName - Model name for cache invalidation
+ * @param {Object} embeddings - Map of text -> embedding
+ */
+export function saveEmbeddingCache(cacheKey, modelName, embeddings) {
+    const cachePath = path.join(CACHE_DIR, `${cacheKey}_embeddings.json`);
+    
+    try {
+        mkdirSync(CACHE_DIR, { recursive: true });
+        
+        const cache = {
+            version: CACHE_VERSION,
+            model: modelName,
+            timestamp: new Date().toISOString(),
+            embeddings: embeddings
+        };
+        
+        writeFileSync(cachePath, JSON.stringify(cache), 'utf8');
+        console.log(`Saved ${Object.keys(embeddings).length} embeddings to cache for ${cacheKey}`);
+    } catch (err) {
+        console.warn(`Failed to save embedding cache for ${cacheKey}:`, err.message);
+    }
+}
+
+/**
+ * Get embeddings with caching support
+ * @param {Array} items - Items to embed
+ * @param {Function} getTextFn - Function to extract text from item: (item) => string
+ * @param {Function} embedFn - Async function to embed text: (text) => embedding
+ * @param {string} cacheKey - Cache key for this set of embeddings
+ * @param {string} modelName - Model name for cache invalidation
+ * @param {Function} progressFn - Optional progress callback: (current, total, item) => void
+ * @returns {Promise<Map>} Map of item -> embedding
+ */
+export async function getEmbeddingsWithCache(items, getTextFn, embedFn, cacheKey, modelName, progressFn = null) {
+    const results = new Map();
+    const cachedEmbeddings = loadEmbeddingCache(cacheKey, modelName) || {};
+    const toEmbed = [];
+    
+    // Check what's already cached
+    for (const item of items) {
+        const text = getTextFn(item);
+        const hash = hashContent(text);
+        
+        if (cachedEmbeddings[hash]) {
+            results.set(item, cachedEmbeddings[hash]);
+        } else {
+            toEmbed.push({ item, text, hash });
+        }
+    }
+    
+    if (toEmbed.length === 0) {
+        console.log(`${cacheKey}: All ${items.length} embeddings loaded from cache`);
+        return results;
+    }
+    
+    console.log(`${cacheKey}: Embedding ${toEmbed.length} items (${items.length - toEmbed.length} cached)...`);
+    
+    // Embed missing items
+    const newEmbeddings = {};
+    for (let i = 0; i < toEmbed.length; i++) {
+        const { item, text, hash } = toEmbed[i];
+        
+        const embedding = await embedFn(text);
+        results.set(item, embedding);
+        newEmbeddings[hash] = embedding;
+        cachedEmbeddings[hash] = embedding;
+    }
+    
+    // Save updated cache
+    saveEmbeddingCache(cacheKey, modelName, cachedEmbeddings);
+    console.log(`${cacheKey}: Done (${toEmbed.length} embedded, ${results.size} total)`);
+    
+    return results;
+}
diff --git a/src/utils/examples.js b/src/utils/examples.js
index 470663d20..889584668 100644
--- a/src/utils/examples.js
+++ b/src/utils/examples.js
@@ -1,12 +1,14 @@
 import { cosineSimilarity } from './math.js';
 import { stringifyTurns, wordOverlapScore } from './text.js';
+import { embedWithProgress } from './rate_limiter.js';
 
 export class Examples {
-    constructor(model, select_num=2) {
+    constructor(model, select_num=2, cacheKey='examples') {
         this.examples = [];
         this.model = model;
         this.select_num = select_num;
         this.embeddings = {};
+        this.cacheKey = cacheKey;
     }
 
     turnsToText(turns) {
@@ -26,17 +28,23 @@ export class Examples {
             return;
 
         try {
-            // Create array of promises first
-            const embeddingPromises = examples.map(example => {
-                const turn_text = this.turnsToText(example);
-                return this.model.embed(turn_text)
-                    .then(embedding => {
-                        this.embeddings[turn_text] = embedding;
-                    });
-            });
+            const textsToEmbed = examples.map(example => this.turnsToText(example));
+            const modelName = this.model.model_name || this.model.constructor?.name || 'unknown';
             
-            // Wait for all embeddings to complete
-            await Promise.all(embeddingPromises);
+            const embeddings = await embedWithProgress(
+                textsToEmbed,
+                async (text) => await this.model.embed(text),
+                this.cacheKey,
+                {
+                    cacheKey: this.cacheKey,
+                    modelName: modelName,
+                    getTextFn: (text) => text
+                }
+            );
+            
+            for (const [text, embedding] of embeddings) {
+                this.embeddings[text] = embedding;
+            }
         } catch (err) {
             console.warn('Error with embedding model, using word-overlap instead.');
             this.model = null;
diff --git a/src/utils/rate_limiter.js b/src/utils/rate_limiter.js
new file mode 100644
index 000000000..f9bf5a3c0
--- /dev/null
+++ b/src/utils/rate_limiter.js
@@ -0,0 +1,105 @@
+/**
+ * Utility for rate-limited operations with exponential backoff retry
+ */
+
+import { getEmbeddingsWithCache } from './embedding_cache.js';
+
+/**
+ * Execute an async function with exponential backoff retry on rate limit errors
+ * @param {Function} fn - Async function to execute
+ * @param {Object} options - Options
+ * @param {number} options.maxRetries - Maximum number of retries (default: 5)
+ * @param {number} options.initialDelay - Initial delay in ms (default: 1000)
+ * @param {number} options.maxDelay - Maximum delay in ms (default: 60000)
+ * @returns {Promise} Result of the function
+ */
+export async function withRetry(fn, options = {}) {
+    const { maxRetries = 5, initialDelay = 1000, maxDelay = 60000 } = options;
+    let lastError;
+    
+    for (let attempt = 0; attempt <= maxRetries; attempt++) {
+        try {
+            return await fn();
+        } catch (err) {
+            lastError = err;
+            const errMsg = err.message || String(err);
+            
+            // Check if it's a rate limit error
+            const isRateLimit = errMsg.includes('429') || 
+                               errMsg.includes('rate limit') || 
+                               errMsg.includes('Too Many Requests') ||
+                               errMsg.includes('throttled');
+            
+            if (!isRateLimit || attempt === maxRetries) {
+                throw err;
+            }
+            
+            // Parse retry_after from error if available, otherwise use exponential backoff
+            let delay = initialDelay * Math.pow(2, attempt);
+            const retryAfterMatch = errMsg.match(/retry.after[^\d]*(\d+)/i);
+            if (retryAfterMatch) {
+                delay = parseInt(retryAfterMatch[1]) * 1000 + 1000; // Add 1s buffer
+            }
+            delay = Math.min(delay, maxDelay);
+            
+            console.log(`Rate limited, retrying in ${(delay/1000).toFixed(1)}s (attempt ${attempt + 1}/${maxRetries})...`);
+            await new Promise(resolve => setTimeout(resolve, delay));
+        }
+    }
+    throw lastError;
+}
+
+/**
+ * Process items with embedding, showing progress, handling rate limits, and caching
+ * @param {Array} items - Items to process
+ * @param {Function} embedFn - Async function to embed an item: (item, index) => embedding
+ * @param {string} label - Label for progress display (e.g., "examples", "skills")
+ * @param {Object} options - Options including retry options and cache settings
+ * @param {string} options.cacheKey - Cache key for persistent storage
+ * @param {string} options.modelName - Model name for cache invalidation
+ * @param {Function} options.getTextFn - Function to extract text from item for caching
+ * @returns {Promise<Map>} Map of item -> embedding
+ */
+export async function embedWithProgress(items, embedFn, label = 'items', options = {}) {
+    const { cacheKey, modelName, getTextFn } = options;
+    const total = items.length;
+    
+    if (total === 0) return new Map();
+    
+    // If caching is enabled, use the cache system
+    if (cacheKey && modelName && getTextFn) {
+        const embedWithRetry = async (text) => {
+            return await withRetry(() => embedFn(text), options);
+        };
+        
+        const results = await getEmbeddingsWithCache(
+            items,
+            getTextFn,
+            embedWithRetry,
+            cacheKey,
+            modelName,
+            null  // No per-item progress to avoid spam
+        );
+        
+        return results;
+    }
+    
+    // Fallback to non-cached embedding
+    const results = new Map();
+    console.log(`${label}: Embedding ${total} items...`);
+    
+    for (let i = 0; i < total; i++) {
+        const item = items[i];
+        
+        try {
+            const embedding = await withRetry(() => embedFn(item, i), options);
+            results.set(item, embedding);
+        } catch (err) {
+            console.error(`${label}: Failed to embed item ${i + 1}: ${err.message}`);
+            throw err;
+        }
+    }
+    
+    console.log(`${label}: Done (${total} embedded)`);
+    return results;
+}