THU-MAIC · cosarah · Mar 23, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/app/api/generate/agent-profiles/route.ts b/app/api/generate/agent-profiles/route.ts
@@ -36,6 +36,8 @@ interface RequestBody {
   sceneOutlines?: { title: string; description?: string }[];
   language: string;
   availableAvatars: string[];
+  avatarDescriptions?: Array<{ path: string; desc: string }>;
+  availableVoices?: Array<{ providerId: string; voiceId: string; voiceName: string }>;
 }
 
 function stripCodeFences(text: string): string {
@@ -50,7 +52,14 @@ function stripCodeFences(text: string): string {
 export async function POST(req: NextRequest) {
   try {
     const body = (await req.json()) as RequestBody;
-    const { stageInfo, sceneOutlines, language, availableAvatars } = body;
+    const {
+      stageInfo,
+      sceneOutlines,
+      language,
+      availableAvatars,
+      avatarDescriptions,
+      availableVoices,
+    } = body;
 
     // ── Validate required fields ──
     if (!stageInfo?.name) {
@@ -79,6 +88,27 @@ export async function POST(req: NextRequest) {
 
     const systemPrompt = `You are an expert instructional designer. Generate agent profiles for a multi-agent classroom simulation. Decide the appropriate number of agents (typically 3-5) based on the course content and complexity. Return ONLY valid JSON, no markdown or explanation.`;
 
+    // Build voice list for prompt (if available)
+    const voiceListStr =
+      availableVoices && availableVoices.length > 0
+        ? JSON.stringify(
+            availableVoices.map((v) => ({
+              id: `${v.providerId}::${v.voiceId}`,
+              name: v.voiceName,
+            })),
+          )
+        : '';
+
+    const voicePrompt = voiceListStr
+      ? `- Each agent should be assigned a voice that matches their persona from this list: ${voiceListStr}
+  - Pick a voice that suits the agent's personality and role (e.g. authoritative voice for teacher, lively voice for energetic student)
+  - Try to use different voices for each agent`
+      : '';
+
+    const voiceJsonField = voiceListStr
+      ? ',\n      "voice": "string (voice id from available list, e.g. \'qwen-tts::Cherry\')"'
+      : '';
+
     const userPrompt = `Generate agent profiles for the following course:
 
 Course name: ${stageInfo.name}
@@ -90,10 +120,13 @@ Requirements:
 - Priority values: teacher=10 (highest), assistant=7, student=4-6
 - Each agent needs: name, role, persona (2-3 sentences describing personality and teaching/learning style)
 - Names and personas must be in language: ${language}
-- Each agent must be assigned one avatar from this list: ${JSON.stringify(availableAvatars)}
+- Each agent must be assigned one avatar from this list: ${JSON.stringify(avatarDescriptions && avatarDescriptions.length > 0 ? avatarDescriptions.map((a) => ({ path: a.path, description: a.desc })) : availableAvatars)}
+  - Pick an avatar that visually matches the agent's personality and role
   - Try to use different avatars for each agent
+  - Use the "path" value as the avatar field in the output
 - Each agent must be assigned one color from this list: ${JSON.stringify(COLOR_PALETTE)}
   - Each agent must have a different color
+${voicePrompt}
 
 Return a JSON object with this exact structure:
 {
@@ -104,7 +137,7 @@ Return a JSON object with this exact structure:
       "persona": "string (2-3 sentences)",
       "avatar": "string (from available list)",
       "color": "string (hex color from palette)",
-      "priority": number (10 for teacher, 7 for assistant, 4-6 for student)
+      "priority": number (10 for teacher, 7 for assistant, 4-6 for student)${voiceJsonField}
     }
   ]
 }`;
@@ -130,6 +163,7 @@ Return a JSON object with this exact structure:
         avatar: string;
         color: string;
         priority: number;
+        voice?: string;
       }>;
     };
 
@@ -161,16 +195,28 @@ Return a JSON object with this exact structure:
     }
 
     // ── Build output with IDs ──
-    const agents = parsed.agents.map((agent, index) => ({
-      id: `gen-${nanoid(8)}`,
-      name: agent.name,
-      role: agent.role,
-      persona: agent.persona,
-      avatar: agent.avatar || availableAvatars[index % availableAvatars.length],
-      color: agent.color || COLOR_PALETTE[index % COLOR_PALETTE.length],
-      priority:
-        agent.priority ?? (agent.role === 'teacher' ? 10 : agent.role === 'assistant' ? 7 : 5),
-    }));
+    const agents = parsed.agents.map((agent, index) => {
+      // Parse voice "providerId::voiceId" format
+      let voiceConfig: { providerId: string; voiceId: string } | undefined;
+      if (agent.voice && agent.voice.includes('::')) {
+        const [providerId, voiceId] = agent.voice.split('::');
+        if (providerId && voiceId) {
+          voiceConfig = { providerId, voiceId };
+        }
+      }
+
+      return {
+        id: `gen-${nanoid(8)}`,
+        name: agent.name,
+        role: agent.role,
+        persona: agent.persona,
+        avatar: agent.avatar || availableAvatars[index % availableAvatars.length],
+        color: agent.color || COLOR_PALETTE[index % COLOR_PALETTE.length],
+        priority:
+          agent.priority ?? (agent.role === 'teacher' ? 10 : agent.role === 'assistant' ? 7 : 5),
+        ...(voiceConfig ? { voiceConfig } : {}),
+      };
+    });
 
     log.info(`Successfully generated ${agents.length} agent profiles for "${stageInfo.name}"`);
 

diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx
@@ -11,6 +11,7 @@ import { cn } from '@/lib/utils';
 import { useStageStore } from '@/lib/store/stage';
 import { useSettingsStore } from '@/lib/store/settings';
 import { useAgentRegistry } from '@/lib/orchestration/registry/store';
+import { getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver';
 import { useI18n } from '@/lib/hooks/use-i18n';
 import {
   loadImageMapping,
@@ -379,28 +380,77 @@ function GenerationPreviewContent() {
 
         try {
           const allAvatars = [
-            '/avatars/assist.png',
-            '/avatars/assist-2.png',
-            '/avatars/clown.png',
-            '/avatars/clown-2.png',
-            '/avatars/curious.png',
-            '/avatars/curious-2.png',
-            '/avatars/note-taker.png',
-            '/avatars/note-taker-2.png',
-            '/avatars/teacher.png',
-            '/avatars/teacher-2.png',
-            '/avatars/thinker.png',
-            '/avatars/thinker-2.png',
+            {
+              path: '/avatars/teacher.png',
+              desc: 'Male teacher with glasses, holding a book, green background',
+            },
+            {
+              path: '/avatars/teacher-2.png',
+              desc: 'Female teacher with long dark hair, blue traditional outfit, gentle expression',
+            },
+            {
+              path: '/avatars/assist.png',
+              desc: 'Young female assistant with glasses, pink background, friendly smile',
+            },
+            {
+              path: '/avatars/assist-2.png',
+              desc: 'Young female in orange top and purple overalls, cheerful and approachable',
+            },
+            {
+              path: '/avatars/clown.png',
+              desc: 'Energetic girl with glasses pointing up, green shirt, lively and fun',
+            },
+            {
+              path: '/avatars/clown-2.png',
+              desc: 'Playful girl with curly hair doing rock gesture, blue shirt, humorous vibe',
+            },
+            {
+              path: '/avatars/curious.png',
+              desc: 'Surprised boy with glasses, hand on cheek, curious expression',
+            },
+            {
+              path: '/avatars/curious-2.png',
+              desc: 'Boy with backpack holding a book and question mark bubble, inquisitive',
+            },
+            {
+              path: '/avatars/note-taker.png',
+              desc: 'Studious boy with glasses, blue shirt, calm and organized',
+            },
+            {
+              path: '/avatars/note-taker-2.png',
+              desc: 'Active boy with yellow backpack waving, blue outfit, enthusiastic learner',
+            },
+            {
+              path: '/avatars/thinker.png',
+              desc: 'Thoughtful girl with hand on chin, purple background, contemplative',
+            },
+            {
+              path: '/avatars/thinker-2.png',
+              desc: 'Girl reading a book intently, long dark hair, intellectual and focused',
+            },
           ];
 
+          const getAvailableVoicesForGeneration = () => {
+            const providers = getAvailableProvidersWithVoices(settings.ttsProvidersConfig);
+            return providers.flatMap((p) =>
+              p.voices.map((v) => ({
+                providerId: p.providerId,
+                voiceId: v.id,
+                voiceName: v.name,
+              })),
+            );
+          };
+
           // No outlines yet — agent generation uses only stage name + description
           const agentResp = await fetch('/api/generate/agent-profiles', {
             method: 'POST',
             headers: getApiHeaders(),
             body: JSON.stringify({
               stageInfo: { name: stage.name, description: stage.description },
               language: currentSession.requirements.language || 'zh-CN',
-              availableAvatars: allAvatars,
+              availableAvatars: allAvatars.map((a) => a.path),
+              avatarDescriptions: allAvatars.map((a) => ({ path: a.path, desc: a.desc })),
+              availableVoices: getAvailableVoicesForGeneration(),
             }),
             signal,
           });