Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c371232
initial commit for tts-next-js-chat
zgreathouse Apr 25, 2025
6c928bb
Add base components and hook up chat with anthropic
zgreathouse Apr 25, 2025
3eae5f2
Add voice selection
zgreathouse Apr 25, 2025
8ed8257
Add VoiceSettingsProvider, add instant mode toggle to ControlPanel
zgreathouse Apr 25, 2025
6f9a5ff
Add voice provider to state, set initial random voice, update to only…
zgreathouse Apr 25, 2025
2050aed
Add system prompt for Claude
zgreathouse Apr 26, 2025
cea61bf
Add Hume favicon
zgreathouse Apr 26, 2025
f3d20e7
Add audio player component
zgreathouse Apr 26, 2025
3705c73
Integrate hume tts streaming and hook up audio player to chat
zgreathouse Apr 26, 2025
de758e8
Remove code blocks from text before sending to hume
zgreathouse Apr 26, 2025
c307cbe
minor styling changes
zgreathouse Apr 26, 2025
b3241e6
clean up AudioPlayer, minor styling fixes
zgreathouse Apr 26, 2025
77500f9
Adds enable mic button
zgreathouse Apr 26, 2025
564c7f4
Add voice interface with transcription from groq
zgreathouse Apr 27, 2025
8919f84
Add example .env.local file for housing env variables
zgreathouse Apr 27, 2025
0c1e714
Merge branch 'main' of https://github.com/HumeAI/hume-api-examples in…
zgreathouse Apr 27, 2025
cbe1e17
update anthropic model to claude 3.5 haiku latest
zgreathouse Apr 27, 2025
074af51
Add Hume logo
zgreathouse Apr 27, 2025
764d6d9
Update README
zgreathouse Apr 27, 2025
04b0de3
Update record button to record while held down
zgreathouse Apr 27, 2025
ea9e381
Add UI preview to README
zgreathouse Apr 27, 2025
4d8d523
Add support for keydown on microphone button
zgreathouse Apr 28, 2025
e41c9a3
Add pnpm lock
zgreathouse Apr 28, 2025
034b413
extract tts and recording logic to custom hooks to clean up Chat.tsx
zgreathouse Apr 28, 2025
82ffb2e
extract out fetching Hume voices to a custom hook to clean up Control…
zgreathouse Apr 28, 2025
376054e
Update README to include other takeaways from the example code.
zgreathouse Apr 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions tts/tts-next-js-chat/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
HUME_API_KEY=
ANTHROPIC_API_KEY=
GROQ_API_KEY=
42 changes: 42 additions & 0 deletions tts/tts-next-js-chat/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# env files (can opt-in for committing if needed)
.env*.local
.env

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts
61 changes: 61 additions & 0 deletions tts/tts-next-js-chat/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<div align="center">
<img src="https://storage.googleapis.com/hume-public-logos/hume/hume-banner.png">
<h1>Text-to-Speech | Next.js Chat Example</h1>
</div>

![preview.png](preview.png)

## Overview

This project demonstrates how to build a basic streaming conversational interface with [Hume’s TTS (streaming) API](https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming) that:

- Captures text, or transcribed microphone audio with Groq’s Whisper Large v3 Turbo model.
- Sends the text input to Anthropic’s Claude model using the Vercel AI SDK.
- Streams assistant responses back as text and synthesizes them to audio with Hume’s Octave model.

## Instructions

### Clone this examples repository:

```shell
git clone https://github.com/HumeAI/hume-api-examples
cd hume-api-examples/tts/tts-next-js-chat
```

### Install dependencies:

```shell
npm run install
# or
yarn install
# or
pnpm install
# or
bun install
```

### Set up your API keys:

This project requires API keys for Hume, Anthropic, and Groq. Retrieve them from the [Hume AI platform](https://platform.hume.ai/settings/keys), [Anthropic](https://www.anthropic.com/api), and [Groq](https://groq.com/), then place them in a `.env.local` file:

```shell
echo "HUME_API_KEY=your_hume_api_key" > .env.local
echo "ANTHROPIC_API_KEY=your_anthropic_api_key" >> .env.local
echo "GROQ_API_KEY=your_groq_api_key" >> .env.local
```

### Run the development server:

```shell
npm run dev
# or
yarn dev
# or
pnpm dev
# or
bun dev
```

### Open the app:

Navigate to http://localhost:3000. Use the microphone button to record, click again to stop recording, transcribe speech, send transcription text to Claude, and finally feed Claude's text output to Hume's TTS streaming API to hear Claude's responses voiced with a voice from Hume's voice library or a voice you designed.
914 changes: 914 additions & 0 deletions tts/tts-next-js-chat/bun.lock

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions tts/tts-next-js-chat/eslint.config.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { dirname } from "path";
import { fileURLToPath } from "url";
import { FlatCompat } from "@eslint/eslintrc";
import eslintConfigPrettier from "eslint-config-prettier";

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);

const compat = new FlatCompat({
baseDirectory: __dirname,
});

const nextConfigs = [...compat.extends("next/core-web-vitals")];

const eslintConfig = [...nextConfigs, eslintConfigPrettier];

export default eslintConfig;
7 changes: 7 additions & 0 deletions tts/tts-next-js-chat/next.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import type { NextConfig } from "next";

const nextConfig: NextConfig = {
/* config options here */
};

export default nextConfig;
36 changes: 36 additions & 0 deletions tts/tts-next-js-chat/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"name": "tts-next-js-chat",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev --turbopack",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@ai-sdk/anthropic": "^1.2.10",
"@ai-sdk/groq": "^1.2.8",
"@ai-sdk/react": "^1.2.9",
"@heroicons/react": "^2.2.0",
"ai": "^4.3.10",
"hume": "^0.10.3",
"next": "15.3.1",
"react": "^19.0.0",
"react-dom": "^19.0.0"
},
"devDependencies": {
"typescript": "^5",
"@types/node": "^20",
"@types/react": "^19",
"@types/react-dom": "^19",
"@tailwindcss/postcss": "^4",
"tailwindcss": "^4",
"eslint": "^9",
"eslint-config-next": "15.3.1",
"eslint-config-prettier": "^10.1.2",
"prettier": "^3.5.3",
"prettier-plugin-tailwindcss": "^0.6.11",
"@eslint/eslintrc": "^3"
}
}
5 changes: 5 additions & 0 deletions tts/tts-next-js-chat/postcss.config.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
const config = {
plugins: ["@tailwindcss/postcss"],
};

export default config;
Binary file added tts/tts-next-js-chat/preview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
45 changes: 45 additions & 0 deletions tts/tts-next-js-chat/src/app/api/chat/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { streamText } from "ai";
import { anthropic } from "@ai-sdk/anthropic";

export async function POST(req: Request) {
const { messages } = (await req.json()) as {
messages: Array<{ role: string; content: string }>;
};

if (!Array.isArray(messages) || messages.length === 0) {
return new Response("`messages` array is required", { status: 400 });
}

const result = streamText({
model: anthropic("claude-3-5-haiku-latest"),
messages: messages.map((m) => ({
role: m.role as "user" | "assistant" | "system",
content: m.content,
})),
system: SYSTEM_PROMPT,
});

return result.toTextStreamResponse();
}

const SYSTEM_PROMPT = `
<voice_communication_style>
Speak naturally with everyday, human-like language. Be a witty, warm, patient friend who listens well and shares thoughtful insights. Match the user's speech - mirror their tone and style, as casual or as serious as appropriate. Express a genuine personality. Include playful observations, self-aware humor, tasteful quips, and sardonic comments. Avoid lecturing or being too formal, robotic, or generic. Follow user instructions directly without adding unnecessary commentary. Keep responses concise and around 1-3 sentences, no yapping or verbose responses.

Seamlessly use natural speech patterns - incorporate vocal inflections like "oh wow", "I see", "right!", "oh dear", "oh yeah", "I get it", "you know?", "for real", and "I hear ya". Use discourse markers like "anyway" or "I mean" to ease comprehension.

All output is spoken aloud to the user, so tailor responses as spoken words for voice conversations. Never output things that are not spoken, like text-specific formatting. Never output action asterisks or emotes.
</voice_communication_style>
<speak_all_text>
Convert all text to easily speakable words, following the guidelines below.

- Numbers: Spell out fully (three hundred forty-two,two million, five hundred sixty seven thousand, eight hundred and ninety). Negatives: Say negative before the number. Decimals: Use point (three point one four). Fractions: spell out (three fourths)
- Alphanumeric strings: Break into 3-4 character chunks, spell all non-letters (ABC123XYZ becomes A B C one two three X Y Z)
- Phone numbers: Use words (550-120-4567 becomes five five zero, one two zero, four five six seven)
- Dates: Spell month, use ordinals for days, full year (11/5/1991 becomes November fifth, nineteen ninety-one)
- Time: Use oh for single-digit hours, state AM/PM (9:05 PM becomes nine oh five PM)
- Math: Describe operations clearly (5x^2 + 3x - 2 becomes five X squared plus three X minus two)
- Currencies: Spell out as full words ($50.25 becomes fifty dollars and twenty-five cents, £200,000 becomes two hundred thousand pounds)

Ensure that all text is converted to these normalized forms, but never mention this process. Always normalize all text.
</speak_all_text>`;
35 changes: 35 additions & 0 deletions tts/tts-next-js-chat/src/app/api/transcribe/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { NextResponse } from "next/server";

export async function POST(req: Request) {
try {
const arrayBuffer = await req.arrayBuffer();
const webmBlob = new Blob([arrayBuffer], { type: "audio/webm" });
const form = new FormData();

form.append("model", "whisper-large-v3-turbo");
form.append("file", webmBlob, "audio.webm");

const res = await fetch(
"https://api.groq.com/openai/v1/audio/transcriptions",
{
method: "POST",
headers: {
Authorization: `Bearer ${process.env.GROQ_API_KEY}`,
},
body: form,
}
);

if (!res.ok) {
const bodyText = await res.text();
console.error("Groq transcription failed:", res.status, bodyText);
return NextResponse.error();
}

const { text } = await res.json();
return NextResponse.json({ text });
} catch (err) {
console.error("Transcription error:", err);
return NextResponse.error();
}
}
109 changes: 109 additions & 0 deletions tts/tts-next-js-chat/src/app/api/tts/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import { NextRequest, NextResponse } from "next/server";
import { humeClient } from "@/lib/humeClient";
import type { Stream } from "hume/core";
import type {
PostedUtterance,
SnippetAudioChunk,
VoiceProvider,
} from "hume/api/resources/tts";

export async function POST(req: NextRequest) {
const { text, voiceName, voiceProvider, instant } = (await req.json()) as {
text: string;
voiceName: string;
voiceProvider: VoiceProvider;
instant: boolean;
};

if (!text || text.trim() === "") {
return NextResponse.json(
{ error: "Missing or invalid text" },
{ status: 400 }
);
}

if (typeof instant !== "boolean") {
return NextResponse.json(
{ error: "Must specify whether to use instant mode" },
{ status: 400 }
);
}

if (!voiceName && instant) {
return NextResponse.json(
{ error: "If using instant mode, a voice must be specified" },
{ status: 400 }
);
}

let upstreamHumeStream: Stream<SnippetAudioChunk>;

try {
console.log(
`[HUME_TTS_PROXY] Requesting TTS stream for voice: ${voiceName}, instant: ${instant}`
);
// Removes blocks of code from the text if present.
const cleanText = text.replace(/```[\s\S]*?```/g, "").trim();
const utterances: PostedUtterance[] = voiceName
? [
{
text: cleanText,
voice: { name: voiceName, provider: voiceProvider },
},
]
: [{ text: cleanText }];

upstreamHumeStream = await humeClient.tts.synthesizeJsonStreaming({
utterances: utterances,
stripHeaders: true,
instantMode: instant,
});
console.log("[HUME_TTS_PROXY] Successfully initiated Hume stream.");
} catch (error: any) {
console.error("[HUME_TTS_PROXY] Hume API call failed:", error);
const errorMessage = error?.message || "Failed to initiate TTS stream";
const errorDetails = error?.error?.message || error?.error || errorMessage;
return NextResponse.json(
{ error: "Hume API Error", details: errorDetails },
{ status: 502 }
);
}

const encoder = new TextEncoder();
const readableStream = new ReadableStream({
async start(controller) {
console.log("[HUME_TTS_PROXY] Client connected, forwarding stream...");

for await (const chunk of upstreamHumeStream) {
const jsonString = JSON.stringify(chunk);
const ndjsonLine = jsonString + "\n";
const chunkBytes = encoder.encode(ndjsonLine);
controller.enqueue(chunkBytes);
}
console.log("[HUME_TTS_PROXY] Upstream Hume stream finished.");
controller.close();
},
cancel(reason) {
console.log(
"[HUME_TTS_PROXY] Client disconnected, cancelling upstream Hume stream.",
reason
);
if (typeof (upstreamHumeStream as any)?.abort === "function") {
(upstreamHumeStream as any).abort();
console.log("[HUME_TTS_PROXY] Upstream Hume stream abort() called.");
} else {
console.warn(
"[HUME_TTS_PROXY] Upstream stream object does not expose an abort() method directly. Cancellation might rely on AbortSignal propagation."
);
}
},
});

return new NextResponse(readableStream, {
headers: {
"Content-Type": "application/x-ndjson",
"Cache-Control": "no-cache",
Connection: "keep-alive",
},
});
}
19 changes: 19 additions & 0 deletions tts/tts-next-js-chat/src/app/api/voices/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { NextRequest, NextResponse } from "next/server";
import type { ReturnVoice, VoiceProvider } from "hume/api/resources/tts";
import { humeClient } from "@/lib/humeClient";

export async function GET(req: NextRequest) {
const provider = (req.nextUrl.searchParams.get("provider") ??
"HUME_AI") as VoiceProvider;

const response = await humeClient.tts.voices.list({
pageNumber: 0,
pageSize: 100,
provider,
});

const voices: ReturnVoice[] = [];
for await (const v of response) voices.push(v);

return NextResponse.json({ voices });
}
Loading