Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ npm publish
```bash
npm install # Install dependencies
npm run dev # Development with hot reload
npm test # Run tests
npm run build # Build for production
npm start # Start production server
```
Expand Down
10 changes: 8 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"start": "node dist/server.js",
"dev": "tsx watch src/server.ts",
"prepublishOnly": "npm run build",
"test": "echo \"No tests yet\" && exit 0"
"test": "node --import tsx --test"
},
"files": [
"dist/**/*",
Expand Down
49 changes: 41 additions & 8 deletions src/providers/gemini.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
import { GoogleGenAI } from "@google/genai";
import { GenerateImageArgs, GeneratedImage } from "../types.js";
import { GenerateImageArgs, GeneratedImage, Provider } from "../types.js";
import { saveBase64Image } from "../utils/fs.js";
import { z } from "zod";

const DEFAULT_MODEL = "gemini-2.5-flash-image-preview";

export async function generateImageGemini(args: GenerateImageArgs): Promise<GeneratedImage> {
const apiKey = process.env.GOOGLE_API_KEY;
type GeminiDeps = {
GoogleGenAI?: typeof GoogleGenAI;
saveBase64Image?: typeof saveBase64Image;
now?: () => number;
env?: NodeJS.ProcessEnv;
};

async function generateImageGemini(
args: GenerateImageArgs,
deps: GeminiDeps = {}
): Promise<GeneratedImage> {
const env = deps.env ?? process.env;
const apiKey = env.GOOGLE_API_KEY;
if (!apiKey) throw new Error("Missing GOOGLE_API_KEY environment variable");

const model = args.model || process.env.GOOGLE_IMAGE_MODEL || DEFAULT_MODEL;
const ai = new GoogleGenAI({ apiKey });
const model = args.model || env.GOOGLE_IMAGE_MODEL || DEFAULT_MODEL;
const GoogleGenAIClient = deps.GoogleGenAI ?? GoogleGenAI;
const ai = new GoogleGenAIClient({ apiKey });

// The new SDK accepts contents as a string for simple prompts
const response = await ai.models.generateContent({
Expand Down Expand Up @@ -47,14 +60,16 @@ export async function generateImageGemini(args: GenerateImageArgs): Promise<Gene
}

const ext = mimeType.includes("png") ? "png" : mimeType.includes("jpeg") || mimeType.includes("jpg") ? "jpg" : mimeType.includes("webp") ? "webp" : "png";
const outputDir = process.env.OUTPUT_DIR || "outputs";
const outputDir = env.OUTPUT_DIR || "outputs";
const name = (args.filenameHint || "gemini-image")
.toLowerCase()
.replace(/[^a-z0-9-_]+/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
const filename = `${Date.now()}-${name || "image"}.${ext}`;
const path = saveBase64Image(base64, outputDir, filename);
const now = deps.now ?? Date.now;
const filename = `${now()}-${name || "image"}.${ext}`;
const save = deps.saveBase64Image ?? saveBase64Image;
const path = save(base64, outputDir, filename);

return {
path,
Expand All @@ -65,3 +80,21 @@ export async function generateImageGemini(args: GenerateImageArgs): Promise<Gene
};
}

export const geminiProvider: Provider = {
id: "gemini",
displayName: "Google Gemini",
tool: {
name: "image.generate.gemini",
description:
`Generate an image using Google Gemini via @google/genai (default ${DEFAULT_MODEL}). Requires GOOGLE_API_KEY.`,
inputSchema: {
prompt: z.string(),
model: z.string().optional(),
returnBase64: z.boolean().optional(),
filenameHint: z.string().optional(),
},
},
generateImage: generateImageGemini,
responseProviderLabel: "google(gemini)",
};

61 changes: 51 additions & 10 deletions src/providers/google.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import { GenerateImageArgs, GeneratedImage } from "../types.js";
import { GenerateImageArgs, GeneratedImage, Provider } from "../types.js";
import { saveBase64Image } from "../utils/fs.js";
import { z } from "zod";

type GoogleDeps = {
fetch?: typeof fetch;
saveBase64Image?: typeof saveBase64Image;
now?: () => number;
env?: NodeJS.ProcessEnv;
};

/**
* Placeholder Google image generation provider.
Expand All @@ -17,9 +25,13 @@ import { saveBase64Image } from "../utils/fs.js";
* }
* and returns JSON with a base64-encoded image under `image.base64` and optional mimeType.
*/
export async function generateImageGoogle(args: GenerateImageArgs): Promise<GeneratedImage> {
const apiKey = process.env.GOOGLE_API_KEY;
const endpoint = process.env.GOOGLE_IMAGEN_ENDPOINT;
async function generateImageGoogle(
args: GenerateImageArgs,
deps: GoogleDeps = {}
): Promise<GeneratedImage> {
const env = deps.env ?? process.env;
const apiKey = env.GOOGLE_API_KEY;
const endpoint = env.GOOGLE_IMAGEN_ENDPOINT;
if (!apiKey) throw new Error("Missing GOOGLE_API_KEY environment variable");
if (!endpoint)
throw new Error(
Expand All @@ -39,8 +51,8 @@ export async function generateImageGoogle(args: GenerateImageArgs): Promise<Gene
const size = args.size
? args.size
: args.width && args.height
? `${args.width}x${args.height}`
: undefined;
? `${args.width}x${args.height}`
: undefined;

const payload = {
prompt: args.prompt,
Expand All @@ -55,7 +67,8 @@ export async function generateImageGoogle(args: GenerateImageArgs): Promise<Gene
model: args.model,
};

const res = await fetch(endpoint, {
const fetchFn = deps.fetch ?? fetch;
const res = await fetchFn(endpoint, {
method: "POST",
headers,
body: JSON.stringify(payload),
Expand All @@ -74,14 +87,16 @@ export async function generateImageGoogle(args: GenerateImageArgs): Promise<Gene
}

const ext = mimeType.includes("png") ? "png" : mimeType.includes("jpeg") ? "jpg" : mimeType.includes("webp") ? "webp" : "png";
const outputDir = process.env.OUTPUT_DIR || "outputs";
const outputDir = env.OUTPUT_DIR || "outputs";
const name = (args.filenameHint || "google-image")
.toLowerCase()
.replace(/[^a-z0-9-_]+/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
const filename = `${Date.now()}-${name || "image"}.${ext}`;
const path = saveBase64Image(base64, outputDir, filename);
const now = deps.now ?? Date.now;
const filename = `${now()}-${name || "image"}.${ext}`;
const save = deps.saveBase64Image ?? saveBase64Image;
const path = save(base64, outputDir, filename);

return {
path,
Expand All @@ -92,3 +107,29 @@ export async function generateImageGoogle(args: GenerateImageArgs): Promise<Gene
};
}

export const googleProvider: Provider = {
id: "google",
displayName: "Google",
tool: {
name: "image.generate.google",
description:
"Generate an image using Google (e.g., Imagen 3). Requires GOOGLE_API_KEY and GOOGLE_IMAGEN_ENDPOINT. Returns a saved file path and optional base64.",
inputSchema: {
prompt: z.string(),
negativePrompt: z.string().optional(),
width: z.number().int().positive().optional(),
height: z.number().int().positive().optional(),
size: z.string().optional(),
format: z.enum(["png", "jpeg", "jpg", "webp"]).optional(),
seed: z.number().int().optional(),
quality: z.string().optional(),
style: z.string().optional(),
background: z.enum(["transparent", "solid"]).optional(),
model: z.string().optional(),
returnBase64: z.boolean().optional(),
filenameHint: z.string().optional(),
},
},
generateImage: generateImageGoogle,
};

65 changes: 53 additions & 12 deletions src/providers/openai.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,47 @@
import OpenAI from "openai";
import { GenerateImageArgs, GeneratedImage } from "../types.js";
import { GenerateImageArgs, GeneratedImage, Provider } from "../types.js";
import { saveBase64Image } from "../utils/fs.js";
import { z } from "zod";

const DEFAULT_MODEL = "gpt-image-1"; // supports images.generate

export async function generateImageOpenAI(args: GenerateImageArgs): Promise<GeneratedImage> {
const apiKey = process.env.OPENAI_API_KEY;
type OpenAIDeps = {
OpenAI?: typeof OpenAI;
saveBase64Image?: typeof saveBase64Image;
now?: () => number;
env?: NodeJS.ProcessEnv;
};

async function generateImageOpenAI(
args: GenerateImageArgs,
deps: OpenAIDeps = {}
): Promise<GeneratedImage> {
const env = deps.env ?? process.env;
const apiKey = env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error("Missing OPENAI_API_KEY environment variable");
}

const client = new OpenAI({ apiKey });
const OpenAIClient = deps.OpenAI ?? OpenAI;
const client = new OpenAIClient({ apiKey });

const format = (args.format ?? "png").toLowerCase();
const ext = format === "jpeg" ? "jpg" : (format as string);
const mimeType =
format === "png"
? "image/png"
: format === "jpg" || format === "jpeg"
? "image/jpeg"
: "image/webp";
? "image/jpeg"
: "image/webp";

// size priority: explicit size string > width/height > default
const size = args.size
? args.size
: args.width && args.height
? `${args.width}x${args.height}`
: "1024x1024";
? `${args.width}x${args.height}`
: "1024x1024";

const model = args.model || process.env.OPENAI_IMAGE_MODEL || DEFAULT_MODEL;
const model = args.model || env.OPENAI_IMAGE_MODEL || DEFAULT_MODEL;

// Build request parameters based on model capabilities
const requestParams: any = {
Expand Down Expand Up @@ -66,14 +79,16 @@ export async function generateImageOpenAI(args: GenerateImageArgs): Promise<Gene
}

const base64 = data.b64_json as string;
const outputDir = process.env.OUTPUT_DIR || "outputs";
const outputDir = env.OUTPUT_DIR || "outputs";
const name = (args.filenameHint || "openai-image")
.toLowerCase()
.replace(/[^a-z0-9-_]+/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
const filename = `${Date.now()}-${name || "image"}.${ext}`;
const path = saveBase64Image(base64, outputDir, filename);
const now = deps.now ?? Date.now;
const filename = `${now()}-${name || "image"}.${ext}`;
const save = deps.saveBase64Image ?? saveBase64Image;
const path = save(base64, outputDir, filename);

return {
path,
Expand All @@ -84,3 +99,29 @@ export async function generateImageOpenAI(args: GenerateImageArgs): Promise<Gene
};
}

export const openaiProvider: Provider = {
id: "openai",
displayName: "OpenAI",
tool: {
name: "image.generate.openai",
description:
`Generate an image using OpenAI (default model ${DEFAULT_MODEL}). Returns a saved file path and optional base64.`,
inputSchema: {
prompt: z.string(),
negativePrompt: z.string().optional(),
width: z.number().int().positive().optional(),
height: z.number().int().positive().optional(),
size: z.string().optional(),
format: z.enum(["png", "jpeg", "jpg", "webp"]).optional(),
seed: z.number().int().optional(),
quality: z.enum(["standard", "hd"]).optional(),
style: z.string().optional(),
background: z.enum(["transparent", "solid"]).optional(),
model: z.string().optional(),
returnBase64: z.boolean().optional(),
filenameHint: z.string().optional(),
},
},
generateImage: generateImageOpenAI,
};

Loading