From ac19e8e22189205d51ed38ec3313b2fb204b949e Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Mon, 11 Nov 2024 23:50:25 -0800 Subject: [PATCH 1/6] feat(client): endpoint type definitions --- .gitattributes | 1 + .../app/comfy/image-to-image/page.tsx | 4 +- .../app/comfy/image-to-video/page.tsx | 4 +- .../app/comfy/text-to-image/page.tsx | 4 +- apps/demo-nextjs-app-router/app/page.tsx | 16 +- .../app/whisper/page.tsx | 4 +- libs/client/package.json | 15 +- libs/client/src/client.ts | 36 +- libs/client/src/index.ts | 25 +- libs/client/src/queue.ts | 2 +- libs/client/src/request.ts | 2 +- libs/client/src/response.ts | 2 +- libs/client/src/streaming.ts | 21 +- libs/client/src/types/client.ts | 14 + libs/client/src/{types.ts => types/common.ts} | 0 libs/client/src/types/endpoints.ts | 12840 ++++++++++++++++ tsconfig.base.json | 1 + typedoc.json | 2 +- 18 files changed, 12930 insertions(+), 63 deletions(-) create mode 100644 libs/client/src/types/client.ts rename libs/client/src/{types.ts => types/common.ts} (100%) create mode 100644 libs/client/src/types/endpoints.ts diff --git a/.gitattributes b/.gitattributes index f4a319e..6ff7ce1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ package-lock.json linguist-generated docs/reference/** linguist-generated +libs/client/src/types/endpoints.ts linguist-generated diff --git a/apps/demo-nextjs-app-router/app/comfy/image-to-image/page.tsx b/apps/demo-nextjs-app-router/app/comfy/image-to-image/page.tsx index 4be2197..c5cbca2 100644 --- a/apps/demo-nextjs-app-router/app/comfy/image-to-image/page.tsx +++ b/apps/demo-nextjs-app-router/app/comfy/image-to-image/page.tsx @@ -1,7 +1,7 @@ /* eslint-disable @next/next/no-img-element */ "use client"; -import { createFalClient } from "@fal-ai/client"; +import { createFalClient, Result } from "@fal-ai/client"; import { useMemo, useState } from "react"; const fal = createFalClient({ @@ -80,7 +80,7 @@ export default function ComfyImageToImagePage() { setLoading(true); const start = Date.now(); try { - const { data } = await fal.subscribe( + const { data }: Result = await fal.subscribe( "comfy/fal-ai/image-to-image", { input: { diff --git a/apps/demo-nextjs-app-router/app/comfy/image-to-video/page.tsx b/apps/demo-nextjs-app-router/app/comfy/image-to-video/page.tsx index af75f01..33b8fbf 100644 --- a/apps/demo-nextjs-app-router/app/comfy/image-to-video/page.tsx +++ b/apps/demo-nextjs-app-router/app/comfy/image-to-video/page.tsx @@ -1,6 +1,6 @@ "use client"; -import { createFalClient } from "@fal-ai/client"; +import { createFalClient, Result } from "@fal-ai/client"; import { useMemo, useState } from "react"; const fal = createFalClient({ @@ -75,7 +75,7 @@ export default function ComfyImageToVideoPage() { setLoading(true); const start = Date.now(); try { - const { data } = await fal.subscribe( + const { data }: Result = await fal.subscribe( "comfy/fal-ai/image-to-video", { input: { diff --git a/apps/demo-nextjs-app-router/app/comfy/text-to-image/page.tsx b/apps/demo-nextjs-app-router/app/comfy/text-to-image/page.tsx index 0545844..20c3656 100644 --- a/apps/demo-nextjs-app-router/app/comfy/text-to-image/page.tsx +++ b/apps/demo-nextjs-app-router/app/comfy/text-to-image/page.tsx @@ -1,6 +1,6 @@ "use client"; -import { createFalClient } from "@fal-ai/client"; +import { createFalClient, Result } from "@fal-ai/client"; import { useMemo, useState } from "react"; const fal = createFalClient({ @@ -78,7 +78,7 @@ export default function ComfyTextToImagePage() { setLoading(true); const start = Date.now(); try { - const { data } = await fal.subscribe( + const { data }: Result = await fal.subscribe( "comfy/fal-ai/text-to-image", { input: { diff --git a/apps/demo-nextjs-app-router/app/page.tsx b/apps/demo-nextjs-app-router/app/page.tsx index 9d841e3..56ca04e 100644 --- a/apps/demo-nextjs-app-router/app/page.tsx +++ b/apps/demo-nextjs-app-router/app/page.tsx @@ -1,6 +1,7 @@ "use client"; import { createFalClient } from "@fal-ai/client"; +import { IllusionDiffusionOutput } from "@fal-ai/client/endpoints"; import { useMemo, useState } from "react"; const fal = createFalClient({ @@ -9,16 +10,6 @@ const fal = createFalClient({ // proxyUrl: 'http://localhost:3333/api/fal/proxy', // or your own external proxy }); -type Image = { - url: string; - file_name: string; - file_size: number; -}; -type Output = { - image: Image; -}; -// @snippet:end - type ErrorProps = { error: any; }; @@ -48,7 +39,7 @@ export default function Home() { // Result state const [loading, setLoading] = useState(false); const [error, setError] = useState(null); - const [result, setResult] = useState(null); + const [result, setResult] = useState(null); const [logs, setLogs] = useState([]); const [elapsedTime, setElapsedTime] = useState(0); // @snippet:end @@ -71,12 +62,13 @@ export default function Home() { }; const generateImage = async () => { + if (!imageFile) return; reset(); // @snippet:start("client.queue.subscribe") setLoading(true); const start = Date.now(); try { - const result = await fal.subscribe("fal-ai/illusion-diffusion", { + const result = await fal.subscribe("fal-ai/illusion-diffusion", { input: { prompt, image_url: imageFile, diff --git a/apps/demo-nextjs-app-router/app/whisper/page.tsx b/apps/demo-nextjs-app-router/app/whisper/page.tsx index 561daa8..5e3de00 100644 --- a/apps/demo-nextjs-app-router/app/whisper/page.tsx +++ b/apps/demo-nextjs-app-router/app/whisper/page.tsx @@ -108,10 +108,10 @@ export default function WhisperDemo() { setLoading(true); const start = Date.now(); try { - const result = await fal.subscribe("fal-ai/whisper", { + const result = await fal.subscribe("fal-ai/wizper", { input: { - file_name: "recording.wav", audio_url: audioFile, + version: "3", }, logs: true, onQueueUpdate(update) { diff --git a/libs/client/package.json b/libs/client/package.json index 8961573..b4c7b65 100644 --- a/libs/client/package.json +++ b/libs/client/package.json @@ -1,7 +1,7 @@ { "name": "@fal-ai/client", "description": "The fal.ai client for JavaScript and TypeScript", - "version": "1.0.4", + "version": "1.1.0-alpha.0", "license": "MIT", "repository": { "type": "git", @@ -15,6 +15,19 @@ "ml", "typescript" ], + "exports": { + ".": "./src/index.js", + "./endpoints": "./src/types/endpoints.js" + }, + "typesVersions": { + "*": { + "endpoints": [ + "src/types/endpoints.d.ts" + ] + } + }, + "main": "./src/index.js", + "types": "./src/index.d.ts", "dependencies": { "@msgpack/msgpack": "^3.0.0-beta2", "eventsource-parser": "^1.1.2", diff --git a/libs/client/src/client.ts b/libs/client/src/client.ts index 4cd241a..18bd329 100644 --- a/libs/client/src/client.ts +++ b/libs/client/src/client.ts @@ -5,7 +5,8 @@ import { buildUrl, dispatchRequest } from "./request"; import { resultResponseHandler } from "./response"; import { createStorageClient, StorageClient } from "./storage"; import { createStreamingClient, StreamingClient } from "./streaming"; -import { Result, RunOptions } from "./types"; +import { EndpointType, InputType, OutputType } from "./types/client"; +import { Result, RunOptions } from "./types/common"; /** * The main client type, it provides access to simple API model usage, @@ -44,10 +45,10 @@ export interface FalClient { * @param endpointId the registered function revision id or alias. * @returns the remote function output */ - run>( - endpointId: string, - options: RunOptions, - ): Promise>; + run( + endpointId: Id, + options: RunOptions>, + ): Promise>>; /** * Subscribes to updates for a specific request in the queue. @@ -56,10 +57,10 @@ export interface FalClient { * @param options - Options to configure how the request is run and how updates are received. * @returns A promise that resolves to the result of the request once it's completed. */ - subscribe>( - endpointId: string, - options: RunOptions & QueueSubscribeOptions, - ): Promise>; + subscribe( + endpointId: Id, + options: RunOptions> & QueueSubscribeOptions, + ): Promise>>; /** * Calls a fal app that supports streaming and provides a streaming-capable @@ -90,27 +91,24 @@ export function createFalClient(userConfig: Config = {}): FalClient { storage, streaming, stream: streaming.stream, - async run( - endpointId: string, - options: RunOptions = {}, - ): Promise> { + async run( + endpointId: Id, + options: RunOptions> = {}, + ): Promise>> { const input = options.input ? await storage.transformInput(options.input) : undefined; - return dispatchRequest>({ + return dispatchRequest, Result>>({ method: options.method, targetUrl: buildUrl(endpointId, options), - input: input as Input, + input: input as InputType, config: { ...config, responseHandler: resultResponseHandler, }, }); }, - async subscribe( - endpointId: string, - options: RunOptions & QueueSubscribeOptions = {}, - ): Promise> { + subscribe: async (endpointId, options) => { const { request_id: requestId } = await queue.submit(endpointId, options); if (options.onEnqueue) { options.onEnqueue(requestId); diff --git a/libs/client/src/index.ts b/libs/client/src/index.ts index 18554fb..b3b8ccc 100644 --- a/libs/client/src/index.ts +++ b/libs/client/src/index.ts @@ -1,7 +1,8 @@ import { createFalClient, type FalClient } from "./client"; import { Config } from "./config"; import { StreamOptions } from "./streaming"; -import { RunOptions } from "./types"; +import { EndpointType, InputType } from "./types/client"; +import { RunOptions } from "./types/common"; export { createFalClient, type FalClient } from "./client"; export { withMiddleware, withProxy } from "./middleware"; @@ -12,12 +13,12 @@ export { ApiError, ValidationError } from "./response"; export type { ResponseHandler } from "./response"; export type { StorageClient } from "./storage"; export type { FalStream, StreamingClient } from "./streaming"; -export * from "./types"; +export * from "./types/common"; export type { QueueStatus, ValidationErrorInfo, WebHookResponse, -} from "./types"; +} from "./types/common"; export { parseEndpointId } from "./utils"; type SingletonFalClient = { @@ -46,14 +47,20 @@ export const fal: SingletonFalClient = (function createSingletonFalClient() { get streaming() { return currentInstance.streaming; }, - run(id: string, options: RunOptions) { - return currentInstance.run(id, options); + run(id: Id, options: RunOptions>) { + return currentInstance.run(id, options); }, - subscribe(endpointId: string, options: RunOptions) { - return currentInstance.subscribe(endpointId, options); + subscribe( + endpointId: Id, + options: RunOptions>, + ) { + return currentInstance.subscribe(endpointId, options); }, - stream(endpointId: string, options: StreamOptions) { - return currentInstance.stream(endpointId, options); + stream( + endpointId: Id, + options: StreamOptions>, + ) { + return currentInstance.stream(endpointId, options); }, } satisfies SingletonFalClient; })(); diff --git a/libs/client/src/queue.ts b/libs/client/src/queue.ts index 165230e..922f2ed 100644 --- a/libs/client/src/queue.ts +++ b/libs/client/src/queue.ts @@ -10,7 +10,7 @@ import { RequestLog, Result, RunOptions, -} from "./types"; +} from "./types/common"; import { parseEndpointId } from "./utils"; export type QueuePriority = "low" | "normal"; diff --git a/libs/client/src/request.ts b/libs/client/src/request.ts index 9b55e50..fdbf943 100644 --- a/libs/client/src/request.ts +++ b/libs/client/src/request.ts @@ -1,7 +1,7 @@ import { RequiredConfig } from "./config"; import { ResponseHandler } from "./response"; import { getUserAgent, isBrowser } from "./runtime"; -import { RunOptions, UrlOptions } from "./types"; +import { RunOptions, UrlOptions } from "./types/common"; import { ensureEndpointIdFormat, isValidUrl } from "./utils"; const isCloudflareWorkers = diff --git a/libs/client/src/response.ts b/libs/client/src/response.ts index e310450..1145fb3 100644 --- a/libs/client/src/response.ts +++ b/libs/client/src/response.ts @@ -1,5 +1,5 @@ import { RequiredConfig } from "./config"; -import { Result, ValidationErrorInfo } from "./types"; +import { Result, ValidationErrorInfo } from "./types/common"; export type ResponseHandler = (response: Response) => Promise; diff --git a/libs/client/src/streaming.ts b/libs/client/src/streaming.ts index e4808bc..bf32a95 100644 --- a/libs/client/src/streaming.ts +++ b/libs/client/src/streaming.ts @@ -4,6 +4,7 @@ import { RequiredConfig } from "./config"; import { buildUrl, dispatchRequest } from "./request"; import { ApiError, defaultResponseHandler } from "./response"; import { type StorageClient } from "./storage"; +import { EndpointType, InputType, OutputType } from "./types/client"; export type StreamingConnectionMode = "client" | "server"; @@ -117,7 +118,7 @@ export class FalStream { ); } this.signal.addEventListener("abort", () => { - resolve(this.currentData); + resolve(this.currentData ?? ({} as Output)); }); this.on("done", (data) => { this.streamClosed = true; @@ -365,10 +366,10 @@ export interface StreamingClient { * @param options the request options, including the input payload. * @returns the `FalStream` instance. */ - stream>( - endpointId: string, - options: StreamOptions, - ): Promise>; + stream( + endpointId: Id, + options: StreamOptions>, + ): Promise, OutputType>>; } type StreamingClientDependencies = { @@ -381,16 +382,16 @@ export function createStreamingClient({ storage, }: StreamingClientDependencies): StreamingClient { return { - async stream( - endpointId: string, - options: StreamOptions, + async stream( + endpointId: Id, + options: StreamOptions>, ) { const input = options.input ? await storage.transformInput(options.input) : undefined; - return new FalStream(endpointId, config, { + return new FalStream, OutputType>(endpointId, config, { ...options, - input: input as Input, + input: input as InputType, }); }, }; diff --git a/libs/client/src/types/client.ts b/libs/client/src/types/client.ts new file mode 100644 index 0000000..5a66cc4 --- /dev/null +++ b/libs/client/src/types/client.ts @@ -0,0 +1,14 @@ +import { EndpointTypeMap } from "./endpoints"; + +// eslint-disable-next-line @typescript-eslint/ban-types +export type EndpointType = keyof EndpointTypeMap | (string & {}); + +// Get input type based on endpoint ID +export type InputType = T extends keyof EndpointTypeMap + ? EndpointTypeMap[T]["input"] + : Record; + +// Get output type based on endpoint ID +export type OutputType = T extends keyof EndpointTypeMap + ? EndpointTypeMap[T]["output"] + : any; diff --git a/libs/client/src/types.ts b/libs/client/src/types/common.ts similarity index 100% rename from libs/client/src/types.ts rename to libs/client/src/types/common.ts diff --git a/libs/client/src/types/endpoints.ts b/libs/client/src/types/endpoints.ts new file mode 100644 index 0000000..576dcfb --- /dev/null +++ b/libs/client/src/types/endpoints.ts @@ -0,0 +1,12840 @@ +export type ImageSize = { + /** + * The width of the generated image. Default value: `512` + */ + width?: number; + /** + * The height of the generated image. Default value: `512` + */ + height?: number; +}; +export type Image = { + /** + * The URL where the file can be downloaded from. + */ + url: string; + /** + * The mime type of the file. + */ + content_type?: string | null; + /** + * The name of the file. It will be auto-generated if not provided. + */ + file_name?: string | null; + /** + * The size of the file in bytes. + */ + file_size?: number | null; + /** + * The width of the image in pixels. + */ + width?: number | null; + /** + * The height of the image in pixels. + */ + height?: number | null; +}; +export type File = { + /** + * The URL where the file can be downloaded from. + */ + url: string; + /** + * The mime type of the file. + */ + content_type?: string; + /** + * The name of the file. It will be auto-generated if not provided. + */ + file_name?: string; + /** + * The size of the file in bytes. + */ + file_size?: number; + /** + * File data + */ + file_data?: string; +}; +export type RGBColor = { + /** + * Red color value + */ + r?: number; + /** + * Green color value + */ + g?: number; + /** + * Blue color value + */ + b?: number; +}; +export type LoraWeight = { + /** + * URL or the path to the LoRA weights. Or HF model name. + */ + path: string; + /** + * The scale of the LoRA weight. This is used to scale the LoRA weight + * before merging it with the base model. Default value: `1` + */ + scale?: number; + /** + * If set to true, the embedding will be forced to be used. + */ + force?: boolean; +}; +export type IPAdapter = { + /** + * URL of the image to be used as the IP adapter. + */ + ip_adapter_image_url: string | Blob | File | Array; + /** + * The mask to use for the IP adapter. When using a mask, the ip-adapter image size and the mask size must be the same + */ + ip_adapter_mask_url?: string | Blob | File; + /** + * URL or the path to the IP adapter weights. + */ + path: string; + /** + * Subfolder in the model directory where the IP adapter weights are stored. + */ + model_subfolder?: string; + /** + * Name of the weight file. + */ + weight_name?: string; + /** + * URL or the path to the InsightFace model weights. + */ + insight_face_model_path?: string; + /** + * The scale of the IP adapter weight. This is used to scale the IP adapter weight + * before merging it with the base model. Default value: `1` + */ + scale?: number; + /** + * The scale of the IP adapter weight. This is used to scale the IP adapter weight + * before merging it with the base model. + */ + scale_json?: Record; + /** + * The factor to apply to the unconditional noising of the IP adapter. + */ + unconditional_noising_factor?: number; + /** + * The value to set the image projection shortcut to. For FaceID plus V1 models, + * this should be set to False. For FaceID plus V2 models, this should be set to True. + * Default is True. Default value: `true` + */ + image_projection_shortcut?: boolean; +}; +export type ControlNetUnion = { + /** + * URL or the path to the control net weights. + */ + path: string; + /** + * optional URL to the controlnet config.json file. + */ + config_url?: string | Blob | File; + /** + * The optional variant if a Hugging Face repo key is used. + */ + variant?: string; + /** + * The control images and modes to use for the control net. + */ + controls: Array; +}; +export type ControlNet = { + /** + * URL or the path to the control net weights. + */ + path: string; + /** + * optional URL to the controlnet config.json file. + */ + config_url?: string | Blob | File; + /** + * The optional variant if a Hugging Face repo key is used. + */ + variant?: string; + /** + * URL of the image to be used as the control net. + */ + image_url: string | Blob | File; + /** + * The mask to use for the controlnet. When using a mask, the control image size and the mask size must be the same and divisible by 32. + */ + mask_url?: string | Blob | File; + /** + * The scale of the control net weight. This is used to scale the control net weight + * before merging it with the base model. Default value: `1` + */ + conditioning_scale?: number; + /** + * The percentage of the image to start applying the controlnet in terms of the total timesteps. + */ + start_percentage?: number; + /** + * The percentage of the image to end applying the controlnet in terms of the total timesteps. Default value: `1` + */ + end_percentage?: number; + /** + * The index of the IP adapter to be applied to the controlnet. This is only needed for InstantID ControlNets. + */ + ip_adapter_index?: number; +}; +export type ControlNetUnionInput = { + /** + * URL of the image to be used as the control image. + */ + control_image_url: string | Blob | File; + /** + * URL of the mask for the control image. + */ + mask_image_url?: string | Blob | File; + /** + * Control Mode for Flux Controlnet Union. Supported values are: + * - canny: Uses the edges for guided generation. + * - tile: Uses the tiles for guided generation. + * - depth: Utilizes a grayscale depth map for guided generation. + * - blur: Adds a blur to the image. + * - pose: Uses the pose of the image for guided generation. + * - gray: Converts the image to grayscale. + * - low-quality: Converts the image to a low-quality image. + */ + control_mode: + | "canny" + | "tile" + | "depth" + | "blur" + | "pose" + | "gray" + | "low-quality"; + /** + * The scale of the control net weight. This is used to scale the control net weight + * before merging it with the base model. Default value: `1` + */ + conditioning_scale?: number; + /** + * Threshold for mask. Default value: `0.5` + */ + mask_threshold?: number; + /** + * The percentage of the image to start applying the controlnet in terms of the total timesteps. + */ + start_percentage?: number; + /** + * The percentage of the image to end applying the controlnet in terms of the total timesteps. Default value: `1` + */ + end_percentage?: number; +}; +export type Embedding = { + /** + * URL or the path to the embedding weights. + */ + path: string; + /** + * The list of tokens to use for the embedding. Default value: `,` + */ + tokens?: Array; +}; +export type InputV2 = { + /** + * URL of the image to remove background from + */ + image_url: string | Blob | File; + /** + * Model to use for background removal. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository but trained with 2K images. + * The 'General Use (Heavy)' model is a slower but more accurate model. + * The 'Matting' model is a model trained specifically for matting images. + * The 'Portrait' model is a model trained specifically for portrait images. + * The 'General Use (Light)' model is recommended for most use cases. + * + * The corresponding models are as follows: + * - 'General Use (Light)': BiRefNet-DIS_ep580.pth + * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth + * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` + */ + model?: + | "General Use (Light)" + | "General Use (Light 2K)" + | "General Use (Heavy)" + | "Matting" + | "Portrait"; + /** + * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` + */ + operating_resolution?: "1024x1024" | "2048x2048"; + /** + * The format of the output image Default value: `"png"` + */ + output_format?: "webp" | "png"; + /** + * Whether to output the mask used to remove the background + */ + output_mask?: boolean; + /** + * Whether to refine the foreground using the estimated mask Default value: `true` + */ + refine_foreground?: boolean; +}; +export type FalInputDevInfo = { + /** + * The key of the parameter that the input is associated with. + */ + key: Array; + /** + * The class type of the input node. + */ + class_type: string; +}; +export type WhisperChunk = { + /** + * Start and end timestamp of the chunk + */ + timestamp: Array; + /** + * Transcription of the chunk + */ + text: string; +}; +export type DiarizationSegment = { + /** + * Start and end timestamp of the segment + */ + timestamp: Array; + /** + * Speaker ID of the segment + */ + speaker: string; +}; +export type Frame = { + /** + * URL of the frame + */ + url: string; +}; +export type ImagePrompt = { + /** + * Default value: `"ImagePrompt"` + */ + type?: "ImagePrompt" | "PyraCanny" | "CPDS" | "FaceSwap"; + /** + * + */ + image_url?: string | Blob | File; + /** + * Default value: `0.5` + */ + stop_at?: number; + /** + * Default value: `1` + */ + weight?: number; +}; +export type ReferenceFace = { + /** + * URL of the reference face image + */ + image_url: string | Blob | File; +}; +export type MoondreamInputParam = { + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; + /** + * Prompt to be used for the image Default value: `"Describe this image."` + */ + prompt?: string; +}; +export type PolygonOutputWithLabels = { + /** + * Results from the model + */ + results: PolygonOutput; + /** + * Processed image + */ + image?: Image; +}; +export type OCRBoundingBoxSingle = { + /** + * X-coordinate of the top-left corner + */ + x: number; + /** + * Y-coordinate of the top-left corner + */ + y: number; + /** + * Width of the bounding box + */ + w: number; + /** + * Height of the bounding box + */ + h: number; + /** + * Label of the bounding box + */ + label: string; +}; +export type BoundingBox = { + /** + * X-coordinate of the top-left corner + */ + x: number; + /** + * Y-coordinate of the top-left corner + */ + y: number; + /** + * Width of the bounding box + */ + w: number; + /** + * Height of the bounding box + */ + h: number; + /** + * Label of the bounding box + */ + label: string; +}; +export type PolygonOutput = { + /** + * List of polygons + */ + polygons: Array; +}; +export type OCRBoundingBoxOutputWithLabels = { + /** + * Results from the model + */ + results: OCRBoundingBox; + /** + * Processed image + */ + image?: Image; +}; +export type BoundingBoxes = { + /** + * List of bounding boxes + */ + bboxes: Array; +}; +export type OCRBoundingBox = { + /** + * List of quadrilateral boxes + */ + quad_boxes: Array; +}; +export type BoundingBoxOutputWithLabels = { + /** + * Results from the model + */ + results: BoundingBoxes; + /** + * Processed image + */ + image?: Image; +}; +export type Polygon = { + /** + * List of points + */ + points: Array>; + /** + * Label of the polygon + */ + label: string; +}; +export type Region = { + /** + * X-coordinate of the top-left corner + */ + x1: number; + /** + * Y-coordinate of the top-left corner + */ + y1: number; + /** + * X-coordinate of the bottom-right corner + */ + x2: number; + /** + * Y-coordinate of the bottom-right corner + */ + y2: number; +}; +export type BoxPrompt = { + /** + * X Min Coordinate of the box + */ + x_min?: number; + /** + * Y Min Coordinate of the box + */ + y_min?: number; + /** + * X Max Coordinate of the prompt + */ + x_max?: number; + /** + * Y Max Coordinate of the prompt + */ + y_max?: number; +}; +export type PointPrompt = { + /** + * X Coordinate of the prompt Default value: `305` + */ + x?: number; + /** + * Y Coordinate of the prompt Default value: `350` + */ + y?: number; + /** + * Label of the prompt. 1 for foreground, 0 for background Default value: `"1"` + */ + label?: "0" | "1"; +}; +export type FaceDetection = { + /** + * Bounding box of the face. + */ + bbox: Array; + /** + * Keypoints of the face. + */ + kps?: Array>; + /** + * Keypoints of the face on the image. + */ + kps_image: Image; + /** + * Confidence score of the detection. + */ + det_score: number; + /** + * Embedding of the face. + */ + embedding_file: File; + /** + * Either M or F if available. + */ + sex?: string; +}; +export type Color = { + /** + * Red value Default value: `128` + */ + r?: number; + /** + * Green value Default value: `128` + */ + g?: number; + /** + * Blue value Default value: `128` + */ + b?: number; +}; +export type AudioFile = { + /** + * + */ + url: string; + /** + * Default value: `"audio/wav"` + */ + content_type?: string; + /** + * Default value: `"8535dd59e911496a947daa35c07e67a3_tmplkcy6tut.wav"` + */ + file_name?: string; + /** + * The size of the file in bytes. + */ + file_size?: number | null; +}; +export type FluxProV11UltraInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + */ + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; +}; +export type FluxProV11UltraOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxLoraFastTrainingInput = { + /** + * URL to zip archive with images. Try to use at least 4 images in general the more the better. + * + * In addition to images the archive can contain text files with captions. Each text file should have the same name as the image file it corresponds to. + */ + images_data_url: string | Blob | File; + /** + * Trigger word to be used in the captions. If None, a trigger word will not be used. + * If no captions are provide the trigger_word will be used instead of captions. If captions are the trigger word will not be used. + */ + trigger_word?: string; + /** + * If True segmentation masks will be used in the weight the training loss. For people a face mask is used if possible. Default value: `true` + */ + create_masks?: boolean; + /** + * Number of steps to train the LoRA on. + */ + steps?: number; + /** + * If True, the training will be for a style. This will deactivate segmentation, captioning and will use trigger word instead. Use the trigger word to specify the style. + */ + is_style?: boolean; + /** + * Specifies whether the input data is already in a processed format. When set to False (default), the system expects raw input where image files and their corresponding caption files share the same name (e.g., 'photo.jpg' and 'photo.txt'). Set to True if your data is already in a preprocessed format. + */ + is_input_format_already_preprocessed?: boolean; + /** + * The format of the archive. If not specified, the format will be inferred from the URL. + */ + data_archive_format?: string; +}; +export type FluxLoraFastTrainingOutput = { + /** + * URL to the trained diffusers lora weights. + */ + diffusers_lora_file: File; + /** + * URL to the training configuration file. + */ + config_file: File; +}; +export type RecraftV3Output = { + /** + * The ID of the created style, this ID can be used to reference the style in the future. + */ + style_id: string; +}; +export type RecraftV3Input = { + /** + * + */ + prompt: string; + /** + * Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The style of the generated images. Vector images cost 2X as much. Default value: `"realistic_image"` + */ + style?: + | "any" + | "realistic_image" + | "digital_illustration" + | "vector_illustration" + | "realistic_image/b_and_w" + | "realistic_image/hard_flash" + | "realistic_image/hdr" + | "realistic_image/natural_light" + | "realistic_image/studio_portrait" + | "realistic_image/enterprise" + | "realistic_image/motion_blur" + | "digital_illustration/pixel_art" + | "digital_illustration/hand_drawn" + | "digital_illustration/grain" + | "digital_illustration/infantile_sketch" + | "digital_illustration/2d_art_poster" + | "digital_illustration/handmade_3d" + | "digital_illustration/hand_drawn_outline" + | "digital_illustration/engraving_color" + | "digital_illustration/2d_art_poster_2" + | "vector_illustration/engraving" + | "vector_illustration/line_art" + | "vector_illustration/line_circuit" + | "vector_illustration/linocut"; + /** + * An array of preferable colors Default value: `` + */ + colors?: Array; + /** + * The ID of the custom style reference (optional) + */ + style_id?: string | null; +}; +export type MinimaxVideoImageToVideoInput = { + /** + * + */ + prompt: string; + /** + * URL of the image to use as the first frame + */ + image_url: string | Blob | File; + /** + * Whether to use the model's prompt optimizer Default value: `true` + */ + prompt_optimizer?: boolean; +}; +export type MinimaxVideoImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type AuraFlowInput = { + /** + * The prompt to generate images from + */ + prompt: string; + /** + * The number of images to generate Default value: `1` + */ + num_images?: number; + /** + * The seed to use for generating images + */ + seed?: number; + /** + * Classifier free guidance scale Default value: `3.5` + */ + guidance_scale?: number; + /** + * The number of inference steps to take Default value: `50` + */ + num_inference_steps?: number; + /** + * Whether to perform prompt expansion (recommended) Default value: `true` + */ + expand_prompt?: boolean; +}; +export type AuraFlowOutput = { + /** + * The generated images + */ + images: Array; + /** + * The seed used to generate the images + */ + seed: number; + /** + * The expanded prompt + */ + prompt: string; +}; +export type FluxDevImageToImageInput = { + /** + * The URL of the image to generate an image from. + */ + image_url: string | Blob | File; + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The strength of the initial image. Higher strength values are better for this model. Default value: `0.95` + */ + strength?: number; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `40` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FluxDevImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxDevInput = { + /** + * The URL of the image to generate an image from. + */ + image_url: string | Blob | File; + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The strength of the initial image. Higher strength values are better for this model. Default value: `0.95` + */ + strength?: number; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `40` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FluxDevOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxLoraOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxLoraInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxSchnellInput = { + /** + * The URL of the image to generate an image from. + */ + image_url: string | Blob | File; + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The strength of the initial image. Higher strength values are better for this model. Default value: `0.95` + */ + strength?: number; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `40` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FluxSchnellOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxProV11Input = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + */ + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; +}; +export type FluxProV11Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxProNewInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + */ + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; +}; +export type FluxProNewOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type OmnigenV1Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type OmnigenV1Input = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * URL of images to use while generating the image, Use <|image_1|> for the first image and so on. Default value: `` + */ + input_image_urls?: Array; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `50` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + */ + guidance_scale?: number; + /** + * The Image Guidance scale is a measure of how close you want + * the model to stick to your input image when looking for a related image to show you. Default value: `1.6` + */ + img_guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type StableDiffusionV35LargeInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type StableDiffusionV35LargeOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type StableDiffusionV35MediumOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type StableDiffusionV35MediumInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `40` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type RecraftV3CreateStyleOutput = { + /** + * The ID of the created style, this ID can be used to reference the style in the future. + */ + style_id: string; +}; +export type RecraftV3CreateStyleInput = { + /** + * + */ + prompt: string; + /** + * Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The style of the generated images. Vector images cost 2X as much. Default value: `"realistic_image"` + */ + style?: + | "any" + | "realistic_image" + | "digital_illustration" + | "vector_illustration" + | "realistic_image/b_and_w" + | "realistic_image/hard_flash" + | "realistic_image/hdr" + | "realistic_image/natural_light" + | "realistic_image/studio_portrait" + | "realistic_image/enterprise" + | "realistic_image/motion_blur" + | "digital_illustration/pixel_art" + | "digital_illustration/hand_drawn" + | "digital_illustration/grain" + | "digital_illustration/infantile_sketch" + | "digital_illustration/2d_art_poster" + | "digital_illustration/handmade_3d" + | "digital_illustration/hand_drawn_outline" + | "digital_illustration/engraving_color" + | "digital_illustration/2d_art_poster_2" + | "vector_illustration/engraving" + | "vector_illustration/line_art" + | "vector_illustration/line_circuit" + | "vector_illustration/linocut"; + /** + * An array of preferable colors Default value: `` + */ + colors?: Array; + /** + * The ID of the custom style reference (optional) + */ + style_id?: string | null; +}; +export type FluxRealismOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxRealismInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The strength of the model. Default value: `1` + */ + strength?: number; + /** + * The output image format. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type FluxLoraInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxLoraInpaintingInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxLoraImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxLoraImageToImageInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxGeneralInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnets?: Array; + /** + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnet_unions?: Array; + /** + * IP-Adapter to use for image generation. Default value: `` + */ + ip_adapters?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + real_cfg_scale?: number; + /** + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. + */ + use_real_cfg?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * URL of Image for Reference-Only + */ + reference_image_url?: string | Blob | File; + /** + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + */ + reference_strength?: number; + /** + * The percentage of the total timesteps when the reference guidance is to bestarted. + */ + reference_start?: number; + /** + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + */ + reference_end?: number; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxGeneralOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxGeneralInpaintingInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnets?: Array; + /** + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnet_unions?: Array; + /** + * IP-Adapter to use for image generation. Default value: `` + */ + ip_adapters?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + real_cfg_scale?: number; + /** + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. + */ + use_real_cfg?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * URL of Image for Reference-Only + */ + reference_image_url?: string | Blob | File; + /** + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + */ + reference_strength?: number; + /** + * The percentage of the total timesteps when the reference guidance is to bestarted. + */ + reference_start?: number; + /** + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + */ + reference_end?: number; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxGeneralInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxGeneralImageToImageInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnets?: Array; + /** + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnet_unions?: Array; + /** + * IP-Adapter to use for image generation. Default value: `` + */ + ip_adapters?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + real_cfg_scale?: number; + /** + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. + */ + use_real_cfg?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * URL of Image for Reference-Only + */ + reference_image_url?: string | Blob | File; + /** + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + */ + reference_strength?: number; + /** + * The percentage of the total timesteps when the reference guidance is to bestarted. + */ + reference_start?: number; + /** + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + */ + reference_end?: number; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxGeneralImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxGeneralDifferentialDiffusionInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnets?: Array; + /** + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnet_unions?: Array; + /** + * IP-Adapter to use for image generation. Default value: `` + */ + ip_adapters?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + real_cfg_scale?: number; + /** + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. + */ + use_real_cfg?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * URL of Image for Reference-Only + */ + reference_image_url?: string | Blob | File; + /** + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + */ + reference_strength?: number; + /** + * The percentage of the total timesteps when the reference guidance is to bestarted. + */ + reference_start?: number; + /** + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + */ + reference_end?: number; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxGeneralDifferentialDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FluxGeneralRfInversionInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnets?: Array; + /** + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnet_unions?: Array; + /** + * IP-Adapter to use for image generation. Default value: `` + */ + ip_adapters?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + real_cfg_scale?: number; + /** + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. + */ + use_real_cfg?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * URL of Image for Reference-Only + */ + reference_image_url?: string | Blob | File; + /** + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + */ + reference_strength?: number; + /** + * The percentage of the total timesteps when the reference guidance is to bestarted. + */ + reference_start?: number; + /** + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + */ + reference_end?: number; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxGeneralRfInversionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type IclightV2Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type IclightV2Input = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * Negative Prompt for the image Default value: `""` + */ + negative_prompt?: string; + /** + * URL of image to be used for relighting + */ + image_url: string | Blob | File; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * Provide lighting conditions for the model Default value: `"None"` + */ + initial_latent?: "None" | "Left" | "Right" | "Top" | "Bottom"; + /** + * Use HR fix + */ + enable_hr_fix?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The number of images to generate. Default value: `1` + */ + cfg?: number; + /** + * Strength for low-resolution pass. Default value: `0.98` + */ + lowres_denoise?: number; + /** + * Strength for high-resolution pass. Only used if enable_hr_fix is True. Default value: `0.95` + */ + highres_denoise?: number; + /** + * Default value: `0.5` + */ + hr_downscale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type FluxDifferentialDiffusionInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * URL of image to use as initial image. + */ + image_url: string | Blob | File; + /** + * URL of change map. + */ + change_map_image_url: string | Blob | File; + /** + * The strength to use for image-to-image. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FluxDifferentialDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type StableDiffusionV3MediumInput = { + /** + * The image URL to generate an image from. + */ + image_url: string | Blob | File; + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The negative prompt to generate an image from. Default value: `""` + */ + negative_prompt?: string; + /** + * If set to true, prompt will be upsampled with more details. + */ + prompt_expansion?: boolean; + /** + * The size of the generated image. Defaults to the conditioning image's size. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The strength of the image-to-image transformation. Default value: `0.9` + */ + strength?: number; +}; +export type StableDiffusionV3MediumOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; + /** + * The number of images generated. + */ + num_images: number; +}; +export type StableDiffusionV3MediumImageToImageInput = { + /** + * The image URL to generate an image from. + */ + image_url: string | Blob | File; + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The negative prompt to generate an image from. Default value: `""` + */ + negative_prompt?: string; + /** + * If set to true, prompt will be upsampled with more details. + */ + prompt_expansion?: boolean; + /** + * The size of the generated image. Defaults to the conditioning image's size. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The strength of the image-to-image transformation. Default value: `0.9` + */ + strength?: number; +}; +export type StableDiffusionV3MediumImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; + /** + * The number of images generated. + */ + num_images: number; +}; +export type FastSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastSdxlInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type LoraInput = { + /** + * The method to use for the sigmas. If set to 'custom', the sigmas will be set based + * on the provided sigmas schedule in the `array` field. + * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` + */ + method?: "default" | "array"; + /** + * Sigmas schedule to be used if 'custom' method is selected. Default value: `` + */ + array?: Array; +}; +export type LoraOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The latents saved for debugging. + */ + debug_latents?: File; + /** + * The latents saved for debugging per pass. + */ + debug_per_pass_latents?: File; +}; +export type AuraSrInput = { + /** + * URL of the image to upscale. + */ + image_url: string | Blob | File; + /** + * Upscaling factor. More coming soon. Default value: `"4"` + */ + upscaling_factor?: "4"; + /** + * Whether to use overlapping tiles for upscaling. Setting this to true helps remove seams but doubles the inference time. + */ + overlapping_tiles?: boolean; + /** + * Checkpoint to use for upscaling. More coming soon. Default value: `"v1"` + */ + checkpoint?: "v1" | "v2"; +}; +export type AuraSrOutput = { + /** + * Upscaled image + */ + image: Image; + /** + * Timings for each step in the pipeline. + */ + timings: Record; +}; +export type StableCascadeOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type StableCascadeInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Number of steps to run the first stage for. Default value: `20` + */ + first_stage_steps?: number; + /** + * Number of steps to run the second stage for. Default value: `10` + */ + second_stage_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. + */ + second_stage_guidance_scale?: number; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of Stable Cascade + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the image will be returned as base64 encoded string. + */ + sync_mode?: boolean; +}; +export type MinimaxVideoInput = { + /** + * + */ + prompt: string; + /** + * URL of the image to use as the first frame + */ + image_url: string | Blob | File; + /** + * Whether to use the model's prompt optimizer Default value: `true` + */ + prompt_optimizer?: boolean; +}; +export type MinimaxVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type HaiperVideoV2Input = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"4"` + */ + duration?: "4" | "6"; + /** + * Whether to use the model's prompt enhancer Default value: `true` + */ + prompt_enhancer?: boolean; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. + */ + seed?: number; + /** + * URL of the image to use as the first frame + */ + image_url: string | Blob | File; +}; +export type HaiperVideoV2Output = { + /** + * The generated video + */ + video: File; +}; +export type HaiperVideoV2ImageToVideoInput = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"4"` + */ + duration?: "4" | "6"; + /** + * Whether to use the model's prompt enhancer Default value: `true` + */ + prompt_enhancer?: boolean; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. + */ + seed?: number; + /** + * URL of the image to use as the first frame + */ + image_url: string | Blob | File; +}; +export type HaiperVideoV2ImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type MochiV1Output = { + /** + * The generated video + */ + video: File; +}; +export type MochiV1Input = { + /** + * The prompt to generate a video from. + */ + prompt: string; + /** + * The seed to use for generating the video. + */ + seed?: number; + /** + * Whether to enable prompt expansion. Default value: `true` + */ + enable_prompt_expansion?: boolean; +}; +export type LumaDreamMachineInput = { + /** + * + */ + prompt: string; + /** + * The aspect ratio of the generated video Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; + /** + * Whether the video should loop (end of video is blended with the beginning) + */ + loop?: boolean; +}; +export type LumaDreamMachineOutput = { + /** + * The generated video + */ + video: File; +}; +export type LumaDreamMachineImageToVideoInput = { + /** + * + */ + prompt: string; + /** + * The aspect ratio of the generated video Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; + /** + * Whether the video should loop (end of video is blended with the beginning) + */ + loop?: boolean; +}; +export type LumaDreamMachineImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type KlingVideoV1StandardTextToVideoInput = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV1StandardTextToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type KlingVideoV1StandardImageToVideoInput = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV1StandardImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type KlingVideoV1ProTextToVideoInput = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV1ProTextToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type KlingVideoV1ProImageToVideoInput = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV1ProImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type Cogvideox5bOutput = { + /** + * The URL to the generated video + */ + video: File; + /** + * + */ + timings: Record; + /** + * Seed of the generated video. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * The prompt used for generating the video. + */ + prompt: string; +}; +export type Cogvideox5bInput = { + /** + * The prompt to generate the video from. + */ + prompt: string; + /** + * The size of the generated video. Default value: `[object Object]` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The negative prompt to generate video from Default value: `""` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. Default value: `50` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` + */ + guidance_scale?: number; + /** + * Use RIFE for video interpolation Default value: `true` + */ + use_rife?: boolean; + /** + * The target FPS of the video Default value: `16` + */ + export_fps?: number; +}; +export type Cogvideox5bVideoToVideoOutput = { + /** + * The URL to the generated video + */ + video: File; + /** + * + */ + timings: Record; + /** + * Seed of the generated video. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * The prompt used for generating the video. + */ + prompt: string; +}; +export type Cogvideox5bVideoToVideoInput = { + /** + * The prompt to generate the video from. + */ + prompt: string; + /** + * The size of the generated video. Default value: `[object Object]` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The negative prompt to generate video from Default value: `""` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. Default value: `50` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` + */ + guidance_scale?: number; + /** + * Use RIFE for video interpolation Default value: `true` + */ + use_rife?: boolean; + /** + * The target FPS of the video Default value: `16` + */ + export_fps?: number; +}; +export type Cogvideox5bImageToVideoOutput = { + /** + * The URL to the generated video + */ + video: File; + /** + * + */ + timings: Record; + /** + * Seed of the generated video. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * The prompt used for generating the video. + */ + prompt: string; +}; +export type Cogvideox5bImageToVideoInput = { + /** + * The prompt to generate the video from. + */ + prompt: string; + /** + * The size of the generated video. Default value: `[object Object]` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The negative prompt to generate video from Default value: `""` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. Default value: `50` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` + */ + guidance_scale?: number; + /** + * Use RIFE for video interpolation Default value: `true` + */ + use_rife?: boolean; + /** + * The target FPS of the video Default value: `16` + */ + export_fps?: number; +}; +export type StableVideoInput = { + /** + * The prompt to use as a starting point for the generation. + */ + prompt: string; + /** + * The negative prompt to use as a starting point for the generation. Default value: `"unrealistic, saturated, high contrast, big nose, painting, drawing, sketch, cartoon, anime, manga, render, CG, 3d, watermark, signature, label"` + */ + negative_prompt?: string; + /** + * The size of the generated video. Default value: `landscape_16_9` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` + */ + motion_bucket_id?: number; + /** + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` + */ + cond_aug?: number; +}; +export type StableVideoOutput = { + /** + * Generated video + */ + video: File; + /** + * Seed for random number generator + */ + seed: number; +}; +export type FastSvdTextToVideoInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` + */ + motion_bucket_id?: number; + /** + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` + */ + cond_aug?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `20` + */ + steps?: number; + /** + * Enabling [DeepCache](https://github.com/horseee/DeepCache) will make the execution + * faster, but might sometimes degrade overall quality. The higher the setting, the + * faster the execution will be, but the more quality might be lost. Default value: `"none"` + */ + deep_cache?: "none" | "minimum" | "medium" | "high"; + /** + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` + */ + fps?: number; +}; +export type FastSvdTextToVideoOutput = { + /** + * The generated video file. + */ + video: File; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type FastSvdLcmInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` + */ + motion_bucket_id?: number; + /** + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` + */ + cond_aug?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `4` + */ + steps?: number; + /** + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` + */ + fps?: number; +}; +export type FastSvdLcmOutput = { + /** + * The generated video file. + */ + video: File; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type BirefnetInput = { + /** + * URL of the image to remove background from + */ + image_url: string | Blob | File; + /** + * Model to use for background removal. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository. + * The 'General Use (Heavy)' model is a slower but more accurate model. + * The 'Portrait' model is a model trained specifically for portrait images. + * The 'General Use (Light)' model is recommended for most use cases. + * + * The corresponding models are as follows: + * - 'General Use (Light)': BiRefNet-DIS_ep580.pth + * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth + * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` + */ + model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; + /** + * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` + */ + operating_resolution?: "1024x1024" | "2048x2048"; + /** + * The format of the output image Default value: `"png"` + */ + output_format?: "webp" | "png"; + /** + * Whether to output the mask used to remove the background + */ + output_mask?: boolean; + /** + * Whether to refine the foreground using the estimated mask Default value: `true` + */ + refine_foreground?: boolean; +}; +export type BirefnetOutput = { + /** + * Image with background removed + */ + image: Image; + /** + * Mask used to remove the background + */ + mask_image?: Image; +}; +export type BirefnetV2Input = { + /** + * URL of the image to remove background from + */ + image_url: string | Blob | File; + /** + * Model to use for background removal. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository. + * The 'General Use (Heavy)' model is a slower but more accurate model. + * The 'Portrait' model is a model trained specifically for portrait images. + * The 'General Use (Light)' model is recommended for most use cases. + * + * The corresponding models are as follows: + * - 'General Use (Light)': BiRefNet-DIS_ep580.pth + * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth + * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` + */ + model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; + /** + * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` + */ + operating_resolution?: "1024x1024" | "2048x2048"; + /** + * The format of the output image Default value: `"png"` + */ + output_format?: "webp" | "png"; + /** + * Whether to output the mask used to remove the background + */ + output_mask?: boolean; + /** + * Whether to refine the foreground using the estimated mask Default value: `true` + */ + refine_foreground?: boolean; +}; +export type BirefnetV2Output = { + /** + * Image with background removed + */ + image: Image; + /** + * Mask used to remove the background + */ + mask_image?: Image; +}; +export type FastSvdLcmTextToVideoInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` + */ + motion_bucket_id?: number; + /** + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` + */ + cond_aug?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `4` + */ + steps?: number; + /** + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` + */ + fps?: number; +}; +export type FastSvdLcmTextToVideoOutput = { + /** + * The generated video file. + */ + video: File; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type CreativeUpscalerInput = { + /** + * The type of model to use for the upscaling. Default is SD_1_5 Default value: `"SD_1_5"` + */ + model_type?: "SD_1_5" | "SDXL"; + /** + * The image to upscale. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. If no prompt is provide BLIP2 will be used to generate a prompt. + */ + prompt?: string; + /** + * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` + */ + scale?: number; + /** + * How much the output can deviate from the original Default value: `0.5` + */ + creativity?: number; + /** + * How much detail to add Default value: `1` + */ + detail?: number; + /** + * How much to preserve the shape of the original image Default value: `0.25` + */ + shape_preservation?: number; + /** + * The suffix to add to the generated prompt. Not used for a custom prompt. This is useful to add a common ending to all prompts such as 'high quality' etc or embedding tokens. Default value: `" high quality, highly detailed, high resolution, sharp"` + */ + prompt_suffix?: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` + */ + negative_prompt?: string; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `20` + */ + num_inference_steps?: number; + /** + * If set to true, the resulting image will be checked whether it includes any + * potentially unsafe content. If it does, it will be replaced with a black + * image. Default value: `true` + */ + enable_safety_checks?: boolean; + /** + * If set to true, the image will not be processed by the CCSR model before + * being processed by the creativity model. + */ + skip_ccsr?: boolean; + /** + * Allow for large uploads that could take a very long time. + */ + override_size_limits?: boolean; + /** + * The URL to the base model to use for the upscaling + */ + base_model_url?: string | Blob | File; + /** + * The URL to the additional LORA model to use for the upscaling. Default is None + */ + additional_lora_url?: string | Blob | File; + /** + * The scale of the additional LORA model to use for the upscaling. Default is 1.0 Default value: `1` + */ + additional_lora_scale?: number; + /** + * The URL to the additional embeddings to use for the upscaling. Default is None + */ + additional_embedding_url?: string | Blob | File; +}; +export type CreativeUpscalerOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type ClarityUpscalerOutput = { + /** + * The URL of the generated image. + */ + image: Image; + /** + * The seed used to generate the image. + */ + seed: number; + /** + * The timings of the different steps in the workflow. + */ + timings: Record; +}; +export type ClarityUpscalerInput = { + /** + * The URL of the image to upscale. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `"masterpiece, best quality, highres"` + */ + prompt?: string; + /** + * The upscale factor Default value: `2` + */ + upscale_factor?: number; + /** + * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality:2)"` + */ + negative_prompt?: string; + /** + * The creativity of the model. The higher the creativity, the more the model will deviate from the prompt. + * Refers to the denoise strength of the sampling. Default value: `0.35` + */ + creativity?: number; + /** + * The resemblance of the upscaled image to the original image. The higher the resemblance, the more the model will try to keep the original image. + * Refers to the strength of the ControlNet. Default value: `0.6` + */ + resemblance?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The number of inference steps to perform. Default value: `18` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type CcsrInput = { + /** + * The text prompt you would like to convert to speech. + */ + image_url: string | Blob | File; + /** + * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` + */ + scale?: number; + /** + * If specified, a patch-based sampling strategy will be used for sampling. Default value: `"none"` + */ + tile_diffusion?: "none" | "mix" | "gaussian"; + /** + * Size of patch. Default value: `1024` + */ + tile_diffusion_size?: number; + /** + * Stride of sliding patch. Default value: `512` + */ + tile_diffusion_stride?: number; + /** + * If specified, a patch-based sampling strategy will be used for VAE decoding. + */ + tile_vae?: boolean; + /** + * Size of VAE patch. Default value: `226` + */ + tile_vae_decoder_size?: number; + /** + * Size of latent image Default value: `1024` + */ + tile_vae_encoder_size?: number; + /** + * The number of steps to run the model for. The higher the number the better the quality and longer it will take to generate. Default value: `50` + */ + steps?: number; + /** + * The ending point of uniform sampling strategy. Default value: `0.6667` + */ + t_max?: number; + /** + * The starting point of uniform sampling strategy. Default value: `0.3333` + */ + t_min?: number; + /** + * Type of color correction for samples. Default value: `"adain"` + */ + color_fix_type?: "none" | "wavelet" | "adain"; + /** + * Seed for reproducibility. Different seeds will make slightly different results. + */ + seed?: number; +}; +export type CcsrOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * The seed used for the generation. + */ + seed: number; +}; +export type FastTurboDiffusionInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastTurboDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastTurboDiffusionImageToImageInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastTurboDiffusionImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastTurboDiffusionInpaintingInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastTurboDiffusionInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastLcmDiffusionInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLcmDiffusionImageToImageInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLcmDiffusionInpaintingInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type WhisperInput = { + /** + * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm. + */ + audio_url: string | Blob | File; + /** + * Task to perform on the audio file. Either transcribe or translate. Default value: `"transcribe"` + */ + task?: "transcribe" | "translate"; + /** + * Language of the audio file. If set to null, the language will be + * automatically detected. Defaults to null. + * + * If translate is selected as the task, the audio will be translated to + * English, regardless of the language selected. + */ + language?: + | "af" + | "am" + | "ar" + | "as" + | "az" + | "ba" + | "be" + | "bg" + | "bn" + | "bo" + | "br" + | "bs" + | "ca" + | "cs" + | "cy" + | "da" + | "de" + | "el" + | "en" + | "es" + | "et" + | "eu" + | "fa" + | "fi" + | "fo" + | "fr" + | "gl" + | "gu" + | "ha" + | "haw" + | "he" + | "hi" + | "hr" + | "ht" + | "hu" + | "hy" + | "id" + | "is" + | "it" + | "ja" + | "jw" + | "ka" + | "kk" + | "km" + | "kn" + | "ko" + | "la" + | "lb" + | "ln" + | "lo" + | "lt" + | "lv" + | "mg" + | "mi" + | "mk" + | "ml" + | "mn" + | "mr" + | "ms" + | "mt" + | "my" + | "ne" + | "nl" + | "nn" + | "no" + | "oc" + | "pa" + | "pl" + | "ps" + | "pt" + | "ro" + | "ru" + | "sa" + | "sd" + | "si" + | "sk" + | "sl" + | "sn" + | "so" + | "sq" + | "sr" + | "su" + | "sv" + | "sw" + | "ta" + | "te" + | "tg" + | "th" + | "tk" + | "tl" + | "tr" + | "tt" + | "uk" + | "ur" + | "uz" + | "vi" + | "yi" + | "yo" + | "yue" + | "zh"; + /** + * Whether to diarize the audio file. Defaults to false. + */ + diarize?: boolean; + /** + * Level of the chunks to return. Either segment or word. Default value: `"segment"` + */ + chunk_level?: "segment" | "word"; + /** + * Version of the model to use. All of the models are the Whisper large variant. Default value: `"3"` + */ + version?: "3"; + /** + * Default value: `64` + */ + batch_size?: number; + /** + * Prompt to use for generation. Defaults to an empty string. Default value: `""` + */ + prompt?: string; + /** + * Number of speakers in the audio file. Defaults to null. + * If not provided, the number of speakers will be automatically + * detected. + */ + num_speakers?: number; +}; +export type WhisperOutput = { + /** + * Transcription of the audio file + */ + text: string; + /** + * Timestamp chunks of the audio file + */ + chunks?: Array; + /** + * List of languages that the audio file is inferred to be. Defaults to null. + */ + inferred_languages: Array< + | "af" + | "am" + | "ar" + | "as" + | "az" + | "ba" + | "be" + | "bg" + | "bn" + | "bo" + | "br" + | "bs" + | "ca" + | "cs" + | "cy" + | "da" + | "de" + | "el" + | "en" + | "es" + | "et" + | "eu" + | "fa" + | "fi" + | "fo" + | "fr" + | "gl" + | "gu" + | "ha" + | "haw" + | "he" + | "hi" + | "hr" + | "ht" + | "hu" + | "hy" + | "id" + | "is" + | "it" + | "ja" + | "jw" + | "ka" + | "kk" + | "km" + | "kn" + | "ko" + | "la" + | "lb" + | "ln" + | "lo" + | "lt" + | "lv" + | "mg" + | "mi" + | "mk" + | "ml" + | "mn" + | "mr" + | "ms" + | "mt" + | "my" + | "ne" + | "nl" + | "nn" + | "no" + | "oc" + | "pa" + | "pl" + | "ps" + | "pt" + | "ro" + | "ru" + | "sa" + | "sd" + | "si" + | "sk" + | "sl" + | "sn" + | "so" + | "sq" + | "sr" + | "su" + | "sv" + | "sw" + | "ta" + | "te" + | "tg" + | "th" + | "tk" + | "tl" + | "tr" + | "tt" + | "uk" + | "ur" + | "uz" + | "vi" + | "yi" + | "yo" + | "yue" + | "zh" + >; + /** + * Speaker diarization segments of the audio file. Only present if diarization is enabled. + */ + diarization_segments: Array; +}; +export type WizperInput = { + /** + * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm. + */ + audio_url: string | Blob | File; + /** + * Task to perform on the audio file. Either transcribe or translate. Default value: `"transcribe"` + */ + task?: "transcribe" | "translate"; + /** + * Language of the audio file. + * If translate is selected as the task, the audio will be translated to + * English, regardless of the language selected. Default value: `"en"` + */ + language?: + | "af" + | "am" + | "ar" + | "as" + | "az" + | "ba" + | "be" + | "bg" + | "bn" + | "bo" + | "br" + | "bs" + | "ca" + | "cs" + | "cy" + | "da" + | "de" + | "el" + | "en" + | "es" + | "et" + | "eu" + | "fa" + | "fi" + | "fo" + | "fr" + | "gl" + | "gu" + | "ha" + | "haw" + | "he" + | "hi" + | "hr" + | "ht" + | "hu" + | "hy" + | "id" + | "is" + | "it" + | "ja" + | "jw" + | "ka" + | "kk" + | "km" + | "kn" + | "ko" + | "la" + | "lb" + | "ln" + | "lo" + | "lt" + | "lv" + | "mg" + | "mi" + | "mk" + | "ml" + | "mn" + | "mr" + | "ms" + | "mt" + | "my" + | "ne" + | "nl" + | "nn" + | "no" + | "oc" + | "pa" + | "pl" + | "ps" + | "pt" + | "ro" + | "ru" + | "sa" + | "sd" + | "si" + | "sk" + | "sl" + | "sn" + | "so" + | "sq" + | "sr" + | "su" + | "sv" + | "sw" + | "ta" + | "te" + | "tg" + | "th" + | "tk" + | "tl" + | "tr" + | "tt" + | "uk" + | "ur" + | "uz" + | "vi" + | "yi" + | "yo" + | "yue" + | "zh"; + /** + * Level of the chunks to return. Default value: `"segment"` + */ + chunk_level?: "segment"; + /** + * Version of the model to use. All of the models are the Whisper large variant. Default value: `"3"` + */ + version?: "3"; +}; +export type WizperOutput = { + /** + * Transcription of the audio file + */ + text: string; + /** + * Timestamp chunks of the audio file + */ + chunks: Array; +}; +export type FastLightningSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLightningSdxlInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type FastLightningSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLightningSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type FastLightningSdxlInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLightningSdxlInpaintingInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type HyperSdxlInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type HyperSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type HyperSdxlInpaintingInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type PlaygroundV25Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type PlaygroundV25Input = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + */ + guidance_scale?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; +}; +export type PlaygroundV25ImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type PlaygroundV25ImageToImageInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + */ + guidance_scale?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; +}; +export type PlaygroundV25InpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type PlaygroundV25InpaintingInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + */ + guidance_scale?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; +}; +export type AmtInterpolationOutput = { + /** + * Generated video + */ + video: File; +}; +export type AmtInterpolationInput = { + /** + * Frames to interpolate + */ + frames: Array; + /** + * Output frames per second Default value: `24` + */ + output_fps?: number; + /** + * Number of recursive interpolation passes Default value: `4` + */ + recursive_interpolation_passes?: number; +}; +export type AmtInterpolationFrameInterpolationOutput = { + /** + * Generated video + */ + video: File; +}; +export type AmtInterpolationFrameInterpolationInput = { + /** + * Frames to interpolate + */ + frames: Array; + /** + * Output frames per second Default value: `24` + */ + output_fps?: number; + /** + * Number of recursive interpolation passes Default value: `4` + */ + recursive_interpolation_passes?: number; +}; +export type T2vTurboInput = { + /** + * The prompt to generate images from + */ + prompt: string; + /** + * The seed to use for the random number generator + */ + seed?: number | null; + /** + * The number of steps to sample Default value: `4` + */ + num_inference_steps?: number; + /** + * The guidance scale Default value: `7.5` + */ + guidance_scale?: number; + /** + * The number of frames to generate Default value: `16` + */ + num_frames?: number; + /** + * The FPS of the exported video Default value: `8` + */ + export_fps?: number; +}; +export type T2vTurboOutput = { + /** + * The URL to the generated video + */ + video: File; +}; +export type Sd15DepthControlnetOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type Sd15DepthControlnetInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type PhotomakerOutput = { + /** + * + */ + images: Array; + /** + * + */ + seed: number; +}; +export type PhotomakerInput = { + /** + * The URL of the image archive containing the images you want to use. + */ + image_archive_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The base pipeline to use for generating the image. Default value: `"photomaker"` + */ + base_pipeline?: "photomaker" | "photomaker-style"; + /** + * Optional initial image for img2img + */ + initial_image_url?: string | Blob | File; + /** + * How much noise to add to the latent image. O for no noise, 1 for maximum noise. Default value: `0.5` + */ + initial_image_strength?: number; + /** + * Default value: `"Photographic"` + */ + style?: + | "(No style)" + | "Cinematic" + | "Disney Character" + | "Digital Art" + | "Photographic" + | "Fantasy art" + | "Neonpunk" + | "Enhance" + | "Comic book" + | "Lowpoly" + | "Line art"; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `50` + */ + num_inference_steps?: number; + /** + * Default value: `20` + */ + style_strength?: number; + /** + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` + */ + num_images?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type LcmInput = { + /** + * The model to use for generating the image. Default value: `"sdv1-5"` + */ + model?: "sdxl" | "sdv1-5"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The base image to use for guiding the image generation on image-to-image + * generations. If the either width or height of the image is larger than 1024 + * pixels, the image will be resized to 1024 pixels while keeping the aspect ratio. + */ + image_url?: string | Blob | File; + /** + * The mask to use for guiding the image generation on image + * inpainting. The model will focus on the mask area and try to fill it with + * the most relevant content. + * + * The mask must be a black and white image where the white area is the area + * that needs to be filled and the black area is the area that should be + * ignored. + * + * The mask must have the same dimensions as the image passed as `image_url`. + */ + mask_url?: string | Blob | File; + /** + * The strength of the image that is passed as `image_url`. The strength + * determines how much the generated image will be similar to the image passed as + * `image_url`. The higher the strength the more model gets "creative" and + * generates an image that's different from the initial image. A strength of 1.0 + * means that the initial image is more or less ignored and the model will try to + * generate an image that's as close as possible to the prompt. Default value: `0.8` + */ + strength?: number; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `4` + */ + num_inference_steps?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. + * + * If not provided: + * - For text-to-image generations, the default size is 512x512. + * - For image-to-image generations, the default size is the same as the input image. + * - For inpainting generations, the default size is the same as the input image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. The function will return a list of images + * with the same prompt and negative prompt but different seeds. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the resulting image will be checked whether it includes any + * potentially unsafe content. If it does, it will be replaced with a black + * image. Default value: `true` + */ + enable_safety_checks?: boolean; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * If set to true, the inpainting pipeline will only inpaint the provided mask + * area. Only effective for inpainting pipelines. + */ + inpaint_mask_only?: boolean; + /** + * If set to true, the inpainting pipeline will use controlnet inpainting. + * Only effective for inpainting pipelines. + */ + controlnet_inpaint?: boolean; + /** + * The url of the lora server to use for image generation. + */ + lora_url?: string | Blob | File; + /** + * The scale of the lora server to use for image generation. Default value: `1` + */ + lora_scale?: number; +}; +export type LcmOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Number of inference steps used to generate the image. It will be the same value of the one passed in the + * input or the default one in case none was passed. Default value: `4` + */ + num_inference_steps?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * A list of booleans indicating whether the generated image contains any + * potentially unsafe content. If the safety check is disabled, this field + * will all will be false. + */ + nsfw_content_detected: Array; +}; +export type LcmSd15I2iInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The image to use as a base. + */ + image_url: string | Blob | File; + /** + * The strength of the image. Default value: `0.8` + */ + strength?: number; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `4` + */ + num_inference_steps?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. The function will return a list of images + * with the same prompt and negative prompt but different seeds. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the resulting image will be checked whether it includes any + * potentially unsafe content. If it does, it will be replaced with a black + * image. Default value: `true` + */ + enable_safety_checks?: boolean; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type LcmSd15I2iOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Number of inference steps used to generate the image. It will be the same value of the one passed in the + * input or the default one in case none was passed. Default value: `4` + */ + num_inference_steps?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * A list of booleans indicating whether the generated image contains any + * potentially unsafe content. If the safety check is disabled, this field + * will have a false for each generated image. + */ + nsfw_content_detected: Array; +}; +export type FooocusInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` + */ + styles?: Array< + | "Fooocus V2" + | "Fooocus Enhance" + | "Fooocus Sharp" + | "Fooocus Semi Realistic" + | "Fooocus Masterpiece" + | "Fooocus Photograph" + | "Fooocus Negative" + | "Fooocus Cinematic" + | "SAI 3D Model" + | "SAI Analog Film" + | "SAI Anime" + | "SAI Cinematic" + | "SAI Comic Book" + | "SAI Craft Clay" + | "SAI Digital Art" + | "SAI Enhance" + | "SAI Fantasy Art" + | "SAI Isometric" + | "SAI Line Art" + | "SAI Lowpoly" + | "SAI Neonpunk" + | "SAI Origami" + | "SAI Photographic" + | "SAI Pixel Art" + | "SAI Texture" + | "MRE Cinematic Dynamic" + | "MRE Spontaneous Picture" + | "MRE Artistic Vision" + | "MRE Dark Dream" + | "MRE Gloomy Art" + | "MRE Bad Dream" + | "MRE Underground" + | "MRE Surreal Painting" + | "MRE Dynamic Illustration" + | "MRE Undead Art" + | "MRE Elemental Art" + | "MRE Space Art" + | "MRE Ancient Illustration" + | "MRE Brave Art" + | "MRE Heroic Fantasy" + | "MRE Dark Cyberpunk" + | "MRE Lyrical Geometry" + | "MRE Sumi E Symbolic" + | "MRE Sumi E Detailed" + | "MRE Manga" + | "MRE Anime" + | "MRE Comic" + | "Ads Advertising" + | "Ads Automotive" + | "Ads Corporate" + | "Ads Fashion Editorial" + | "Ads Food Photography" + | "Ads Gourmet Food Photography" + | "Ads Luxury" + | "Ads Real Estate" + | "Ads Retail" + | "Artstyle Abstract" + | "Artstyle Abstract Expressionism" + | "Artstyle Art Deco" + | "Artstyle Art Nouveau" + | "Artstyle Constructivist" + | "Artstyle Cubist" + | "Artstyle Expressionist" + | "Artstyle Graffiti" + | "Artstyle Hyperrealism" + | "Artstyle Impressionist" + | "Artstyle Pointillism" + | "Artstyle Pop Art" + | "Artstyle Psychedelic" + | "Artstyle Renaissance" + | "Artstyle Steampunk" + | "Artstyle Surrealist" + | "Artstyle Typography" + | "Artstyle Watercolor" + | "Futuristic Biomechanical" + | "Futuristic Biomechanical Cyberpunk" + | "Futuristic Cybernetic" + | "Futuristic Cybernetic Robot" + | "Futuristic Cyberpunk Cityscape" + | "Futuristic Futuristic" + | "Futuristic Retro Cyberpunk" + | "Futuristic Retro Futurism" + | "Futuristic Sci Fi" + | "Futuristic Vaporwave" + | "Game Bubble Bobble" + | "Game Cyberpunk Game" + | "Game Fighting Game" + | "Game Gta" + | "Game Mario" + | "Game Minecraft" + | "Game Pokemon" + | "Game Retro Arcade" + | "Game Retro Game" + | "Game Rpg Fantasy Game" + | "Game Strategy Game" + | "Game Streetfighter" + | "Game Zelda" + | "Misc Architectural" + | "Misc Disco" + | "Misc Dreamscape" + | "Misc Dystopian" + | "Misc Fairy Tale" + | "Misc Gothic" + | "Misc Grunge" + | "Misc Horror" + | "Misc Kawaii" + | "Misc Lovecraftian" + | "Misc Macabre" + | "Misc Manga" + | "Misc Metropolis" + | "Misc Minimalist" + | "Misc Monochrome" + | "Misc Nautical" + | "Misc Space" + | "Misc Stained Glass" + | "Misc Techwear Fashion" + | "Misc Tribal" + | "Misc Zentangle" + | "Papercraft Collage" + | "Papercraft Flat Papercut" + | "Papercraft Kirigami" + | "Papercraft Paper Mache" + | "Papercraft Paper Quilling" + | "Papercraft Papercut Collage" + | "Papercraft Papercut Shadow Box" + | "Papercraft Stacked Papercut" + | "Papercraft Thick Layered Papercut" + | "Photo Alien" + | "Photo Film Noir" + | "Photo Glamour" + | "Photo Hdr" + | "Photo Iphone Photographic" + | "Photo Long Exposure" + | "Photo Neon Noir" + | "Photo Silhouette" + | "Photo Tilt Shift" + | "Cinematic Diva" + | "Abstract Expressionism" + | "Academia" + | "Action Figure" + | "Adorable 3D Character" + | "Adorable Kawaii" + | "Art Deco" + | "Art Nouveau" + | "Astral Aura" + | "Avant Garde" + | "Baroque" + | "Bauhaus Style Poster" + | "Blueprint Schematic Drawing" + | "Caricature" + | "Cel Shaded Art" + | "Character Design Sheet" + | "Classicism Art" + | "Color Field Painting" + | "Colored Pencil Art" + | "Conceptual Art" + | "Constructivism" + | "Cubism" + | "Dadaism" + | "Dark Fantasy" + | "Dark Moody Atmosphere" + | "Dmt Art Style" + | "Doodle Art" + | "Double Exposure" + | "Dripping Paint Splatter Art" + | "Expressionism" + | "Faded Polaroid Photo" + | "Fauvism" + | "Flat 2d Art" + | "Fortnite Art Style" + | "Futurism" + | "Glitchcore" + | "Glo Fi" + | "Googie Art Style" + | "Graffiti Art" + | "Harlem Renaissance Art" + | "High Fashion" + | "Idyllic" + | "Impressionism" + | "Infographic Drawing" + | "Ink Dripping Drawing" + | "Japanese Ink Drawing" + | "Knolling Photography" + | "Light Cheery Atmosphere" + | "Logo Design" + | "Luxurious Elegance" + | "Macro Photography" + | "Mandola Art" + | "Marker Drawing" + | "Medievalism" + | "Minimalism" + | "Neo Baroque" + | "Neo Byzantine" + | "Neo Futurism" + | "Neo Impressionism" + | "Neo Rococo" + | "Neoclassicism" + | "Op Art" + | "Ornate And Intricate" + | "Pencil Sketch Drawing" + | "Pop Art 2" + | "Rococo" + | "Silhouette Art" + | "Simple Vector Art" + | "Sketchup" + | "Steampunk 2" + | "Surrealism" + | "Suprematism" + | "Terragen" + | "Tranquil Relaxing Atmosphere" + | "Sticker Designs" + | "Vibrant Rim Light" + | "Volumetric Lighting" + | "Watercolor 2" + | "Whimsical And Playful" + | "Mk Chromolithography" + | "Mk Cross Processing Print" + | "Mk Dufaycolor Photograph" + | "Mk Herbarium" + | "Mk Punk Collage" + | "Mk Mosaic" + | "Mk Van Gogh" + | "Mk Coloring Book" + | "Mk Singer Sargent" + | "Mk Pollock" + | "Mk Basquiat" + | "Mk Andy Warhol" + | "Mk Halftone Print" + | "Mk Gond Painting" + | "Mk Albumen Print" + | "Mk Aquatint Print" + | "Mk Anthotype Print" + | "Mk Inuit Carving" + | "Mk Bromoil Print" + | "Mk Calotype Print" + | "Mk Color Sketchnote" + | "Mk Cibulak Porcelain" + | "Mk Alcohol Ink Art" + | "Mk One Line Art" + | "Mk Blacklight Paint" + | "Mk Carnival Glass" + | "Mk Cyanotype Print" + | "Mk Cross Stitching" + | "Mk Encaustic Paint" + | "Mk Embroidery" + | "Mk Gyotaku" + | "Mk Luminogram" + | "Mk Lite Brite Art" + | "Mk Mokume Gane" + | "Pebble Art" + | "Mk Palekh" + | "Mk Suminagashi" + | "Mk Scrimshaw" + | "Mk Shibori" + | "Mk Vitreous Enamel" + | "Mk Ukiyo E" + | "Mk Vintage Airline Poster" + | "Mk Vintage Travel Poster" + | "Mk Bauhaus Style" + | "Mk Afrofuturism" + | "Mk Atompunk" + | "Mk Constructivism" + | "Mk Chicano Art" + | "Mk De Stijl" + | "Mk Dayak Art" + | "Mk Fayum Portrait" + | "Mk Illuminated Manuscript" + | "Mk Kalighat Painting" + | "Mk Madhubani Painting" + | "Mk Pictorialism" + | "Mk Pichwai Painting" + | "Mk Patachitra Painting" + | "Mk Samoan Art Inspired" + | "Mk Tlingit Art" + | "Mk Adnate Style" + | "Mk Ron English Style" + | "Mk Shepard Fairey Style" + >; + /** + * You can choose Speed or Quality Default value: `"Extreme Speed"` + */ + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` + */ + sharpness?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + */ + aspect_ratio?: string; + /** + * Number of images to generate in one request Default value: `1` + */ + num_images?: number; + /** + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` + */ + loras?: Array; + /** + * Refiner (SDXL or SD 1.5) Default value: `"None"` + */ + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + /** + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + */ + refiner_switch?: number; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "png" | "jpeg" | "webp"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * + */ + image_prompt_1: ImagePrompt; + /** + * + */ + image_prompt_2?: ImagePrompt; + /** + * + */ + image_prompt_3?: ImagePrompt; + /** + * + */ + image_prompt_4?: ImagePrompt; + /** + * The image to use as a reference for inpainting. + */ + inpaint_image_url?: string | Blob | File; + /** + * The image to use as a mask for the generated image. + */ + mask_image_url?: string | Blob | File; + /** + * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + */ + inpaint_mode?: + | "Inpaint or Outpaint (default)" + | "Improve Detail (face, hand, eyes, etc.)" + | "Modify Content (add objects, change background, etc.)"; + /** + * Describe what you want to inpaint. Default value: `""` + */ + inpaint_additional_prompt?: string; + /** + * The directions to outpaint. Default value: `` + */ + outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + /** + * Mixing Image Prompt and Inpaint + */ + mixing_image_prompt_and_inpaint?: boolean; + /** + * The image to upscale or vary. + */ + uov_image_url?: string | Blob | File; + /** + * The method to use for upscaling or varying. Default value: `"Disabled"` + */ + uov_method?: + | "Disabled" + | "Vary (Subtle)" + | "Vary (Strong)" + | "Upscale (1.5x)" + | "Upscale (2x)" + | "Upscale (Fast 2x)"; + /** + * Mixing Image Prompt and Vary/Upscale + */ + mixing_image_prompt_and_vary_upscale?: boolean; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FooocusOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: Record; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type AnimatediffV2vInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7` + */ + guidance_scale?: number; + /** + * Base model to use for animation generation. Default value: `"cardosAnimev20"` + */ + base_model?: "darkSushiMixMix_colorful" | "cardosAnimev20"; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * Select every Nth frame from the video. + * This can be used to reduce the number of frames to process, which can reduce the time and the cost. + * However, it can also reduce the quality of the final video. Default value: `2` + */ + select_every_nth_frame?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type AnimatediffV2vOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; + /** + * + */ + timings: Record; +}; +export type AnimatediffV2vTurboInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7` + */ + guidance_scale?: number; + /** + * Base model to use for animation generation. Default value: `"cardosAnimev20"` + */ + base_model?: "darkSushiMixMix_colorful" | "cardosAnimev20"; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * Select every Nth frame from the video. + * This can be used to reduce the number of frames to process, which can reduce the time and the cost. + * However, it can also reduce the quality of the final video. Default value: `2` + */ + select_every_nth_frame?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type AnimatediffV2vTurboOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; + /** + * + */ + timings: Record; +}; +export type FastAnimatediffTextToVideoInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type FastAnimatediffTextToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffVideoToVideoInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type FastAnimatediffVideoToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffTurboTextToVideoInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type FastAnimatediffTurboTextToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffTurboVideoToVideoInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type FastAnimatediffTurboVideoToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type IllusionDiffusionOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type IllusionDiffusionInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The scale of the ControlNet. Default value: `1` + */ + controlnet_conditioning_scale?: number; + /** + * + */ + control_guidance_start?: number; + /** + * Default value: `1` + */ + control_guidance_end?: number; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed?: number; + /** + * Scheduler / sampler to use for the image denoising process. Default value: `"Euler"` + */ + scheduler?: "DPM++ Karras SDE" | "Euler"; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `40` + */ + num_inference_steps?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type ImageutilsDepthInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type ImageutilsDepthOutput = { + /** + * Combined image of all detected masks + */ + image?: Image; +}; +export type ImageutilsRembgInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type ImageutilsRembgOutput = { + /** + * Combined image of all detected masks + */ + image?: Image; +}; +export type EsrganOutput = { + /** + * Upscaled image + */ + image: Image; +}; +export type EsrganInput = { + /** + * Url to input image + */ + image_url: string | Blob | File; + /** + * Rescaling factor Default value: `2` + */ + scale?: number; + /** + * Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200 + */ + tile?: number; + /** + * Upscaling a face + */ + face?: boolean; + /** + * Model to use for upscaling Default value: `"RealESRGAN_x4plus"` + */ + model?: + | "RealESRGAN_x4plus" + | "RealESRGAN_x2plus" + | "RealESRGAN_x4plus_anime_6B" + | "RealESRGAN_x4_v3" + | "RealESRGAN_x4_wdn_v3" + | "RealESRGAN_x4_anime_v3"; + /** + * Output image format (png or jpeg) Default value: `"png"` + */ + output_format?: "png" | "jpeg"; +}; +export type ControlnetsdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type ControlnetsdxlInput = { + /** + * Url to input image + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The scale of the ControlNet. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `50` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type FastSdxlControlnetCannyOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastSdxlControlnetCannyInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastSdxlControlnetCannyImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastSdxlControlnetCannyImageToImageInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastSdxlControlnetCannyInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastSdxlControlnetCannyInpaintingInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type InpaintInput = { + /** + * URL or HuggingFace ID of the base model to generate the image. + */ + model_name: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Input image for img2img or inpaint mode + */ + image_url: string | Blob | File; + /** + * Input mask for inpaint mode. Black areas will be preserved, white areas will be inpainted. + */ + mask_url: string | Blob | File; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type InpaintOutput = { + /** + * The generated image files info. + */ + image: Image; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type AnimatediffSparsectrlLcmInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to specify what you don't want. Default value: `""` + */ + negative_prompt?: string; + /** + * The type of controlnet to use for generating the video. The controlnet determines how the video will be animated. Default value: `"scribble"` + */ + controlnet_type?: "scribble" | "rgb"; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps to generate your final result which can increase the amount of detail in your image. Default value: `4` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable + * Diffusion will output the same image every time. + */ + seed?: number; + /** + * The URL of the first keyframe to use for the generation. + */ + keyframe_0_image_url?: string | Blob | File; + /** + * The frame index of the first keyframe to use for the generation. + */ + keyframe_0_index?: number; + /** + * The URL of the second keyframe to use for the generation. + */ + keyframe_1_image_url?: string | Blob | File; + /** + * The frame index of the second keyframe to use for the generation. + */ + keyframe_1_index?: number; + /** + * The URL of the third keyframe to use for the generation. + */ + keyframe_2_image_url?: string | Blob | File; + /** + * The frame index of the third keyframe to use for the generation. + */ + keyframe_2_index?: number; +}; +export type AnimatediffSparsectrlLcmOutput = { + /** + * Generated video file. + */ + video: File; + /** + * The seed used to generate the video. + */ + seed: number; +}; +export type PulidInput = { + /** + * List of reference faces, ideally 4 images. + */ + reference_images: Array; + /** + * Prompt to generate the face from + */ + prompt: string; + /** + * Negative prompt to generate the face from Default value: `"flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, hands, low resolution, partially rendered objects, deformed or partially rendered eyes, deformed, deformed eyeballs, cross-eyed,blurry"` + */ + negative_prompt?: string; + /** + * Number of images to generate Default value: `1` + */ + num_images?: number; + /** + * Guidance scale Default value: `1.2` + */ + guidance_scale?: number; + /** + * Number of steps to take Default value: `4` + */ + num_inference_steps?: number; + /** + * Random seed for reproducibility + */ + seed?: number; + /** + * Size of the generated image Default value: `[object Object]` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * ID scale Default value: `0.8` + */ + id_scale?: number; + /** + * Mode of generation Default value: `"fidelity"` + */ + mode?: "fidelity" | "extreme style"; + /** + * if you want to mix two ID image, please turn this on, otherwise, turn this off + */ + id_mix?: boolean; +}; +export type PulidOutput = { + /** + * List of generated images + */ + images: Array; + /** + * Random seed used for reproducibility + */ + seed: number; +}; +export type IpAdapterFaceIdInput = { + /** + * The model type to use. 1_5 is the default and is recommended for most use cases. Default value: `"1_5-v1"` + */ + model_type?: + | "1_5-v1" + | "1_5-v1-plus" + | "1_5-v2-plus" + | "SDXL-v1" + | "SDXL-v2-plus" + | "1_5-auraface-v1"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * An image of a face to match. If an image with a size of 640x640 is not provided, it will be scaled and cropped to that size. + */ + face_image_url?: string | Blob | File; + /** + * URL to zip archive with images of faces. The images embedding will be averaged to + * create a more accurate face id. + */ + face_images_data_url?: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` + */ + negative_prompt?: string; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `50` + */ + num_inference_steps?: number; + /** + * The number of samples for face id. The more samples the better the image will + * be but it will also take longer to generate. Default is 4. Default value: `4` + */ + num_samples?: number; + /** + * The width of the generated image. Default value: `512` + */ + width?: number; + /** + * The height of the generated image. Default value: `512` + */ + height?: number; + /** + * The size of the face detection model. The higher the number the more accurate + * the detection will be but it will also take longer to run. The higher the number the more + * likely it will fail to find a face as well. Lower it if you are having trouble + * finding a face in the image. Default value: `640` + */ + face_id_det_size?: number; + /** + * The URL to the base 1.5 model. Default is SG161222/Realistic_Vision_V4.0_noVAE Default value: `"SG161222/Realistic_Vision_V4.0_noVAE"` + */ + base_1_5_model_repo?: string; + /** + * The URL to the base SDXL model. Default is SG161222/RealVisXL_V3.0 Default value: `"SG161222/RealVisXL_V3.0"` + */ + base_sdxl_model_repo?: string; +}; +export type IpAdapterFaceIdOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type ImageutilsMarigoldDepthInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type ImageutilsMarigoldDepthOutput = { + /** + * Combined image of all detected masks + */ + image?: Image; +}; +export type StableAudioInput = { + /** + * The prompt to generate audio from + */ + prompt: string; + /** + * The start point of the audio clip to generate + */ + seconds_start?: number; + /** + * The duration of the audio clip to generate Default value: `30` + */ + seconds_total?: number; + /** + * The number of steps to denoise the audio for Default value: `100` + */ + steps?: number; +}; +export type StableAudioOutput = { + /** + * The generated audio clip + */ + audio_file: File; +}; +export type DiffusionEdgeInput = { + /** + * The text prompt you would like to convert to speech. + */ + image_url: string | Blob | File; +}; +export type DiffusionEdgeOutput = { + /** + * The generated image file info. + */ + image: Image; +}; +export type TriposrOutput = { + /** + * Generated 3D object file. + */ + model_mesh: File; + /** + * Inference timings. + */ + timings: Record; + /** + * Directory containing textures for the remeshed model. + */ + remeshing_dir?: File; +}; +export type TriposrInput = { + /** + * Path for the image file to be processed. + */ + image_url: string | Blob | File; + /** + * Output format for the 3D model. Default value: `"glb"` + */ + output_format?: "glb" | "obj"; + /** + * Whether to remove the background from the input image. Default value: `true` + */ + do_remove_background?: boolean; + /** + * Ratio of the foreground image to the original image. Default value: `0.9` + */ + foreground_ratio?: number; + /** + * Resolution of the marching cubes. Above 512 is not recommended. Default value: `256` + */ + mc_resolution?: number; +}; +export type FooocusUpscaleOrVaryInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` + */ + styles?: Array< + | "Fooocus V2" + | "Fooocus Enhance" + | "Fooocus Sharp" + | "Fooocus Semi Realistic" + | "Fooocus Masterpiece" + | "Fooocus Photograph" + | "Fooocus Negative" + | "Fooocus Cinematic" + | "SAI 3D Model" + | "SAI Analog Film" + | "SAI Anime" + | "SAI Cinematic" + | "SAI Comic Book" + | "SAI Craft Clay" + | "SAI Digital Art" + | "SAI Enhance" + | "SAI Fantasy Art" + | "SAI Isometric" + | "SAI Line Art" + | "SAI Lowpoly" + | "SAI Neonpunk" + | "SAI Origami" + | "SAI Photographic" + | "SAI Pixel Art" + | "SAI Texture" + | "MRE Cinematic Dynamic" + | "MRE Spontaneous Picture" + | "MRE Artistic Vision" + | "MRE Dark Dream" + | "MRE Gloomy Art" + | "MRE Bad Dream" + | "MRE Underground" + | "MRE Surreal Painting" + | "MRE Dynamic Illustration" + | "MRE Undead Art" + | "MRE Elemental Art" + | "MRE Space Art" + | "MRE Ancient Illustration" + | "MRE Brave Art" + | "MRE Heroic Fantasy" + | "MRE Dark Cyberpunk" + | "MRE Lyrical Geometry" + | "MRE Sumi E Symbolic" + | "MRE Sumi E Detailed" + | "MRE Manga" + | "MRE Anime" + | "MRE Comic" + | "Ads Advertising" + | "Ads Automotive" + | "Ads Corporate" + | "Ads Fashion Editorial" + | "Ads Food Photography" + | "Ads Gourmet Food Photography" + | "Ads Luxury" + | "Ads Real Estate" + | "Ads Retail" + | "Artstyle Abstract" + | "Artstyle Abstract Expressionism" + | "Artstyle Art Deco" + | "Artstyle Art Nouveau" + | "Artstyle Constructivist" + | "Artstyle Cubist" + | "Artstyle Expressionist" + | "Artstyle Graffiti" + | "Artstyle Hyperrealism" + | "Artstyle Impressionist" + | "Artstyle Pointillism" + | "Artstyle Pop Art" + | "Artstyle Psychedelic" + | "Artstyle Renaissance" + | "Artstyle Steampunk" + | "Artstyle Surrealist" + | "Artstyle Typography" + | "Artstyle Watercolor" + | "Futuristic Biomechanical" + | "Futuristic Biomechanical Cyberpunk" + | "Futuristic Cybernetic" + | "Futuristic Cybernetic Robot" + | "Futuristic Cyberpunk Cityscape" + | "Futuristic Futuristic" + | "Futuristic Retro Cyberpunk" + | "Futuristic Retro Futurism" + | "Futuristic Sci Fi" + | "Futuristic Vaporwave" + | "Game Bubble Bobble" + | "Game Cyberpunk Game" + | "Game Fighting Game" + | "Game Gta" + | "Game Mario" + | "Game Minecraft" + | "Game Pokemon" + | "Game Retro Arcade" + | "Game Retro Game" + | "Game Rpg Fantasy Game" + | "Game Strategy Game" + | "Game Streetfighter" + | "Game Zelda" + | "Misc Architectural" + | "Misc Disco" + | "Misc Dreamscape" + | "Misc Dystopian" + | "Misc Fairy Tale" + | "Misc Gothic" + | "Misc Grunge" + | "Misc Horror" + | "Misc Kawaii" + | "Misc Lovecraftian" + | "Misc Macabre" + | "Misc Manga" + | "Misc Metropolis" + | "Misc Minimalist" + | "Misc Monochrome" + | "Misc Nautical" + | "Misc Space" + | "Misc Stained Glass" + | "Misc Techwear Fashion" + | "Misc Tribal" + | "Misc Zentangle" + | "Papercraft Collage" + | "Papercraft Flat Papercut" + | "Papercraft Kirigami" + | "Papercraft Paper Mache" + | "Papercraft Paper Quilling" + | "Papercraft Papercut Collage" + | "Papercraft Papercut Shadow Box" + | "Papercraft Stacked Papercut" + | "Papercraft Thick Layered Papercut" + | "Photo Alien" + | "Photo Film Noir" + | "Photo Glamour" + | "Photo Hdr" + | "Photo Iphone Photographic" + | "Photo Long Exposure" + | "Photo Neon Noir" + | "Photo Silhouette" + | "Photo Tilt Shift" + | "Cinematic Diva" + | "Abstract Expressionism" + | "Academia" + | "Action Figure" + | "Adorable 3D Character" + | "Adorable Kawaii" + | "Art Deco" + | "Art Nouveau" + | "Astral Aura" + | "Avant Garde" + | "Baroque" + | "Bauhaus Style Poster" + | "Blueprint Schematic Drawing" + | "Caricature" + | "Cel Shaded Art" + | "Character Design Sheet" + | "Classicism Art" + | "Color Field Painting" + | "Colored Pencil Art" + | "Conceptual Art" + | "Constructivism" + | "Cubism" + | "Dadaism" + | "Dark Fantasy" + | "Dark Moody Atmosphere" + | "Dmt Art Style" + | "Doodle Art" + | "Double Exposure" + | "Dripping Paint Splatter Art" + | "Expressionism" + | "Faded Polaroid Photo" + | "Fauvism" + | "Flat 2d Art" + | "Fortnite Art Style" + | "Futurism" + | "Glitchcore" + | "Glo Fi" + | "Googie Art Style" + | "Graffiti Art" + | "Harlem Renaissance Art" + | "High Fashion" + | "Idyllic" + | "Impressionism" + | "Infographic Drawing" + | "Ink Dripping Drawing" + | "Japanese Ink Drawing" + | "Knolling Photography" + | "Light Cheery Atmosphere" + | "Logo Design" + | "Luxurious Elegance" + | "Macro Photography" + | "Mandola Art" + | "Marker Drawing" + | "Medievalism" + | "Minimalism" + | "Neo Baroque" + | "Neo Byzantine" + | "Neo Futurism" + | "Neo Impressionism" + | "Neo Rococo" + | "Neoclassicism" + | "Op Art" + | "Ornate And Intricate" + | "Pencil Sketch Drawing" + | "Pop Art 2" + | "Rococo" + | "Silhouette Art" + | "Simple Vector Art" + | "Sketchup" + | "Steampunk 2" + | "Surrealism" + | "Suprematism" + | "Terragen" + | "Tranquil Relaxing Atmosphere" + | "Sticker Designs" + | "Vibrant Rim Light" + | "Volumetric Lighting" + | "Watercolor 2" + | "Whimsical And Playful" + | "Mk Chromolithography" + | "Mk Cross Processing Print" + | "Mk Dufaycolor Photograph" + | "Mk Herbarium" + | "Mk Punk Collage" + | "Mk Mosaic" + | "Mk Van Gogh" + | "Mk Coloring Book" + | "Mk Singer Sargent" + | "Mk Pollock" + | "Mk Basquiat" + | "Mk Andy Warhol" + | "Mk Halftone Print" + | "Mk Gond Painting" + | "Mk Albumen Print" + | "Mk Aquatint Print" + | "Mk Anthotype Print" + | "Mk Inuit Carving" + | "Mk Bromoil Print" + | "Mk Calotype Print" + | "Mk Color Sketchnote" + | "Mk Cibulak Porcelain" + | "Mk Alcohol Ink Art" + | "Mk One Line Art" + | "Mk Blacklight Paint" + | "Mk Carnival Glass" + | "Mk Cyanotype Print" + | "Mk Cross Stitching" + | "Mk Encaustic Paint" + | "Mk Embroidery" + | "Mk Gyotaku" + | "Mk Luminogram" + | "Mk Lite Brite Art" + | "Mk Mokume Gane" + | "Pebble Art" + | "Mk Palekh" + | "Mk Suminagashi" + | "Mk Scrimshaw" + | "Mk Shibori" + | "Mk Vitreous Enamel" + | "Mk Ukiyo E" + | "Mk Vintage Airline Poster" + | "Mk Vintage Travel Poster" + | "Mk Bauhaus Style" + | "Mk Afrofuturism" + | "Mk Atompunk" + | "Mk Constructivism" + | "Mk Chicano Art" + | "Mk De Stijl" + | "Mk Dayak Art" + | "Mk Fayum Portrait" + | "Mk Illuminated Manuscript" + | "Mk Kalighat Painting" + | "Mk Madhubani Painting" + | "Mk Pictorialism" + | "Mk Pichwai Painting" + | "Mk Patachitra Painting" + | "Mk Samoan Art Inspired" + | "Mk Tlingit Art" + | "Mk Adnate Style" + | "Mk Ron English Style" + | "Mk Shepard Fairey Style" + >; + /** + * You can choose Speed or Quality Default value: `"Extreme Speed"` + */ + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` + */ + sharpness?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + */ + aspect_ratio?: string; + /** + * Number of images to generate in one request Default value: `1` + */ + num_images?: number; + /** + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` + */ + loras?: Array; + /** + * Refiner (SDXL or SD 1.5) Default value: `"None"` + */ + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + /** + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + */ + refiner_switch?: number; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "png" | "jpeg" | "webp"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * + */ + image_prompt_1: ImagePrompt; + /** + * + */ + image_prompt_2?: ImagePrompt; + /** + * + */ + image_prompt_3?: ImagePrompt; + /** + * + */ + image_prompt_4?: ImagePrompt; + /** + * The image to use as a reference for inpainting. + */ + inpaint_image_url?: string | Blob | File; + /** + * The image to use as a mask for the generated image. + */ + mask_image_url?: string | Blob | File; + /** + * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + */ + inpaint_mode?: + | "Inpaint or Outpaint (default)" + | "Improve Detail (face, hand, eyes, etc.)" + | "Modify Content (add objects, change background, etc.)"; + /** + * Describe what you want to inpaint. Default value: `""` + */ + inpaint_additional_prompt?: string; + /** + * The directions to outpaint. Default value: `` + */ + outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + /** + * Mixing Image Prompt and Inpaint + */ + mixing_image_prompt_and_inpaint?: boolean; + /** + * The image to upscale or vary. + */ + uov_image_url?: string | Blob | File; + /** + * The method to use for upscaling or varying. Default value: `"Disabled"` + */ + uov_method?: + | "Disabled" + | "Vary (Subtle)" + | "Vary (Strong)" + | "Upscale (1.5x)" + | "Upscale (2x)" + | "Upscale (Fast 2x)"; + /** + * Mixing Image Prompt and Vary/Upscale + */ + mixing_image_prompt_and_vary_upscale?: boolean; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FooocusUpscaleOrVaryOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: Record; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FooocusImagePromptInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` + */ + styles?: Array< + | "Fooocus V2" + | "Fooocus Enhance" + | "Fooocus Sharp" + | "Fooocus Semi Realistic" + | "Fooocus Masterpiece" + | "Fooocus Photograph" + | "Fooocus Negative" + | "Fooocus Cinematic" + | "SAI 3D Model" + | "SAI Analog Film" + | "SAI Anime" + | "SAI Cinematic" + | "SAI Comic Book" + | "SAI Craft Clay" + | "SAI Digital Art" + | "SAI Enhance" + | "SAI Fantasy Art" + | "SAI Isometric" + | "SAI Line Art" + | "SAI Lowpoly" + | "SAI Neonpunk" + | "SAI Origami" + | "SAI Photographic" + | "SAI Pixel Art" + | "SAI Texture" + | "MRE Cinematic Dynamic" + | "MRE Spontaneous Picture" + | "MRE Artistic Vision" + | "MRE Dark Dream" + | "MRE Gloomy Art" + | "MRE Bad Dream" + | "MRE Underground" + | "MRE Surreal Painting" + | "MRE Dynamic Illustration" + | "MRE Undead Art" + | "MRE Elemental Art" + | "MRE Space Art" + | "MRE Ancient Illustration" + | "MRE Brave Art" + | "MRE Heroic Fantasy" + | "MRE Dark Cyberpunk" + | "MRE Lyrical Geometry" + | "MRE Sumi E Symbolic" + | "MRE Sumi E Detailed" + | "MRE Manga" + | "MRE Anime" + | "MRE Comic" + | "Ads Advertising" + | "Ads Automotive" + | "Ads Corporate" + | "Ads Fashion Editorial" + | "Ads Food Photography" + | "Ads Gourmet Food Photography" + | "Ads Luxury" + | "Ads Real Estate" + | "Ads Retail" + | "Artstyle Abstract" + | "Artstyle Abstract Expressionism" + | "Artstyle Art Deco" + | "Artstyle Art Nouveau" + | "Artstyle Constructivist" + | "Artstyle Cubist" + | "Artstyle Expressionist" + | "Artstyle Graffiti" + | "Artstyle Hyperrealism" + | "Artstyle Impressionist" + | "Artstyle Pointillism" + | "Artstyle Pop Art" + | "Artstyle Psychedelic" + | "Artstyle Renaissance" + | "Artstyle Steampunk" + | "Artstyle Surrealist" + | "Artstyle Typography" + | "Artstyle Watercolor" + | "Futuristic Biomechanical" + | "Futuristic Biomechanical Cyberpunk" + | "Futuristic Cybernetic" + | "Futuristic Cybernetic Robot" + | "Futuristic Cyberpunk Cityscape" + | "Futuristic Futuristic" + | "Futuristic Retro Cyberpunk" + | "Futuristic Retro Futurism" + | "Futuristic Sci Fi" + | "Futuristic Vaporwave" + | "Game Bubble Bobble" + | "Game Cyberpunk Game" + | "Game Fighting Game" + | "Game Gta" + | "Game Mario" + | "Game Minecraft" + | "Game Pokemon" + | "Game Retro Arcade" + | "Game Retro Game" + | "Game Rpg Fantasy Game" + | "Game Strategy Game" + | "Game Streetfighter" + | "Game Zelda" + | "Misc Architectural" + | "Misc Disco" + | "Misc Dreamscape" + | "Misc Dystopian" + | "Misc Fairy Tale" + | "Misc Gothic" + | "Misc Grunge" + | "Misc Horror" + | "Misc Kawaii" + | "Misc Lovecraftian" + | "Misc Macabre" + | "Misc Manga" + | "Misc Metropolis" + | "Misc Minimalist" + | "Misc Monochrome" + | "Misc Nautical" + | "Misc Space" + | "Misc Stained Glass" + | "Misc Techwear Fashion" + | "Misc Tribal" + | "Misc Zentangle" + | "Papercraft Collage" + | "Papercraft Flat Papercut" + | "Papercraft Kirigami" + | "Papercraft Paper Mache" + | "Papercraft Paper Quilling" + | "Papercraft Papercut Collage" + | "Papercraft Papercut Shadow Box" + | "Papercraft Stacked Papercut" + | "Papercraft Thick Layered Papercut" + | "Photo Alien" + | "Photo Film Noir" + | "Photo Glamour" + | "Photo Hdr" + | "Photo Iphone Photographic" + | "Photo Long Exposure" + | "Photo Neon Noir" + | "Photo Silhouette" + | "Photo Tilt Shift" + | "Cinematic Diva" + | "Abstract Expressionism" + | "Academia" + | "Action Figure" + | "Adorable 3D Character" + | "Adorable Kawaii" + | "Art Deco" + | "Art Nouveau" + | "Astral Aura" + | "Avant Garde" + | "Baroque" + | "Bauhaus Style Poster" + | "Blueprint Schematic Drawing" + | "Caricature" + | "Cel Shaded Art" + | "Character Design Sheet" + | "Classicism Art" + | "Color Field Painting" + | "Colored Pencil Art" + | "Conceptual Art" + | "Constructivism" + | "Cubism" + | "Dadaism" + | "Dark Fantasy" + | "Dark Moody Atmosphere" + | "Dmt Art Style" + | "Doodle Art" + | "Double Exposure" + | "Dripping Paint Splatter Art" + | "Expressionism" + | "Faded Polaroid Photo" + | "Fauvism" + | "Flat 2d Art" + | "Fortnite Art Style" + | "Futurism" + | "Glitchcore" + | "Glo Fi" + | "Googie Art Style" + | "Graffiti Art" + | "Harlem Renaissance Art" + | "High Fashion" + | "Idyllic" + | "Impressionism" + | "Infographic Drawing" + | "Ink Dripping Drawing" + | "Japanese Ink Drawing" + | "Knolling Photography" + | "Light Cheery Atmosphere" + | "Logo Design" + | "Luxurious Elegance" + | "Macro Photography" + | "Mandola Art" + | "Marker Drawing" + | "Medievalism" + | "Minimalism" + | "Neo Baroque" + | "Neo Byzantine" + | "Neo Futurism" + | "Neo Impressionism" + | "Neo Rococo" + | "Neoclassicism" + | "Op Art" + | "Ornate And Intricate" + | "Pencil Sketch Drawing" + | "Pop Art 2" + | "Rococo" + | "Silhouette Art" + | "Simple Vector Art" + | "Sketchup" + | "Steampunk 2" + | "Surrealism" + | "Suprematism" + | "Terragen" + | "Tranquil Relaxing Atmosphere" + | "Sticker Designs" + | "Vibrant Rim Light" + | "Volumetric Lighting" + | "Watercolor 2" + | "Whimsical And Playful" + | "Mk Chromolithography" + | "Mk Cross Processing Print" + | "Mk Dufaycolor Photograph" + | "Mk Herbarium" + | "Mk Punk Collage" + | "Mk Mosaic" + | "Mk Van Gogh" + | "Mk Coloring Book" + | "Mk Singer Sargent" + | "Mk Pollock" + | "Mk Basquiat" + | "Mk Andy Warhol" + | "Mk Halftone Print" + | "Mk Gond Painting" + | "Mk Albumen Print" + | "Mk Aquatint Print" + | "Mk Anthotype Print" + | "Mk Inuit Carving" + | "Mk Bromoil Print" + | "Mk Calotype Print" + | "Mk Color Sketchnote" + | "Mk Cibulak Porcelain" + | "Mk Alcohol Ink Art" + | "Mk One Line Art" + | "Mk Blacklight Paint" + | "Mk Carnival Glass" + | "Mk Cyanotype Print" + | "Mk Cross Stitching" + | "Mk Encaustic Paint" + | "Mk Embroidery" + | "Mk Gyotaku" + | "Mk Luminogram" + | "Mk Lite Brite Art" + | "Mk Mokume Gane" + | "Pebble Art" + | "Mk Palekh" + | "Mk Suminagashi" + | "Mk Scrimshaw" + | "Mk Shibori" + | "Mk Vitreous Enamel" + | "Mk Ukiyo E" + | "Mk Vintage Airline Poster" + | "Mk Vintage Travel Poster" + | "Mk Bauhaus Style" + | "Mk Afrofuturism" + | "Mk Atompunk" + | "Mk Constructivism" + | "Mk Chicano Art" + | "Mk De Stijl" + | "Mk Dayak Art" + | "Mk Fayum Portrait" + | "Mk Illuminated Manuscript" + | "Mk Kalighat Painting" + | "Mk Madhubani Painting" + | "Mk Pictorialism" + | "Mk Pichwai Painting" + | "Mk Patachitra Painting" + | "Mk Samoan Art Inspired" + | "Mk Tlingit Art" + | "Mk Adnate Style" + | "Mk Ron English Style" + | "Mk Shepard Fairey Style" + >; + /** + * You can choose Speed or Quality Default value: `"Extreme Speed"` + */ + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` + */ + sharpness?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + */ + aspect_ratio?: string; + /** + * Number of images to generate in one request Default value: `1` + */ + num_images?: number; + /** + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` + */ + loras?: Array; + /** + * Refiner (SDXL or SD 1.5) Default value: `"None"` + */ + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + /** + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + */ + refiner_switch?: number; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "png" | "jpeg" | "webp"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * + */ + image_prompt_1: ImagePrompt; + /** + * + */ + image_prompt_2?: ImagePrompt; + /** + * + */ + image_prompt_3?: ImagePrompt; + /** + * + */ + image_prompt_4?: ImagePrompt; + /** + * The image to use as a reference for inpainting. + */ + inpaint_image_url?: string | Blob | File; + /** + * The image to use as a mask for the generated image. + */ + mask_image_url?: string | Blob | File; + /** + * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + */ + inpaint_mode?: + | "Inpaint or Outpaint (default)" + | "Improve Detail (face, hand, eyes, etc.)" + | "Modify Content (add objects, change background, etc.)"; + /** + * Describe what you want to inpaint. Default value: `""` + */ + inpaint_additional_prompt?: string; + /** + * The directions to outpaint. Default value: `` + */ + outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + /** + * Mixing Image Prompt and Inpaint + */ + mixing_image_prompt_and_inpaint?: boolean; + /** + * The image to upscale or vary. + */ + uov_image_url?: string | Blob | File; + /** + * The method to use for upscaling or varying. Default value: `"Disabled"` + */ + uov_method?: + | "Disabled" + | "Vary (Subtle)" + | "Vary (Strong)" + | "Upscale (1.5x)" + | "Upscale (2x)" + | "Upscale (Fast 2x)"; + /** + * Mixing Image Prompt and Vary/Upscale + */ + mixing_image_prompt_and_vary_upscale?: boolean; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FooocusImagePromptOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: Record; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FooocusInpaintInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` + */ + styles?: Array< + | "Fooocus V2" + | "Fooocus Enhance" + | "Fooocus Sharp" + | "Fooocus Semi Realistic" + | "Fooocus Masterpiece" + | "Fooocus Photograph" + | "Fooocus Negative" + | "Fooocus Cinematic" + | "SAI 3D Model" + | "SAI Analog Film" + | "SAI Anime" + | "SAI Cinematic" + | "SAI Comic Book" + | "SAI Craft Clay" + | "SAI Digital Art" + | "SAI Enhance" + | "SAI Fantasy Art" + | "SAI Isometric" + | "SAI Line Art" + | "SAI Lowpoly" + | "SAI Neonpunk" + | "SAI Origami" + | "SAI Photographic" + | "SAI Pixel Art" + | "SAI Texture" + | "MRE Cinematic Dynamic" + | "MRE Spontaneous Picture" + | "MRE Artistic Vision" + | "MRE Dark Dream" + | "MRE Gloomy Art" + | "MRE Bad Dream" + | "MRE Underground" + | "MRE Surreal Painting" + | "MRE Dynamic Illustration" + | "MRE Undead Art" + | "MRE Elemental Art" + | "MRE Space Art" + | "MRE Ancient Illustration" + | "MRE Brave Art" + | "MRE Heroic Fantasy" + | "MRE Dark Cyberpunk" + | "MRE Lyrical Geometry" + | "MRE Sumi E Symbolic" + | "MRE Sumi E Detailed" + | "MRE Manga" + | "MRE Anime" + | "MRE Comic" + | "Ads Advertising" + | "Ads Automotive" + | "Ads Corporate" + | "Ads Fashion Editorial" + | "Ads Food Photography" + | "Ads Gourmet Food Photography" + | "Ads Luxury" + | "Ads Real Estate" + | "Ads Retail" + | "Artstyle Abstract" + | "Artstyle Abstract Expressionism" + | "Artstyle Art Deco" + | "Artstyle Art Nouveau" + | "Artstyle Constructivist" + | "Artstyle Cubist" + | "Artstyle Expressionist" + | "Artstyle Graffiti" + | "Artstyle Hyperrealism" + | "Artstyle Impressionist" + | "Artstyle Pointillism" + | "Artstyle Pop Art" + | "Artstyle Psychedelic" + | "Artstyle Renaissance" + | "Artstyle Steampunk" + | "Artstyle Surrealist" + | "Artstyle Typography" + | "Artstyle Watercolor" + | "Futuristic Biomechanical" + | "Futuristic Biomechanical Cyberpunk" + | "Futuristic Cybernetic" + | "Futuristic Cybernetic Robot" + | "Futuristic Cyberpunk Cityscape" + | "Futuristic Futuristic" + | "Futuristic Retro Cyberpunk" + | "Futuristic Retro Futurism" + | "Futuristic Sci Fi" + | "Futuristic Vaporwave" + | "Game Bubble Bobble" + | "Game Cyberpunk Game" + | "Game Fighting Game" + | "Game Gta" + | "Game Mario" + | "Game Minecraft" + | "Game Pokemon" + | "Game Retro Arcade" + | "Game Retro Game" + | "Game Rpg Fantasy Game" + | "Game Strategy Game" + | "Game Streetfighter" + | "Game Zelda" + | "Misc Architectural" + | "Misc Disco" + | "Misc Dreamscape" + | "Misc Dystopian" + | "Misc Fairy Tale" + | "Misc Gothic" + | "Misc Grunge" + | "Misc Horror" + | "Misc Kawaii" + | "Misc Lovecraftian" + | "Misc Macabre" + | "Misc Manga" + | "Misc Metropolis" + | "Misc Minimalist" + | "Misc Monochrome" + | "Misc Nautical" + | "Misc Space" + | "Misc Stained Glass" + | "Misc Techwear Fashion" + | "Misc Tribal" + | "Misc Zentangle" + | "Papercraft Collage" + | "Papercraft Flat Papercut" + | "Papercraft Kirigami" + | "Papercraft Paper Mache" + | "Papercraft Paper Quilling" + | "Papercraft Papercut Collage" + | "Papercraft Papercut Shadow Box" + | "Papercraft Stacked Papercut" + | "Papercraft Thick Layered Papercut" + | "Photo Alien" + | "Photo Film Noir" + | "Photo Glamour" + | "Photo Hdr" + | "Photo Iphone Photographic" + | "Photo Long Exposure" + | "Photo Neon Noir" + | "Photo Silhouette" + | "Photo Tilt Shift" + | "Cinematic Diva" + | "Abstract Expressionism" + | "Academia" + | "Action Figure" + | "Adorable 3D Character" + | "Adorable Kawaii" + | "Art Deco" + | "Art Nouveau" + | "Astral Aura" + | "Avant Garde" + | "Baroque" + | "Bauhaus Style Poster" + | "Blueprint Schematic Drawing" + | "Caricature" + | "Cel Shaded Art" + | "Character Design Sheet" + | "Classicism Art" + | "Color Field Painting" + | "Colored Pencil Art" + | "Conceptual Art" + | "Constructivism" + | "Cubism" + | "Dadaism" + | "Dark Fantasy" + | "Dark Moody Atmosphere" + | "Dmt Art Style" + | "Doodle Art" + | "Double Exposure" + | "Dripping Paint Splatter Art" + | "Expressionism" + | "Faded Polaroid Photo" + | "Fauvism" + | "Flat 2d Art" + | "Fortnite Art Style" + | "Futurism" + | "Glitchcore" + | "Glo Fi" + | "Googie Art Style" + | "Graffiti Art" + | "Harlem Renaissance Art" + | "High Fashion" + | "Idyllic" + | "Impressionism" + | "Infographic Drawing" + | "Ink Dripping Drawing" + | "Japanese Ink Drawing" + | "Knolling Photography" + | "Light Cheery Atmosphere" + | "Logo Design" + | "Luxurious Elegance" + | "Macro Photography" + | "Mandola Art" + | "Marker Drawing" + | "Medievalism" + | "Minimalism" + | "Neo Baroque" + | "Neo Byzantine" + | "Neo Futurism" + | "Neo Impressionism" + | "Neo Rococo" + | "Neoclassicism" + | "Op Art" + | "Ornate And Intricate" + | "Pencil Sketch Drawing" + | "Pop Art 2" + | "Rococo" + | "Silhouette Art" + | "Simple Vector Art" + | "Sketchup" + | "Steampunk 2" + | "Surrealism" + | "Suprematism" + | "Terragen" + | "Tranquil Relaxing Atmosphere" + | "Sticker Designs" + | "Vibrant Rim Light" + | "Volumetric Lighting" + | "Watercolor 2" + | "Whimsical And Playful" + | "Mk Chromolithography" + | "Mk Cross Processing Print" + | "Mk Dufaycolor Photograph" + | "Mk Herbarium" + | "Mk Punk Collage" + | "Mk Mosaic" + | "Mk Van Gogh" + | "Mk Coloring Book" + | "Mk Singer Sargent" + | "Mk Pollock" + | "Mk Basquiat" + | "Mk Andy Warhol" + | "Mk Halftone Print" + | "Mk Gond Painting" + | "Mk Albumen Print" + | "Mk Aquatint Print" + | "Mk Anthotype Print" + | "Mk Inuit Carving" + | "Mk Bromoil Print" + | "Mk Calotype Print" + | "Mk Color Sketchnote" + | "Mk Cibulak Porcelain" + | "Mk Alcohol Ink Art" + | "Mk One Line Art" + | "Mk Blacklight Paint" + | "Mk Carnival Glass" + | "Mk Cyanotype Print" + | "Mk Cross Stitching" + | "Mk Encaustic Paint" + | "Mk Embroidery" + | "Mk Gyotaku" + | "Mk Luminogram" + | "Mk Lite Brite Art" + | "Mk Mokume Gane" + | "Pebble Art" + | "Mk Palekh" + | "Mk Suminagashi" + | "Mk Scrimshaw" + | "Mk Shibori" + | "Mk Vitreous Enamel" + | "Mk Ukiyo E" + | "Mk Vintage Airline Poster" + | "Mk Vintage Travel Poster" + | "Mk Bauhaus Style" + | "Mk Afrofuturism" + | "Mk Atompunk" + | "Mk Constructivism" + | "Mk Chicano Art" + | "Mk De Stijl" + | "Mk Dayak Art" + | "Mk Fayum Portrait" + | "Mk Illuminated Manuscript" + | "Mk Kalighat Painting" + | "Mk Madhubani Painting" + | "Mk Pictorialism" + | "Mk Pichwai Painting" + | "Mk Patachitra Painting" + | "Mk Samoan Art Inspired" + | "Mk Tlingit Art" + | "Mk Adnate Style" + | "Mk Ron English Style" + | "Mk Shepard Fairey Style" + >; + /** + * You can choose Speed or Quality Default value: `"Extreme Speed"` + */ + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` + */ + sharpness?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + */ + aspect_ratio?: string; + /** + * Number of images to generate in one request Default value: `1` + */ + num_images?: number; + /** + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` + */ + loras?: Array; + /** + * Refiner (SDXL or SD 1.5) Default value: `"None"` + */ + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + /** + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + */ + refiner_switch?: number; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "png" | "jpeg" | "webp"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * + */ + image_prompt_1: ImagePrompt; + /** + * + */ + image_prompt_2?: ImagePrompt; + /** + * + */ + image_prompt_3?: ImagePrompt; + /** + * + */ + image_prompt_4?: ImagePrompt; + /** + * The image to use as a reference for inpainting. + */ + inpaint_image_url?: string | Blob | File; + /** + * The image to use as a mask for the generated image. + */ + mask_image_url?: string | Blob | File; + /** + * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + */ + inpaint_mode?: + | "Inpaint or Outpaint (default)" + | "Improve Detail (face, hand, eyes, etc.)" + | "Modify Content (add objects, change background, etc.)"; + /** + * Describe what you want to inpaint. Default value: `""` + */ + inpaint_additional_prompt?: string; + /** + * The directions to outpaint. Default value: `` + */ + outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + /** + * Mixing Image Prompt and Inpaint + */ + mixing_image_prompt_and_inpaint?: boolean; + /** + * The image to upscale or vary. + */ + uov_image_url?: string | Blob | File; + /** + * The method to use for upscaling or varying. Default value: `"Disabled"` + */ + uov_method?: + | "Disabled" + | "Vary (Subtle)" + | "Vary (Strong)" + | "Upscale (1.5x)" + | "Upscale (2x)" + | "Upscale (Fast 2x)"; + /** + * Mixing Image Prompt and Vary/Upscale + */ + mixing_image_prompt_and_vary_upscale?: boolean; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FooocusInpaintOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: Record; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type RetoucherInput = { + /** + * The URL of the image to be retouched. + */ + image_url: string | Blob | File; + /** + * Seed for reproducibility. Different seeds will make slightly different results. + */ + seed?: number; +}; +export type RetoucherOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * The seed used for the generation. + */ + seed: number; +}; +export type AnyLlmOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; + /** + * Error message if an error occurred + */ + error?: string; +}; +export type AnyLlmInput = { + /** + * Name of the model to use. Premium models are charged at 3x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + */ + model?: + | "anthropic/claude-3.5-sonnet" + | "anthropic/claude-3-haiku" + | "google/gemini-pro-1.5" + | "google/gemini-flash-1.5" + | "google/gemini-flash-1.5-8b" + | "openai/gpt-4o"; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * System prompt to provide context or instructions to the model + */ + system_prompt?: string; + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; +}; +export type AnyLlmVisionOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; + /** + * Error message if an error occurred + */ + error?: string; +}; +export type AnyLlmVisionInput = { + /** + * Name of the model to use. Premium models are charged at 3x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + */ + model?: + | "anthropic/claude-3.5-sonnet" + | "anthropic/claude-3-haiku" + | "google/gemini-pro-1.5" + | "google/gemini-flash-1.5" + | "google/gemini-flash-1.5-8b" + | "openai/gpt-4o"; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * System prompt to provide context or instructions to the model + */ + system_prompt?: string; + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; +}; +export type Llavav1513bInput = { + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * Maximum number of tokens to generate Default value: `64` + */ + max_tokens?: number; + /** + * Temperature for sampling Default value: `0.2` + */ + temperature?: number; + /** + * Top P for sampling Default value: `1` + */ + top_p?: number; +}; +export type Llavav1513bOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; +}; +export type LlavaNextInput = { + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * Maximum number of tokens to generate Default value: `64` + */ + max_tokens?: number; + /** + * Temperature for sampling Default value: `0.2` + */ + temperature?: number; + /** + * Top P for sampling Default value: `1` + */ + top_p?: number; +}; +export type LlavaNextOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; +}; +export type ImageutilsNsfwInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type ImageutilsNsfwOutput = { + /** + * Combined image of all detected masks + */ + image?: Image; +}; +export type FastFooocusSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastFooocusSdxlInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the prompt image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type FastFooocusSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastFooocusSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the prompt image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type FaceToStickerInput = { + /** + * URL of the video. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `20` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + */ + guidance_scale?: number; + /** + * The strength of the instant ID. Default value: `0.7` + */ + instant_id_strength?: number; + /** + * The weight of the IP adapter. Default value: `0.2` + */ + ip_adapter_weight?: number; + /** + * The amount of noise to add to the IP adapter. Default value: `0.5` + */ + ip_adapter_noise?: number; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * Whether to upscale the image 2x. + */ + upscale?: boolean; + /** + * The number of steps to use for upscaling. Only used if `upscale` is `true`. Default value: `10` + */ + upscale_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FaceToStickerOutput = { + /** + * The generated images. + */ + images: Array; + /** + * The generated face sticker image. + */ + sticker_image: Image; + /** + * The generated face sticker image with the background removed. + */ + sticker_image_background_removed: Image; + /** + * Seed used during the inference. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + * The key is the image type and the value is a boolean. + */ + has_nsfw_concepts: any; +}; +export type MoondreamBatchedInput = { + /** + * Model ID to use for inference Default value: `"vikhyatk/moondream2"` + */ + model_id?: "vikhyatk/moondream2" | "fal-ai/moondream2-docci"; + /** + * List of input prompts and image URLs + */ + inputs: Array; + /** + * Maximum number of new tokens to generate Default value: `64` + */ + max_tokens?: number; + /** + * Temperature for sampling Default value: `0.2` + */ + temperature?: number; + /** + * Top P for sampling Default value: `1` + */ + top_p?: number; + /** + * Repetition penalty for sampling Default value: `1` + */ + repetition_penalty?: number; +}; +export type MoondreamBatchedOutput = { + /** + * List of generated outputs + */ + outputs: Array; + /** + * Whether the output is partial + */ + partial?: boolean; + /** + * Timings for different parts of the process + */ + timings: Record; + /** + * Filenames of the images processed + */ + filenames?: Array; +}; +export type SadtalkerInput = { + /** + * URL of the source image + */ + source_image_url: string | Blob | File; + /** + * URL of the driven audio + */ + driven_audio_url: string | Blob | File; + /** + * URL of the reference video + */ + reference_pose_video_url: string | Blob | File; + /** + * The style of the pose + */ + pose_style?: number; + /** + * The resolution of the face model Default value: `"256"` + */ + face_model_resolution?: "256" | "512"; + /** + * The scale of the expression Default value: `1` + */ + expression_scale?: number; + /** + * The type of face enhancer to use + */ + face_enhancer?: "gfpgan"; + /** + * Whether to use still mode. Fewer head motion, works with preprocess `full`. + */ + still_mode?: boolean; + /** + * The type of preprocessing to use Default value: `"crop"` + */ + preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; +}; +export type SadtalkerOutput = { + /** + * URL of the generated video + */ + video: File; +}; +export type MusetalkInput = { + /** + * URL of the source video + */ + source_video_url: string | Blob | File; + /** + * URL of the audio + */ + audio_url: string | Blob | File; +}; +export type MusetalkOutput = { + /** + * The generated video file. + */ + video: File; +}; +export type SadtalkerReferenceInput = { + /** + * URL of the source image + */ + source_image_url: string | Blob | File; + /** + * URL of the driven audio + */ + driven_audio_url: string | Blob | File; + /** + * URL of the reference video + */ + reference_pose_video_url: string | Blob | File; + /** + * The style of the pose + */ + pose_style?: number; + /** + * The resolution of the face model Default value: `"256"` + */ + face_model_resolution?: "256" | "512"; + /** + * The scale of the expression Default value: `1` + */ + expression_scale?: number; + /** + * The type of face enhancer to use + */ + face_enhancer?: "gfpgan"; + /** + * Whether to use still mode. Fewer head motion, works with preprocess `full`. + */ + still_mode?: boolean; + /** + * The type of preprocessing to use Default value: `"crop"` + */ + preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; +}; +export type SadtalkerReferenceOutput = { + /** + * URL of the generated video + */ + video: File; +}; +export type LayerDiffusionInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; + /** + * The prompt to use for generating the negative image. Be as descriptive as possible for best results. Default value: `"text, watermark"` + */ + negative_prompt?: string; + /** + * The guidance scale for the model. Default value: `8` + */ + guidance_scale?: number; + /** + * The number of inference steps for the model. Default value: `20` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type LayerDiffusionOutput = { + /** + * The URL of the generated image. + */ + image: Image; + /** + * The seed used to generate the image. + */ + seed: number; +}; +export type StableDiffusionV15Input = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type StableDiffusionV15Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type LoraImageToImageInput = { + /** + * The method to use for the sigmas. If set to 'custom', the sigmas will be set based + * on the provided sigmas schedule in the `array` field. + * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` + */ + method?: "default" | "array"; + /** + * Sigmas schedule to be used if 'custom' method is selected. Default value: `` + */ + array?: Array; +}; +export type LoraImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The latents saved for debugging. + */ + debug_latents?: File; + /** + * The latents saved for debugging per pass. + */ + debug_per_pass_latents?: File; +}; +export type FastSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastSdxlInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastSdxlInpaintingInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type LoraInpaintInput = { + /** + * The method to use for the sigmas. If set to 'custom', the sigmas will be set based + * on the provided sigmas schedule in the `array` field. + * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` + */ + method?: "default" | "array"; + /** + * Sigmas schedule to be used if 'custom' method is selected. Default value: `` + */ + array?: Array; +}; +export type LoraInpaintOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The latents saved for debugging. + */ + debug_latents?: File; + /** + * The latents saved for debugging per pass. + */ + debug_per_pass_latents?: File; +}; +export type PixartSigmaInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The style to apply to the image. Default value: `"(No style)"` + */ + style?: + | "(No style)" + | "Cinematic" + | "Photographic" + | "Anime" + | "Manga" + | "Digital Art" + | "Pixel art" + | "Fantasy art" + | "Neonpunk" + | "3D Model"; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The scheduler to use for the model. Default value: `"DPM-SOLVER"` + */ + scheduler?: "DPM-SOLVER" | "SA-SOLVER"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; +}; +export type PixartSigmaOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * The timings of the different steps of the generation process. + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type DreamshaperOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type DreamshaperInput = { + /** + * The Dreamshaper model to use. + */ + model_name?: + | "Lykon/dreamshaper-xl-1-0" + | "Lykon/dreamshaper-xl-v2-turbo" + | "Lykon/dreamshaper-8"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Default value: `[object Object]` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; +}; +export type RealisticVisionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type RealisticVisionInput = { + /** + * The Realistic Vision model to use. + */ + model_name?: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Default value: `[object Object]` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; +}; +export type LightningModelsInput = { + /** + * The Lightning model to use. + */ + model_name?: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Default value: `[object Object]` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `5` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * Scheduler / sampler to use for the image denoising process. + */ + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "DPM++ SDE" + | "DPM++ SDE Karras" + | "KDPM 2A" + | "Euler" + | "Euler (trailing timesteps)" + | "Euler A" + | "LCM" + | "EDMDPMSolverMultistepScheduler" + | "TCDScheduler"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; +}; +export type LightningModelsOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type OmniZeroOutput = { + /** + * The generated image. + */ + image: Image; +}; +export type OmniZeroInput = { + /** + * Prompt to guide the image generation. + */ + prompt: string; + /** + * Negative prompt to guide the image generation. Default value: `""` + */ + negative_prompt?: string; + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * Composition image url. + */ + composition_image_url: string | Blob | File; + /** + * Style image url. + */ + style_image_url: string | Blob | File; + /** + * Identity image url. + */ + identity_image_url: string | Blob | File; + /** + * Image strength. Default value: `0.75` + */ + image_strength?: number; + /** + * Composition strength. Default value: `1` + */ + composition_strength?: number; + /** + * Depth strength. Default value: `0.5` + */ + depth_strength?: number; + /** + * Style strength. Default value: `1` + */ + style_strength?: number; + /** + * Face strength. Default value: `1` + */ + face_strength?: number; + /** + * Identity strength. Default value: `1` + */ + identity_strength?: number; + /** + * Guidance scale. Default value: `5` + */ + guidance_scale?: number; + /** + * Seed. Default value: `42` + */ + seed?: number; + /** + * Number of images. Default value: `1` + */ + number_of_images?: number; +}; +export type CatVtonOutput = { + /** + * The output image. + */ + image: Image; +}; +export type CatVtonInput = { + /** + * Url for the human image. + */ + human_image_url: string | Blob | File; + /** + * Url to the garment image. + */ + garment_image_url: string | Blob | File; + /** + * Type of the Cloth to be tried on. + * + * Options: + * upper: Upper body cloth + * lower: Lower body cloth + * overall: Full body cloth + * inner: Inner cloth, like T-shirt inside a jacket + * outer: Outer cloth, like a jacket over a T-shirt + */ + cloth_type: "upper" | "lower" | "overall" | "inner" | "outer"; + /** + * The size of the generated image. Default value: `portrait_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `30` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` + */ + guidance_scale?: number; + /** + * The same seed and the same input given to the same version of the model + * will output the same image every time. + */ + seed?: number; +}; +export type DwposeOutput = { + /** + * The predicted pose image + */ + image: Image; +}; +export type DwposeInput = { + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; +}; +export type StableCascadeSoteDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type StableCascadeSoteDiffusionInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Number of steps to run the first stage for. Default value: `20` + */ + first_stage_steps?: number; + /** + * Number of steps to run the second stage for. Default value: `10` + */ + second_stage_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. + */ + second_stage_guidance_scale?: number; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of Stable Cascade + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the image will be returned as base64 encoded string. + */ + sync_mode?: boolean; +}; +export type Florence2LargeCaptionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeCaptionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeDetailedCaptionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeDetailedCaptionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeMoreDetailedCaptionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeMoreDetailedCaptionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeObjectDetectionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeObjectDetectionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeDenseRegionCaptionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeDenseRegionCaptionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeRegionProposalOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeRegionProposalInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeCaptionToPhraseGroundingOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeCaptionToPhraseGroundingInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeReferringExpressionSegmentationOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeReferringExpressionSegmentationInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeRegionToSegmentationOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeRegionToSegmentationInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeOpenVocabularyDetectionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeOpenVocabularyDetectionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeRegionToCategoryOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeRegionToCategoryInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeRegionToDescriptionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeRegionToDescriptionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeOcrOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeOcrInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeOcrWithRegionOutput = { + /** + * Results from the model + */ + results: string; +}; +export type Florence2LargeOcrWithRegionInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Era3dOutput = { + /** + * Images with background removed + */ + images: Array; + /** + * Normal images with background removed + */ + normal_images: Array; + /** + * Seed used for random number generation + */ + seed: number; +}; +export type Era3dInput = { + /** + * URL of the image to remove background from + */ + image_url: string | Blob | File; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + cfg?: number; + /** + * Number of steps to run the model for Default value: `40` + */ + steps?: number; + /** + * Size of the image to crop to Default value: `400` + */ + crop_size?: number; + /** + * Seed for random number generation Default value: `-1` + */ + seed?: number; + /** + * Background removal Default value: `true` + */ + background_removal?: boolean; +}; +export type LivePortraitOutput = { + /** + * The generated video file. + */ + video: File; +}; +export type LivePortraitInput = { + /** + * URL of the image to be animated + */ + image_url: string | Blob | File; + /** + * Amount to blink the eyes + */ + blink?: number; + /** + * Amount to raise or lower eyebrows + */ + eyebrow?: number; + /** + * Amount to wink + */ + wink?: number; + /** + * Amount to move pupils horizontally + */ + pupil_x?: number; + /** + * Amount to move pupils vertically + */ + pupil_y?: number; + /** + * Amount to open mouth in 'aaa' shape + */ + aaa?: number; + /** + * Amount to shape mouth in 'eee' position + */ + eee?: number; + /** + * Amount to shape mouth in 'woo' position + */ + woo?: number; + /** + * Amount to smile + */ + smile?: number; + /** + * Amount to rotate the face in pitch + */ + rotate_pitch?: number; + /** + * Amount to rotate the face in yaw + */ + rotate_yaw?: number; + /** + * Amount to rotate the face in roll + */ + rotate_roll?: number; + /** + * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` + */ + flag_pasteback?: boolean; + /** + * Whether to crop the source portrait to the face-cropping space. Default value: `true` + */ + flag_do_crop?: boolean; + /** + * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` + */ + flag_do_rot?: boolean; + /** + * Size of the output image. Default value: `512` + */ + dsize?: number; + /** + * Scaling factor for the face crop. Default value: `2.3` + */ + scale?: number; + /** + * Horizontal offset ratio for face crop. + */ + vx_ratio?: number; + /** + * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` + */ + vy_ratio?: number; + /** + * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. + * The safety checker will process the input image + */ + enable_safety_checker?: boolean; + /** + * Output format Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type LivePortraitImageOutput = { + /** + * The generated video file. + */ + video: File; +}; +export type LivePortraitImageInput = { + /** + * URL of the image to be animated + */ + image_url: string | Blob | File; + /** + * Amount to blink the eyes + */ + blink?: number; + /** + * Amount to raise or lower eyebrows + */ + eyebrow?: number; + /** + * Amount to wink + */ + wink?: number; + /** + * Amount to move pupils horizontally + */ + pupil_x?: number; + /** + * Amount to move pupils vertically + */ + pupil_y?: number; + /** + * Amount to open mouth in 'aaa' shape + */ + aaa?: number; + /** + * Amount to shape mouth in 'eee' position + */ + eee?: number; + /** + * Amount to shape mouth in 'woo' position + */ + woo?: number; + /** + * Amount to smile + */ + smile?: number; + /** + * Amount to rotate the face in pitch + */ + rotate_pitch?: number; + /** + * Amount to rotate the face in yaw + */ + rotate_yaw?: number; + /** + * Amount to rotate the face in roll + */ + rotate_roll?: number; + /** + * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` + */ + flag_pasteback?: boolean; + /** + * Whether to crop the source portrait to the face-cropping space. Default value: `true` + */ + flag_do_crop?: boolean; + /** + * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` + */ + flag_do_rot?: boolean; + /** + * Size of the output image. Default value: `512` + */ + dsize?: number; + /** + * Scaling factor for the face crop. Default value: `2.3` + */ + scale?: number; + /** + * Horizontal offset ratio for face crop. + */ + vx_ratio?: number; + /** + * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` + */ + vy_ratio?: number; + /** + * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. + * The safety checker will process the input image + */ + enable_safety_checker?: boolean; + /** + * Output format Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; +}; +export type MusePoseInput = { + /** + * URL of the image to animate. + */ + image_url: string | Blob | File; + /** + * The URL of the video to drive the animation + */ + video_url: string | Blob | File; + /** + * The resolution to use for the pose detection. Default value: `512` + */ + dwpose_detection_resolution?: number; + /** + * The resolution to use for the image during pose calculation. Default value: `720` + */ + dwpose_image_resolution?: number; + /** + * The frame to align the pose to. + */ + dwpose_align_frame?: number; + /** + * The width of the output video. Default value: `748` + */ + width?: number; + /** + * The height of the output video. Default value: `748` + */ + height?: number; + /** + * The length of the output video. Default value: `300` + */ + length?: number; + /** + * The video slice frame number Default value: `48` + */ + slice?: number; + /** + * The video slice overlap frame number Default value: `4` + */ + overlap?: number; + /** + * Classifier free guidance Default value: `3.5` + */ + cfg?: number; + /** + * The seed to use for the random number generator. + */ + seed?: number; + /** + * DDIM sampling steps Default value: `20` + */ + steps?: number; + /** + * The frames per second of the output video. + */ + fps?: number; + /** + * Number of input frames to skip. Skipping 1 effectively reduces the fps in half. Default value: `1` + */ + skip?: number; +}; +export type MusePoseOutput = { + /** + * The generated video with the lip sync. + */ + video: File; +}; +export type KolorsInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible + * for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small + * details (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show + * you. Default value: `5` + */ + guidance_scale?: number; + /** + * The number of inference steps to perform. Default value: `50` + */ + num_inference_steps?: number; + /** + * Seed + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and + * uploaded before returning the response. This will increase the latency of + * the function but it allows you to get the image directly in the response + * without going through the CDN. + */ + sync_mode?: boolean; + /** + * Enable safety checker. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The scheduler to use for the model. Default value: `"EulerDiscreteScheduler"` + */ + scheduler?: + | "EulerDiscreteScheduler" + | "EulerAncestralDiscreteScheduler" + | "DPMSolverMultistepScheduler" + | "DPMSolverMultistepScheduler_SDE_karras" + | "UniPCMultistepScheduler" + | "DEISMultistepScheduler"; +}; +export type KolorsOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * The timings of the different steps of the generation process. + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in + * the input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type SdxlControlnetUnionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type SdxlControlnetUnionInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * The URL of the control image. + */ + openpose_image_url?: string | Blob | File; + /** + * Whether to preprocess the openpose image. Default value: `true` + */ + openpose_preprocess?: boolean; + /** + * The URL of the control image. + */ + depth_image_url?: string | Blob | File; + /** + * Whether to preprocess the depth image. Default value: `true` + */ + depth_preprocess?: boolean; + /** + * The URL of the control image. + */ + teed_image_url?: string | Blob | File; + /** + * Whether to preprocess the teed image. Default value: `true` + */ + teed_preprocess?: boolean; + /** + * The URL of the control image. + */ + canny_image_url?: string | Blob | File; + /** + * Whether to preprocess the canny image. Default value: `true` + */ + canny_preprocess?: boolean; + /** + * The URL of the control image. + */ + normal_image_url?: string | Blob | File; + /** + * Whether to preprocess the normal image. Default value: `true` + */ + normal_preprocess?: boolean; + /** + * The URL of the control image. + */ + segmentation_image_url?: string | Blob | File; + /** + * Whether to preprocess the segmentation image. Default value: `true` + */ + segmentation_preprocess?: boolean; +}; +export type SdxlControlnetUnionImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type SdxlControlnetUnionImageToImageInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * The URL of the control image. + */ + openpose_image_url?: string | Blob | File; + /** + * Whether to preprocess the openpose image. Default value: `true` + */ + openpose_preprocess?: boolean; + /** + * The URL of the control image. + */ + depth_image_url?: string | Blob | File; + /** + * Whether to preprocess the depth image. Default value: `true` + */ + depth_preprocess?: boolean; + /** + * The URL of the control image. + */ + teed_image_url?: string | Blob | File; + /** + * Whether to preprocess the teed image. Default value: `true` + */ + teed_preprocess?: boolean; + /** + * The URL of the control image. + */ + canny_image_url?: string | Blob | File; + /** + * Whether to preprocess the canny image. Default value: `true` + */ + canny_preprocess?: boolean; + /** + * The URL of the control image. + */ + normal_image_url?: string | Blob | File; + /** + * Whether to preprocess the normal image. Default value: `true` + */ + normal_preprocess?: boolean; + /** + * The URL of the control image. + */ + segmentation_image_url?: string | Blob | File; + /** + * Whether to preprocess the segmentation image. Default value: `true` + */ + segmentation_preprocess?: boolean; +}; +export type SdxlControlnetUnionInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: Record; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type SdxlControlnetUnionInpaintingInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * The URL of the control image. + */ + openpose_image_url?: string | Blob | File; + /** + * Whether to preprocess the openpose image. Default value: `true` + */ + openpose_preprocess?: boolean; + /** + * The URL of the control image. + */ + depth_image_url?: string | Blob | File; + /** + * Whether to preprocess the depth image. Default value: `true` + */ + depth_preprocess?: boolean; + /** + * The URL of the control image. + */ + teed_image_url?: string | Blob | File; + /** + * Whether to preprocess the teed image. Default value: `true` + */ + teed_preprocess?: boolean; + /** + * The URL of the control image. + */ + canny_image_url?: string | Blob | File; + /** + * Whether to preprocess the canny image. Default value: `true` + */ + canny_preprocess?: boolean; + /** + * The URL of the control image. + */ + normal_image_url?: string | Blob | File; + /** + * Whether to preprocess the normal image. Default value: `true` + */ + normal_preprocess?: boolean; + /** + * The URL of the control image. + */ + segmentation_image_url?: string | Blob | File; + /** + * Whether to preprocess the segmentation image. Default value: `true` + */ + segmentation_preprocess?: boolean; +}; +export type Sam2ImageInput = { + /** + * The URL of the video to be segmented. + */ + video_url: string | Blob | File; + /** + * List of prompts to segment the video Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; +}; +export type Sam2ImageOutput = { + /** + * Segmented image. + */ + image: Image; +}; +export type Sam2VideoInput = { + /** + * The URL of the video to be segmented. + */ + video_url: string | Blob | File; + /** + * List of prompts to segment the video Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; +}; +export type Sam2VideoOutput = { + /** + * Segmented image. + */ + image: Image; +}; +export type ImageutilsSamInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type ImageutilsSamOutput = { + /** + * Combined image of all detected masks + */ + image?: Image; +}; +export type MiniCpmInput = { + /** + * URL of the video to be analyzed + */ + video_url: string | Blob | File; + /** + * Prompt to be used for the video description + */ + prompt: string; +}; +export type MiniCpmOutput = { + /** + * Response from the model + */ + output: string; +}; +export type MiniCpmVideoInput = { + /** + * URL of the video to be analyzed + */ + video_url: string | Blob | File; + /** + * Prompt to be used for the video description + */ + prompt: string; +}; +export type MiniCpmVideoOutput = { + /** + * Response from the model + */ + output: string; +}; +export type ControlnextInput = { + /** + * URL of the reference image. + */ + image_url: string | Blob | File; + /** + * URL of the input video. + */ + video_url: string | Blob | File; + /** + * Height of the output video. Default value: `1024` + */ + height?: number; + /** + * Width of the output video. Default value: `576` + */ + width?: number; + /** + * Guidance scale for the diffusion process. Default value: `3` + */ + guidance_scale?: number; + /** + * Number of inference steps. Default value: `25` + */ + num_inference_steps?: number; + /** + * Maximum number of frames to process. Default value: `240` + */ + max_frame_num?: number; + /** + * Number of frames to process in each batch. Default value: `24` + */ + batch_frames?: number; + /** + * Number of overlapping frames between batches. Default value: `6` + */ + overlap?: number; + /** + * Stride for sampling frames from the input video. Default value: `2` + */ + sample_stride?: number; + /** + * Chunk size for decoding frames. Default value: `2` + */ + decode_chunk_size?: number; + /** + * Motion bucket ID for the pipeline. Default value: `127` + */ + motion_bucket_id?: number; + /** + * Frames per second for the output video. Default value: `7` + */ + fps?: number; + /** + * Condition scale for ControlNeXt. Default value: `1` + */ + controlnext_cond_scale?: number; +}; +export type ControlnextOutput = { + /** + * The generated video. + */ + video: File; +}; +export type WorkflowutilsCannyOutput = { + /** + * The edge map. + */ + image: Image; +}; +export type WorkflowutilsCannyInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * Threshold for the edge map. Default value: `0.5` + */ + threshold?: number; + /** + * Size of the detection. Default value: `640` + */ + det_size_width?: number; + /** + * Size of the detection. Default value: `640` + */ + det_size_height?: number; + /** + * Maximum number of faces to detect. Default value: `1` + */ + max_face_num?: number; + /** + * URL of the model weights. Default value: `"buffalo_l"` + */ + model_url?: string | Blob | File; + /** + * Sorting of the faces. Default value: `"size"` + */ + sorting?: string; + /** + * Whether to run in sync mode. Default value: `true` + */ + sync_mode?: boolean; +}; +export type ImagePreprocessorsDepthAnythingV2Output = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsDepthAnythingV2Input = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsHedOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsHedInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsLineartOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsLineartInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsMidasOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsMidasInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsMlsdOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsMlsdInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsPidiOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsPidiInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsSamOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsSamInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsScribbleOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsScribbleInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsTeedOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsTeedInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type ImagePreprocessorsZoeOutput = { + /** + * Image with lines detected using the MLSD detector + */ + image: Image; +}; +export type ImagePreprocessorsZoeInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; + /** + * The model to use for the Scribble detector Default value: `"HED"` + */ + model?: "HED" | "PiDi"; + /** + * Whether to use the safe version of the Scribble detector + */ + safe?: boolean; +}; +export type F5TtsOutput = { + /** + * The audio file containing the generated speech. + */ + audio_url: AudioFile; +}; +export type F5TtsInput = { + /** + * The text to be converted to speech. + */ + gen_text: string; + /** + * The URL of the reference audio file. + */ + ref_audio_url: string | Blob | File; + /** + * The reference text to be used for TTS. If not provided, an ASR (Automatic Speech Recognition) model will be used to generate the reference text. Default value: `""` + */ + ref_text?: string; + /** + * The name of the model to be used for TTS. + */ + model_type: "F5-TTS" | "E2-TTS"; + /** + * Whether to remove the silence from the audio file. Default value: `true` + */ + remove_silence?: boolean; +}; +export type EndpointTypeMap = { + "fal-ai/flux-pro/v1.1-ultra": { + input: FluxProV11UltraInput; + output: FluxProV11UltraOutput; + }; + "fal-ai/flux-lora-fast-training": { + input: FluxLoraFastTrainingInput; + output: FluxLoraFastTrainingOutput; + }; + "fal-ai/recraft-v3": { + input: RecraftV3Input; + output: RecraftV3Output; + }; + "fal-ai/minimax-video/image-to-video": { + input: MinimaxVideoImageToVideoInput; + output: MinimaxVideoImageToVideoOutput; + }; + "fal-ai/aura-flow": { + input: AuraFlowInput; + output: AuraFlowOutput; + }; + "fal-ai/flux/dev/image-to-image": { + input: FluxDevImageToImageInput; + output: FluxDevImageToImageOutput; + }; + "fal-ai/flux/dev": { + input: FluxDevInput; + output: FluxDevOutput; + }; + "fal-ai/flux-lora": { + input: FluxLoraInput; + output: FluxLoraOutput; + }; + "fal-ai/flux/schnell": { + input: FluxSchnellInput; + output: FluxSchnellOutput; + }; + "fal-ai/flux-pro/v1.1": { + input: FluxProV11Input; + output: FluxProV11Output; + }; + "fal-ai/flux-pro/new": { + input: FluxProNewInput; + output: FluxProNewOutput; + }; + "fal-ai/omnigen-v1": { + input: OmnigenV1Input; + output: OmnigenV1Output; + }; + "fal-ai/stable-diffusion-v35-large": { + input: StableDiffusionV35LargeInput; + output: StableDiffusionV35LargeOutput; + }; + "fal-ai/stable-diffusion-v35-medium": { + input: StableDiffusionV35MediumInput; + output: StableDiffusionV35MediumOutput; + }; + "fal-ai/recraft-v3/create-style": { + input: RecraftV3CreateStyleInput; + output: RecraftV3CreateStyleOutput; + }; + "fal-ai/flux-realism": { + input: FluxRealismInput; + output: FluxRealismOutput; + }; + "fal-ai/flux-lora/inpainting": { + input: FluxLoraInpaintingInput; + output: FluxLoraInpaintingOutput; + }; + "fal-ai/flux-lora/image-to-image": { + input: FluxLoraImageToImageInput; + output: FluxLoraImageToImageOutput; + }; + "fal-ai/flux-general": { + input: FluxGeneralInput; + output: FluxGeneralOutput; + }; + "fal-ai/flux-general/inpainting": { + input: FluxGeneralInpaintingInput; + output: FluxGeneralInpaintingOutput; + }; + "fal-ai/flux-general/image-to-image": { + input: FluxGeneralImageToImageInput; + output: FluxGeneralImageToImageOutput; + }; + "fal-ai/flux-general/differential-diffusion": { + input: FluxGeneralDifferentialDiffusionInput; + output: FluxGeneralDifferentialDiffusionOutput; + }; + "fal-ai/flux-general/rf-inversion": { + input: FluxGeneralRfInversionInput; + output: FluxGeneralRfInversionOutput; + }; + "fal-ai/iclight-v2": { + input: IclightV2Input; + output: IclightV2Output; + }; + "fal-ai/flux-differential-diffusion": { + input: FluxDifferentialDiffusionInput; + output: FluxDifferentialDiffusionOutput; + }; + "fal-ai/stable-diffusion-v3-medium": { + input: StableDiffusionV3MediumInput; + output: StableDiffusionV3MediumOutput; + }; + "fal-ai/stable-diffusion-v3-medium/image-to-image": { + input: StableDiffusionV3MediumImageToImageInput; + output: StableDiffusionV3MediumImageToImageOutput; + }; + "fal-ai/fast-sdxl": { + input: FastSdxlInput; + output: FastSdxlOutput; + }; + "fal-ai/lora": { + input: LoraInput; + output: LoraOutput; + }; + "fal-ai/aura-sr": { + input: AuraSrInput; + output: AuraSrOutput; + }; + "fal-ai/stable-cascade": { + input: StableCascadeInput; + output: StableCascadeOutput; + }; + "fal-ai/minimax-video": { + input: MinimaxVideoInput; + output: MinimaxVideoOutput; + }; + "fal-ai/haiper-video-v2": { + input: HaiperVideoV2Input; + output: HaiperVideoV2Output; + }; + "fal-ai/haiper-video-v2/image-to-video": { + input: HaiperVideoV2ImageToVideoInput; + output: HaiperVideoV2ImageToVideoOutput; + }; + "fal-ai/mochi-v1": { + input: MochiV1Input; + output: MochiV1Output; + }; + "fal-ai/luma-dream-machine": { + input: LumaDreamMachineInput; + output: LumaDreamMachineOutput; + }; + "fal-ai/luma-dream-machine/image-to-video": { + input: LumaDreamMachineImageToVideoInput; + output: LumaDreamMachineImageToVideoOutput; + }; + "fal-ai/kling-video/v1/standard/text-to-video": { + input: KlingVideoV1StandardTextToVideoInput; + output: KlingVideoV1StandardTextToVideoOutput; + }; + "fal-ai/kling-video/v1/standard/image-to-video": { + input: KlingVideoV1StandardImageToVideoInput; + output: KlingVideoV1StandardImageToVideoOutput; + }; + "fal-ai/kling-video/v1/pro/text-to-video": { + input: KlingVideoV1ProTextToVideoInput; + output: KlingVideoV1ProTextToVideoOutput; + }; + "fal-ai/kling-video/v1/pro/image-to-video": { + input: KlingVideoV1ProImageToVideoInput; + output: KlingVideoV1ProImageToVideoOutput; + }; + "fal-ai/cogvideox-5b": { + input: Cogvideox5bInput; + output: Cogvideox5bOutput; + }; + "fal-ai/cogvideox-5b/video-to-video": { + input: Cogvideox5bVideoToVideoInput; + output: Cogvideox5bVideoToVideoOutput; + }; + "fal-ai/cogvideox-5b/image-to-video": { + input: Cogvideox5bImageToVideoInput; + output: Cogvideox5bImageToVideoOutput; + }; + "fal-ai/stable-video": { + input: StableVideoInput; + output: StableVideoOutput; + }; + "fal-ai/fast-svd/text-to-video": { + input: FastSvdTextToVideoInput; + output: FastSvdTextToVideoOutput; + }; + "fal-ai/fast-svd-lcm": { + input: FastSvdLcmInput; + output: FastSvdLcmOutput; + }; + "fal-ai/birefnet": { + input: BirefnetInput; + output: BirefnetOutput; + }; + "fal-ai/birefnet/v2": { + input: BirefnetV2Input; + output: BirefnetV2Output; + }; + "fal-ai/fast-svd-lcm/text-to-video": { + input: FastSvdLcmTextToVideoInput; + output: FastSvdLcmTextToVideoOutput; + }; + "fal-ai/creative-upscaler": { + input: CreativeUpscalerInput; + output: CreativeUpscalerOutput; + }; + "fal-ai/clarity-upscaler": { + input: ClarityUpscalerInput; + output: ClarityUpscalerOutput; + }; + "fal-ai/ccsr": { + input: CcsrInput; + output: CcsrOutput; + }; + "fal-ai/fast-turbo-diffusion": { + input: FastTurboDiffusionInput; + output: FastTurboDiffusionOutput; + }; + "fal-ai/fast-turbo-diffusion/image-to-image": { + input: FastTurboDiffusionImageToImageInput; + output: FastTurboDiffusionImageToImageOutput; + }; + "fal-ai/fast-turbo-diffusion/inpainting": { + input: FastTurboDiffusionInpaintingInput; + output: FastTurboDiffusionInpaintingOutput; + }; + "fal-ai/fast-lcm-diffusion": { + input: FastLcmDiffusionInput; + output: FastLcmDiffusionOutput; + }; + "fal-ai/fast-lcm-diffusion/image-to-image": { + input: FastLcmDiffusionImageToImageInput; + output: FastLcmDiffusionImageToImageOutput; + }; + "fal-ai/fast-lcm-diffusion/inpainting": { + input: FastLcmDiffusionInpaintingInput; + output: FastLcmDiffusionInpaintingOutput; + }; + "fal-ai/whisper": { + input: WhisperInput; + output: WhisperOutput; + }; + "fal-ai/wizper": { + input: WizperInput; + output: WizperOutput; + }; + "fal-ai/fast-lightning-sdxl": { + input: FastLightningSdxlInput; + output: FastLightningSdxlOutput; + }; + "fal-ai/fast-lightning-sdxl/image-to-image": { + input: FastLightningSdxlImageToImageInput; + output: FastLightningSdxlImageToImageOutput; + }; + "fal-ai/fast-lightning-sdxl/inpainting": { + input: FastLightningSdxlInpaintingInput; + output: FastLightningSdxlInpaintingOutput; + }; + "fal-ai/hyper-sdxl": { + input: HyperSdxlInput; + output: HyperSdxlOutput; + }; + "fal-ai/hyper-sdxl/image-to-image": { + input: HyperSdxlImageToImageInput; + output: HyperSdxlImageToImageOutput; + }; + "fal-ai/hyper-sdxl/inpainting": { + input: HyperSdxlInpaintingInput; + output: HyperSdxlInpaintingOutput; + }; + "fal-ai/playground-v25": { + input: PlaygroundV25Input; + output: PlaygroundV25Output; + }; + "fal-ai/playground-v25/image-to-image": { + input: PlaygroundV25ImageToImageInput; + output: PlaygroundV25ImageToImageOutput; + }; + "fal-ai/playground-v25/inpainting": { + input: PlaygroundV25InpaintingInput; + output: PlaygroundV25InpaintingOutput; + }; + "fal-ai/amt-interpolation": { + input: AmtInterpolationInput; + output: AmtInterpolationOutput; + }; + "fal-ai/amt-interpolation/frame-interpolation": { + input: AmtInterpolationFrameInterpolationInput; + output: AmtInterpolationFrameInterpolationOutput; + }; + "fal-ai/t2v-turbo": { + input: T2vTurboInput; + output: T2vTurboOutput; + }; + "fal-ai/sd15-depth-controlnet": { + input: Sd15DepthControlnetInput; + output: Sd15DepthControlnetOutput; + }; + "fal-ai/photomaker": { + input: PhotomakerInput; + output: PhotomakerOutput; + }; + "fal-ai/lcm": { + input: LcmInput; + output: LcmOutput; + }; + "fal-ai/lcm-sd15-i2i": { + input: LcmSd15I2iInput; + output: LcmSd15I2iOutput; + }; + "fal-ai/fooocus": { + input: FooocusInput; + output: FooocusOutput; + }; + "fal-ai/animatediff-v2v": { + input: AnimatediffV2vInput; + output: AnimatediffV2vOutput; + }; + "fal-ai/animatediff-v2v/turbo": { + input: AnimatediffV2vTurboInput; + output: AnimatediffV2vTurboOutput; + }; + "fal-ai/fast-animatediff/text-to-video": { + input: FastAnimatediffTextToVideoInput; + output: FastAnimatediffTextToVideoOutput; + }; + "fal-ai/fast-animatediff/video-to-video": { + input: FastAnimatediffVideoToVideoInput; + output: FastAnimatediffVideoToVideoOutput; + }; + "fal-ai/fast-animatediff/turbo/text-to-video": { + input: FastAnimatediffTurboTextToVideoInput; + output: FastAnimatediffTurboTextToVideoOutput; + }; + "fal-ai/fast-animatediff/turbo/video-to-video": { + input: FastAnimatediffTurboVideoToVideoInput; + output: FastAnimatediffTurboVideoToVideoOutput; + }; + "fal-ai/illusion-diffusion": { + input: IllusionDiffusionInput; + output: IllusionDiffusionOutput; + }; + "fal-ai/imageutils/depth": { + input: ImageutilsDepthInput; + output: ImageutilsDepthOutput; + }; + "fal-ai/imageutils/rembg": { + input: ImageutilsRembgInput; + output: ImageutilsRembgOutput; + }; + "fal-ai/esrgan": { + input: EsrganInput; + output: EsrganOutput; + }; + "fal-ai/controlnetsdxl": { + input: ControlnetsdxlInput; + output: ControlnetsdxlOutput; + }; + "fal-ai/fast-sdxl-controlnet-canny": { + input: FastSdxlControlnetCannyInput; + output: FastSdxlControlnetCannyOutput; + }; + "fal-ai/fast-sdxl-controlnet-canny/image-to-image": { + input: FastSdxlControlnetCannyImageToImageInput; + output: FastSdxlControlnetCannyImageToImageOutput; + }; + "fal-ai/fast-sdxl-controlnet-canny/inpainting": { + input: FastSdxlControlnetCannyInpaintingInput; + output: FastSdxlControlnetCannyInpaintingOutput; + }; + "fal-ai/inpaint": { + input: InpaintInput; + output: InpaintOutput; + }; + "fal-ai/animatediff-sparsectrl-lcm": { + input: AnimatediffSparsectrlLcmInput; + output: AnimatediffSparsectrlLcmOutput; + }; + "fal-ai/pulid": { + input: PulidInput; + output: PulidOutput; + }; + "fal-ai/ip-adapter-face-id": { + input: IpAdapterFaceIdInput; + output: IpAdapterFaceIdOutput; + }; + "fal-ai/imageutils/marigold-depth": { + input: ImageutilsMarigoldDepthInput; + output: ImageutilsMarigoldDepthOutput; + }; + "fal-ai/stable-audio": { + input: StableAudioInput; + output: StableAudioOutput; + }; + "fal-ai/diffusion-edge": { + input: DiffusionEdgeInput; + output: DiffusionEdgeOutput; + }; + "fal-ai/triposr": { + input: TriposrInput; + output: TriposrOutput; + }; + "fal-ai/fooocus/upscale-or-vary": { + input: FooocusUpscaleOrVaryInput; + output: FooocusUpscaleOrVaryOutput; + }; + "fal-ai/fooocus/image-prompt": { + input: FooocusImagePromptInput; + output: FooocusImagePromptOutput; + }; + "fal-ai/fooocus/inpaint": { + input: FooocusInpaintInput; + output: FooocusInpaintOutput; + }; + "fal-ai/retoucher": { + input: RetoucherInput; + output: RetoucherOutput; + }; + "fal-ai/any-llm": { + input: AnyLlmInput; + output: AnyLlmOutput; + }; + "fal-ai/any-llm/vision": { + input: AnyLlmVisionInput; + output: AnyLlmVisionOutput; + }; + "fal-ai/llavav15-13b": { + input: Llavav1513bInput; + output: Llavav1513bOutput; + }; + "fal-ai/llava-next": { + input: LlavaNextInput; + output: LlavaNextOutput; + }; + "fal-ai/imageutils/nsfw": { + input: ImageutilsNsfwInput; + output: ImageutilsNsfwOutput; + }; + "fal-ai/fast-fooocus-sdxl": { + input: FastFooocusSdxlInput; + output: FastFooocusSdxlOutput; + }; + "fal-ai/fast-fooocus-sdxl/image-to-image": { + input: FastFooocusSdxlImageToImageInput; + output: FastFooocusSdxlImageToImageOutput; + }; + "fal-ai/face-to-sticker": { + input: FaceToStickerInput; + output: FaceToStickerOutput; + }; + "fal-ai/moondream/batched": { + input: MoondreamBatchedInput; + output: MoondreamBatchedOutput; + }; + "fal-ai/sadtalker": { + input: SadtalkerInput; + output: SadtalkerOutput; + }; + "fal-ai/musetalk": { + input: MusetalkInput; + output: MusetalkOutput; + }; + "fal-ai/sadtalker/reference": { + input: SadtalkerReferenceInput; + output: SadtalkerReferenceOutput; + }; + "fal-ai/layer-diffusion": { + input: LayerDiffusionInput; + output: LayerDiffusionOutput; + }; + "fal-ai/stable-diffusion-v15": { + input: StableDiffusionV15Input; + output: StableDiffusionV15Output; + }; + "fal-ai/lora/image-to-image": { + input: LoraImageToImageInput; + output: LoraImageToImageOutput; + }; + "fal-ai/fast-sdxl/image-to-image": { + input: FastSdxlImageToImageInput; + output: FastSdxlImageToImageOutput; + }; + "fal-ai/fast-sdxl/inpainting": { + input: FastSdxlInpaintingInput; + output: FastSdxlInpaintingOutput; + }; + "fal-ai/lora/inpaint": { + input: LoraInpaintInput; + output: LoraInpaintOutput; + }; + "fal-ai/pixart-sigma": { + input: PixartSigmaInput; + output: PixartSigmaOutput; + }; + "fal-ai/dreamshaper": { + input: DreamshaperInput; + output: DreamshaperOutput; + }; + "fal-ai/realistic-vision": { + input: RealisticVisionInput; + output: RealisticVisionOutput; + }; + "fal-ai/lightning-models": { + input: LightningModelsInput; + output: LightningModelsOutput; + }; + "fal-ai/omni-zero": { + input: OmniZeroInput; + output: OmniZeroOutput; + }; + "fal-ai/cat-vton": { + input: CatVtonInput; + output: CatVtonOutput; + }; + "fal-ai/dwpose": { + input: DwposeInput; + output: DwposeOutput; + }; + "fal-ai/stable-cascade/sote-diffusion": { + input: StableCascadeSoteDiffusionInput; + output: StableCascadeSoteDiffusionOutput; + }; + "fal-ai/florence-2-large/caption": { + input: Florence2LargeCaptionInput; + output: Florence2LargeCaptionOutput; + }; + "fal-ai/florence-2-large/detailed-caption": { + input: Florence2LargeDetailedCaptionInput; + output: Florence2LargeDetailedCaptionOutput; + }; + "fal-ai/florence-2-large/more-detailed-caption": { + input: Florence2LargeMoreDetailedCaptionInput; + output: Florence2LargeMoreDetailedCaptionOutput; + }; + "fal-ai/florence-2-large/object-detection": { + input: Florence2LargeObjectDetectionInput; + output: Florence2LargeObjectDetectionOutput; + }; + "fal-ai/florence-2-large/dense-region-caption": { + input: Florence2LargeDenseRegionCaptionInput; + output: Florence2LargeDenseRegionCaptionOutput; + }; + "fal-ai/florence-2-large/region-proposal": { + input: Florence2LargeRegionProposalInput; + output: Florence2LargeRegionProposalOutput; + }; + "fal-ai/florence-2-large/caption-to-phrase-grounding": { + input: Florence2LargeCaptionToPhraseGroundingInput; + output: Florence2LargeCaptionToPhraseGroundingOutput; + }; + "fal-ai/florence-2-large/referring-expression-segmentation": { + input: Florence2LargeReferringExpressionSegmentationInput; + output: Florence2LargeReferringExpressionSegmentationOutput; + }; + "fal-ai/florence-2-large/region-to-segmentation": { + input: Florence2LargeRegionToSegmentationInput; + output: Florence2LargeRegionToSegmentationOutput; + }; + "fal-ai/florence-2-large/open-vocabulary-detection": { + input: Florence2LargeOpenVocabularyDetectionInput; + output: Florence2LargeOpenVocabularyDetectionOutput; + }; + "fal-ai/florence-2-large/region-to-category": { + input: Florence2LargeRegionToCategoryInput; + output: Florence2LargeRegionToCategoryOutput; + }; + "fal-ai/florence-2-large/region-to-description": { + input: Florence2LargeRegionToDescriptionInput; + output: Florence2LargeRegionToDescriptionOutput; + }; + "fal-ai/florence-2-large/ocr": { + input: Florence2LargeOcrInput; + output: Florence2LargeOcrOutput; + }; + "fal-ai/florence-2-large/ocr-with-region": { + input: Florence2LargeOcrWithRegionInput; + output: Florence2LargeOcrWithRegionOutput; + }; + "fal-ai/era-3d": { + input: Era3dInput; + output: Era3dOutput; + }; + "fal-ai/live-portrait": { + input: LivePortraitInput; + output: LivePortraitOutput; + }; + "fal-ai/live-portrait/image": { + input: LivePortraitImageInput; + output: LivePortraitImageOutput; + }; + "fal-ai/muse-pose": { + input: MusePoseInput; + output: MusePoseOutput; + }; + "fal-ai/kolors": { + input: KolorsInput; + output: KolorsOutput; + }; + "fal-ai/sdxl-controlnet-union": { + input: SdxlControlnetUnionInput; + output: SdxlControlnetUnionOutput; + }; + "fal-ai/sdxl-controlnet-union/image-to-image": { + input: SdxlControlnetUnionImageToImageInput; + output: SdxlControlnetUnionImageToImageOutput; + }; + "fal-ai/sdxl-controlnet-union/inpainting": { + input: SdxlControlnetUnionInpaintingInput; + output: SdxlControlnetUnionInpaintingOutput; + }; + "fal-ai/sam2/image": { + input: Sam2ImageInput; + output: Sam2ImageOutput; + }; + "fal-ai/sam2/video": { + input: Sam2VideoInput; + output: Sam2VideoOutput; + }; + "fal-ai/imageutils/sam": { + input: ImageutilsSamInput; + output: ImageutilsSamOutput; + }; + "fal-ai/mini-cpm": { + input: MiniCpmInput; + output: MiniCpmOutput; + }; + "fal-ai/mini-cpm/video": { + input: MiniCpmVideoInput; + output: MiniCpmVideoOutput; + }; + "fal-ai/controlnext": { + input: ControlnextInput; + output: ControlnextOutput; + }; + "fal-ai/workflowutils/canny": { + input: WorkflowutilsCannyInput; + output: WorkflowutilsCannyOutput; + }; + "fal-ai/image-preprocessors/depth-anything/v2": { + input: ImagePreprocessorsDepthAnythingV2Input; + output: ImagePreprocessorsDepthAnythingV2Output; + }; + "fal-ai/image-preprocessors/hed": { + input: ImagePreprocessorsHedInput; + output: ImagePreprocessorsHedOutput; + }; + "fal-ai/image-preprocessors/lineart": { + input: ImagePreprocessorsLineartInput; + output: ImagePreprocessorsLineartOutput; + }; + "fal-ai/image-preprocessors/midas": { + input: ImagePreprocessorsMidasInput; + output: ImagePreprocessorsMidasOutput; + }; + "fal-ai/image-preprocessors/mlsd": { + input: ImagePreprocessorsMlsdInput; + output: ImagePreprocessorsMlsdOutput; + }; + "fal-ai/image-preprocessors/pidi": { + input: ImagePreprocessorsPidiInput; + output: ImagePreprocessorsPidiOutput; + }; + "fal-ai/image-preprocessors/sam": { + input: ImagePreprocessorsSamInput; + output: ImagePreprocessorsSamOutput; + }; + "fal-ai/image-preprocessors/scribble": { + input: ImagePreprocessorsScribbleInput; + output: ImagePreprocessorsScribbleOutput; + }; + "fal-ai/image-preprocessors/teed": { + input: ImagePreprocessorsTeedInput; + output: ImagePreprocessorsTeedOutput; + }; + "fal-ai/image-preprocessors/zoe": { + input: ImagePreprocessorsZoeInput; + output: ImagePreprocessorsZoeOutput; + }; + "fal-ai/f5-tts": { + input: F5TtsInput; + output: F5TtsOutput; + }; +}; diff --git a/tsconfig.base.json b/tsconfig.base.json index 67d0d9a..4bdeeee 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -16,6 +16,7 @@ "baseUrl": ".", "paths": { "@fal-ai/client": ["libs/client/src/index.ts"], + "@fal-ai/client/endpoints": ["libs/client/src/types/endpoints.ts"], "@fal-ai/create-app": ["libs/create-app/src/index.ts"], "@fal-ai/server-proxy": ["libs/proxy/src/index.ts"], "@fal-ai/server-proxy/express": ["libs/proxy/src/express.ts"], diff --git a/typedoc.json b/typedoc.json index ff12bec..fea30c7 100644 --- a/typedoc.json +++ b/typedoc.json @@ -2,7 +2,7 @@ "$schema": "https://typedoc.org/schema.json", "out": "docs/reference", "entryPoints": ["./libs/client/src/index.ts"], - "exclude": ["./src/__tests__/**", "*.spec.ts"], + "exclude": ["./src/__tests__/**", "*.spec.ts", "./src/types/endpoints.ts"], "excludeExternals": true, "excludeInternal": false, "includeVersion": true, From 2a0a47a5624c2e41ad1b346bf3060e7b2bf5a33d Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Tue, 12 Nov 2024 00:32:02 -0800 Subject: [PATCH 2/6] fix(client): endpoint generation --- libs/client/package.json | 2 +- libs/client/src/types/endpoints.ts | 17145 ++++++++++++++++++--------- 2 files changed, 11572 insertions(+), 5575 deletions(-) diff --git a/libs/client/package.json b/libs/client/package.json index b4c7b65..9bed0ba 100644 --- a/libs/client/package.json +++ b/libs/client/package.json @@ -1,7 +1,7 @@ { "name": "@fal-ai/client", "description": "The fal.ai client for JavaScript and TypeScript", - "version": "1.1.0-alpha.0", + "version": "1.1.0-alpha.2", "license": "MIT", "repository": { "type": "git", diff --git a/libs/client/src/types/endpoints.ts b/libs/client/src/types/endpoints.ts index 576dcfb..407e571 100644 --- a/libs/client/src/types/endpoints.ts +++ b/libs/client/src/types/endpoints.ts @@ -70,6 +70,20 @@ export type RGBColor = { */ b?: number; }; +export type TextToVideoRequest = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; export type LoraWeight = { /** * URL or the path to the LoRA weights. Or HF model name. @@ -188,60 +202,33 @@ export type ControlNet = { */ ip_adapter_index?: number; }; -export type ControlNetUnionInput = { - /** - * URL of the image to be used as the control image. - */ - control_image_url: string | Blob | File; - /** - * URL of the mask for the control image. - */ - mask_image_url?: string | Blob | File; - /** - * Control Mode for Flux Controlnet Union. Supported values are: - * - canny: Uses the edges for guided generation. - * - tile: Uses the tiles for guided generation. - * - depth: Utilizes a grayscale depth map for guided generation. - * - blur: Adds a blur to the image. - * - pose: Uses the pose of the image for guided generation. - * - gray: Converts the image to grayscale. - * - low-quality: Converts the image to a low-quality image. - */ - control_mode: - | "canny" - | "tile" - | "depth" - | "blur" - | "pose" - | "gray" - | "low-quality"; +export type Embedding = { /** - * The scale of the control net weight. This is used to scale the control net weight - * before merging it with the base model. Default value: `1` + * URL or the path to the embedding weights. */ - conditioning_scale?: number; + path: string; /** - * Threshold for mask. Default value: `0.5` + * The list of tokens to use for the embedding. Default value: `,` */ - mask_threshold?: number; + tokens?: Array; +}; +export type ImageToVideoRequest = { /** - * The percentage of the image to start applying the controlnet in terms of the total timesteps. + * */ - start_percentage?: number; + prompt: string; /** - * The percentage of the image to end applying the controlnet in terms of the total timesteps. Default value: `1` + * */ - end_percentage?: number; -}; -export type Embedding = { + image_url: string | Blob | File; /** - * URL or the path to the embedding weights. + * The duration of the generated video in seconds Default value: `"5"` */ - path: string; + duration?: "5" | "10"; /** - * The list of tokens to use for the embedding. Default value: `,` + * The aspect ratio of the generated video frame Default value: `"16:9"` */ - tokens?: Array; + aspect_ratio?: "16:9" | "9:16" | "1:1"; }; export type InputV2 = { /** @@ -409,12 +396,6 @@ export type BoundingBox = { */ label: string; }; -export type PolygonOutput = { - /** - * List of polygons - */ - polygons: Array; -}; export type OCRBoundingBoxOutputWithLabels = { /** * Results from the model @@ -451,7 +432,7 @@ export type Polygon = { /** * List of points */ - points: Array>; + points: Array; /** * Label of the polygon */ @@ -565,7 +546,7 @@ export type AudioFile = { */ file_size?: number | null; }; -export type FluxProV11UltraInput = { +export type FluxProTextToImageInput = { /** * The prompt to generate an image from. */ @@ -581,6 +562,40 @@ export type FluxProV11UltraInput = { | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + */ + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; +}; +export type FluxProV11UltraInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. @@ -604,6 +619,14 @@ export type FluxProV11UltraInput = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The aspect ratio of the generated image. Default value: `"16:9"` + */ + aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "9:21"; + /** + * Generate less processed, more natural-looking images. + */ + raw?: boolean; }; export type FluxProV11UltraOutput = { /** @@ -613,7 +636,7 @@ export type FluxProV11UltraOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -628,6 +651,46 @@ export type FluxProV11UltraOutput = { */ prompt: string; }; +export type FluxProPlusTextToImageInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. Default value: `landscape_4_3` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + */ + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; +}; export type FluxLoraFastTrainingInput = { /** * URL to zip archive with images. Try to use at least 4 images in general the more the better. @@ -673,9 +736,43 @@ export type FluxLoraFastTrainingOutput = { }; export type RecraftV3Output = { /** - * The ID of the created style, this ID can be used to reference the style in the future. + * */ - style_id: string; + images: Array; +}; +export type StyleReferenceInput = { + /** + * URL to zip archive with images, use PNG format. Maximum 5 images are allowed. + */ + images_data_url: string | Blob | File; + /** + * The base style of the generated images, this topic is covered above. Default value: `"digital_illustration"` + */ + base_style?: + | "any" + | "realistic_image" + | "digital_illustration" + | "vector_illustration" + | "realistic_image/b_and_w" + | "realistic_image/hard_flash" + | "realistic_image/hdr" + | "realistic_image/natural_light" + | "realistic_image/studio_portrait" + | "realistic_image/enterprise" + | "realistic_image/motion_blur" + | "digital_illustration/pixel_art" + | "digital_illustration/hand_drawn" + | "digital_illustration/grain" + | "digital_illustration/infantile_sketch" + | "digital_illustration/2d_art_poster" + | "digital_illustration/handmade_3d" + | "digital_illustration/hand_drawn_outline" + | "digital_illustration/engraving_color" + | "digital_illustration/2d_art_poster_2" + | "vector_illustration/engraving" + | "vector_illustration/line_art" + | "vector_illustration/line_circuit" + | "vector_illustration/linocut"; }; export type RecraftV3Input = { /** @@ -730,6 +827,18 @@ export type RecraftV3Input = { */ style_id?: string | null; }; +export type StyleReferenceOutput = { + /** + * The ID of the created style, this ID can be used to reference the style in the future. + */ + style_id: string; +}; +export type MinimaxVideoImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; export type MinimaxVideoImageToVideoInput = { /** * @@ -744,12 +853,6 @@ export type MinimaxVideoImageToVideoInput = { */ prompt_optimizer?: boolean; }; -export type MinimaxVideoImageToVideoOutput = { - /** - * The generated video - */ - video: File; -}; export type AuraFlowInput = { /** * The prompt to generate images from @@ -790,19 +893,51 @@ export type AuraFlowOutput = { */ prompt: string; }; -export type FluxDevImageToImageInput = { - /** - * The URL of the image to generate an image from. - */ - image_url: string | Blob | File; +export type SchnellTextToImageInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * The strength of the initial image. Higher strength values are better for this model. Default value: `0.95` + * The size of the generated image. Default value: `landscape_4_3` */ - strength?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `4` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type DevTextToImageInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; /** * The size of the generated image. Default value: `landscape_4_3` */ @@ -815,7 +950,7 @@ export type FluxDevImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `40` + * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** @@ -851,7 +986,7 @@ export type FluxDevImageToImageOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -866,7 +1001,7 @@ export type FluxDevImageToImageOutput = { */ prompt: string; }; -export type FluxDevInput = { +export type FluxDevImageToImageInput = { /** * The URL of the image to generate an image from. */ @@ -919,59 +1054,13 @@ export type FluxDevInput = { */ enable_safety_checker?: boolean; }; -export type FluxDevOutput = { +export type FluxDevInput = { /** - * The generated image files info. + * The prompt to generate an image from. */ - images: Array; + prompt: string; /** - * - */ - timings: Record; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. - */ - seed: number; - /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; - /** - * The prompt used for generating the image. - */ - prompt: string; -}; -export type FluxLoraOutput = { - /** - * The generated image files info. - */ - images: Array; - /** - * - */ - timings: Record; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. - */ - seed: number; - /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; - /** - * The prompt used for generating the image. - */ - prompt: string; -}; -export type FluxLoraInput = { - /** - * The prompt to generate an image from. - */ - prompt: string; - /** - * The size of the generated image. + * The size of the generated image. Default value: `landscape_4_3` */ image_size?: | ImageSize @@ -990,11 +1079,6 @@ export type FluxLoraInput = { * will output the same image every time. */ seed?: number; - /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` - */ - loras?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` @@ -1014,24 +1098,31 @@ export type FluxLoraInput = { * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; +}; +export type FluxDevOutput = { /** - * The format of the generated image. Default value: `"jpeg"` + * The generated image files info. */ - output_format?: "jpeg" | "png"; + images: Array; /** - * URL of image to use for inpainting. or img2img + * */ - image_url: string | Blob | File; + timings: any; /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - strength?: number; + seed: number; /** - * The mask to area to Inpaint in. + * Whether the generated images contain NSFW concepts. */ - mask_url: string | Blob | File; + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; }; -export type FluxSchnellInput = { +export type DevImageToImageInput = { /** * The URL of the image to generate an image from. */ @@ -1084,7 +1175,7 @@ export type FluxSchnellInput = { */ enable_safety_checker?: boolean; }; -export type FluxSchnellOutput = { +export type FluxLoraOutput = { /** * The generated image files info. */ @@ -1092,7 +1183,7 @@ export type FluxSchnellOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -1107,7 +1198,7 @@ export type FluxSchnellOutput = { */ prompt: string; }; -export type FluxProV11Input = { +export type FluxLoraInput = { /** * The prompt to generate an image from. */ @@ -1123,11 +1214,25 @@ export type FluxProV11Input = { | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -1143,416 +1248,361 @@ export type FluxProV11Input = { */ enable_safety_checker?: boolean; /** - * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + * The format of the generated image. Default value: `"jpeg"` */ - safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + output_format?: "jpeg" | "png"; }; -export type FluxProV11Output = { +export type ImageToImageInput = { /** - * The generated image files info. + * URL or HuggingFace ID of the base model to generate the image. */ - images: Array; + model_name: string; /** - * + * URL or HuggingFace ID of the custom U-Net model to use for the image generation. */ - timings: Record; + unet_name?: string; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The variant of the model to use for huggingface models, e.g. 'fp16'. */ - seed: number; + variant?: string; /** - * Whether the generated images contain NSFW concepts. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - has_nsfw_concepts: Array; + prompt: string; /** - * The prompt used for generating the image. + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - prompt: string; -}; -export type FluxProNewInput = { + negative_prompt?: string; /** - * The prompt to generate an image from. + * If set to true, the prompt weighting syntax will be used. + * Additionally, this will lift the 77 token limit by averaging embeddings. */ - prompt: string; + prompt_weighting?: boolean; /** - * The size of the generated image. Default value: `landscape_4_3` + * URL of image to use for image to image/inpainting. */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + image_url?: string | Blob | File; /** - * The same seed and the same prompt given to the same version of the model - * will output the same image every time. + * The amount of noise to add to noise image for image. Only used if the image_url is provided. 1.0 is complete noise and 0 is no noise. Default value: `0.5` */ - seed?: number; + noise_strength?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - sync_mode?: boolean; + loras?: Array; /** - * The number of images to generate. Default value: `1` + * The embeddings to use for the image generation. Only a single embedding is supported at the moment. + * The embeddings will be used to map the tokens in the prompt to the embedding weights. Default value: `` */ - num_images?: number; + embeddings?: Array; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The control nets to use for the image generation. You can use any number of control nets + * and they will be applied to the image at the specified timesteps. Default value: `` */ - enable_safety_checker?: boolean; + controlnets?: Array; /** - * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` + * If set to true, the controlnet will be applied to only the conditional predictions. */ - safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; -}; -export type FluxProNewOutput = { + controlnet_guess_mode?: boolean; /** - * The generated image files info. + * The IP adapter to use for the image generation. Default value: `` */ - images: Array; + ip_adapter?: Array; /** - * + * The path to the image encoder model to use for the image generation. */ - timings: Record; + image_encoder_path?: string; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The subfolder of the image encoder model to use for the image generation. */ - seed: number; + image_encoder_subfolder?: string; /** - * Whether the generated images contain NSFW concepts. + * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ - has_nsfw_concepts: Array; + image_encoder_weight_name?: string; /** - * The prompt used for generating the image. + * The URL of the IC Light model to use for the image generation. */ - prompt: string; -}; -export type OmnigenV1Output = { + ic_light_model_url?: string | Blob | File; /** - * The generated image files info. + * The URL of the IC Light model background image to use for the image generation. + * Make sure to use a background compatible with the model. */ - images: Array; + ic_light_model_background_image_url?: string | Blob | File; /** - * + * The URL of the IC Light model image to use for the image generation. */ - timings: Record; + ic_light_image_url?: string | Blob | File; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - seed: number; + seed?: number; /** - * Whether the generated images contain NSFW concepts. + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ - has_nsfw_concepts: Array; + num_inference_steps?: number; /** - * The prompt used for generating the image. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - prompt: string; -}; -export type OmnigenV1Input = { + guidance_scale?: number; /** - * The prompt to generate an image from. + * Skips part of the image generation process, leading to slightly different results. + * This means the image renders faster, too. */ - prompt: string; + clip_skip?: number; /** - * URL of images to use while generating the image, Use <|image_1|> for the first image and so on. Default value: `` + * Scheduler / sampler to use for the image denoising process. */ - input_image_urls?: Array; + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "Euler" + | "Euler A" + | "Euler (trailing timesteps)" + | "LCM" + | "LCM (trailing timesteps)" + | "DDIM" + | "TCD"; /** - * The size of the generated image. Default value: `square_hd` + * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. + * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. + * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. Default value: `[object Object]` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + timesteps?: TimestepsInput; /** - * The number of inference steps to perform. Default value: `50` + * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. + * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. + * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. Default value: `[object Object]` */ - num_inference_steps?: number; + sigmas?: SigmasInput; /** - * The same seed and the same prompt given to the same version of the model - * will output the same image every time. + * The format of the generated image. Default value: `"png"` */ - seed?: number; + image_format?: "jpeg" | "png"; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` */ - guidance_scale?: number; + num_images?: number; /** - * The Image Guidance scale is a measure of how close you want - * the model to stick to your input image when looking for a related image to show you. Default value: `1.6` + * If set to true, the safety checker will be enabled. */ - img_guidance_scale?: number; + enable_safety_checker?: boolean; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The size of the tiles to be used for the image generation. Default value: `4096` */ - sync_mode?: boolean; + tile_width?: number; /** - * The number of images to generate. Default value: `1` + * The size of the tiles to be used for the image generation. Default value: `4096` */ - num_images?: number; + tile_height?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - enable_safety_checker?: boolean; + tile_stride_width?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - output_format?: "jpeg" | "png"; + tile_stride_height?: number; + /** + * The eta value to be used for the image generation. + */ + eta?: number; + /** + * If set to true, the latents will be saved for debugging. + */ + debug_latents?: boolean; + /** + * If set to true, the latents will be saved for debugging per pass. + */ + debug_per_pass_latents?: boolean; }; -export type StableDiffusionV35LargeInput = { +export type InpaintInput = { /** - * The prompt to generate an image from. + * URL or HuggingFace ID of the base model to generate the image. + */ + model_name: string; + /** + * URL or HuggingFace ID of the custom U-Net model to use for the image generation. + */ + unet_name?: string; + /** + * The variant of the model to use for huggingface models, e.g. 'fp16'. + */ + variant?: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want + * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * The size of the generated image. Default value: `landscape_4_3` + * If set to true, the prompt weighting syntax will be used. + * Additionally, this will lift the 77 token limit by averaging embeddings. */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `28` - */ - num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of the model - * will output the same image every time. - */ - seed?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - guidance_scale?: number; - /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. Default value: `1` - */ - num_images?: number; + prompt_weighting?: boolean; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * URL of image to use for image to image/inpainting. */ - enable_safety_checker?: boolean; + image_url?: string | Blob | File; /** - * The format of the generated image. Default value: `"jpeg"` + * URL of black-and-white image to use as mask during inpainting. */ - output_format?: "jpeg" | "png"; -}; -export type StableDiffusionV35LargeOutput = { + mask_url?: string | Blob | File; /** - * The generated image files info. + * The amount of noise to add to noise image for image. Only used if the image_url is provided. 1.0 is complete noise and 0 is no noise. Default value: `0.5` */ - images: Array; + noise_strength?: number; /** - * + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - timings: Record; + loras?: Array; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The embeddings to use for the image generation. Only a single embedding is supported at the moment. + * The embeddings will be used to map the tokens in the prompt to the embedding weights. Default value: `` */ - seed: number; + embeddings?: Array; /** - * Whether the generated images contain NSFW concepts. + * The control nets to use for the image generation. You can use any number of control nets + * and they will be applied to the image at the specified timesteps. Default value: `` */ - has_nsfw_concepts: Array; + controlnets?: Array; /** - * The prompt used for generating the image. + * If set to true, the controlnet will be applied to only the conditional predictions. */ - prompt: string; -}; -export type StableDiffusionV35MediumOutput = { + controlnet_guess_mode?: boolean; /** - * The generated image files info. + * The IP adapter to use for the image generation. Default value: `` */ - images: Array; + ip_adapter?: Array; /** - * + * The path to the image encoder model to use for the image generation. */ - timings: Record; + image_encoder_path?: string; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The subfolder of the image encoder model to use for the image generation. */ - seed: number; + image_encoder_subfolder?: string; /** - * Whether the generated images contain NSFW concepts. + * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ - has_nsfw_concepts: Array; + image_encoder_weight_name?: string; /** - * The prompt used for generating the image. + * The URL of the IC Light model to use for the image generation. */ - prompt: string; -}; -export type StableDiffusionV35MediumInput = { + ic_light_model_url?: string | Blob | File; /** - * The prompt to generate an image from. + * The URL of the IC Light model background image to use for the image generation. + * Make sure to use a background compatible with the model. */ - prompt: string; + ic_light_model_background_image_url?: string | Blob | File; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The URL of the IC Light model image to use for the image generation. */ - negative_prompt?: string; + ic_light_image_url?: string | Blob | File; /** - * The size of the generated image. Default value: `landscape_4_3` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + seed?: number; /** - * The number of inference steps to perform. Default value: `40` + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of the model - * will output the same image every time. - */ - seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. Default value: `1` + * Skips part of the image generation process, leading to slightly different results. + * This means the image renders faster, too. */ - num_images?: number; + clip_skip?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Scheduler / sampler to use for the image denoising process. */ - enable_safety_checker?: boolean; + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "Euler" + | "Euler A" + | "Euler (trailing timesteps)" + | "LCM" + | "LCM (trailing timesteps)" + | "DDIM" + | "TCD"; /** - * The format of the generated image. Default value: `"jpeg"` + * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. + * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. + * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. Default value: `[object Object]` */ - output_format?: "jpeg" | "png"; -}; -export type RecraftV3CreateStyleOutput = { + timesteps?: TimestepsInput; /** - * The ID of the created style, this ID can be used to reference the style in the future. + * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. + * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. + * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. Default value: `[object Object]` */ - style_id: string; -}; -export type RecraftV3CreateStyleInput = { + sigmas?: SigmasInput; /** - * + * The format of the generated image. Default value: `"png"` */ - prompt: string; + image_format?: "jpeg" | "png"; /** - * Default value: `square_hd` + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + num_images?: number; /** - * The style of the generated images. Vector images cost 2X as much. Default value: `"realistic_image"` + * If set to true, the safety checker will be enabled. */ - style?: - | "any" - | "realistic_image" - | "digital_illustration" - | "vector_illustration" - | "realistic_image/b_and_w" - | "realistic_image/hard_flash" - | "realistic_image/hdr" - | "realistic_image/natural_light" - | "realistic_image/studio_portrait" - | "realistic_image/enterprise" - | "realistic_image/motion_blur" - | "digital_illustration/pixel_art" - | "digital_illustration/hand_drawn" - | "digital_illustration/grain" - | "digital_illustration/infantile_sketch" - | "digital_illustration/2d_art_poster" - | "digital_illustration/handmade_3d" - | "digital_illustration/hand_drawn_outline" - | "digital_illustration/engraving_color" - | "digital_illustration/2d_art_poster_2" - | "vector_illustration/engraving" - | "vector_illustration/line_art" - | "vector_illustration/line_circuit" - | "vector_illustration/linocut"; + enable_safety_checker?: boolean; /** - * An array of preferable colors Default value: `` + * The size of the tiles to be used for the image generation. Default value: `4096` */ - colors?: Array; + tile_width?: number; /** - * The ID of the custom style reference (optional) + * The size of the tiles to be used for the image generation. Default value: `4096` */ - style_id?: string | null; -}; -export type FluxRealismOutput = { + tile_height?: number; /** - * The generated image files info. + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - images: Array; + tile_stride_width?: number; /** - * + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - timings: Record; + tile_stride_height?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The eta value to be used for the image generation. */ - seed: number; + eta?: number; /** - * Whether the generated images contain NSFW concepts. + * If set to true, the latents will be saved for debugging. */ - has_nsfw_concepts: Array; + debug_latents?: boolean; /** - * The prompt used for generating the image. + * If set to true, the latents will be saved for debugging per pass. */ - prompt: string; + debug_per_pass_latents?: boolean; }; -export type FluxRealismInput = { +export type FluxSchnellInput = { /** * The prompt to generate an image from. */ @@ -1569,7 +1619,7 @@ export type FluxRealismInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `28` + * The number of inference steps to perform. Default value: `4` */ num_inference_steps?: number; /** @@ -1577,11 +1627,6 @@ export type FluxRealismInput = { * will output the same image every time. */ seed?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -1596,16 +1641,8 @@ export type FluxRealismInput = { * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; - /** - * The strength of the model. Default value: `1` - */ - strength?: number; - /** - * The output image format. Default value: `"jpeg"` - */ - output_format?: "jpeg" | "png"; }; -export type FluxLoraInpaintingOutput = { +export type FluxSchnellOutput = { /** * The generated image files info. */ @@ -1613,7 +1650,7 @@ export type FluxLoraInpaintingOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -1628,41 +1665,16 @@ export type FluxLoraInpaintingOutput = { */ prompt: string; }; -export type FluxLoraInpaintingInput = { +export type FluxProUltraTextToImageInput = { /** * The prompt to generate an image from. */ prompt: string; - /** - * The size of the generated image. - */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `28` - */ - num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; - /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` - */ - loras?: Array; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -1678,23 +1690,19 @@ export type FluxLoraInpaintingInput = { */ enable_safety_checker?: boolean; /** - * The format of the generated image. Default value: `"jpeg"` - */ - output_format?: "jpeg" | "png"; - /** - * URL of image to use for inpainting. or img2img + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ - image_url: string | Blob | File; + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * The aspect ratio of the generated image. Default value: `"16:9"` */ - strength?: number; + aspect_ratio?: "21:9" | "16:9" | "4:3" | "1:1" | "3:4" | "9:16" | "9:21"; /** - * The mask to area to Inpaint in. + * Generate less processed, more natural-looking images. */ - mask_url: string | Blob | File; + raw?: boolean; }; -export type FluxLoraImageToImageOutput = { +export type FluxProV11Output = { /** * The generated image files info. */ @@ -1702,7 +1710,7 @@ export type FluxLoraImageToImageOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -1717,13 +1725,13 @@ export type FluxLoraImageToImageOutput = { */ prompt: string; }; -export type FluxLoraImageToImageInput = { +export type FluxProV11Input = { /** * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated image. + * The size of the generated image. Default value: `landscape_4_3` */ image_size?: | ImageSize @@ -1733,31 +1741,17 @@ export type FluxLoraImageToImageInput = { | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `28` - */ - num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - loras?: Array; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - guidance_scale?: number; - /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; + sync_mode?: boolean; /** * The number of images to generate. Default value: `1` */ @@ -1767,29 +1761,17 @@ export type FluxLoraImageToImageInput = { */ enable_safety_checker?: boolean; /** - * The format of the generated image. Default value: `"jpeg"` - */ - output_format?: "jpeg" | "png"; - /** - * URL of image to use for inpainting. or img2img - */ - image_url: string | Blob | File; - /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` - */ - strength?: number; - /** - * The mask to area to Inpaint in. + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ - mask_url: string | Blob | File; + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; }; -export type FluxGeneralInput = { +export type FluxProNewInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated image. + * The size of the generated image. Default value: `landscape_4_3` */ image_size?: | ImageSize @@ -1808,38 +1790,11 @@ export type FluxGeneralInput = { * will output the same image every time. */ seed?: number; - /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` - */ - loras?: Array; - /** - * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnets?: Array; - /** - * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnet_unions?: Array; - /** - * IP-Adapter to use for image generation. Default value: `` - */ - ip_adapters?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - real_cfg_scale?: number; - /** - * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. - * If using XLabs IP-Adapter v1, this will be turned on!. - */ - use_real_cfg?: boolean; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -1851,39 +1806,34 @@ export type FluxGeneralInput = { */ num_images?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` - */ - enable_safety_checker?: boolean; - /** - * URL of Image for Reference-Only - */ - reference_image_url?: string | Blob | File; - /** - * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ - reference_strength?: number; + safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; +}; +export type FluxProNewOutput = { /** - * The percentage of the total timesteps when the reference guidance is to bestarted. + * The generated image files info. */ - reference_start?: number; + images: Array; /** - * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + * */ - reference_end?: number; + timings: any; /** - * URL of image to use for inpainting. or img2img + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - image_url: string | Blob | File; + seed: number; /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * Whether the generated images contain NSFW concepts. */ - strength?: number; + has_nsfw_concepts: Array; /** - * The mask to area to Inpaint in. + * The prompt used for generating the image. */ - mask_url: string | Blob | File; + prompt: string; }; -export type FluxGeneralOutput = { +export type OmnigenV1Output = { /** * The generated image files info. */ @@ -1891,7 +1841,7 @@ export type FluxGeneralOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -1906,13 +1856,17 @@ export type FluxGeneralOutput = { */ prompt: string; }; -export type FluxGeneralInpaintingInput = { +export type OmnigenV1Input = { /** * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated image. + * URL of images to use while generating the image, Use <|image_1|> for the first image and so on. Default value: `` + */ + input_image_urls?: Array; + /** + * The size of the generated image. Default value: `square_hd` */ image_size?: | ImageSize @@ -1923,7 +1877,7 @@ export type FluxGeneralInpaintingInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `28` + * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** @@ -1931,38 +1885,16 @@ export type FluxGeneralInpaintingInput = { * will output the same image every time. */ seed?: number; - /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` - */ - loras?: Array; - /** - * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnets?: Array; - /** - * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnet_unions?: Array; - /** - * IP-Adapter to use for image generation. Default value: `` - */ - ip_adapters?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - real_cfg_scale?: number; - /** - * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. - * If using XLabs IP-Adapter v1, this will be turned on!. + * The Image Guidance scale is a measure of how close you want + * the model to stick to your input image when looking for a related image to show you. Default value: `1.6` */ - use_real_cfg?: boolean; + img_guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -1978,35 +1910,70 @@ export type FluxGeneralInpaintingInput = { */ enable_safety_checker?: boolean; /** - * URL of Image for Reference-Only + * The format of the generated image. Default value: `"jpeg"` */ - reference_image_url?: string | Blob | File; + output_format?: "jpeg" | "png"; +}; +export type TextToImageTurboInput = { /** - * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` */ - reference_strength?: number; + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; /** - * The percentage of the total timesteps when the reference guidance is to bestarted. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - reference_start?: number; + prompt: string; /** - * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - reference_end?: number; + negative_prompt?: string; /** - * URL of image to use for inpainting. or img2img + * The size of the generated image. Default value: `square` */ - image_url: string | Blob | File; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * The number of inference steps to perform. Default value: `2` */ - strength?: number; + num_inference_steps?: number; /** - * The mask to area to Inpaint in. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - mask_url: string | Blob | File; + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; }; -export type FluxGeneralInpaintingOutput = { +export type StableDiffusionV35LargeOutput = { /** * The generated image files info. */ @@ -2014,7 +1981,7 @@ export type FluxGeneralInpaintingOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -2029,13 +1996,19 @@ export type FluxGeneralInpaintingOutput = { */ prompt: string; }; -export type FluxGeneralImageToImageInput = { +export type StableDiffusionV35LargeInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated image. + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `landscape_4_3` */ image_size?: | ImageSize @@ -2054,38 +2027,11 @@ export type FluxGeneralImageToImageInput = { * will output the same image every time. */ seed?: number; - /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` - */ - loras?: Array; - /** - * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnets?: Array; - /** - * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnet_unions?: Array; - /** - * IP-Adapter to use for image generation. Default value: `` - */ - ip_adapters?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - real_cfg_scale?: number; - /** - * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. - * If using XLabs IP-Adapter v1, this will be turned on!. - */ - use_real_cfg?: boolean; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -2101,64 +2047,46 @@ export type FluxGeneralImageToImageInput = { */ enable_safety_checker?: boolean; /** - * URL of Image for Reference-Only + * The format of the generated image. Default value: `"jpeg"` */ - reference_image_url?: string | Blob | File; + output_format?: "jpeg" | "png"; +}; +export type StableDiffusionV35MediumOutput = { /** - * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + * The generated image files info. */ - reference_strength?: number; + images: Array; /** - * The percentage of the total timesteps when the reference guidance is to bestarted. + * */ - reference_start?: number; + timings: any; /** - * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - reference_end?: number; + seed: number; /** - * URL of image to use for inpainting. or img2img + * Whether the generated images contain NSFW concepts. */ - image_url: string | Blob | File; - /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` - */ - strength?: number; - /** - * The mask to area to Inpaint in. - */ - mask_url: string | Blob | File; -}; -export type FluxGeneralImageToImageOutput = { - /** - * The generated image files info. - */ - images: Array; - /** - * - */ - timings: Record; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. - */ - seed: number; - /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; + has_nsfw_concepts: Array; /** * The prompt used for generating the image. */ prompt: string; }; -export type FluxGeneralDifferentialDiffusionInput = { +export type StableDiffusionV35MediumInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated image. + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `landscape_4_3` */ image_size?: | ImageSize @@ -2169,7 +2097,7 @@ export type FluxGeneralDifferentialDiffusionInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `28` + * The number of inference steps to perform. Default value: `40` */ num_inference_steps?: number; /** @@ -2177,38 +2105,11 @@ export type FluxGeneralDifferentialDiffusionInput = { * will output the same image every time. */ seed?: number; - /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` - */ - loras?: Array; - /** - * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnets?: Array; - /** - * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` - */ - controlnet_unions?: Array; - /** - * IP-Adapter to use for image generation. Default value: `` - */ - ip_adapters?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` */ guidance_scale?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` - */ - real_cfg_scale?: number; - /** - * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. - * If using XLabs IP-Adapter v1, this will be turned on!. - */ - use_real_cfg?: boolean; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -2224,181 +2125,234 @@ export type FluxGeneralDifferentialDiffusionInput = { */ enable_safety_checker?: boolean; /** - * URL of Image for Reference-Only + * The format of the generated image. Default value: `"jpeg"` */ - reference_image_url?: string | Blob | File; + output_format?: "jpeg" | "png"; +}; +export type TextToImageOutput = { /** - * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + * */ - reference_strength?: number; + images: Array; +}; +export type RecraftV3CreateStyleInput = { /** - * The percentage of the total timesteps when the reference guidance is to bestarted. + * URL to zip archive with images, use PNG format. Maximum 5 images are allowed. */ - reference_start?: number; + images_data_url: string | Blob | File; /** - * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + * The base style of the generated images, this topic is covered above. Default value: `"digital_illustration"` */ - reference_end?: number; + base_style?: + | "any" + | "realistic_image" + | "digital_illustration" + | "vector_illustration" + | "realistic_image/b_and_w" + | "realistic_image/hard_flash" + | "realistic_image/hdr" + | "realistic_image/natural_light" + | "realistic_image/studio_portrait" + | "realistic_image/enterprise" + | "realistic_image/motion_blur" + | "digital_illustration/pixel_art" + | "digital_illustration/hand_drawn" + | "digital_illustration/grain" + | "digital_illustration/infantile_sketch" + | "digital_illustration/2d_art_poster" + | "digital_illustration/handmade_3d" + | "digital_illustration/hand_drawn_outline" + | "digital_illustration/engraving_color" + | "digital_illustration/2d_art_poster_2" + | "vector_illustration/engraving" + | "vector_illustration/line_art" + | "vector_illustration/line_circuit" + | "vector_illustration/linocut"; +}; +export type TextToImageInput = { /** - * URL of image to use for inpainting. or img2img + * URL or HuggingFace ID of the base model to generate the image. */ - image_url: string | Blob | File; + model_name: string; /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * URL or HuggingFace ID of the custom U-Net model to use for the image generation. */ - strength?: number; + unet_name?: string; /** - * The mask to area to Inpaint in. + * The variant of the model to use for huggingface models, e.g. 'fp16'. */ - mask_url: string | Blob | File; -}; -export type FluxGeneralDifferentialDiffusionOutput = { + variant?: string; /** - * The generated image files info. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - images: Array; + prompt: string; /** - * + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - timings: Record; + negative_prompt?: string; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * If set to true, the prompt weighting syntax will be used. + * Additionally, this will lift the 77 token limit by averaging embeddings. */ - seed: number; + prompt_weighting?: boolean; /** - * Whether the generated images contain NSFW concepts. + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - has_nsfw_concepts: Array; + loras?: Array; /** - * The prompt used for generating the image. + * The embeddings to use for the image generation. Only a single embedding is supported at the moment. + * The embeddings will be used to map the tokens in the prompt to the embedding weights. Default value: `` */ - prompt: string; -}; -export type FluxGeneralRfInversionInput = { + embeddings?: Array; /** - * The prompt to generate an image from. + * The control nets to use for the image generation. You can use any number of control nets + * and they will be applied to the image at the specified timesteps. Default value: `` */ - prompt: string; + controlnets?: Array; /** - * The size of the generated image. + * If set to true, the controlnet will be applied to only the conditional predictions. */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + controlnet_guess_mode?: boolean; /** - * The number of inference steps to perform. Default value: `28` + * The IP adapter to use for the image generation. Default value: `` */ - num_inference_steps?: number; + ip_adapter?: Array; /** - * The same seed and the same prompt given to the same version of the model - * will output the same image every time. + * The path to the image encoder model to use for the image generation. */ - seed?: number; + image_encoder_path?: string; /** - * The LoRAs to use for the image generation. You can use any number of LoRAs - * and they will be merged together to generate the final image. Default value: `` + * The subfolder of the image encoder model to use for the image generation. */ - loras?: Array; + image_encoder_subfolder?: string; /** - * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ - controlnets?: Array; + image_encoder_weight_name?: string; /** - * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + * The URL of the IC Light model to use for the image generation. */ - controlnet_unions?: Array; + ic_light_model_url?: string | Blob | File; /** - * IP-Adapter to use for image generation. Default value: `` + * The URL of the IC Light model background image to use for the image generation. + * Make sure to use a background compatible with the model. */ - ip_adapters?: Array; + ic_light_model_background_image_url?: string | Blob | File; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + * The URL of the IC Light model image to use for the image generation. */ - guidance_scale?: number; + ic_light_image_url?: string | Blob | File; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - real_cfg_scale?: number; + seed?: number; /** - * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. - * If using XLabs IP-Adapter v1, this will be turned on!. + * The size of the generated image. You can choose between some presets or custom height and width + * that **must be multiples of 8**. Default value: `square_hd` */ - use_real_cfg?: boolean; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ - sync_mode?: boolean; + num_inference_steps?: number; /** - * The number of images to generate. Default value: `1` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - num_images?: number; + guidance_scale?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Skips part of the image generation process, leading to slightly different results. + * This means the image renders faster, too. */ - enable_safety_checker?: boolean; + clip_skip?: number; /** - * URL of Image for Reference-Only + * Scheduler / sampler to use for the image denoising process. */ - reference_image_url?: string | Blob | File; + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "Euler" + | "Euler A" + | "Euler (trailing timesteps)" + | "LCM" + | "LCM (trailing timesteps)" + | "DDIM" + | "TCD"; /** - * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` + * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. + * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. + * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. Default value: `[object Object]` */ - reference_strength?: number; + timesteps?: TimestepsInput; /** - * The percentage of the total timesteps when the reference guidance is to bestarted. + * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. + * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. + * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. Default value: `[object Object]` */ - reference_start?: number; + sigmas?: SigmasInput; /** - * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` + * The format of the generated image. Default value: `"png"` */ - reference_end?: number; + image_format?: "jpeg" | "png"; /** - * URL of image to use for inpainting. or img2img + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` */ - image_url: string | Blob | File; + num_images?: number; /** - * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * If set to true, the safety checker will be enabled. */ - strength?: number; + enable_safety_checker?: boolean; /** - * The mask to area to Inpaint in. + * The size of the tiles to be used for the image generation. Default value: `4096` */ - mask_url: string | Blob | File; -}; -export type FluxGeneralRfInversionOutput = { + tile_width?: number; /** - * The generated image files info. + * The size of the tiles to be used for the image generation. Default value: `4096` */ - images: Array; + tile_height?: number; /** - * + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - timings: Record; + tile_stride_width?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - seed: number; + tile_stride_height?: number; /** - * Whether the generated images contain NSFW concepts. + * The eta value to be used for the image generation. */ - has_nsfw_concepts: Array; + eta?: number; /** - * The prompt used for generating the image. + * If set to true, the latents will be saved for debugging. */ - prompt: string; + debug_latents?: boolean; + /** + * If set to true, the latents will be saved for debugging per pass. + */ + debug_per_pass_latents?: boolean; }; -export type IclightV2Output = { +export type RecraftV3CreateStyleOutput = { + /** + * The ID of the created style, this ID can be used to reference the style in the future. + */ + style_id: string; +}; +export type FluxRealismOutput = { /** * The generated image files info. */ @@ -2406,7 +2360,7 @@ export type IclightV2Output = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -2421,21 +2375,13 @@ export type IclightV2Output = { */ prompt: string; }; -export type IclightV2Input = { +export type FluxRealismInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * Negative Prompt for the image Default value: `""` - */ - negative_prompt?: string; - /** - * URL of image to be used for relighting - */ - image_url: string | Blob | File; - /** - * The size of the generated image. + * The size of the generated image. Default value: `landscape_4_3` */ image_size?: | ImageSize @@ -2455,13 +2401,10 @@ export type IclightV2Input = { */ seed?: number; /** - * Provide lighting conditions for the model Default value: `"None"` - */ - initial_latent?: "None" | "Left" | "Right" | "Top" | "Bottom"; - /** - * Use HR fix + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - enable_hr_fix?: boolean; + guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -2473,52 +2416,57 @@ export type IclightV2Input = { */ num_images?: number; /** - * The number of images to generate. Default value: `1` + * If set to true, the safety checker will be enabled. Default value: `true` */ - cfg?: number; + enable_safety_checker?: boolean; /** - * Strength for low-resolution pass. Default value: `0.98` + * The strength of the model. Default value: `1` */ - lowres_denoise?: number; + strength?: number; /** - * Strength for high-resolution pass. Only used if enable_hr_fix is True. Default value: `0.95` + * The output image format. Default value: `"jpeg"` */ - highres_denoise?: number; + output_format?: "jpeg" | "png"; +}; +export type FluxLoraInpaintingOutput = { /** - * Default value: `0.5` + * The generated image files info. */ - hr_downscale?: number; + images: Array; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + * */ - guidance_scale?: number; + timings: any; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - enable_safety_checker?: boolean; + seed: number; /** - * The format of the generated image. Default value: `"jpeg"` + * Whether the generated images contain NSFW concepts. */ - output_format?: "jpeg" | "png"; -}; -export type FluxDifferentialDiffusionInput = { + has_nsfw_concepts: Array; /** - * The prompt to generate an image from. + * The prompt used for generating the image. */ prompt: string; +}; +export type FluxLoraInpaintingInput = { /** - * URL of image to use as initial image. - */ - image_url: string | Blob | File; - /** - * URL of change map. + * The prompt to generate an image from. */ - change_map_image_url: string | Blob | File; + prompt: string; /** - * The strength to use for image-to-image. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + * The size of the generated image. */ - strength?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** * The number of inference steps to perform. Default value: `28` */ @@ -2528,6 +2476,11 @@ export type FluxDifferentialDiffusionInput = { * will output the same image every time. */ seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` @@ -2547,8 +2500,24 @@ export type FluxDifferentialDiffusionInput = { * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; + /** + * URL of image to use for inpainting. or img2img + */ + image_url: string | Blob | File; + /** + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` + */ + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; }; -export type FluxDifferentialDiffusionOutput = { +export type FluxLoraImageToImageOutput = { /** * The generated image files info. */ @@ -2556,7 +2525,7 @@ export type FluxDifferentialDiffusionOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -2571,25 +2540,13 @@ export type FluxDifferentialDiffusionOutput = { */ prompt: string; }; -export type StableDiffusionV3MediumInput = { - /** - * The image URL to generate an image from. - */ - image_url: string | Blob | File; +export type FluxLoraImageToImageInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * The negative prompt to generate an image from. Default value: `""` - */ - negative_prompt?: string; - /** - * If set to true, prompt will be upsampled with more details. - */ - prompt_expansion?: boolean; - /** - * The size of the generated image. Defaults to the conditioning image's size. + * The size of the generated image. */ image_size?: | ImageSize @@ -2604,13 +2561,18 @@ export type StableDiffusionV3MediumInput = { */ num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion + * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** @@ -2628,56 +2590,25 @@ export type StableDiffusionV3MediumInput = { */ enable_safety_checker?: boolean; /** - * The strength of the image-to-image transformation. Default value: `0.9` - */ - strength?: number; -}; -export type StableDiffusionV3MediumOutput = { - /** - * The generated image files info. - */ - images: Array; - /** - * - */ - timings: Record; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. - */ - seed: number; - /** - * Whether the generated images contain NSFW concepts. + * The format of the generated image. Default value: `"jpeg"` */ - has_nsfw_concepts: Array; + output_format?: "jpeg" | "png"; /** - * The prompt used for generating the image. + * URL of image to use for inpainting. or img2img */ - prompt: string; + image_url: string | Blob | File; /** - * The number of images generated. + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ - num_images: number; + strength?: number; }; -export type StableDiffusionV3MediumImageToImageInput = { - /** - * The image URL to generate an image from. - */ - image_url: string | Blob | File; +export type DifferentialDiffusionInput = { /** * The prompt to generate an image from. */ prompt: string; /** - * The negative prompt to generate an image from. Default value: `""` - */ - negative_prompt?: string; - /** - * If set to true, prompt will be upsampled with more details. - */ - prompt_expansion?: boolean; - /** - * The size of the generated image. Defaults to the conditioning image's size. + * The size of the generated image. */ image_size?: | ImageSize @@ -2692,15 +2623,42 @@ export type StableDiffusionV3MediumImageToImageInput = { */ num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion + * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnets?: Array; + /** + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` + */ + controlnet_unions?: Array; + /** + * IP-Adapter to use for image generation. Default value: `` + */ + ip_adapters?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` + */ + real_cfg_scale?: number; + /** + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. + */ + use_real_cfg?: boolean; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -2716,108 +2674,77 @@ export type StableDiffusionV3MediumImageToImageInput = { */ enable_safety_checker?: boolean; /** - * The strength of the image-to-image transformation. Default value: `0.9` + * URL of Image for Reference-Only */ - strength?: number; -}; -export type StableDiffusionV3MediumImageToImageOutput = { + reference_image_url?: string | Blob | File; /** - * The generated image files info. + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - images: Array; + reference_strength?: number; /** - * + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - timings: Record; + reference_start?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - seed: number; + reference_end?: number; /** - * Whether the generated images contain NSFW concepts. + * URL of image to use as initial image. */ - has_nsfw_concepts: Array; + image_url: string | Blob | File; /** - * The prompt used for generating the image. + * URL of change map. */ - prompt: string; + change_map_image_url: string | Blob | File; /** - * The number of images generated. + * The strength to use for differential diffusion. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ - num_images: number; + strength?: number; }; -export type FastSdxlOutput = { +export type RFInversionInput = { /** - * The generated image files info. + * The prompt to edit the image with */ - images: Array; + prompt: string; /** - * + * The size of the generated image. */ - timings: Record; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The number of inference steps to perform. Default value: `28` */ - seed: number; + num_inference_steps?: number; /** - * Whether the generated images contain NSFW concepts. + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. */ - has_nsfw_concepts: Array; + seed?: number; /** - * The prompt used for generating the image. + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - prompt: string; -}; -export type FastSdxlInput = { + loras?: Array; /** - * The URL of the image to use as a starting point for the generation. - */ - image_url: string | Blob | File; - /** - * The URL of the mask to use for inpainting. - */ - mask_url: string | Blob | File; - /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. - */ - prompt: string; - /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` - */ - negative_prompt?: string; - /** - * The size of the generated image. Default value: `square_hd` + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + controlnets?: Array; /** - * The number of inference steps to perform. Default value: `25` + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - num_inference_steps?: number; + controlnet_unions?: Array; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; - /** - * determines how much the generated image resembles the initial image Default value: `0.95` - */ - strength?: number; - /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. - */ - seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -2828,100 +2755,55 @@ export type FastSdxlInput = { * The number of images to generate. Default value: `1` */ num_images?: number; - /** - * The list of LoRA weights to use. Default value: `` - */ - loras?: Array; - /** - * The list of embeddings to use. Default value: `` - */ - embeddings?: Array; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` - */ - safety_checker_version?: "v1" | "v2"; - /** - * If set to true, the prompt will be expanded with additional prompts. - */ - expand_prompt?: boolean; - /** - * The format of the generated image. Default value: `"jpeg"` - */ - format?: "jpeg" | "png"; - /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` - */ - request_id?: string; -}; -export type LoraInput = { - /** - * The method to use for the sigmas. If set to 'custom', the sigmas will be set based - * on the provided sigmas schedule in the `array` field. - * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` - */ - method?: "default" | "array"; - /** - * Sigmas schedule to be used if 'custom' method is selected. Default value: `` - */ - array?: Array; -}; -export type LoraOutput = { - /** - * The generated image files info. - */ - images: Array; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * URL of Image for Reference-Only */ - seed: number; + reference_image_url?: string | Blob | File; /** - * Whether the generated images contain NSFW concepts. + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - has_nsfw_concepts: Array; + reference_strength?: number; /** - * The latents saved for debugging. + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - debug_latents?: File; + reference_start?: number; /** - * The latents saved for debugging per pass. + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - debug_per_pass_latents?: File; -}; -export type AuraSrInput = { + reference_end?: number; /** - * URL of the image to upscale. + * URL of image to be edited */ image_url: string | Blob | File; /** - * Upscaling factor. More coming soon. Default value: `"4"` + * The controller guidance (gamma) used in the creation of structured noise. Default value: `0.6` */ - upscaling_factor?: "4"; + controller_guidance_forward?: number; /** - * Whether to use overlapping tiles for upscaling. Setting this to true helps remove seams but doubles the inference time. + * The controller guidance (eta) used in the denoising process.Using values closer to 1 will result in an image closer to input. Default value: `0.75` */ - overlapping_tiles?: boolean; + controller_guidance_reverse?: number; /** - * Checkpoint to use for upscaling. More coming soon. Default value: `"v1"` + * Timestep to start guidance during reverse process. */ - checkpoint?: "v1" | "v2"; -}; -export type AuraSrOutput = { + reverse_guidance_start?: number; /** - * Upscaled image + * Timestep to stop guidance during reverse process. Default value: `8` */ - image: Image; + reverse_guidance_end?: number; /** - * Timings for each step in the pipeline. + * Scheduler for applying reverse guidance. Default value: `"constant"` */ - timings: Record; + reverse_guidance_schedule?: + | "constant" + | "linear_increase" + | "linear_decrease"; }; -export type StableCascadeOutput = { +export type FluxGeneralOutput = { /** * The generated image files info. */ @@ -2929,7 +2811,7 @@ export type StableCascadeOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -2944,37 +2826,13 @@ export type StableCascadeOutput = { */ prompt: string; }; -export type StableCascadeInput = { +export type FluxGeneralInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt to generate an image from. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` - */ - negative_prompt?: string; - /** - * Number of steps to run the first stage for. Default value: `20` - */ - first_stage_steps?: number; - /** - * Number of steps to run the second stage for. Default value: `10` - */ - second_stage_steps?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4` - */ - guidance_scale?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. - */ - second_stage_guidance_scale?: number; - /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated image. */ image_size?: | ImageSize @@ -2985,269 +2843,277 @@ export type StableCascadeInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The same seed and the same prompt given to the same version of Stable Cascade - * will output the same image every time. + * The number of inference steps to perform. Default value: `28` */ - seed?: number; + num_inference_steps?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. */ - enable_safety_checker?: boolean; + seed?: number; /** - * The number of images to generate. Default value: `1` + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - num_images?: number; + loras?: Array; /** - * If set to true, the image will be returned as base64 encoded string. + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - sync_mode?: boolean; -}; -export type MinimaxVideoInput = { + controlnets?: Array; /** - * + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - prompt: string; + controlnet_unions?: Array; /** - * URL of the image to use as the first frame + * IP-Adapter to use for image generation. Default value: `` */ - image_url: string | Blob | File; + ip_adapters?: Array; /** - * Whether to use the model's prompt optimizer Default value: `true` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - prompt_optimizer?: boolean; -}; -export type MinimaxVideoOutput = { + guidance_scale?: number; /** - * The generated video + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - video: File; -}; -export type HaiperVideoV2Input = { + real_cfg_scale?: number; /** - * + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. */ - prompt: string; + use_real_cfg?: boolean; /** - * The duration of the generated video in seconds Default value: `"4"` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - duration?: "4" | "6"; + sync_mode?: boolean; /** - * Whether to use the model's prompt enhancer Default value: `true` + * The number of images to generate. Default value: `1` */ - prompt_enhancer?: boolean; + num_images?: number; /** - * The same seed and the same prompt given to the same version of the model - * will output the same video every time. + * If set to true, the safety checker will be enabled. Default value: `true` */ - seed?: number; + enable_safety_checker?: boolean; /** - * URL of the image to use as the first frame + * URL of Image for Reference-Only */ - image_url: string | Blob | File; -}; -export type HaiperVideoV2Output = { + reference_image_url?: string | Blob | File; /** - * The generated video + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - video: File; -}; -export type HaiperVideoV2ImageToVideoInput = { + reference_strength?: number; /** - * + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - prompt: string; + reference_start?: number; /** - * The duration of the generated video in seconds Default value: `"4"` + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - duration?: "4" | "6"; + reference_end?: number; +}; +export type ControlNetUnionInput = { /** - * Whether to use the model's prompt enhancer Default value: `true` + * URL of the image to be used as the control image. */ - prompt_enhancer?: boolean; + control_image_url: string | Blob | File; /** - * The same seed and the same prompt given to the same version of the model - * will output the same video every time. + * URL of the mask for the control image. */ - seed?: number; + mask_image_url?: string | Blob | File; /** - * URL of the image to use as the first frame + * Control Mode for Flux Controlnet Union. Supported values are: + * - canny: Uses the edges for guided generation. + * - tile: Uses the tiles for guided generation. + * - depth: Utilizes a grayscale depth map for guided generation. + * - blur: Adds a blur to the image. + * - pose: Uses the pose of the image for guided generation. + * - gray: Converts the image to grayscale. + * - low-quality: Converts the image to a low-quality image. */ - image_url: string | Blob | File; -}; -export type HaiperVideoV2ImageToVideoOutput = { - /** - * The generated video - */ - video: File; -}; -export type MochiV1Output = { + control_mode: + | "canny" + | "tile" + | "depth" + | "blur" + | "pose" + | "gray" + | "low-quality"; /** - * The generated video + * The scale of the control net weight. This is used to scale the control net weight + * before merging it with the base model. Default value: `1` */ - video: File; -}; -export type MochiV1Input = { + conditioning_scale?: number; /** - * The prompt to generate a video from. + * Threshold for mask. Default value: `0.5` */ - prompt: string; + mask_threshold?: number; /** - * The seed to use for generating the video. + * The percentage of the image to start applying the controlnet in terms of the total timesteps. */ - seed?: number; + start_percentage?: number; /** - * Whether to enable prompt expansion. Default value: `true` + * The percentage of the image to end applying the controlnet in terms of the total timesteps. Default value: `1` */ - enable_prompt_expansion?: boolean; + end_percentage?: number; }; -export type LumaDreamMachineInput = { +export type FluxGeneralInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; /** * */ - prompt: string; + timings: any; /** - * The aspect ratio of the generated video Default value: `"16:9"` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; + seed: number; /** - * Whether the video should loop (end of video is blended with the beginning) + * Whether the generated images contain NSFW concepts. */ - loop?: boolean; -}; -export type LumaDreamMachineOutput = { + has_nsfw_concepts: Array; /** - * The generated video + * The prompt used for generating the image. */ - video: File; + prompt: string; }; -export type LumaDreamMachineImageToVideoInput = { +export type FluxGeneralInpaintingInput = { /** - * + * The prompt to generate an image from. */ prompt: string; /** - * The aspect ratio of the generated video Default value: `"16:9"` + * The size of the generated image. */ - aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Whether the video should loop (end of video is blended with the beginning) + * The number of inference steps to perform. Default value: `28` */ - loop?: boolean; -}; -export type LumaDreamMachineImageToVideoOutput = { + num_inference_steps?: number; /** - * The generated video + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. */ - video: File; -}; -export type KlingVideoV1StandardTextToVideoInput = { + seed?: number; /** - * + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - prompt: string; + loras?: Array; /** - * The duration of the generated video in seconds Default value: `"5"` + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - duration?: "5" | "10"; + controlnets?: Array; /** - * The aspect ratio of the generated video frame Default value: `"16:9"` + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - aspect_ratio?: "16:9" | "9:16" | "1:1"; -}; -export type KlingVideoV1StandardTextToVideoOutput = { + controlnet_unions?: Array; /** - * The generated video + * IP-Adapter to use for image generation. Default value: `` */ - video: File; -}; -export type KlingVideoV1StandardImageToVideoInput = { + ip_adapters?: Array; /** - * + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - prompt: string; + guidance_scale?: number; /** - * The duration of the generated video in seconds Default value: `"5"` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - duration?: "5" | "10"; + real_cfg_scale?: number; /** - * The aspect ratio of the generated video frame Default value: `"16:9"` + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. */ - aspect_ratio?: "16:9" | "9:16" | "1:1"; -}; -export type KlingVideoV1StandardImageToVideoOutput = { + use_real_cfg?: boolean; /** - * The generated video + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - video: File; -}; -export type KlingVideoV1ProTextToVideoInput = { + sync_mode?: boolean; /** - * + * The number of images to generate. Default value: `1` */ - prompt: string; + num_images?: number; /** - * The duration of the generated video in seconds Default value: `"5"` + * If set to true, the safety checker will be enabled. Default value: `true` */ - duration?: "5" | "10"; + enable_safety_checker?: boolean; /** - * The aspect ratio of the generated video frame Default value: `"16:9"` + * URL of Image for Reference-Only */ - aspect_ratio?: "16:9" | "9:16" | "1:1"; -}; -export type KlingVideoV1ProTextToVideoOutput = { + reference_image_url?: string | Blob | File; /** - * The generated video + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - video: File; -}; -export type KlingVideoV1ProImageToVideoInput = { + reference_strength?: number; /** - * + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - prompt: string; + reference_start?: number; /** - * The duration of the generated video in seconds Default value: `"5"` + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - duration?: "5" | "10"; + reference_end?: number; /** - * The aspect ratio of the generated video frame Default value: `"16:9"` + * URL of image to use for inpainting. or img2img */ - aspect_ratio?: "16:9" | "9:16" | "1:1"; -}; -export type KlingVideoV1ProImageToVideoOutput = { + image_url: string | Blob | File; /** - * The generated video + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ - video: File; + strength?: number; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; }; -export type Cogvideox5bOutput = { +export type FluxGeneralImageToImageOutput = { /** - * The URL to the generated video + * The generated image files info. */ - video: File; + images: Array; /** * */ - timings: Record; + timings: any; /** - * Seed of the generated video. It will be the same value of the one passed in the + * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** - * The prompt used for generating the video. + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. */ prompt: string; }; -export type Cogvideox5bInput = { +export type FluxGeneralImageToImageInput = { /** - * The prompt to generate the video from. + * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated video. Default value: `[object Object]` + * The size of the generated image. */ - video_size?: + image_size?: | ImageSize | "square_hd" | "square" @@ -3256,122 +3122,94 @@ export type Cogvideox5bInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The negative prompt to generate video from Default value: `""` - */ - negative_prompt?: string; - /** - * The number of inference steps to perform. Default value: `50` + * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of the model - * will output the same video every time. + * will output the same image every time. */ seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related video to show you. Default value: `7` + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - guidance_scale?: number; + loras?: Array; /** - * Use RIFE for video interpolation Default value: `true` + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - use_rife?: boolean; + controlnets?: Array; /** - * The target FPS of the video Default value: `16` + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - export_fps?: number; -}; -export type Cogvideox5bVideoToVideoOutput = { + controlnet_unions?: Array; /** - * The URL to the generated video + * IP-Adapter to use for image generation. Default value: `` */ - video: File; + ip_adapters?: Array; /** - * + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - timings: Record; + guidance_scale?: number; /** - * Seed of the generated video. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - seed: number; + real_cfg_scale?: number; /** - * The prompt used for generating the video. + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. */ - prompt: string; -}; -export type Cogvideox5bVideoToVideoInput = { + use_real_cfg?: boolean; /** - * The prompt to generate the video from. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - prompt: string; + sync_mode?: boolean; /** - * The size of the generated video. Default value: `[object Object]` + * The number of images to generate. Default value: `1` */ - video_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + num_images?: number; /** - * The negative prompt to generate video from Default value: `""` + * If set to true, the safety checker will be enabled. Default value: `true` */ - negative_prompt?: string; - /** - * The number of inference steps to perform. Default value: `50` - */ - num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of the model - * will output the same video every time. - */ - seed?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related video to show you. Default value: `7` - */ - guidance_scale?: number; + enable_safety_checker?: boolean; /** - * Use RIFE for video interpolation Default value: `true` + * URL of Image for Reference-Only */ - use_rife?: boolean; + reference_image_url?: string | Blob | File; /** - * The target FPS of the video Default value: `16` + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - export_fps?: number; -}; -export type Cogvideox5bImageToVideoOutput = { + reference_strength?: number; /** - * The URL to the generated video + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - video: File; + reference_start?: number; /** - * + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - timings: Record; + reference_end?: number; /** - * Seed of the generated video. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * URL of image to use for inpainting. or img2img */ - seed: number; + image_url: string | Blob | File; /** - * The prompt used for generating the video. + * The strength to use for inpainting/image-to-image. Only used if the image_url is provided. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ - prompt: string; + strength?: number; }; -export type Cogvideox5bImageToVideoInput = { +export type FluxGeneralDifferentialDiffusionInput = { /** - * The prompt to generate the video from. + * The prompt to generate an image from. */ prompt: string; /** - * The size of the generated video. Default value: `[object Object]` + * The size of the generated image. */ - video_size?: + image_size?: | ImageSize | "square_hd" | "square" @@ -3380,554 +3218,535 @@ export type Cogvideox5bImageToVideoInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The negative prompt to generate video from Default value: `""` - */ - negative_prompt?: string; - /** - * The number of inference steps to perform. Default value: `50` + * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of the model - * will output the same video every time. + * will output the same image every time. */ seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related video to show you. Default value: `7` + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - guidance_scale?: number; + loras?: Array; /** - * Use RIFE for video interpolation Default value: `true` + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - use_rife?: boolean; + controlnets?: Array; /** - * The target FPS of the video Default value: `16` + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - export_fps?: number; -}; -export type StableVideoInput = { + controlnet_unions?: Array; /** - * The prompt to use as a starting point for the generation. + * IP-Adapter to use for image generation. Default value: `` */ - prompt: string; + ip_adapters?: Array; /** - * The negative prompt to use as a starting point for the generation. Default value: `"unrealistic, saturated, high contrast, big nose, painting, drawing, sketch, cartoon, anime, manga, render, CG, 3d, watermark, signature, label"` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - negative_prompt?: string; + guidance_scale?: number; /** - * The size of the generated video. Default value: `landscape_16_9` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - video_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + real_cfg_scale?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Uses classical CFG as in SD1.5, SDXL, etc. Increases generation times and price when set to be true. + * If using XLabs IP-Adapter v1, this will be turned on!. */ - seed?: number; + use_real_cfg?: boolean; /** - * The motion bucket id determines the motion of the generated video. The - * higher the number, the more motion there will be. Default value: `127` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - motion_bucket_id?: number; + sync_mode?: boolean; /** - * The conditoning augmentation determines the amount of noise that will be - * added to the conditioning frame. The higher the number, the more noise - * there will be, and the less the video will look like the initial image. - * Increase it for more motion. Default value: `0.02` + * The number of images to generate. Default value: `1` */ - cond_aug?: number; -}; -export type StableVideoOutput = { + num_images?: number; /** - * Generated video + * If set to true, the safety checker will be enabled. Default value: `true` */ - video: File; + enable_safety_checker?: boolean; /** - * Seed for random number generator + * URL of Image for Reference-Only */ - seed: number; -}; -export type FastSvdTextToVideoInput = { + reference_image_url?: string | Blob | File; /** - * The URL of the image to use as a starting point for the generation. + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - image_url: string | Blob | File; + reference_strength?: number; /** - * The motion bucket id determines the motion of the generated video. The - * higher the number, the more motion there will be. Default value: `127` + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - motion_bucket_id?: number; + reference_start?: number; /** - * The conditoning augmentation determines the amount of noise that will be - * added to the conditioning frame. The higher the number, the more noise - * there will be, and the less the video will look like the initial image. - * Increase it for more motion. Default value: `0.02` + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - cond_aug?: number; + reference_end?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * URL of image to use as initial image. */ - seed?: number; + image_url: string | Blob | File; /** - * The number of steps to run the model for. The higher the number the better - * the quality and longer it will take to generate. Default value: `20` + * URL of change map. */ - steps?: number; + change_map_image_url: string | Blob | File; /** - * Enabling [DeepCache](https://github.com/horseee/DeepCache) will make the execution - * faster, but might sometimes degrade overall quality. The higher the setting, the - * faster the execution will be, but the more quality might be lost. Default value: `"none"` + * The strength to use for differential diffusion. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ - deep_cache?: "none" | "minimum" | "medium" | "high"; + strength?: number; +}; +export type FluxGeneralDifferentialDiffusionOutput = { /** - * The FPS of the generated video. The higher the number, the faster the video will - * play. Total video length is 25 frames. Default value: `10` + * The generated image files info. */ - fps?: number; -}; -export type FastSvdTextToVideoOutput = { + images: Array; /** - * The generated video file. + * */ - video: File; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; }; -export type FastSvdLcmInput = { +export type FluxGeneralRfInversionInput = { /** - * The URL of the image to use as a starting point for the generation. + * The prompt to edit the image with */ - image_url: string | Blob | File; + prompt: string; /** - * The motion bucket id determines the motion of the generated video. The - * higher the number, the more motion there will be. Default value: `127` + * The size of the generated image. */ - motion_bucket_id?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The conditoning augmentation determines the amount of noise that will be - * added to the conditioning frame. The higher the number, the more noise - * there will be, and the less the video will look like the initial image. - * Increase it for more motion. Default value: `0.02` + * The number of inference steps to perform. Default value: `28` */ - cond_aug?: number; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion + * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** - * The number of steps to run the model for. The higher the number the better - * the quality and longer it will take to generate. Default value: `4` + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - steps?: number; + loras?: Array; /** - * The FPS of the generated video. The higher the number, the faster the video will - * play. Total video length is 25 frames. Default value: `10` + * The controlnets to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - fps?: number; -}; -export type FastSvdLcmOutput = { + controlnets?: Array; /** - * The generated video file. + * The controlnet unions to use for the image generation. Only one controlnet is supported at the moment. Default value: `` */ - video: File; + controlnet_unions?: Array; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - seed: number; -}; -export type BirefnetInput = { + guidance_scale?: number; /** - * URL of the image to remove background from + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - image_url: string | Blob | File; + sync_mode?: boolean; /** - * Model to use for background removal. - * The 'General Use (Light)' model is the original model used in the BiRefNet repository. - * The 'General Use (Heavy)' model is a slower but more accurate model. - * The 'Portrait' model is a model trained specifically for portrait images. - * The 'General Use (Light)' model is recommended for most use cases. - * - * The corresponding models are as follows: - * - 'General Use (Light)': BiRefNet-DIS_ep580.pth - * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth - * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` + * The number of images to generate. Default value: `1` */ - model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; + num_images?: number; /** - * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` + * If set to true, the safety checker will be enabled. Default value: `true` */ - operating_resolution?: "1024x1024" | "2048x2048"; + enable_safety_checker?: boolean; /** - * The format of the output image Default value: `"png"` + * URL of Image for Reference-Only */ - output_format?: "webp" | "png"; + reference_image_url?: string | Blob | File; /** - * Whether to output the mask used to remove the background - */ - output_mask?: boolean; - /** - * Whether to refine the foreground using the estimated mask Default value: `true` + * Strength of reference_only generation. Only used if a reference image is provided. Default value: `0.65` */ - refine_foreground?: boolean; -}; -export type BirefnetOutput = { + reference_strength?: number; /** - * Image with background removed + * The percentage of the total timesteps when the reference guidance is to bestarted. */ - image: Image; + reference_start?: number; /** - * Mask used to remove the background + * The percentage of the total timesteps when the reference guidance is to be ended. Default value: `1` */ - mask_image?: Image; -}; -export type BirefnetV2Input = { + reference_end?: number; /** - * URL of the image to remove background from + * URL of image to be edited */ image_url: string | Blob | File; /** - * Model to use for background removal. - * The 'General Use (Light)' model is the original model used in the BiRefNet repository. - * The 'General Use (Heavy)' model is a slower but more accurate model. - * The 'Portrait' model is a model trained specifically for portrait images. - * The 'General Use (Light)' model is recommended for most use cases. - * - * The corresponding models are as follows: - * - 'General Use (Light)': BiRefNet-DIS_ep580.pth - * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth - * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` - */ - model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; - /** - * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` - */ - operating_resolution?: "1024x1024" | "2048x2048"; - /** - * The format of the output image Default value: `"png"` + * The controller guidance (gamma) used in the creation of structured noise. Default value: `0.6` */ - output_format?: "webp" | "png"; + controller_guidance_forward?: number; /** - * Whether to output the mask used to remove the background + * The controller guidance (eta) used in the denoising process.Using values closer to 1 will result in an image closer to input. Default value: `0.75` */ - output_mask?: boolean; + controller_guidance_reverse?: number; /** - * Whether to refine the foreground using the estimated mask Default value: `true` + * Timestep to start guidance during reverse process. */ - refine_foreground?: boolean; -}; -export type BirefnetV2Output = { + reverse_guidance_start?: number; /** - * Image with background removed + * Timestep to stop guidance during reverse process. Default value: `8` */ - image: Image; + reverse_guidance_end?: number; /** - * Mask used to remove the background + * Scheduler for applying reverse guidance. Default value: `"constant"` */ - mask_image?: Image; + reverse_guidance_schedule?: + | "constant" + | "linear_increase" + | "linear_decrease"; }; -export type FastSvdLcmTextToVideoInput = { +export type FluxGeneralRfInversionOutput = { /** - * The URL of the image to use as a starting point for the generation. + * The generated image files info. */ - image_url: string | Blob | File; + images: Array; /** - * The motion bucket id determines the motion of the generated video. The - * higher the number, the more motion there will be. Default value: `127` + * */ - motion_bucket_id?: number; + timings: any; /** - * The conditoning augmentation determines the amount of noise that will be - * added to the conditioning frame. The higher the number, the more noise - * there will be, and the less the video will look like the initial image. - * Increase it for more motion. Default value: `0.02` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - cond_aug?: number; + seed: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Whether the generated images contain NSFW concepts. */ - seed?: number; + has_nsfw_concepts: Array; /** - * The number of steps to run the model for. The higher the number the better - * the quality and longer it will take to generate. Default value: `4` + * The prompt used for generating the image. */ - steps?: number; + prompt: string; +}; +export type IclightV2Output = { /** - * The FPS of the generated video. The higher the number, the faster the video will - * play. Total video length is 25 frames. Default value: `10` + * The generated image files info. */ - fps?: number; -}; -export type FastSvdLcmTextToVideoOutput = { + images: Array; /** - * The generated video file. + * */ - video: File; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; -}; -export type CreativeUpscalerInput = { - /** - * The type of model to use for the upscaling. Default is SD_1_5 Default value: `"SD_1_5"` - */ - model_type?: "SD_1_5" | "SDXL"; /** - * The image to upscale. - */ - image_url: string | Blob | File; - /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. If no prompt is provide BLIP2 will be used to generate a prompt. + * Whether the generated images contain NSFW concepts. */ - prompt?: string; + has_nsfw_concepts: Array; /** - * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` + * The prompt used for generating the image. */ - scale?: number; + prompt: string; +}; +export type IclightV2Input = { /** - * How much the output can deviate from the original Default value: `0.5` + * The prompt to generate an image from. */ - creativity?: number; + prompt: string; /** - * How much detail to add Default value: `1` + * Negative Prompt for the image Default value: `""` */ - detail?: number; + negative_prompt?: string; /** - * How much to preserve the shape of the original image Default value: `0.25` + * URL of image to be used for relighting */ - shape_preservation?: number; + image_url: string | Blob | File; /** - * The suffix to add to the generated prompt. Not used for a custom prompt. This is useful to add a common ending to all prompts such as 'high quality' etc or embedding tokens. Default value: `" high quality, highly detailed, high resolution, sharp"` + * The size of the generated image. */ - prompt_suffix?: string; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` + * The number of inference steps to perform. Default value: `28` */ - negative_prompt?: string; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion + * The same seed and the same prompt given to the same version of the model * will output the same image every time. */ seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * Provide lighting conditions for the model Default value: `"None"` */ - guidance_scale?: number; + initial_latent?: "None" | "Left" | "Right" | "Top" | "Bottom"; /** - * The number of inference steps to use for generating the image. The more steps - * the better the image will be but it will also take longer to generate. Default value: `20` + * Use HR fix */ - num_inference_steps?: number; + enable_hr_fix?: boolean; /** - * If set to true, the resulting image will be checked whether it includes any - * potentially unsafe content. If it does, it will be replaced with a black - * image. Default value: `true` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - enable_safety_checks?: boolean; + sync_mode?: boolean; /** - * If set to true, the image will not be processed by the CCSR model before - * being processed by the creativity model. + * The number of images to generate. Default value: `1` */ - skip_ccsr?: boolean; + num_images?: number; /** - * Allow for large uploads that could take a very long time. + * The number of images to generate. Default value: `1` */ - override_size_limits?: boolean; + cfg?: number; /** - * The URL to the base model to use for the upscaling + * Strength for low-resolution pass. Default value: `0.98` */ - base_model_url?: string | Blob | File; + lowres_denoise?: number; /** - * The URL to the additional LORA model to use for the upscaling. Default is None + * Strength for high-resolution pass. Only used if enable_hr_fix is True. Default value: `0.95` */ - additional_lora_url?: string | Blob | File; + highres_denoise?: number; /** - * The scale of the additional LORA model to use for the upscaling. Default is 1.0 Default value: `1` + * Default value: `0.5` */ - additional_lora_scale?: number; + hr_downscale?: number; /** - * The URL to the additional embeddings to use for the upscaling. Default is None + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ - additional_embedding_url?: string | Blob | File; -}; -export type CreativeUpscalerOutput = { + guidance_scale?: number; /** - * The generated image file info. + * If set to true, the safety checker will be enabled. Default value: `true` */ - image: Image; + enable_safety_checker?: boolean; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The format of the generated image. Default value: `"jpeg"` */ - seed: number; + output_format?: "jpeg" | "png"; }; -export type ClarityUpscalerOutput = { - /** - * The URL of the generated image. - */ - image: Image; - /** - * The seed used to generate the image. - */ - seed: number; +export type FluxDifferentialDiffusionInput = { /** - * The timings of the different steps in the workflow. + * The prompt to generate an image from. */ - timings: Record; -}; -export type ClarityUpscalerInput = { + prompt: string; /** - * The URL of the image to upscale. + * URL of image to use as initial image. */ image_url: string | Blob | File; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `"masterpiece, best quality, highres"` - */ - prompt?: string; - /** - * The upscale factor Default value: `2` + * URL of change map. */ - upscale_factor?: number; + change_map_image_url: string | Blob | File; /** - * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality:2)"` + * The strength to use for image-to-image. 1.0 is completely remakes the image while 0.0 preserves the original. Default value: `0.85` */ - negative_prompt?: string; + strength?: number; /** - * The creativity of the model. The higher the creativity, the more the model will deviate from the prompt. - * Refers to the denoise strength of the sampling. Default value: `0.35` + * The number of inference steps to perform. Default value: `28` */ - creativity?: number; + num_inference_steps?: number; /** - * The resemblance of the upscaled image to the original image. The higher the resemblance, the more the model will try to keep the original image. - * Refers to the strength of the ControlNet. Default value: `0.6` + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. */ - resemblance?: number; + seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3.5` */ guidance_scale?: number; /** - * The number of inference steps to perform. Default value: `18` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - num_inference_steps?: number; + sync_mode?: boolean; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The number of images to generate. Default value: `1` */ - seed?: number; + num_images?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; }; -export type CcsrInput = { +export type FluxDifferentialDiffusionOutput = { /** - * The text prompt you would like to convert to speech. + * The generated image files info. */ - image_url: string | Blob | File; + images: Array; /** - * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` + * */ - scale?: number; + timings: any; /** - * If specified, a patch-based sampling strategy will be used for sampling. Default value: `"none"` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - tile_diffusion?: "none" | "mix" | "gaussian"; + seed: number; /** - * Size of patch. Default value: `1024` + * Whether the generated images contain NSFW concepts. */ - tile_diffusion_size?: number; + has_nsfw_concepts: Array; /** - * Stride of sliding patch. Default value: `512` + * The prompt used for generating the image. */ - tile_diffusion_stride?: number; + prompt: string; +}; +export type StableDiffusionV3MediumInput = { /** - * If specified, a patch-based sampling strategy will be used for VAE decoding. + * The prompt to generate an image from. */ - tile_vae?: boolean; + prompt: string; /** - * Size of VAE patch. Default value: `226` + * The negative prompt to generate an image from. Default value: `""` */ - tile_vae_decoder_size?: number; + negative_prompt?: string; /** - * Size of latent image Default value: `1024` + * If set to true, prompt will be upsampled with more details. */ - tile_vae_encoder_size?: number; + prompt_expansion?: boolean; /** - * The number of steps to run the model for. The higher the number the better the quality and longer it will take to generate. Default value: `50` + * The size of the generated image. Default value: `square_hd` */ - steps?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The ending point of uniform sampling strategy. Default value: `0.6667` + * The number of inference steps to perform. Default value: `28` */ - t_max?: number; + num_inference_steps?: number; /** - * The starting point of uniform sampling strategy. Default value: `0.3333` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - t_min?: number; + seed?: number; /** - * Type of color correction for samples. Default value: `"adain"` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ - color_fix_type?: "none" | "wavelet" | "adain"; + guidance_scale?: number; /** - * Seed for reproducibility. Different seeds will make slightly different results. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - seed?: number; + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; }; -export type CcsrOutput = { +export type StableDiffusionV3MediumOutput = { /** - * The generated image file info. + * The generated image files info. */ - image: Image; + images: Array; /** - * The seed used for the generation. + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; + /** + * The number of images generated. + */ + num_images: number; }; -export type FastTurboDiffusionInput = { +export type StableDiffusionV3MediumImageToImageOutput = { /** - * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + * The generated image files info. */ - model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + images: Array; /** - * The URL of the image to use as a starting point for the generation. + * */ - image_url: string | Blob | File; + timings: any; /** - * The URL of the mask to use for inpainting. + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - mask_url: string | Blob | File; + seed: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. */ prompt: string; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The number of images generated. + */ + num_images: number; +}; +export type StableDiffusionV3MediumImageToImageInput = { + /** + * The image URL to generate an image from. + */ + image_url: string | Blob | File; + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The negative prompt to generate an image from. Default value: `""` */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square` + * If set to true, prompt will be upsampled with more details. + */ + prompt_expansion?: boolean; + /** + * The size of the generated image. Defaults to the conditioning image's size. */ image_size?: | ImageSize @@ -3938,27 +3757,23 @@ export type FastTurboDiffusionInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `2` + * The number of inference steps to perform. Default value: `28` */ num_inference_steps?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1` - */ - guidance_scale?: number; - /** - * determines how much the generated image resembles the initial image Default value: `0.95` - */ - strength?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + */ + guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; /** @@ -3966,15 +3781,15 @@ export type FastTurboDiffusionInput = { */ num_images?: number; /** - * If set to true, the safety checker will be enabled. + * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** - * If set to true, the prompt will be expanded with additional prompts. + * The strength of the image-to-image transformation. Default value: `0.9` */ - expand_prompt?: boolean; + strength?: number; }; -export type FastTurboDiffusionOutput = { +export type FastSdxlOutput = { /** * The generated image files info. */ @@ -3982,7 +3797,7 @@ export type FastTurboDiffusionOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -3992,32 +3807,24 @@ export type FastTurboDiffusionOutput = { * Whether the generated images contain NSFW concepts. */ has_nsfw_concepts: Array; -}; -export type FastTurboDiffusionImageToImageInput = { - /** - * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` - */ - model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; - /** - * The URL of the image to use as a starting point for the generation. - */ - image_url: string | Blob | File; /** - * The URL of the mask to use for inpainting. + * The prompt used for generating the image. */ - mask_url: string | Blob | File; + prompt: string; +}; +export type FastSdxlInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use.Use it to address details that you don't want + * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square` + * The size of the generated image. Default value: `square_hd` */ image_size?: | ImageSize @@ -4028,27 +3835,23 @@ export type FastTurboDiffusionImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `2` + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1` - */ - guidance_scale?: number; - /** - * determines how much the generated image resembles the initial image Default value: `0.95` - */ - strength?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; /** @@ -4056,38 +3859,36 @@ export type FastTurboDiffusionImageToImageInput = { */ num_images?: number; /** - * If set to true, the safety checker will be enabled. + * The list of LoRA weights to use. Default value: `` */ - enable_safety_checker?: boolean; + loras?: Array; /** - * If set to true, the prompt will be expanded with additional prompts. + * The list of embeddings to use. Default value: `` */ - expand_prompt?: boolean; -}; -export type FastTurboDiffusionImageToImageOutput = { + embeddings?: Array; /** - * The generated image files info. + * If set to true, the safety checker will be enabled. Default value: `true` */ - images: Array; + enable_safety_checker?: boolean; /** - * + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - timings: Record; + safety_checker_version?: "v1" | "v2"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * If set to true, the prompt will be expanded with additional prompts. */ - seed: number; + expand_prompt?: boolean; /** - * Whether the generated images contain NSFW concepts. + * The format of the generated image. Default value: `"jpeg"` */ - has_nsfw_concepts: Array; -}; -export type FastTurboDiffusionInpaintingInput = { + format?: "jpeg" | "png"; /** - * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + request_id?: string; +}; +export type InpaintingInput = { /** * The URL of the image to use as a starting point for the generation. */ @@ -4107,7 +3908,7 @@ export type FastTurboDiffusionInpaintingInput = { */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square` + * The size of the generated image. Default value: `square_hd` */ image_size?: | ImageSize @@ -4118,12 +3919,12 @@ export type FastTurboDiffusionInpaintingInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `2` + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** @@ -4138,7 +3939,7 @@ export type FastTurboDiffusionInpaintingInput = { /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + * it allows you to get the image directly in the response without going through the CDN. */ sync_mode?: boolean; /** @@ -4146,153 +3947,154 @@ export type FastTurboDiffusionInpaintingInput = { */ num_images?: number; /** - * If set to true, the safety checker will be enabled. + * The list of LoRA weights to use. Default value: `` */ - enable_safety_checker?: boolean; + loras?: Array; /** - * If set to true, the prompt will be expanded with additional prompts. + * The list of embeddings to use. Default value: `` */ - expand_prompt?: boolean; -}; -export type FastTurboDiffusionInpaintingOutput = { + embeddings?: Array; /** - * The generated image files info. + * If set to true, the safety checker will be enabled. Default value: `true` */ - images: Array; + enable_safety_checker?: boolean; /** - * + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - timings: Record; + safety_checker_version?: "v1" | "v2"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * If set to true, the prompt will be expanded with additional prompts. */ - seed: number; + expand_prompt?: boolean; /** - * Whether the generated images contain NSFW concepts. + * The format of the generated image. Default value: `"jpeg"` */ - has_nsfw_concepts: Array; + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; }; -export type FastLcmDiffusionInput = { +export type TimestepsInput = { /** - * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + * The method to use for the timesteps. If set to 'array', the timesteps will be set based + * on the provided timesteps schedule in the `array` field. + * Defaults to 'default' which means the scheduler will use the `num_inference_steps` parameter. Default value: `"default"` */ - model_name?: - | "stabilityai/stable-diffusion-xl-base-1.0" - | "runwayml/stable-diffusion-v1-5"; + method?: "default" | "array"; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Timesteps schedule to be used if 'custom' method is selected. Default value: `` */ - prompt: string; + array?: Array; +}; +export type LoraOutput = { /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The generated image files info. */ - negative_prompt?: string; + images: Array; /** - * The size of the generated image. Default value: `square_hd` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + seed: number; /** - * The number of inference steps to perform. Default value: `6` + * Whether the generated images contain NSFW concepts. */ - num_inference_steps?: number; + has_nsfw_concepts: Array; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The latents saved for debugging. */ - seed?: number; + debug_latents?: File; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + * The latents saved for debugging per pass. */ - guidance_scale?: number; + debug_per_pass_latents?: File; +}; +export type LoraInput = { /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + * URL or HuggingFace ID of the base model to generate the image. */ - sync_mode?: boolean; + model_name: string; /** - * The number of images to generate. Default value: `1` + * URL or HuggingFace ID of the custom U-Net model to use for the image generation. */ - num_images?: number; + unet_name?: string; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The variant of the model to use for huggingface models, e.g. 'fp16'. */ - enable_safety_checker?: boolean; + variant?: string; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - safety_checker_version?: "v1" | "v2"; + prompt: string; /** - * If set to true, the prompt will be expanded with additional prompts. + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - expand_prompt?: boolean; + negative_prompt?: string; /** - * The format of the generated image. Default value: `"jpeg"` + * If set to true, the prompt weighting syntax will be used. + * Additionally, this will lift the 77 token limit by averaging embeddings. */ - format?: "jpeg" | "png"; + prompt_weighting?: boolean; /** - * The rescale factor for the CFG. + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` */ - guidance_rescale?: number; + loras?: Array; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * The embeddings to use for the image generation. Only a single embedding is supported at the moment. + * The embeddings will be used to map the tokens in the prompt to the embedding weights. Default value: `` */ - request_id?: string; -}; -export type FastLcmDiffusionOutput = { + embeddings?: Array; /** - * The generated image files info. + * The control nets to use for the image generation. You can use any number of control nets + * and they will be applied to the image at the specified timesteps. Default value: `` */ - images: Array; + controlnets?: Array; /** - * + * If set to true, the controlnet will be applied to only the conditional predictions. */ - timings: Record; + controlnet_guess_mode?: boolean; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The IP adapter to use for the image generation. Default value: `` */ - seed: number; + ip_adapter?: Array; /** - * Whether the generated images contain NSFW concepts. + * The path to the image encoder model to use for the image generation. */ - has_nsfw_concepts: Array; + image_encoder_path?: string; /** - * The prompt used for generating the image. + * The subfolder of the image encoder model to use for the image generation. */ - prompt: string; -}; -export type FastLcmDiffusionImageToImageInput = { + image_encoder_subfolder?: string; /** - * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` */ - model_name?: - | "stabilityai/stable-diffusion-xl-base-1.0" - | "runwayml/stable-diffusion-v1-5"; + image_encoder_weight_name?: string; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The URL of the IC Light model to use for the image generation. */ - prompt: string; + ic_light_model_url?: string | Blob | File; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The URL of the IC Light model background image to use for the image generation. + * Make sure to use a background compatible with the model. */ - negative_prompt?: string; + ic_light_model_background_image_url?: string | Blob | File; /** - * The size of the generated image. Default value: `square_hd` + * The URL of the IC Light model image to use for the image generation. + */ + ic_light_image_url?: string | Blob | File; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The size of the generated image. You can choose between some presets or custom height and width + * that **must be multiples of 8**. Default value: `square_hd` */ image_size?: | ImageSize @@ -4303,56 +4105,130 @@ export type FastLcmDiffusionImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `6` + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` */ num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. - */ - seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + * Skips part of the image generation process, leading to slightly different results. + * This means the image renders faster, too. */ - sync_mode?: boolean; + clip_skip?: number; /** - * The number of images to generate. Default value: `1` + * Scheduler / sampler to use for the image denoising process. + */ + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "Euler" + | "Euler A" + | "Euler (trailing timesteps)" + | "LCM" + | "LCM (trailing timesteps)" + | "DDIM" + | "TCD"; + /** + * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. + * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. + * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. Default value: `[object Object]` + */ + timesteps?: TimestepsInput; + /** + * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. + * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. + * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. Default value: `[object Object]` + */ + sigmas?: SigmasInput; + /** + * The format of the generated image. Default value: `"png"` + */ + image_format?: "jpeg" | "png"; + /** + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` */ num_images?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The size of the tiles to be used for the image generation. Default value: `4096` */ - safety_checker_version?: "v1" | "v2"; + tile_width?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * The size of the tiles to be used for the image generation. Default value: `4096` */ - expand_prompt?: boolean; + tile_height?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - format?: "jpeg" | "png"; + tile_stride_width?: number; /** - * The rescale factor for the CFG. + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - guidance_rescale?: number; + tile_stride_height?: number; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * The eta value to be used for the image generation. */ - request_id?: string; + eta?: number; + /** + * If set to true, the latents will be saved for debugging. + */ + debug_latents?: boolean; + /** + * If set to true, the latents will be saved for debugging per pass. + */ + debug_per_pass_latents?: boolean; }; -export type FastLcmDiffusionImageToImageOutput = { +export type SigmasInput = { + /** + * The method to use for the sigmas. If set to 'custom', the sigmas will be set based + * on the provided sigmas schedule in the `array` field. + * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` + */ + method?: "default" | "array"; + /** + * Sigmas schedule to be used if 'custom' method is selected. Default value: `` + */ + array?: Array; +}; +export type AuraSrInput = { + /** + * URL of the image to upscale. + */ + image_url: string | Blob | File; + /** + * Upscaling factor. More coming soon. Default value: `"4"` + */ + upscaling_factor?: "4"; + /** + * Whether to use overlapping tiles for upscaling. Setting this to true helps remove seams but doubles the inference time. + */ + overlapping_tiles?: boolean; + /** + * Checkpoint to use for upscaling. More coming soon. Default value: `"v1"` + */ + checkpoint?: "v1" | "v2"; +}; +export type AuraSrOutput = { + /** + * Upscaled image + */ + image: Image; + /** + * Timings for each step in the pipeline. + */ + timings: any; +}; +export type StableCascadeOutput = { /** * The generated image files info. */ @@ -4360,7 +4236,7 @@ export type FastLcmDiffusionImageToImageOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -4375,13 +4251,7 @@ export type FastLcmDiffusionImageToImageOutput = { */ prompt: string; }; -export type FastLcmDiffusionInpaintingInput = { - /** - * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` - */ - model_name?: - | "stabilityai/stable-diffusion-xl-base-1.0" - | "runwayml/stable-diffusion-v1-5"; +export type SoteDiffusionInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -4393,7 +4263,25 @@ export type FastLcmDiffusionInpaintingInput = { */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square_hd` + * Number of steps to run the first stage for. Default value: `25` + */ + first_stage_steps?: number; + /** + * Number of steps to run the second stage for. Default value: `10` + */ + second_stage_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `8` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + second_stage_guidance_scale?: number; + /** + * The size of the generated image. Default value: `[object Object]` */ image_size?: | ImageSize @@ -4404,677 +4292,353 @@ export type FastLcmDiffusionInpaintingInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `6` - */ - num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of Stable Diffusion + * The same seed and the same prompt given to the same version of Stable Cascade * will output the same image every time. */ seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + * If set to false, the safety checker will be disabled. Default value: `true` */ - guidance_scale?: number; + enable_safety_checker?: boolean; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the image will be returned as base64 encoded string. */ sync_mode?: boolean; +}; +export type StableCascadeInput = { /** - * The number of images to generate. Default value: `1` + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - num_images?: number; + prompt: string; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - enable_safety_checker?: boolean; + negative_prompt?: string; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * Number of steps to run the first stage for. Default value: `20` */ - safety_checker_version?: "v1" | "v2"; + first_stage_steps?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * Number of steps to run the second stage for. Default value: `10` */ - expand_prompt?: boolean; + second_stage_steps?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ - format?: "jpeg" | "png"; + guidance_scale?: number; /** - * The rescale factor for the CFG. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. */ - guidance_rescale?: number; + second_stage_guidance_scale?: number; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * The size of the generated image. Default value: `square_hd` */ - request_id?: string; -}; -export type FastLcmDiffusionInpaintingOutput = { + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The generated image files info. + * The same seed and the same prompt given to the same version of Stable Cascade + * will output the same image every time. */ - images: Array; + seed?: number; /** - * + * If set to false, the safety checker will be disabled. Default value: `true` */ - timings: Record; + enable_safety_checker?: boolean; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The number of images to generate. Default value: `1` */ - seed: number; + num_images?: number; /** - * Whether the generated images contain NSFW concepts. + * If set to true, the image will be returned as base64 encoded string. */ - has_nsfw_concepts: Array; + sync_mode?: boolean; +}; +export type MinimaxVideoInput = { /** - * The prompt used for generating the image. + * */ prompt: string; -}; -export type WhisperInput = { /** - * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm. + * Whether to use the model's prompt optimizer Default value: `true` */ - audio_url: string | Blob | File; + prompt_optimizer?: boolean; +}; +export type MinimaxVideoOutput = { /** - * Task to perform on the audio file. Either transcribe or translate. Default value: `"transcribe"` + * The generated video */ - task?: "transcribe" | "translate"; + video: File; +}; +export type HaiperVideoV2Input = { /** - * Language of the audio file. If set to null, the language will be - * automatically detected. Defaults to null. * - * If translate is selected as the task, the audio will be translated to - * English, regardless of the language selected. */ - language?: - | "af" - | "am" - | "ar" - | "as" - | "az" - | "ba" - | "be" - | "bg" - | "bn" - | "bo" - | "br" - | "bs" - | "ca" - | "cs" - | "cy" - | "da" - | "de" - | "el" - | "en" - | "es" - | "et" - | "eu" - | "fa" - | "fi" - | "fo" - | "fr" - | "gl" - | "gu" - | "ha" - | "haw" - | "he" - | "hi" - | "hr" - | "ht" - | "hu" - | "hy" - | "id" - | "is" - | "it" - | "ja" - | "jw" - | "ka" - | "kk" - | "km" - | "kn" - | "ko" - | "la" - | "lb" - | "ln" - | "lo" - | "lt" - | "lv" - | "mg" - | "mi" - | "mk" - | "ml" - | "mn" - | "mr" - | "ms" - | "mt" - | "my" - | "ne" - | "nl" - | "nn" - | "no" - | "oc" - | "pa" - | "pl" - | "ps" - | "pt" - | "ro" - | "ru" - | "sa" - | "sd" - | "si" - | "sk" - | "sl" - | "sn" - | "so" - | "sq" - | "sr" - | "su" - | "sv" - | "sw" - | "ta" - | "te" - | "tg" - | "th" - | "tk" - | "tl" - | "tr" - | "tt" - | "uk" - | "ur" - | "uz" - | "vi" - | "yi" - | "yo" - | "yue" - | "zh"; + prompt: string; /** - * Whether to diarize the audio file. Defaults to false. + * The duration of the generated video in seconds Default value: `"4"` */ - diarize?: boolean; + duration?: "4" | "6"; /** - * Level of the chunks to return. Either segment or word. Default value: `"segment"` + * Whether to use the model's prompt enhancer Default value: `true` */ - chunk_level?: "segment" | "word"; + prompt_enhancer?: boolean; /** - * Version of the model to use. All of the models are the Whisper large variant. Default value: `"3"` + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ - version?: "3"; + seed?: number; +}; +export type I2VOutput = { /** - * Default value: `64` + * The generated video */ - batch_size?: number; + video: File; +}; +export type HaiperVideoV2Output = { /** - * Prompt to use for generation. Defaults to an empty string. Default value: `""` + * The generated video */ - prompt?: string; + video: File; +}; +export type HaiperVideoV2ImageToVideoOutput = { /** - * Number of speakers in the audio file. Defaults to null. - * If not provided, the number of speakers will be automatically - * detected. + * The generated video */ - num_speakers?: number; + video: File; }; -export type WhisperOutput = { +export type VideoOutput = { /** - * Transcription of the audio file + * The generated video */ - text: string; + video: File; +}; +export type HaiperVideoV2ImageToVideoInput = { /** - * Timestamp chunks of the audio file + * */ - chunks?: Array; + prompt: string; /** - * List of languages that the audio file is inferred to be. Defaults to null. + * The duration of the generated video in seconds Default value: `"4"` */ - inferred_languages: Array< - | "af" - | "am" - | "ar" - | "as" - | "az" - | "ba" - | "be" - | "bg" - | "bn" - | "bo" - | "br" - | "bs" - | "ca" - | "cs" - | "cy" - | "da" - | "de" - | "el" - | "en" - | "es" - | "et" - | "eu" - | "fa" - | "fi" - | "fo" - | "fr" - | "gl" - | "gu" - | "ha" - | "haw" - | "he" - | "hi" - | "hr" - | "ht" - | "hu" - | "hy" - | "id" - | "is" - | "it" - | "ja" - | "jw" - | "ka" - | "kk" - | "km" - | "kn" - | "ko" - | "la" - | "lb" - | "ln" - | "lo" - | "lt" - | "lv" - | "mg" - | "mi" - | "mk" - | "ml" - | "mn" - | "mr" - | "ms" - | "mt" - | "my" - | "ne" - | "nl" - | "nn" - | "no" - | "oc" - | "pa" - | "pl" - | "ps" - | "pt" - | "ro" - | "ru" - | "sa" - | "sd" - | "si" - | "sk" - | "sl" - | "sn" - | "so" - | "sq" - | "sr" - | "su" - | "sv" - | "sw" - | "ta" - | "te" - | "tg" - | "th" - | "tk" - | "tl" - | "tr" - | "tt" - | "uk" - | "ur" - | "uz" - | "vi" - | "yi" - | "yo" - | "yue" - | "zh" - >; - /** - * Speaker diarization segments of the audio file. Only present if diarization is enabled. - */ - diarization_segments: Array; -}; -export type WizperInput = { + duration?: "4" | "6"; /** - * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm. + * Whether to use the model's prompt enhancer Default value: `true` */ - audio_url: string | Blob | File; + prompt_enhancer?: boolean; /** - * Task to perform on the audio file. Either transcribe or translate. Default value: `"transcribe"` + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ - task?: "transcribe" | "translate"; + seed?: number; /** - * Language of the audio file. - * If translate is selected as the task, the audio will be translated to - * English, regardless of the language selected. Default value: `"en"` + * URL of the image to use as the first frame */ - language?: - | "af" - | "am" - | "ar" - | "as" - | "az" - | "ba" - | "be" - | "bg" - | "bn" - | "bo" - | "br" - | "bs" - | "ca" - | "cs" - | "cy" - | "da" - | "de" - | "el" - | "en" - | "es" - | "et" - | "eu" - | "fa" - | "fi" - | "fo" - | "fr" - | "gl" - | "gu" - | "ha" - | "haw" - | "he" - | "hi" - | "hr" - | "ht" - | "hu" - | "hy" - | "id" - | "is" - | "it" - | "ja" - | "jw" - | "ka" - | "kk" - | "km" - | "kn" - | "ko" - | "la" - | "lb" - | "ln" - | "lo" - | "lt" - | "lv" - | "mg" - | "mi" - | "mk" - | "ml" - | "mn" - | "mr" - | "ms" - | "mt" - | "my" - | "ne" - | "nl" - | "nn" - | "no" - | "oc" - | "pa" - | "pl" - | "ps" - | "pt" - | "ro" - | "ru" - | "sa" - | "sd" - | "si" - | "sk" - | "sl" - | "sn" - | "so" - | "sq" - | "sr" - | "su" - | "sv" - | "sw" - | "ta" - | "te" - | "tg" - | "th" - | "tk" - | "tl" - | "tr" - | "tt" - | "uk" - | "ur" - | "uz" - | "vi" - | "yi" - | "yo" - | "yue" - | "zh"; + image_url: string | Blob | File; +}; +export type MochiV1Output = { /** - * Level of the chunks to return. Default value: `"segment"` + * The generated video */ - chunk_level?: "segment"; + video: File; +}; +export type MochiV1Input = { /** - * Version of the model to use. All of the models are the Whisper large variant. Default value: `"3"` + * The prompt to generate a video from. */ - version?: "3"; -}; -export type WizperOutput = { + prompt: string; /** - * Transcription of the audio file + * The seed to use for generating the video. */ - text: string; + seed?: number; /** - * Timestamp chunks of the audio file + * Whether to enable prompt expansion. Default value: `true` */ - chunks: Array; + enable_prompt_expansion?: boolean; }; -export type FastLightningSdxlOutput = { +export type LumaDreamMachineInput = { /** - * The generated image files info. + * */ - images: Array; + prompt: string; /** - * + * The aspect ratio of the generated video Default value: `"16:9"` */ - timings: Record; + aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Whether the video should loop (end of video is blended with the beginning) */ - seed: number; + loop?: boolean; +}; +export type LumaDreamMachineOutput = { /** - * Whether the generated images contain NSFW concepts. + * The generated video */ - has_nsfw_concepts: Array; + video: File; +}; +export type LumaDreamMachineImageToVideoInput = { /** - * The prompt used for generating the image. + * */ prompt: string; -}; -export type FastLightningSdxlInput = { /** - * The URL of the image to use as a starting point for the generation. + * */ image_url: string | Blob | File; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. - */ - prompt: string; - /** - * The size of the generated image. Default value: `square_hd` - */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `"4"` + * An image to blend the end of the video with */ - num_inference_steps?: "1" | "2" | "4" | "8"; - /** - * determines how much the generated image resembles the initial image Default value: `0.95` - */ - strength?: number; + end_image_url?: string | Blob | File; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The aspect ratio of the generated video Default value: `"16:9"` */ - seed?: number; + aspect_ratio?: "16:9" | "9:16" | "4:3" | "3:4" | "21:9" | "9:21"; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Whether the video should loop (end of video is blended with the beginning) */ - sync_mode?: boolean; + loop?: boolean; +}; +export type LumaDreamMachineImageToVideoOutput = { /** - * The number of images to generate. Default value: `1` + * The generated video */ - num_images?: number; + video: File; +}; +export type T2VOutput = { /** - * The list of embeddings to use. Default value: `` + * The generated video */ - embeddings?: Array; + video: File; +}; +export type KlingVideoV1StandardTextToVideoInput = { /** - * If set to true, the safety checker will be enabled. + * */ - enable_safety_checker?: boolean; + prompt: string; /** - * If set to true, the prompt will be expanded with additional prompts. + * The duration of the generated video in seconds Default value: `"5"` */ - expand_prompt?: boolean; + duration?: "5" | "10"; /** - * The format of the generated image. Default value: `"jpeg"` + * The aspect ratio of the generated video frame Default value: `"16:9"` */ - format?: "jpeg" | "png"; + aspect_ratio?: "16:9" | "9:16" | "1:1"; }; -export type FastLightningSdxlImageToImageOutput = { +export type KlingVideoV1StandardTextToVideoOutput = { /** - * The generated image files info. + * The generated video */ - images: Array; + video: File; +}; +export type KlingVideoV1StandardImageToVideoInput = { /** * */ - timings: Record; + prompt: string; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * */ - seed: number; + image_url: string | Blob | File; /** - * Whether the generated images contain NSFW concepts. + * The duration of the generated video in seconds Default value: `"5"` */ - has_nsfw_concepts: Array; + duration?: "5" | "10"; /** - * The prompt used for generating the image. + * The aspect ratio of the generated video frame Default value: `"16:9"` */ - prompt: string; + aspect_ratio?: "16:9" | "9:16" | "1:1"; }; -export type FastLightningSdxlImageToImageInput = { +export type KlingVideoV1StandardImageToVideoOutput = { /** - * The URL of the image to use as a starting point for the generation. + * The generated video */ - image_url: string | Blob | File; + video: File; +}; +export type KlingVideoV1ProTextToVideoInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * */ prompt: string; /** - * The size of the generated image. Default value: `square_hd` - */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `"4"` - */ - num_inference_steps?: "1" | "2" | "4" | "8"; - /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The duration of the generated video in seconds Default value: `"5"` */ - strength?: number; + duration?: "5" | "10"; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The aspect ratio of the generated video frame Default value: `"16:9"` */ - seed?: number; + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV1ProTextToVideoOutput = { /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The generated video */ - sync_mode?: boolean; + video: File; +}; +export type KlingVideoV1ProImageToVideoInput = { /** - * The number of images to generate. Default value: `1` + * */ - num_images?: number; + prompt: string; /** - * The list of embeddings to use. Default value: `` + * */ - embeddings?: Array; + image_url: string | Blob | File; /** - * If set to true, the safety checker will be enabled. + * The duration of the generated video in seconds Default value: `"5"` */ - enable_safety_checker?: boolean; + duration?: "5" | "10"; /** - * If set to true, the prompt will be expanded with additional prompts. + * The aspect ratio of the generated video frame Default value: `"16:9"` */ - expand_prompt?: boolean; + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV1ProImageToVideoOutput = { /** - * The format of the generated image. Default value: `"jpeg"` + * The generated video */ - format?: "jpeg" | "png"; + video: File; }; -export type FastLightningSdxlInpaintingOutput = { +export type Cogvideox5bOutput = { /** - * The generated image files info. + * The URL to the generated video */ - images: Array; + video: File; /** * */ - timings: Record; + timings: any; /** - * Seed of the generated Image. It will be the same value of the one passed in the + * Seed of the generated video. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; - /** - * The prompt used for generating the image. + * The prompt used for generating the video. */ prompt: string; }; -export type FastLightningSdxlInpaintingInput = { +export type ImageToVideoInput = { /** - * The URL of the image to use as a starting point for the generation. - */ - image_url: string | Blob | File; - /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt to generate the video from. */ prompt: string; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated video. Default value: `[object Object]` */ - image_size?: + video_size?: | ImageSize | "square_hd" | "square" @@ -5083,85 +4647,96 @@ export type FastLightningSdxlInpaintingInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `"4"` + * The negative prompt to generate video from Default value: `""` */ - num_inference_steps?: "1" | "2" | "4" | "8"; + negative_prompt?: string; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The number of inference steps to perform. Default value: `50` */ - strength?: number; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ seed?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ - sync_mode?: boolean; + guidance_scale?: number; /** - * The number of images to generate. Default value: `1` + * Use RIFE for video interpolation Default value: `true` */ - num_images?: number; + use_rife?: boolean; /** - * The list of embeddings to use. Default value: `` + * The target FPS of the video Default value: `16` */ - embeddings?: Array; + export_fps?: number; /** - * If set to true, the safety checker will be enabled. + * The URL to the image to generate the video from. */ - enable_safety_checker?: boolean; + image_url: string | Blob | File; +}; +export type VideoToVideoInput = { /** - * If set to true, the prompt will be expanded with additional prompts. + * The prompt to generate the video from. */ - expand_prompt?: boolean; + prompt: string; /** - * The format of the generated image. Default value: `"jpeg"` + * The size of the generated video. Default value: `[object Object]` */ - format?: "jpeg" | "png"; -}; -export type HyperSdxlOutput = { + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The generated image files info. + * The negative prompt to generate video from Default value: `""` */ - images: Array; + negative_prompt?: string; /** - * + * The number of inference steps to perform. Default value: `50` */ - timings: Record; + num_inference_steps?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ - seed: number; + seed?: number; /** - * Whether the generated images contain NSFW concepts. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ - has_nsfw_concepts: Array; + guidance_scale?: number; /** - * The prompt used for generating the image. + * Use RIFE for video interpolation Default value: `true` */ - prompt: string; -}; -export type HyperSdxlInput = { + use_rife?: boolean; /** - * The URL of the image to use as a starting point for the generation. + * The target FPS of the video Default value: `16` */ - image_url: string | Blob | File; + export_fps?: number; /** - * The URL of the mask to use for inpainting. + * The video to generate the video from. */ - mask_url: string | Blob | File; + video_url: string | Blob | File; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The strength to use for Video to Video. 1.0 completely remakes the video while 0.0 preserves the original. Default value: `0.8` */ - prompt: string; + strength?: number; +}; +export type Cogvideox5bInput = { /** - * The size of the generated image. Default value: `square_hd` + * The prompt to generate the video from. */ - image_size?: + prompt: string; + /** + * The size of the generated video. Default value: `[object Object]` + */ + video_size?: | ImageSize | "square_hd" | "square" @@ -5170,85 +4745,60 @@ export type HyperSdxlInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `"1"` + * The negative prompt to generate video from Default value: `""` */ - num_inference_steps?: "1" | "2" | "4"; + negative_prompt?: string; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The number of inference steps to perform. Default value: `50` */ - strength?: number; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ seed?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. Default value: `1` - */ - num_images?: number; - /** - * The list of embeddings to use. Default value: `` - */ - embeddings?: Array; - /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ - enable_safety_checker?: boolean; + guidance_scale?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * Use RIFE for video interpolation Default value: `true` */ - expand_prompt?: boolean; + use_rife?: boolean; /** - * The format of the generated image. Default value: `"jpeg"` + * The target FPS of the video Default value: `16` */ - format?: "jpeg" | "png"; + export_fps?: number; }; -export type HyperSdxlImageToImageOutput = { +export type Cogvideox5bVideoToVideoOutput = { /** - * The generated image files info. + * The URL to the generated video */ - images: Array; + video: File; /** * */ - timings: Record; + timings: any; /** - * Seed of the generated Image. It will be the same value of the one passed in the + * Seed of the generated video. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; - /** - * The prompt used for generating the image. + * The prompt used for generating the video. */ prompt: string; }; -export type HyperSdxlImageToImageInput = { - /** - * The URL of the image to use as a starting point for the generation. - */ - image_url: string | Blob | File; - /** - * The URL of the mask to use for inpainting. - */ - mask_url: string | Blob | File; +export type Cogvideox5bVideoToVideoInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt to generate the video from. */ prompt: string; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated video. Default value: `[object Object]` */ - image_size?: + video_size?: | ImageSize | "square_hd" | "square" @@ -5257,85 +4807,49 @@ export type HyperSdxlImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `"1"` + * The negative prompt to generate video from Default value: `""` */ - num_inference_steps?: "1" | "2" | "4"; + negative_prompt?: string; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The number of inference steps to perform. Default value: `50` */ - strength?: number; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ seed?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. Default value: `1` - */ - num_images?: number; - /** - * The list of embeddings to use. Default value: `` - */ - embeddings?: Array; - /** - * If set to true, the safety checker will be enabled. Default value: `true` - */ - enable_safety_checker?: boolean; - /** - * If set to true, the prompt will be expanded with additional prompts. - */ - expand_prompt?: boolean; - /** - * The format of the generated image. Default value: `"jpeg"` - */ - format?: "jpeg" | "png"; -}; -export type HyperSdxlInpaintingOutput = { - /** - * The generated image files info. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ - images: Array; + guidance_scale?: number; /** - * + * Use RIFE for video interpolation Default value: `true` */ - timings: Record; + use_rife?: boolean; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The target FPS of the video Default value: `16` */ - seed: number; + export_fps?: number; /** - * Whether the generated images contain NSFW concepts. + * The video to generate the video from. */ - has_nsfw_concepts: Array; + video_url: string | Blob | File; /** - * The prompt used for generating the image. + * The strength to use for Video to Video. 1.0 completely remakes the video while 0.0 preserves the original. Default value: `0.8` */ - prompt: string; + strength?: number; }; -export type HyperSdxlInpaintingInput = { - /** - * The URL of the image to use as a starting point for the generation. - */ - image_url: string | Blob | File; - /** - * The URL of the mask to use for inpainting. - */ - mask_url: string | Blob | File; +export type BaseInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt to generate the video from. */ prompt: string; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated video. Default value: `[object Object]` */ - image_size?: + video_size?: | ImageSize | "square_hd" | "square" @@ -5344,83 +4858,60 @@ export type HyperSdxlInpaintingInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `"1"` + * The negative prompt to generate video from Default value: `""` */ - num_inference_steps?: "1" | "2" | "4"; + negative_prompt?: string; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The number of inference steps to perform. Default value: `50` */ - strength?: number; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ seed?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. Default value: `1` - */ - num_images?: number; - /** - * The list of embeddings to use. Default value: `` - */ - embeddings?: Array; - /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ - enable_safety_checker?: boolean; + guidance_scale?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * Use RIFE for video interpolation Default value: `true` */ - expand_prompt?: boolean; + use_rife?: boolean; /** - * The format of the generated image. Default value: `"jpeg"` + * The target FPS of the video Default value: `16` */ - format?: "jpeg" | "png"; + export_fps?: number; }; -export type PlaygroundV25Output = { +export type Cogvideox5bImageToVideoOutput = { /** - * The generated image files info. + * The URL to the generated video */ - images: Array; + video: File; /** * */ - timings: Record; + timings: any; /** - * Seed of the generated Image. It will be the same value of the one passed in the + * Seed of the generated video. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; - /** - * The prompt used for generating the image. + * The prompt used for generating the video. */ prompt: string; }; -export type PlaygroundV25Input = { +export type Cogvideox5bImageToVideoInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt to generate the video from. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` - */ - negative_prompt?: string; - /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated video. Default value: `[object Object]` */ - image_size?: + video_size?: | ImageSize | "square_hd" | "square" @@ -5429,86 +4920,76 @@ export type PlaygroundV25Input = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `25` + * The negative prompt to generate video from Default value: `""` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. Default value: `50` */ num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The same seed and the same prompt given to the same version of the model + * will output the same video every time. */ seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + * the model to stick to your prompt when looking for a related video to show you. Default value: `7` */ guidance_scale?: number; /** - * The number of images to generate. Default value: `1` + * Use RIFE for video interpolation Default value: `true` */ - num_images?: number; + use_rife?: boolean; /** - * The list of embeddings to use. Default value: `` + * The target FPS of the video Default value: `16` */ - embeddings?: Array; + export_fps?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The URL to the image to generate the video from. */ - enable_safety_checker?: boolean; + image_url: string | Blob | File; +}; +export type StableVideoInput = { /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The URL of the image to use as a starting point for the generation. */ - safety_checker_version?: "v1" | "v2"; + image_url: string | Blob | File; /** - * If set to true, the prompt will be expanded with additional prompts. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - expand_prompt?: boolean; + seed?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - format?: "jpeg" | "png"; + motion_bucket_id?: number; /** - * The rescale factor for the CFG. - */ - guidance_rescale?: number; -}; -export type PlaygroundV25ImageToImageOutput = { - /** - * The generated image files info. - */ - images: Array; - /** - * - */ - timings: Record; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. - */ - seed: number; - /** - * Whether the generated images contain NSFW concepts. + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - has_nsfw_concepts: Array; + cond_aug?: number; /** - * The prompt used for generating the image. + * The frames per second of the generated video. Default value: `25` */ - prompt: string; + fps?: number; }; -export type PlaygroundV25ImageToImageInput = { +export type TextInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt to use as a starting point for the generation. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The negative prompt to use as a starting point for the generation. Default value: `"unrealistic, saturated, high contrast, big nose, painting, drawing, sketch, cartoon, anime, manga, render, CG, 3d, watermark, signature, label"` */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated video. Default value: `landscape_16_9` */ - image_size?: + video_size?: | ImageSize | "square_hd" | "square" @@ -5516,87 +4997,80 @@ export type PlaygroundV25ImageToImageInput = { | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `25` - */ - num_inference_steps?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3` - */ - guidance_scale?: number; - /** - * The number of images to generate. Default value: `1` - */ - num_images?: number; - /** - * The list of embeddings to use. Default value: `` - */ - embeddings?: Array; - /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - enable_safety_checker?: boolean; + motion_bucket_id?: number; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - safety_checker_version?: "v1" | "v2"; + cond_aug?: number; +}; +export type StableVideoOutput = { /** - * If set to true, the prompt will be expanded with additional prompts. + * Generated video */ - expand_prompt?: boolean; + video: File; /** - * The format of the generated image. Default value: `"jpeg"` + * Seed for random number generator */ - format?: "jpeg" | "png"; + seed: number; +}; +export type FastSvdTextToVideoInput = { /** - * The rescale factor for the CFG. + * The prompt to use as a starting point for the generation. */ - guidance_rescale?: number; -}; -export type PlaygroundV25InpaintingOutput = { + prompt: string; /** - * The generated image files info. + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - images: Array; + motion_bucket_id?: number; /** - * + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - timings: Record; + cond_aug?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - seed: number; + seed?: number; /** - * Whether the generated images contain NSFW concepts. + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `20` */ - has_nsfw_concepts: Array; + steps?: number; /** - * The prompt used for generating the image. + * Enabling [DeepCache](https://github.com/horseee/DeepCache) will make the execution + * faster, but might sometimes degrade overall quality. The higher the setting, the + * faster the execution will be, but the more quality might be lost. Default value: `"none"` */ - prompt: string; -}; -export type PlaygroundV25InpaintingInput = { + deep_cache?: "none" | "minimum" | "medium" | "high"; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` */ - prompt: string; + fps?: number; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The negative prompt to use as a starting point for the generation. Default value: `"unrealistic, saturated, high contrast, big nose, painting, drawing, sketch, cartoon, anime, manga, render, CG, 3d, watermark, signature, label"` */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated video. Default value: `landscape_16_9` */ - image_size?: + video_size?: | ImageSize | "square_hd" | "square" @@ -5604,340 +5078,363 @@ export type PlaygroundV25InpaintingInput = { | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; +}; +export type FastSvdTextToVideoOutput = { /** - * The number of inference steps to perform. Default value: `25` + * The generated video file. */ - num_inference_steps?: number; + video: File; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - seed?: number; + seed: number; +}; +export type FastSVDImageInput = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + * The URL of the image to use as a starting point for the generation. */ - guidance_scale?: number; + image_url: string | Blob | File; /** - * The number of images to generate. Default value: `1` + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - num_images?: number; + motion_bucket_id?: number; /** - * The list of embeddings to use. Default value: `` + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - embeddings?: Array; + cond_aug?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - enable_safety_checker?: boolean; + seed?: number; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `4` */ - safety_checker_version?: "v1" | "v2"; + steps?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` */ - expand_prompt?: boolean; + fps?: number; +}; +export type FastSVDTextInput = { /** - * The format of the generated image. Default value: `"jpeg"` + * The prompt to use as a starting point for the generation. */ - format?: "jpeg" | "png"; + prompt: string; /** - * The rescale factor for the CFG. + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - guidance_rescale?: number; -}; -export type AmtInterpolationOutput = { + motion_bucket_id?: number; /** - * Generated video + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - video: File; -}; -export type AmtInterpolationInput = { + cond_aug?: number; /** - * Frames to interpolate + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - frames: Array; + seed?: number; /** - * Output frames per second Default value: `24` + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `4` */ - output_fps?: number; + steps?: number; /** - * Number of recursive interpolation passes Default value: `4` + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` */ - recursive_interpolation_passes?: number; -}; -export type AmtInterpolationFrameInterpolationOutput = { + fps?: number; /** - * Generated video + * The size of the generated video. Default value: `landscape_16_9` */ - video: File; + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; }; -export type AmtInterpolationFrameInterpolationInput = { - /** - * Frames to interpolate - */ - frames: Array; +export type FastSvdLcmOutput = { /** - * Output frames per second Default value: `24` + * The generated video file. */ - output_fps?: number; + video: File; /** - * Number of recursive interpolation passes Default value: `4` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - recursive_interpolation_passes?: number; + seed: number; }; -export type T2vTurboInput = { +export type FastSvdLcmInput = { /** - * The prompt to generate images from + * The URL of the image to use as a starting point for the generation. */ - prompt: string; + image_url: string | Blob | File; /** - * The seed to use for the random number generator + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - seed?: number | null; + motion_bucket_id?: number; /** - * The number of steps to sample Default value: `4` + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - num_inference_steps?: number; + cond_aug?: number; /** - * The guidance scale Default value: `7.5` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - guidance_scale?: number; + seed?: number; /** - * The number of frames to generate Default value: `16` + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `4` */ - num_frames?: number; + steps?: number; /** - * The FPS of the exported video Default value: `8` + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` */ - export_fps?: number; + fps?: number; }; -export type T2vTurboOutput = { +export type BirefnetInput = { /** - * The URL to the generated video + * URL of the image to remove background from */ - video: File; -}; -export type Sd15DepthControlnetOutput = { + image_url: string | Blob | File; /** - * The generated image files info. + * Model to use for background removal. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository. + * The 'General Use (Heavy)' model is a slower but more accurate model. + * The 'Portrait' model is a model trained specifically for portrait images. + * The 'General Use (Light)' model is recommended for most use cases. + * + * The corresponding models are as follows: + * - 'General Use (Light)': BiRefNet-DIS_ep580.pth + * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth + * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` */ - images: Array; + model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; /** - * + * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` */ - timings: Record; + operating_resolution?: "1024x1024" | "2048x2048"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The format of the output image Default value: `"png"` */ - seed: number; + output_format?: "webp" | "png"; /** - * Whether the generated images contain NSFW concepts. + * Whether to output the mask used to remove the background */ - has_nsfw_concepts: Array; + output_mask?: boolean; /** - * The prompt used for generating the image. + * Whether to refine the foreground using the estimated mask Default value: `true` */ - prompt: string; + refine_foreground?: boolean; }; -export type Sd15DepthControlnetInput = { +export type BirefnetOutput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Image with background removed */ - prompt: string; + image: Image; /** - * The URL of the control image. + * Mask used to remove the background */ - control_image_url: string | Blob | File; + mask_image?: Image; +}; +export type Input = { /** - * The scale of the controlnet conditioning. Default value: `0.5` + * URL of the image to remove background from */ - controlnet_conditioning_scale?: number; + image_url: string | Blob | File; /** - * The URL of the image to use as a starting point for the generation. + * Model to use for background removal. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository. + * The 'General Use (Heavy)' model is a slower but more accurate model. + * The 'Portrait' model is a model trained specifically for portrait images. + * The 'General Use (Light)' model is recommended for most use cases. + * + * The corresponding models are as follows: + * - 'General Use (Light)': BiRefNet-DIS_ep580.pth + * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth + * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` */ - image_url: string | Blob | File; + model?: "General Use (Light)" | "General Use (Heavy)" | "Portrait"; /** - * The URL of the mask to use for inpainting. + * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` */ - mask_url: string | Blob | File; + operating_resolution?: "1024x1024" | "2048x2048"; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The format of the output image Default value: `"png"` */ - negative_prompt?: string; + output_format?: "webp" | "png"; /** - * The size of the generated image. Leave it none to automatically infer from the control image. + * Whether to output the mask used to remove the background */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + output_mask?: boolean; /** - * The number of inference steps to perform. Default value: `35` + * Whether to refine the foreground using the estimated mask Default value: `true` */ - num_inference_steps?: number; + refine_foreground?: boolean; +}; +export type BirefnetV2Output = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * Image with background removed */ - guidance_scale?: number; + image: Image; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * Mask used to remove the background */ - strength?: number; + mask_image?: Image; +}; +export type BirefnetV2Input = { /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * URL of the image to remove background from */ - seed?: number; + image_url: string | Blob | File; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Model to use for background removal. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository. + * The 'General Use (Light)' model is the original model used in the BiRefNet repository but trained with 2K images. + * The 'General Use (Heavy)' model is a slower but more accurate model. + * The 'Matting' model is a model trained specifically for matting images. + * The 'Portrait' model is a model trained specifically for portrait images. + * The 'General Use (Light)' model is recommended for most use cases. + * + * The corresponding models are as follows: + * - 'General Use (Light)': BiRefNet-DIS_ep580.pth + * - 'General Use (Heavy)': BiRefNet-massive-epoch_240.pth + * - 'Portrait': BiRefNet-portrait-TR_P3M_10k-epoch_120.pth Default value: `"General Use (Light)"` */ - sync_mode?: boolean; + model?: + | "General Use (Light)" + | "General Use (Light 2K)" + | "General Use (Heavy)" + | "Matting" + | "Portrait"; /** - * The number of images to generate. Default value: `1` + * The resolution to operate on. The higher the resolution, the more accurate the output will be for high res input images. Default value: `"1024x1024"` */ - num_images?: number; + operating_resolution?: "1024x1024" | "2048x2048"; /** - * The list of LoRA weights to use. Default value: `` + * The format of the output image Default value: `"png"` */ - loras?: Array; + output_format?: "webp" | "png"; /** - * If set to true, the safety checker will be enabled. + * Whether to output the mask used to remove the background */ - enable_safety_checker?: boolean; + output_mask?: boolean; /** - * If set to true, the prompt will be expanded with additional prompts. + * Whether to refine the foreground using the estimated mask Default value: `true` */ - expand_prompt?: boolean; + refine_foreground?: boolean; }; -export type PhotomakerOutput = { +export type FastSvdLcmTextToVideoInput = { /** - * + * The prompt to use as a starting point for the generation. */ - images: Array; + prompt: string; /** - * + * The motion bucket id determines the motion of the generated video. The + * higher the number, the more motion there will be. Default value: `127` */ - seed: number; -}; -export type PhotomakerInput = { + motion_bucket_id?: number; /** - * The URL of the image archive containing the images you want to use. + * The conditoning augmentation determines the amount of noise that will be + * added to the conditioning frame. The higher the number, the more noise + * there will be, and the less the video will look like the initial image. + * Increase it for more motion. Default value: `0.02` */ - image_archive_url: string | Blob | File; + cond_aug?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - prompt: string; + seed?: number; /** - * The base pipeline to use for generating the image. Default value: `"photomaker"` + * The number of steps to run the model for. The higher the number the better + * the quality and longer it will take to generate. Default value: `4` */ - base_pipeline?: "photomaker" | "photomaker-style"; + steps?: number; /** - * Optional initial image for img2img + * The FPS of the generated video. The higher the number, the faster the video will + * play. Total video length is 25 frames. Default value: `10` */ - initial_image_url?: string | Blob | File; + fps?: number; /** - * How much noise to add to the latent image. O for no noise, 1 for maximum noise. Default value: `0.5` + * The size of the generated video. Default value: `landscape_16_9` */ - initial_image_strength?: number; + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type FastSvdLcmTextToVideoOutput = { /** - * Default value: `"Photographic"` + * The generated video file. */ - style?: - | "(No style)" - | "Cinematic" - | "Disney Character" - | "Digital Art" - | "Photographic" - | "Fantasy art" - | "Neonpunk" - | "Enhance" - | "Comic book" - | "Lowpoly" - | "Line art"; + video: File; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - negative_prompt?: string; + seed: number; +}; +export type CreativeUpscalerInput = { /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `50` + * The type of model to use for the upscaling. Default is SD_1_5 Default value: `"SD_1_5"` */ - num_inference_steps?: number; + model_type?: "SD_1_5" | "SDXL"; /** - * Default value: `20` + * The image to upscale. */ - style_strength?: number; + image_url: string | Blob | File; /** - * Number of images to generate in one request. Note that the higher the batch size, - * the longer it will take to generate the images. Default value: `1` + * The prompt to use for generating the image. Be as descriptive as possible for best results. If no prompt is provide BLIP2 will be used to generate a prompt. */ - num_images?: number; + prompt?: string; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `5` + * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` */ - guidance_scale?: number; + scale?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * How much the output can deviate from the original Default value: `0.5` */ - seed?: number; -}; -export type LcmInput = { - /** - * The model to use for generating the image. Default value: `"sdv1-5"` - */ - model?: "sdxl" | "sdv1-5"; - /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. - */ - prompt: string; + creativity?: number; /** - * The base image to use for guiding the image generation on image-to-image - * generations. If the either width or height of the image is larger than 1024 - * pixels, the image will be resized to 1024 pixels while keeping the aspect ratio. + * How much detail to add Default value: `1` */ - image_url?: string | Blob | File; + detail?: number; /** - * The mask to use for guiding the image generation on image - * inpainting. The model will focus on the mask area and try to fill it with - * the most relevant content. - * - * The mask must be a black and white image where the white area is the area - * that needs to be filled and the black area is the area that should be - * ignored. - * - * The mask must have the same dimensions as the image passed as `image_url`. + * How much to preserve the shape of the original image Default value: `0.25` */ - mask_url?: string | Blob | File; + shape_preservation?: number; /** - * The strength of the image that is passed as `image_url`. The strength - * determines how much the generated image will be similar to the image passed as - * `image_url`. The higher the strength the more model gets "creative" and - * generates an image that's different from the initial image. A strength of 1.0 - * means that the initial image is more or less ignored and the model will try to - * generate an image that's as close as possible to the prompt. Default value: `0.8` + * The suffix to add to the generated prompt. Not used for a custom prompt. This is useful to add a common ending to all prompts such as 'high quality' etc or embedding tokens. Default value: `" high quality, highly detailed, high resolution, sharp"` */ - strength?: number; + prompt_suffix?: string; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` */ negative_prompt?: string; /** @@ -5947,42 +5444,14 @@ export type LcmInput = { seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** * The number of inference steps to use for generating the image. The more steps - * the better the image will be but it will also take longer to generate. Default value: `4` + * the better the image will be but it will also take longer to generate. Default value: `20` */ num_inference_steps?: number; - /** - * The size of the generated image. You can choose between some presets or - * custom height and width that **must be multiples of 8**. - * - * If not provided: - * - For text-to-image generations, the default size is 512x512. - * - For image-to-image generations, the default size is the same as the input image. - * - For inpainting generations, the default size is the same as the input image. - */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. The function will return a list of images - * with the same prompt and negative prompt but different seeds. Default value: `1` - */ - num_images?: number; /** * If set to true, the resulting image will be checked whether it includes any * potentially unsafe content. If it does, it will be replaced with a black @@ -5990,578 +5459,2562 @@ export type LcmInput = { */ enable_safety_checks?: boolean; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * If set to true, the image will not be processed by the CCSR model before + * being processed by the creativity model. */ - request_id?: string; + skip_ccsr?: boolean; /** - * If set to true, the inpainting pipeline will only inpaint the provided mask - * area. Only effective for inpainting pipelines. + * Allow for large uploads that could take a very long time. */ - inpaint_mask_only?: boolean; + override_size_limits?: boolean; /** - * If set to true, the inpainting pipeline will use controlnet inpainting. - * Only effective for inpainting pipelines. + * The URL to the base model to use for the upscaling */ - controlnet_inpaint?: boolean; + base_model_url?: string | Blob | File; /** - * The url of the lora server to use for image generation. + * The URL to the additional LORA model to use for the upscaling. Default is None */ - lora_url?: string | Blob | File; + additional_lora_url?: string | Blob | File; /** - * The scale of the lora server to use for image generation. Default value: `1` + * The scale of the additional LORA model to use for the upscaling. Default is 1.0 Default value: `1` */ - lora_scale?: number; -}; -export type LcmOutput = { + additional_lora_scale?: number; /** - * The generated image files info. + * The URL to the additional embeddings to use for the upscaling. Default is None */ - images: Array; + additional_embedding_url?: string | Blob | File; +}; +export type CreativeUpscalerOutput = { /** - * + * The generated image file info. */ - timings: Record; + image: Image; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; +}; +export type ClarityUpscalerOutput = { /** - * Number of inference steps used to generate the image. It will be the same value of the one passed in the - * input or the default one in case none was passed. Default value: `4` + * The URL of the generated image. */ - num_inference_steps?: number; + image: Image; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * The seed used to generate the image. */ - request_id?: string; + seed: number; /** - * A list of booleans indicating whether the generated image contains any - * potentially unsafe content. If the safety check is disabled, this field - * will all will be false. + * The timings of the different steps in the workflow. */ - nsfw_content_detected: Array; + timings: any; }; -export type LcmSd15I2iInput = { +export type ComfyInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * */ - prompt: string; + prompt: any; /** - * The image to use as a base. + * + */ + extra_data?: any; + /** + * Disable saving prompt metadata in files. + */ + disable_metadata?: boolean; +}; +export type ClarityUpscalerInput = { + /** + * The URL of the image to upscale. */ image_url: string | Blob | File; /** - * The strength of the image. Default value: `0.8` + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `"masterpiece, best quality, highres"` */ - strength?: number; + prompt?: string; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The upscale factor Default value: `2` + */ + upscale_factor?: number; + /** + * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality:2)"` */ negative_prompt?: string; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The creativity of the model. The higher the creativity, the more the model will deviate from the prompt. + * Refers to the denoise strength of the sampling. Default value: `0.35` */ - seed?: number; + creativity?: number; + /** + * The resemblance of the upscaled image to the original image. The higher the resemblance, the more the model will try to keep the original image. + * Refers to the strength of the ControlNet. Default value: `0.6` + */ + resemblance?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** - * The number of inference steps to use for generating the image. The more steps - * the better the image will be but it will also take longer to generate. Default value: `4` + * The number of inference steps to perform. Default value: `18` */ num_inference_steps?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - sync_mode?: boolean; + seed?: number; /** - * The number of images to generate. The function will return a list of images - * with the same prompt and negative prompt but different seeds. Default value: `1` + * If set to false, the safety checker will be disabled. Default value: `true` */ - num_images?: number; + enable_safety_checker?: boolean; +}; +export type CcsrInput = { /** - * If set to true, the resulting image will be checked whether it includes any - * potentially unsafe content. If it does, it will be replaced with a black - * image. Default value: `true` + * The text prompt you would like to convert to speech. */ - enable_safety_checks?: boolean; + image_url: string | Blob | File; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * The scale of the output image. The higher the scale, the bigger the output image will be. Default value: `2` */ - request_id?: string; -}; -export type LcmSd15I2iOutput = { + scale?: number; /** - * The generated image files info. + * If specified, a patch-based sampling strategy will be used for sampling. Default value: `"none"` */ - images: Array; + tile_diffusion?: "none" | "mix" | "gaussian"; /** - * + * Size of patch. Default value: `1024` */ - timings: Record; + tile_diffusion_size?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Stride of sliding patch. Default value: `512` */ - seed: number; + tile_diffusion_stride?: number; /** - * Number of inference steps used to generate the image. It will be the same value of the one passed in the - * input or the default one in case none was passed. Default value: `4` + * If specified, a patch-based sampling strategy will be used for VAE decoding. */ - num_inference_steps?: number; + tile_vae?: boolean; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * Size of VAE patch. Default value: `226` */ - request_id?: string; + tile_vae_decoder_size?: number; /** - * A list of booleans indicating whether the generated image contains any - * potentially unsafe content. If the safety check is disabled, this field - * will have a false for each generated image. + * Size of latent image Default value: `1024` */ - nsfw_content_detected: Array; + tile_vae_encoder_size?: number; + /** + * The number of steps to run the model for. The higher the number the better the quality and longer it will take to generate. Default value: `50` + */ + steps?: number; + /** + * The ending point of uniform sampling strategy. Default value: `0.6667` + */ + t_max?: number; + /** + * The starting point of uniform sampling strategy. Default value: `0.3333` + */ + t_min?: number; + /** + * Type of color correction for samples. Default value: `"adain"` + */ + color_fix_type?: "none" | "wavelet" | "adain"; + /** + * Seed for reproducibility. Different seeds will make slightly different results. + */ + seed?: number; }; -export type FooocusInput = { +export type CcsrOutput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + * The generated image file info. */ - prompt?: string; + image: Image; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The seed used for the generation. */ - negative_prompt?: string; + seed: number; +}; +export type FastTurboDiffusionInput = { /** - * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` */ - styles?: Array< - | "Fooocus V2" - | "Fooocus Enhance" - | "Fooocus Sharp" - | "Fooocus Semi Realistic" - | "Fooocus Masterpiece" - | "Fooocus Photograph" - | "Fooocus Negative" - | "Fooocus Cinematic" - | "SAI 3D Model" - | "SAI Analog Film" - | "SAI Anime" - | "SAI Cinematic" - | "SAI Comic Book" - | "SAI Craft Clay" - | "SAI Digital Art" - | "SAI Enhance" - | "SAI Fantasy Art" - | "SAI Isometric" - | "SAI Line Art" - | "SAI Lowpoly" - | "SAI Neonpunk" - | "SAI Origami" - | "SAI Photographic" - | "SAI Pixel Art" - | "SAI Texture" - | "MRE Cinematic Dynamic" - | "MRE Spontaneous Picture" - | "MRE Artistic Vision" - | "MRE Dark Dream" - | "MRE Gloomy Art" - | "MRE Bad Dream" - | "MRE Underground" - | "MRE Surreal Painting" - | "MRE Dynamic Illustration" - | "MRE Undead Art" - | "MRE Elemental Art" - | "MRE Space Art" - | "MRE Ancient Illustration" - | "MRE Brave Art" - | "MRE Heroic Fantasy" - | "MRE Dark Cyberpunk" - | "MRE Lyrical Geometry" - | "MRE Sumi E Symbolic" - | "MRE Sumi E Detailed" - | "MRE Manga" - | "MRE Anime" - | "MRE Comic" - | "Ads Advertising" - | "Ads Automotive" - | "Ads Corporate" - | "Ads Fashion Editorial" - | "Ads Food Photography" - | "Ads Gourmet Food Photography" - | "Ads Luxury" - | "Ads Real Estate" - | "Ads Retail" - | "Artstyle Abstract" - | "Artstyle Abstract Expressionism" - | "Artstyle Art Deco" - | "Artstyle Art Nouveau" - | "Artstyle Constructivist" - | "Artstyle Cubist" - | "Artstyle Expressionist" - | "Artstyle Graffiti" - | "Artstyle Hyperrealism" - | "Artstyle Impressionist" - | "Artstyle Pointillism" - | "Artstyle Pop Art" - | "Artstyle Psychedelic" - | "Artstyle Renaissance" - | "Artstyle Steampunk" - | "Artstyle Surrealist" - | "Artstyle Typography" - | "Artstyle Watercolor" - | "Futuristic Biomechanical" - | "Futuristic Biomechanical Cyberpunk" - | "Futuristic Cybernetic" - | "Futuristic Cybernetic Robot" - | "Futuristic Cyberpunk Cityscape" - | "Futuristic Futuristic" - | "Futuristic Retro Cyberpunk" - | "Futuristic Retro Futurism" - | "Futuristic Sci Fi" - | "Futuristic Vaporwave" - | "Game Bubble Bobble" - | "Game Cyberpunk Game" - | "Game Fighting Game" - | "Game Gta" - | "Game Mario" - | "Game Minecraft" - | "Game Pokemon" - | "Game Retro Arcade" - | "Game Retro Game" - | "Game Rpg Fantasy Game" - | "Game Strategy Game" - | "Game Streetfighter" - | "Game Zelda" - | "Misc Architectural" - | "Misc Disco" - | "Misc Dreamscape" - | "Misc Dystopian" - | "Misc Fairy Tale" - | "Misc Gothic" - | "Misc Grunge" - | "Misc Horror" - | "Misc Kawaii" - | "Misc Lovecraftian" - | "Misc Macabre" - | "Misc Manga" - | "Misc Metropolis" - | "Misc Minimalist" - | "Misc Monochrome" - | "Misc Nautical" - | "Misc Space" - | "Misc Stained Glass" - | "Misc Techwear Fashion" - | "Misc Tribal" - | "Misc Zentangle" - | "Papercraft Collage" - | "Papercraft Flat Papercut" - | "Papercraft Kirigami" - | "Papercraft Paper Mache" - | "Papercraft Paper Quilling" - | "Papercraft Papercut Collage" - | "Papercraft Papercut Shadow Box" - | "Papercraft Stacked Papercut" - | "Papercraft Thick Layered Papercut" - | "Photo Alien" - | "Photo Film Noir" - | "Photo Glamour" - | "Photo Hdr" - | "Photo Iphone Photographic" - | "Photo Long Exposure" - | "Photo Neon Noir" - | "Photo Silhouette" - | "Photo Tilt Shift" - | "Cinematic Diva" - | "Abstract Expressionism" - | "Academia" - | "Action Figure" - | "Adorable 3D Character" - | "Adorable Kawaii" - | "Art Deco" - | "Art Nouveau" - | "Astral Aura" - | "Avant Garde" - | "Baroque" - | "Bauhaus Style Poster" - | "Blueprint Schematic Drawing" - | "Caricature" - | "Cel Shaded Art" - | "Character Design Sheet" - | "Classicism Art" - | "Color Field Painting" - | "Colored Pencil Art" - | "Conceptual Art" - | "Constructivism" - | "Cubism" - | "Dadaism" - | "Dark Fantasy" - | "Dark Moody Atmosphere" - | "Dmt Art Style" - | "Doodle Art" - | "Double Exposure" - | "Dripping Paint Splatter Art" - | "Expressionism" - | "Faded Polaroid Photo" - | "Fauvism" - | "Flat 2d Art" - | "Fortnite Art Style" - | "Futurism" - | "Glitchcore" - | "Glo Fi" - | "Googie Art Style" - | "Graffiti Art" - | "Harlem Renaissance Art" - | "High Fashion" - | "Idyllic" - | "Impressionism" - | "Infographic Drawing" - | "Ink Dripping Drawing" - | "Japanese Ink Drawing" - | "Knolling Photography" - | "Light Cheery Atmosphere" - | "Logo Design" - | "Luxurious Elegance" - | "Macro Photography" - | "Mandola Art" - | "Marker Drawing" - | "Medievalism" - | "Minimalism" - | "Neo Baroque" - | "Neo Byzantine" - | "Neo Futurism" - | "Neo Impressionism" - | "Neo Rococo" - | "Neoclassicism" - | "Op Art" - | "Ornate And Intricate" - | "Pencil Sketch Drawing" - | "Pop Art 2" - | "Rococo" - | "Silhouette Art" - | "Simple Vector Art" - | "Sketchup" - | "Steampunk 2" - | "Surrealism" - | "Suprematism" - | "Terragen" - | "Tranquil Relaxing Atmosphere" - | "Sticker Designs" - | "Vibrant Rim Light" - | "Volumetric Lighting" - | "Watercolor 2" - | "Whimsical And Playful" - | "Mk Chromolithography" - | "Mk Cross Processing Print" - | "Mk Dufaycolor Photograph" - | "Mk Herbarium" - | "Mk Punk Collage" - | "Mk Mosaic" - | "Mk Van Gogh" - | "Mk Coloring Book" - | "Mk Singer Sargent" - | "Mk Pollock" - | "Mk Basquiat" - | "Mk Andy Warhol" - | "Mk Halftone Print" - | "Mk Gond Painting" - | "Mk Albumen Print" - | "Mk Aquatint Print" - | "Mk Anthotype Print" - | "Mk Inuit Carving" - | "Mk Bromoil Print" - | "Mk Calotype Print" - | "Mk Color Sketchnote" - | "Mk Cibulak Porcelain" - | "Mk Alcohol Ink Art" - | "Mk One Line Art" - | "Mk Blacklight Paint" - | "Mk Carnival Glass" - | "Mk Cyanotype Print" - | "Mk Cross Stitching" - | "Mk Encaustic Paint" - | "Mk Embroidery" - | "Mk Gyotaku" - | "Mk Luminogram" - | "Mk Lite Brite Art" - | "Mk Mokume Gane" - | "Pebble Art" - | "Mk Palekh" - | "Mk Suminagashi" - | "Mk Scrimshaw" - | "Mk Shibori" - | "Mk Vitreous Enamel" - | "Mk Ukiyo E" - | "Mk Vintage Airline Poster" - | "Mk Vintage Travel Poster" - | "Mk Bauhaus Style" - | "Mk Afrofuturism" - | "Mk Atompunk" - | "Mk Constructivism" - | "Mk Chicano Art" - | "Mk De Stijl" - | "Mk Dayak Art" - | "Mk Fayum Portrait" - | "Mk Illuminated Manuscript" - | "Mk Kalighat Painting" - | "Mk Madhubani Painting" - | "Mk Pictorialism" - | "Mk Pichwai Painting" - | "Mk Patachitra Painting" - | "Mk Samoan Art Inspired" - | "Mk Tlingit Art" - | "Mk Adnate Style" - | "Mk Ron English Style" - | "Mk Shepard Fairey Style" + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type ImageToImageTurboInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type InpaintingTurboInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastTurboDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastTurboDiffusionImageToImageInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastTurboDiffusionImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastTurboDiffusionInpaintingInput = { + /** + * The name of the model to use. Default value: `"stabilityai/sdxl-turbo"` + */ + model_name?: "stabilityai/sdxl-turbo" | "stabilityai/sd-turbo"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `2` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastTurboDiffusionInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type ImageToImageLCMInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type InpaintingLCMInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLcmDiffusionInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionImageToImageInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type TextToImageLCMInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionInpaintingInput = { + /** + * The name of the model to use. Default value: `"stabilityai/stable-diffusion-xl-base-1.0"` + */ + model_name?: + | "stabilityai/stable-diffusion-xl-base-1.0" + | "runwayml/stable-diffusion-v1-5"; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `6` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. Default value: `true` + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastLcmDiffusionInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type WhisperInput = { + /** + * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm. + */ + audio_url: string | Blob | File; + /** + * Task to perform on the audio file. Either transcribe or translate. Default value: `"transcribe"` + */ + task?: "transcribe" | "translate"; + /** + * Language of the audio file. If set to null, the language will be + * automatically detected. Defaults to null. + * + * If translate is selected as the task, the audio will be translated to + * English, regardless of the language selected. + */ + language?: + | "af" + | "am" + | "ar" + | "as" + | "az" + | "ba" + | "be" + | "bg" + | "bn" + | "bo" + | "br" + | "bs" + | "ca" + | "cs" + | "cy" + | "da" + | "de" + | "el" + | "en" + | "es" + | "et" + | "eu" + | "fa" + | "fi" + | "fo" + | "fr" + | "gl" + | "gu" + | "ha" + | "haw" + | "he" + | "hi" + | "hr" + | "ht" + | "hu" + | "hy" + | "id" + | "is" + | "it" + | "ja" + | "jw" + | "ka" + | "kk" + | "km" + | "kn" + | "ko" + | "la" + | "lb" + | "ln" + | "lo" + | "lt" + | "lv" + | "mg" + | "mi" + | "mk" + | "ml" + | "mn" + | "mr" + | "ms" + | "mt" + | "my" + | "ne" + | "nl" + | "nn" + | "no" + | "oc" + | "pa" + | "pl" + | "ps" + | "pt" + | "ro" + | "ru" + | "sa" + | "sd" + | "si" + | "sk" + | "sl" + | "sn" + | "so" + | "sq" + | "sr" + | "su" + | "sv" + | "sw" + | "ta" + | "te" + | "tg" + | "th" + | "tk" + | "tl" + | "tr" + | "tt" + | "uk" + | "ur" + | "uz" + | "vi" + | "yi" + | "yo" + | "yue" + | "zh"; + /** + * Whether to diarize the audio file. Defaults to false. + */ + diarize?: boolean; + /** + * Level of the chunks to return. Either segment or word. Default value: `"segment"` + */ + chunk_level?: "segment" | "word"; + /** + * Version of the model to use. All of the models are the Whisper large variant. Default value: `"3"` + */ + version?: "3"; + /** + * Default value: `64` + */ + batch_size?: number; + /** + * Prompt to use for generation. Defaults to an empty string. Default value: `""` + */ + prompt?: string; + /** + * Number of speakers in the audio file. Defaults to null. + * If not provided, the number of speakers will be automatically + * detected. + */ + num_speakers?: number; +}; +export type WhisperOutput = { + /** + * Transcription of the audio file + */ + text: string; + /** + * Timestamp chunks of the audio file + */ + chunks?: Array; + /** + * List of languages that the audio file is inferred to be. Defaults to null. + */ + inferred_languages: Array< + | "af" + | "am" + | "ar" + | "as" + | "az" + | "ba" + | "be" + | "bg" + | "bn" + | "bo" + | "br" + | "bs" + | "ca" + | "cs" + | "cy" + | "da" + | "de" + | "el" + | "en" + | "es" + | "et" + | "eu" + | "fa" + | "fi" + | "fo" + | "fr" + | "gl" + | "gu" + | "ha" + | "haw" + | "he" + | "hi" + | "hr" + | "ht" + | "hu" + | "hy" + | "id" + | "is" + | "it" + | "ja" + | "jw" + | "ka" + | "kk" + | "km" + | "kn" + | "ko" + | "la" + | "lb" + | "ln" + | "lo" + | "lt" + | "lv" + | "mg" + | "mi" + | "mk" + | "ml" + | "mn" + | "mr" + | "ms" + | "mt" + | "my" + | "ne" + | "nl" + | "nn" + | "no" + | "oc" + | "pa" + | "pl" + | "ps" + | "pt" + | "ro" + | "ru" + | "sa" + | "sd" + | "si" + | "sk" + | "sl" + | "sn" + | "so" + | "sq" + | "sr" + | "su" + | "sv" + | "sw" + | "ta" + | "te" + | "tg" + | "th" + | "tk" + | "tl" + | "tr" + | "tt" + | "uk" + | "ur" + | "uz" + | "vi" + | "yi" + | "yo" + | "yue" + | "zh" >; /** - * You can choose Speed or Quality Default value: `"Extreme Speed"` + * Speaker diarization segments of the audio file. Only present if diarization is enabled. + */ + diarization_segments: Array; +}; +export type WizperInput = { + /** + * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm. + */ + audio_url: string | Blob | File; + /** + * Task to perform on the audio file. Either transcribe or translate. Default value: `"transcribe"` + */ + task?: "transcribe" | "translate"; + /** + * Language of the audio file. + * If translate is selected as the task, the audio will be translated to + * English, regardless of the language selected. Default value: `"en"` + */ + language?: + | "af" + | "am" + | "ar" + | "as" + | "az" + | "ba" + | "be" + | "bg" + | "bn" + | "bo" + | "br" + | "bs" + | "ca" + | "cs" + | "cy" + | "da" + | "de" + | "el" + | "en" + | "es" + | "et" + | "eu" + | "fa" + | "fi" + | "fo" + | "fr" + | "gl" + | "gu" + | "ha" + | "haw" + | "he" + | "hi" + | "hr" + | "ht" + | "hu" + | "hy" + | "id" + | "is" + | "it" + | "ja" + | "jw" + | "ka" + | "kk" + | "km" + | "kn" + | "ko" + | "la" + | "lb" + | "ln" + | "lo" + | "lt" + | "lv" + | "mg" + | "mi" + | "mk" + | "ml" + | "mn" + | "mr" + | "ms" + | "mt" + | "my" + | "ne" + | "nl" + | "nn" + | "no" + | "oc" + | "pa" + | "pl" + | "ps" + | "pt" + | "ro" + | "ru" + | "sa" + | "sd" + | "si" + | "sk" + | "sl" + | "sn" + | "so" + | "sq" + | "sr" + | "su" + | "sv" + | "sw" + | "ta" + | "te" + | "tg" + | "th" + | "tk" + | "tl" + | "tr" + | "tt" + | "uk" + | "ur" + | "uz" + | "vi" + | "yi" + | "yo" + | "yue" + | "zh"; + /** + * Level of the chunks to return. Default value: `"segment"` + */ + chunk_level?: "segment"; + /** + * Version of the model to use. All of the models are the Whisper large variant. Default value: `"3"` + */ + version?: "3"; +}; +export type WizperOutput = { + /** + * Transcription of the audio file + */ + text: string; + /** + * Timestamp chunks of the audio file + */ + chunks: Array; +}; +export type FastLightningSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLightningSdxlInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type InpaintingLightningInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type ImageToImageLightningInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type FastLightningSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type TextToImageLightningInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type FastLightningSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type FastLightningSdxlInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastLightningSdxlInpaintingInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"4"` + */ + num_inference_steps?: "1" | "2" | "4" | "8"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type HyperSdxlInput = { + /** + * + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type ImageToImageHyperInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type InpaintingHyperInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type TextToImageHyperInput = { + /** + * + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` */ - performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + * The number of inference steps to perform. Default value: `"1"` */ - guidance_scale?: number; + num_inference_steps?: "1" | "2" | "4"; /** - * The sharpness of the generated image. Use it to control how sharp the generated - * image should be. Higher value means image and texture are sharper. Default value: `2` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - sharpness?: number; + seed?: number; /** - * The size of the generated image. You can choose between some presets or - * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - aspect_ratio?: string; + sync_mode?: boolean; /** - * Number of images to generate in one request Default value: `1` + * The number of images to generate. Default value: `1` */ num_images?: number; /** - * The LoRAs to use for the image generation. You can use up to 5 LoRAs - * and they will be merged together to generate the final image. Default value: `[object Object]` + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type HyperSdxlInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type HyperSdxlInpaintingInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `"1"` + */ + num_inference_steps?: "1" | "2" | "4"; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type PlaygroundV25Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type InpaintingPlaygroundv25Input = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` */ - loras?: Array; + strength?: number; /** - * Refiner (SDXL or SD 1.5) Default value: `"None"` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + seed?: number; /** - * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models - * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + * The number of images to generate. Default value: `1` */ - refiner_switch?: number; + num_images?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * The list of embeddings to use. Default value: `` */ - output_format?: "png" | "jpeg" | "webp"; + embeddings?: Array; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * If set to true, the safety checker will be enabled. Default value: `true` */ - sync_mode?: boolean; + enable_safety_checker?: boolean; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - seed?: number; + safety_checker_version?: "v1" | "v2"; /** - * + * If set to true, the prompt will be expanded with additional prompts. */ - image_prompt_1: ImagePrompt; + expand_prompt?: boolean; /** - * + * The format of the generated image. Default value: `"jpeg"` */ - image_prompt_2?: ImagePrompt; + format?: "jpeg" | "png"; /** - * + * The rescale factor for the CFG. */ - image_prompt_3?: ImagePrompt; + guidance_rescale?: number; +}; +export type ImageToImagePlaygroundv25Input = { /** - * + * The URL of the image to use as a starting point for the generation. */ - image_prompt_4?: ImagePrompt; + image_url: string | Blob | File; /** - * The image to use as a reference for inpainting. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - inpaint_image_url?: string | Blob | File; + prompt: string; /** - * The image to use as a mask for the generated image. + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - mask_image_url?: string | Blob | File; + negative_prompt?: string; /** - * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + * The size of the generated image. Default value: `square_hd` */ - inpaint_mode?: - | "Inpaint or Outpaint (default)" - | "Improve Detail (face, hand, eyes, etc.)" - | "Modify Content (add objects, change background, etc.)"; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Describe what you want to inpaint. Default value: `""` + * The number of inference steps to perform. Default value: `25` */ - inpaint_additional_prompt?: string; + num_inference_steps?: number; /** - * The directions to outpaint. Default value: `` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ - outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + guidance_scale?: number; /** - * Mixing Image Prompt and Inpaint + * determines how much the generated image resembles the initial image Default value: `0.95` */ - mixing_image_prompt_and_inpaint?: boolean; + strength?: number; /** - * The image to upscale or vary. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - uov_image_url?: string | Blob | File; + seed?: number; /** - * The method to use for upscaling or varying. Default value: `"Disabled"` + * The number of images to generate. Default value: `1` */ - uov_method?: - | "Disabled" - | "Vary (Subtle)" - | "Vary (Strong)" - | "Upscale (1.5x)" - | "Upscale (2x)" - | "Upscale (Fast 2x)"; + num_images?: number; /** - * Mixing Image Prompt and Vary/Upscale + * The list of embeddings to use. Default value: `` */ - mixing_image_prompt_and_vary_upscale?: boolean; + embeddings?: Array; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; -}; -export type FooocusOutput = { /** - * The generated image file info. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - images: Array; + safety_checker_version?: "v1" | "v2"; /** - * The time taken for the generation process. + * If set to true, the prompt will be expanded with additional prompts. */ - timings: Record; + expand_prompt?: boolean; /** - * Whether the generated images contain NSFW concepts. + * The format of the generated image. Default value: `"jpeg"` */ - has_nsfw_concepts: Array; -}; -export type AnimatediffV2vInput = { + format?: "jpeg" | "png"; /** - * URL of the video. + * The rescale factor for the CFG. */ - video_url: string | Blob | File; + guidance_rescale?: number; +}; +export type PlaygroundV25Input = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -6569,121 +8022,160 @@ export type AnimatediffV2vInput = { /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `25` + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** - * Base model to use for animation generation. Default value: `"cardosAnimev20"` + * The number of images to generate. Default value: `1` */ - base_model?: "darkSushiMixMix_colorful" | "cardosAnimev20"; + num_images?: number; /** - * The list of LoRA weights to use. Default value: `` + * The list of embeddings to use. Default value: `` */ - loras?: Array; + embeddings?: Array; /** - * Select every Nth frame from the video. - * This can be used to reduce the number of frames to process, which can reduce the time and the cost. - * However, it can also reduce the quality of the final video. Default value: `2` + * If set to true, the safety checker will be enabled. Default value: `true` */ - select_every_nth_frame?: number; + enable_safety_checker?: boolean; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - seed?: number; + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; }; -export type AnimatediffV2vOutput = { +export type PlaygroundV25ImageToImageOutput = { /** - * Generated video file. + * The generated image files info. */ - video: File; + images: Array; /** - * Seed used for generating the video. + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ seed: number; /** - * + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. */ - timings: Record; + prompt: string; }; -export type AnimatediffV2vTurboInput = { +export type PlaygroundV25ImageToImageInput = { /** - * URL of the video. + * The URL of the image to use as a starting point for the generation. */ - video_url: string | Blob | File; + image_url: string | Blob | File; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want + * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `25` + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7` + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** - * Base model to use for animation generation. Default value: `"cardosAnimev20"` + * determines how much the generated image resembles the initial image Default value: `0.95` */ - base_model?: "darkSushiMixMix_colorful" | "cardosAnimev20"; + strength?: number; /** - * The list of LoRA weights to use. Default value: `` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - loras?: Array; + seed?: number; /** - * Select every Nth frame from the video. - * This can be used to reduce the number of frames to process, which can reduce the time and the cost. - * However, it can also reduce the quality of the final video. Default value: `2` + * The number of images to generate. Default value: `1` */ - select_every_nth_frame?: number; + num_images?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The list of embeddings to use. Default value: `` */ - seed?: number; -}; -export type AnimatediffV2vTurboOutput = { + embeddings?: Array; /** - * Generated video file. + * If set to true, the safety checker will be enabled. Default value: `true` */ - video: File; + enable_safety_checker?: boolean; /** - * Seed used for generating the video. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - seed: number; + safety_checker_version?: "v1" | "v2"; /** - * + * If set to true, the prompt will be expanded with additional prompts. */ - timings: Record; -}; -export type FastAnimatediffTextToVideoInput = { + expand_prompt?: boolean; /** - * URL of the video. + * The format of the generated image. Default value: `"jpeg"` */ - video_url: string | Blob | File; + format?: "jpeg" | "png"; /** - * The first N number of seconds of video to animate. Default value: `3` + * The rescale factor for the CFG. */ - first_n_seconds?: number; + guidance_rescale?: number; +}; +export type TextToImagePlaygroundv25Input = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -6691,278 +8183,311 @@ export type FastAnimatediffTextToVideoInput = { /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** - * The strength of the input video in the final output. Default value: `0.7` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - strength?: number; + seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The number of images to generate. Default value: `1` */ - seed?: number; + num_images?: number; /** - * Number of frames per second to extract from the video. Default value: `8` + * The list of embeddings to use. Default value: `` */ - fps?: number; + embeddings?: Array; /** - * The motions to apply to the video. + * If set to true, the safety checker will be enabled. Default value: `true` */ - motions?: Array< - "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" - >; + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; }; -export type FastAnimatediffTextToVideoOutput = { +export type PlaygroundV25InpaintingOutput = { /** - * Generated video file. + * The generated image files info. */ - video: File; + images: Array; /** - * Seed used for generating the video. + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; }; -export type FastAnimatediffVideoToVideoInput = { +export type PlaygroundV25InpaintingInput = { /** - * URL of the video. + * The URL of the image to use as a starting point for the generation. */ - video_url: string | Blob | File; + image_url: string | Blob | File; /** - * The first N number of seconds of video to animate. Default value: `3` + * The URL of the mask to use for inpainting. */ - first_n_seconds?: number; + mask_url: string | Blob | File; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want + * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + * The size of the generated image. Default value: `square_hd` */ - num_inference_steps?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The strength of the input video in the final output. Default value: `0.7` + * The number of inference steps to perform. Default value: `25` */ - strength?: number; + num_inference_steps?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `3` */ guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** - * Number of frames per second to extract from the video. Default value: `8` + * The number of images to generate. Default value: `1` */ - fps?: number; + num_images?: number; /** - * The motions to apply to the video. + * The list of embeddings to use. Default value: `` */ - motions?: Array< - "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" - >; -}; -export type FastAnimatediffVideoToVideoOutput = { + embeddings?: Array; /** - * Generated video file. + * If set to true, the safety checker will be enabled. Default value: `true` */ - video: File; + enable_safety_checker?: boolean; /** - * Seed used for generating the video. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - seed: number; -}; -export type FastAnimatediffTurboTextToVideoInput = { + safety_checker_version?: "v1" | "v2"; /** - * URL of the video. + * If set to true, the prompt will be expanded with additional prompts. */ - video_url: string | Blob | File; + expand_prompt?: boolean; /** - * The first N number of seconds of video to animate. Default value: `3` + * The format of the generated image. Default value: `"jpeg"` */ - first_n_seconds?: number; + format?: "jpeg" | "png"; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The rescale factor for the CFG. */ - prompt: string; + guidance_rescale?: number; +}; +export type AmtInterpolationOutput = { /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + * Generated video */ - negative_prompt?: string; + video: File; +}; +export type AmtInterpolationInput = { /** - * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + * URL of the video to be processed */ - num_inference_steps?: number; + video_url: string | Blob | File; /** - * The strength of the input video in the final output. Default value: `0.7` + * Output frames per second Default value: `24` */ - strength?: number; + output_fps?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * Number of recursive interpolation passes Default value: `2` */ - guidance_scale?: number; + recursive_interpolation_passes?: number; +}; +export type AMTFrameInterpolationInput = { /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Frames to interpolate */ - seed?: number; + frames: Array; /** - * Number of frames per second to extract from the video. Default value: `8` + * Output frames per second Default value: `24` */ - fps?: number; + output_fps?: number; /** - * The motions to apply to the video. + * Number of recursive interpolation passes Default value: `4` */ - motions?: Array< - "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" - >; + recursive_interpolation_passes?: number; }; -export type FastAnimatediffTurboTextToVideoOutput = { +export type AmtInterpolationFrameInterpolationOutput = { /** - * Generated video file. + * Generated video */ video: File; - /** - * Seed used for generating the video. - */ - seed: number; }; -export type FastAnimatediffTurboVideoToVideoInput = { +export type AMTInterpolationInput = { /** - * URL of the video. + * URL of the video to be processed */ video_url: string | Blob | File; /** - * The first N number of seconds of video to animate. Default value: `3` + * Output frames per second Default value: `24` */ - first_n_seconds?: number; + output_fps?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Number of recursive interpolation passes Default value: `2` */ - prompt: string; + recursive_interpolation_passes?: number; +}; +export type AmtInterpolationFrameInterpolationInput = { /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + * Frames to interpolate */ - negative_prompt?: string; + frames: Array; /** - * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + * Output frames per second Default value: `24` */ - num_inference_steps?: number; + output_fps?: number; /** - * The strength of the input video in the final output. Default value: `0.7` + * Number of recursive interpolation passes Default value: `4` */ - strength?: number; + recursive_interpolation_passes?: number; +}; +export type T2vTurboInput = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * The prompt to generate images from */ - guidance_scale?: number; + prompt: string; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The seed to use for the random number generator */ - seed?: number; + seed?: number | null; /** - * Number of frames per second to extract from the video. Default value: `8` + * The number of steps to sample Default value: `4` */ - fps?: number; + num_inference_steps?: number; /** - * The motions to apply to the video. + * The guidance scale Default value: `7.5` */ - motions?: Array< - "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" - >; + guidance_scale?: number; + /** + * The number of frames to generate Default value: `16` + */ + num_frames?: number; + /** + * The FPS of the exported video Default value: `8` + */ + export_fps?: number; }; -export type FastAnimatediffTurboVideoToVideoOutput = { +export type T2vTurboOutput = { /** - * Generated video file. + * The URL to the generated video */ video: File; +}; +export type Sd15DepthControlnetOutput = { /** - * Seed used for generating the video. + * The generated image files info. */ - seed: number; -}; -export type IllusionDiffusionOutput = { + images: Array; /** - * The generated image file info. + * */ - image: Image; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; -}; -export type IllusionDiffusionInput = { /** - * Input image url. + * Whether the generated images contain NSFW concepts. */ - image_url: string | Blob | File; + has_nsfw_concepts: Array; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The prompt used for generating the image. */ prompt: string; +}; +export type ImageToImageControlNetInput = { /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - negative_prompt?: string; + prompt: string; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * The URL of the control image. */ - guidance_scale?: number; + control_image_url: string | Blob | File; /** - * The scale of the ControlNet. Default value: `1` + * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** - * - */ - control_guidance_start?: number; - /** - * Default value: `1` - */ - control_guidance_end?: number; - /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. - */ - seed?: number; - /** - * Scheduler / sampler to use for the image denoising process. Default value: `"Euler"` + * The URL of the image to use as a starting point for the generation. */ - scheduler?: "DPM++ Karras SDE" | "Euler"; + image_url: string | Blob | File; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `40` + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - num_inference_steps?: number; + negative_prompt?: string; /** - * The size of the generated image. You can choose between some presets or - * custom height and width that **must be multiples of 8**. Default value: `square_hd` + * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: | ImageSize @@ -6972,102 +8497,79 @@ export type IllusionDiffusionInput = { | "portrait_16_9" | "landscape_4_3" | "landscape_16_9"; -}; -export type ImageutilsDepthInput = { /** - * Input image url. - */ - image_url: string | Blob | File; -}; -export type ImageutilsDepthOutput = { - /** - * Combined image of all detected masks - */ - image?: Image; -}; -export type ImageutilsRembgInput = { - /** - * Input image url. - */ - image_url: string | Blob | File; -}; -export type ImageutilsRembgOutput = { - /** - * Combined image of all detected masks - */ - image?: Image; -}; -export type EsrganOutput = { - /** - * Upscaled image + * The number of inference steps to perform. Default value: `25` */ - image: Image; -}; -export type EsrganInput = { + num_inference_steps?: number; /** - * Url to input image + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - image_url: string | Blob | File; + guidance_scale?: number; /** - * Rescaling factor Default value: `2` + * determines how much the generated image resembles the initial image Default value: `0.95` */ - scale?: number; + strength?: number; /** - * Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200 + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - tile?: number; + seed?: number; /** - * Upscaling a face + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - face?: boolean; + sync_mode?: boolean; /** - * Model to use for upscaling Default value: `"RealESRGAN_x4plus"` + * The number of images to generate. Default value: `1` */ - model?: - | "RealESRGAN_x4plus" - | "RealESRGAN_x2plus" - | "RealESRGAN_x4plus_anime_6B" - | "RealESRGAN_x4_v3" - | "RealESRGAN_x4_wdn_v3" - | "RealESRGAN_x4_anime_v3"; + num_images?: number; /** - * Output image format (png or jpeg) Default value: `"png"` + * The list of LoRA weights to use. Default value: `` */ - output_format?: "png" | "jpeg"; -}; -export type ControlnetsdxlOutput = { + loras?: Array; /** - * The generated image files info. + * If set to true, the safety checker will be enabled. */ - images: Array; + enable_safety_checker?: boolean; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * If set to true, the prompt will be expanded with additional prompts. */ - seed: number; + expand_prompt?: boolean; }; -export type ControlnetsdxlInput = { - /** - * Url to input image - */ - image_url: string | Blob | File; +export type Sd15DepthControlnetInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The scale of the ControlNet. Default value: `0.5` + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` */ controlnet_conditioning_scale?: number; /** - * The negative prompt to use.Use it to address details that you don't want + * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `50` + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** @@ -7075,27 +8577,39 @@ export type ControlnetsdxlInput = { * will output the same image every time. */ seed?: number; -}; -export type FastSdxlControlnetCannyOutput = { /** - * The generated image files info. + * If set to true, DeepCache will be enabled. TBD */ - images: Array; + enable_deep_cache?: boolean; /** - * + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - timings: Record; + guidance_scale?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - seed: number; + sync_mode?: boolean; /** - * Whether the generated images contain NSFW concepts. + * The number of images to generate. Default value: `1` */ - has_nsfw_concepts: Array; + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; }; -export type FastSdxlControlnetCannyInput = { +export type InpaintingControlNetInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -7170,50 +8684,123 @@ export type FastSdxlControlnetCannyInput = { */ enable_safety_checker?: boolean; /** - * If set to true, the prompt will be expanded with additional prompts. + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type PhotomakerOutput = { + /** + * + */ + images: Array; + /** + * + */ + seed: number; +}; +export type PhotomakerInput = { + /** + * The URL of the image archive containing the images you want to use. + */ + image_archive_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The base pipeline to use for generating the image. Default value: `"photomaker"` + */ + base_pipeline?: "photomaker" | "photomaker-style"; + /** + * Optional initial image for img2img + */ + initial_image_url?: string | Blob | File; + /** + * How much noise to add to the latent image. O for no noise, 1 for maximum noise. Default value: `0.5` + */ + initial_image_strength?: number; + /** + * Default value: `"Photographic"` + */ + style?: + | "(No style)" + | "Cinematic" + | "Disney Character" + | "Digital Art" + | "Photographic" + | "Fantasy art" + | "Neonpunk" + | "Enhance" + | "Comic book" + | "Lowpoly" + | "Line art"; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `50` */ - expand_prompt?: boolean; -}; -export type FastSdxlControlnetCannyImageToImageOutput = { + num_inference_steps?: number; /** - * The generated image files info. + * Default value: `20` */ - images: Array; + style_strength?: number; /** - * + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` */ - timings: Record; + num_images?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ - seed: number; + guidance_scale?: number; /** - * Whether the generated images contain NSFW concepts. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - has_nsfw_concepts: Array; + seed?: number; }; -export type FastSdxlControlnetCannyImageToImageInput = { +export type LcmInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The model to use for generating the image. Default value: `"sdv1-5"` */ - prompt: string; + model?: "sdxl" | "sdv1-5"; /** - * The URL of the control image. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - control_image_url: string | Blob | File; + prompt: string; /** - * The scale of the controlnet conditioning. Default value: `0.5` + * The base image to use for guiding the image generation on image-to-image + * generations. If the either width or height of the image is larger than 1024 + * pixels, the image will be resized to 1024 pixels while keeping the aspect ratio. */ - controlnet_conditioning_scale?: number; + image_url?: string | Blob | File; /** - * The URL of the image to use as a starting point for the generation. + * The mask to use for guiding the image generation on image + * inpainting. The model will focus on the mask area and try to fill it with + * the most relevant content. + * + * The mask must be a black and white image where the white area is the area + * that needs to be filled and the black area is the area that should be + * ignored. + * + * The mask must have the same dimensions as the image passed as `image_url`. */ - image_url: string | Blob | File; + mask_url?: string | Blob | File; /** - * The URL of the mask to use for inpainting. + * The strength of the image that is passed as `image_url`. The strength + * determines how much the generated image will be similar to the image passed as + * `image_url`. The higher the strength the more model gets "creative" and + * generates an image that's different from the initial image. A strength of 1.0 + * means that the initial image is more or less ignored and the model will try to + * generate an image that's as close as possible to the prompt. Default value: `0.8` */ - mask_url: string | Blob | File; + strength?: number; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details @@ -7221,7 +8808,28 @@ export type FastSdxlControlnetCannyImageToImageInput = { */ negative_prompt?: string; /** - * The size of the generated image. Leave it none to automatically infer from the control image. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `4` + */ + num_inference_steps?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. + * + * If not provided: + * - For text-to-image generations, the default size is 512x512. + * - For image-to-image generations, the default size is the same as the input image. + * - For inpainting generations, the default size is the same as the input image. */ image_size?: | ImageSize @@ -7232,47 +8840,47 @@ export type FastSdxlControlnetCannyImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `25` - */ - num_inference_steps?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - guidance_scale?: number; + sync_mode?: boolean; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The number of images to generate. The function will return a list of images + * with the same prompt and negative prompt but different seeds. Default value: `1` */ - strength?: number; + num_images?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * If set to true, the resulting image will be checked whether it includes any + * potentially unsafe content. If it does, it will be replaced with a black + * image. Default value: `true` */ - seed?: number; + enable_safety_checks?: boolean; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - sync_mode?: boolean; + request_id?: string; /** - * The number of images to generate. Default value: `1` + * If set to true, the inpainting pipeline will only inpaint the provided mask + * area. Only effective for inpainting pipelines. */ - num_images?: number; + inpaint_mask_only?: boolean; /** - * The list of LoRA weights to use. Default value: `` + * If set to true, the inpainting pipeline will use controlnet inpainting. + * Only effective for inpainting pipelines. */ - loras?: Array; + controlnet_inpaint?: boolean; /** - * If set to true, the safety checker will be enabled. + * The url of the lora server to use for image generation. */ - enable_safety_checker?: boolean; + lora_url?: string | Blob | File; /** - * If set to true, the prompt will be expanded with additional prompts. + * The scale of the lora server to use for image generation. Default value: `1` */ - expand_prompt?: boolean; + lora_scale?: number; }; -export type FastSdxlControlnetCannyInpaintingOutput = { +export type LcmOutput = { /** * The generated image files info. */ @@ -7280,38 +8888,42 @@ export type FastSdxlControlnetCannyInpaintingOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. */ seed: number; /** - * Whether the generated images contain NSFW concepts. + * Number of inference steps used to generate the image. It will be the same value of the one passed in the + * input or the default one in case none was passed. Default value: `4` */ - has_nsfw_concepts: Array; -}; -export type FastSdxlControlnetCannyInpaintingInput = { + num_inference_steps?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - prompt: string; + request_id?: string; /** - * The URL of the control image. + * A list of booleans indicating whether the generated image contains any + * potentially unsafe content. If the safety check is disabled, this field + * will all will be false. */ - control_image_url: string | Blob | File; + nsfw_content_detected: Array; +}; +export type LcmSd15I2iInput = { /** - * The scale of the controlnet conditioning. Default value: `0.5` + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - controlnet_conditioning_scale?: number; + prompt: string; /** - * The URL of the image to use as a starting point for the generation. + * The image to use as a base. */ image_url: string | Blob | File; /** - * The URL of the mask to use for inpainting. + * The strength of the image. Default value: `0.8` */ - mask_url: string | Blob | File; + strength?: number; /** * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details @@ -7319,34 +8931,20 @@ export type FastSdxlControlnetCannyInpaintingInput = { */ negative_prompt?: string; /** - * The size of the generated image. Leave it none to automatically infer from the control image. - */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * The number of inference steps to perform. Default value: `25` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - num_inference_steps?: number; + seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `1` */ guidance_scale?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` - */ - strength?: number; - /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `4` */ - seed?: number; + num_inference_steps?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -7354,31 +8952,58 @@ export type FastSdxlControlnetCannyInpaintingInput = { */ sync_mode?: boolean; /** - * The number of images to generate. Default value: `1` + * The number of images to generate. The function will return a list of images + * with the same prompt and negative prompt but different seeds. Default value: `1` */ num_images?: number; /** - * The list of LoRA weights to use. Default value: `` + * If set to true, the resulting image will be checked whether it includes any + * potentially unsafe content. If it does, it will be replaced with a black + * image. Default value: `true` */ - loras?: Array; + enable_safety_checks?: boolean; /** - * If set to true, the safety checker will be enabled. + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - enable_safety_checker?: boolean; + request_id?: string; +}; +export type LcmSd15I2iOutput = { /** - * If set to true, the prompt will be expanded with additional prompts. + * The generated image files info. */ - expand_prompt?: boolean; -}; -export type InpaintInput = { + images: Array; /** - * URL or HuggingFace ID of the base model to generate the image. + * */ - model_name: string; + timings: any; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - prompt: string; + seed: number; + /** + * Number of inference steps used to generate the image. It will be the same value of the one passed in the + * input or the default one in case none was passed. Default value: `4` + */ + num_inference_steps?: number; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; + /** + * A list of booleans indicating whether the generated image contains any + * potentially unsafe content. If the safety check is disabled, this field + * will have a false for each generated image. + */ + nsfw_content_detected: Array; +}; +export type FooocusInpaintInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; /** * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details @@ -7386,332 +9011,802 @@ export type InpaintInput = { */ negative_prompt?: string; /** - * Input image for img2img or inpaint mode - */ - image_url: string | Blob | File; - /** - * Input mask for inpaint mode. Black areas will be preserved, white areas will be inpainted. + * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` */ - mask_url: string | Blob | File; + styles?: Array< + | "Fooocus V2" + | "Fooocus Enhance" + | "Fooocus Sharp" + | "Fooocus Semi Realistic" + | "Fooocus Masterpiece" + | "Fooocus Photograph" + | "Fooocus Negative" + | "Fooocus Cinematic" + | "SAI 3D Model" + | "SAI Analog Film" + | "SAI Anime" + | "SAI Cinematic" + | "SAI Comic Book" + | "SAI Craft Clay" + | "SAI Digital Art" + | "SAI Enhance" + | "SAI Fantasy Art" + | "SAI Isometric" + | "SAI Line Art" + | "SAI Lowpoly" + | "SAI Neonpunk" + | "SAI Origami" + | "SAI Photographic" + | "SAI Pixel Art" + | "SAI Texture" + | "MRE Cinematic Dynamic" + | "MRE Spontaneous Picture" + | "MRE Artistic Vision" + | "MRE Dark Dream" + | "MRE Gloomy Art" + | "MRE Bad Dream" + | "MRE Underground" + | "MRE Surreal Painting" + | "MRE Dynamic Illustration" + | "MRE Undead Art" + | "MRE Elemental Art" + | "MRE Space Art" + | "MRE Ancient Illustration" + | "MRE Brave Art" + | "MRE Heroic Fantasy" + | "MRE Dark Cyberpunk" + | "MRE Lyrical Geometry" + | "MRE Sumi E Symbolic" + | "MRE Sumi E Detailed" + | "MRE Manga" + | "MRE Anime" + | "MRE Comic" + | "Ads Advertising" + | "Ads Automotive" + | "Ads Corporate" + | "Ads Fashion Editorial" + | "Ads Food Photography" + | "Ads Gourmet Food Photography" + | "Ads Luxury" + | "Ads Real Estate" + | "Ads Retail" + | "Artstyle Abstract" + | "Artstyle Abstract Expressionism" + | "Artstyle Art Deco" + | "Artstyle Art Nouveau" + | "Artstyle Constructivist" + | "Artstyle Cubist" + | "Artstyle Expressionist" + | "Artstyle Graffiti" + | "Artstyle Hyperrealism" + | "Artstyle Impressionist" + | "Artstyle Pointillism" + | "Artstyle Pop Art" + | "Artstyle Psychedelic" + | "Artstyle Renaissance" + | "Artstyle Steampunk" + | "Artstyle Surrealist" + | "Artstyle Typography" + | "Artstyle Watercolor" + | "Futuristic Biomechanical" + | "Futuristic Biomechanical Cyberpunk" + | "Futuristic Cybernetic" + | "Futuristic Cybernetic Robot" + | "Futuristic Cyberpunk Cityscape" + | "Futuristic Futuristic" + | "Futuristic Retro Cyberpunk" + | "Futuristic Retro Futurism" + | "Futuristic Sci Fi" + | "Futuristic Vaporwave" + | "Game Bubble Bobble" + | "Game Cyberpunk Game" + | "Game Fighting Game" + | "Game Gta" + | "Game Mario" + | "Game Minecraft" + | "Game Pokemon" + | "Game Retro Arcade" + | "Game Retro Game" + | "Game Rpg Fantasy Game" + | "Game Strategy Game" + | "Game Streetfighter" + | "Game Zelda" + | "Misc Architectural" + | "Misc Disco" + | "Misc Dreamscape" + | "Misc Dystopian" + | "Misc Fairy Tale" + | "Misc Gothic" + | "Misc Grunge" + | "Misc Horror" + | "Misc Kawaii" + | "Misc Lovecraftian" + | "Misc Macabre" + | "Misc Manga" + | "Misc Metropolis" + | "Misc Minimalist" + | "Misc Monochrome" + | "Misc Nautical" + | "Misc Space" + | "Misc Stained Glass" + | "Misc Techwear Fashion" + | "Misc Tribal" + | "Misc Zentangle" + | "Papercraft Collage" + | "Papercraft Flat Papercut" + | "Papercraft Kirigami" + | "Papercraft Paper Mache" + | "Papercraft Paper Quilling" + | "Papercraft Papercut Collage" + | "Papercraft Papercut Shadow Box" + | "Papercraft Stacked Papercut" + | "Papercraft Thick Layered Papercut" + | "Photo Alien" + | "Photo Film Noir" + | "Photo Glamour" + | "Photo Hdr" + | "Photo Iphone Photographic" + | "Photo Long Exposure" + | "Photo Neon Noir" + | "Photo Silhouette" + | "Photo Tilt Shift" + | "Cinematic Diva" + | "Abstract Expressionism" + | "Academia" + | "Action Figure" + | "Adorable 3D Character" + | "Adorable Kawaii" + | "Art Deco" + | "Art Nouveau" + | "Astral Aura" + | "Avant Garde" + | "Baroque" + | "Bauhaus Style Poster" + | "Blueprint Schematic Drawing" + | "Caricature" + | "Cel Shaded Art" + | "Character Design Sheet" + | "Classicism Art" + | "Color Field Painting" + | "Colored Pencil Art" + | "Conceptual Art" + | "Constructivism" + | "Cubism" + | "Dadaism" + | "Dark Fantasy" + | "Dark Moody Atmosphere" + | "Dmt Art Style" + | "Doodle Art" + | "Double Exposure" + | "Dripping Paint Splatter Art" + | "Expressionism" + | "Faded Polaroid Photo" + | "Fauvism" + | "Flat 2d Art" + | "Fortnite Art Style" + | "Futurism" + | "Glitchcore" + | "Glo Fi" + | "Googie Art Style" + | "Graffiti Art" + | "Harlem Renaissance Art" + | "High Fashion" + | "Idyllic" + | "Impressionism" + | "Infographic Drawing" + | "Ink Dripping Drawing" + | "Japanese Ink Drawing" + | "Knolling Photography" + | "Light Cheery Atmosphere" + | "Logo Design" + | "Luxurious Elegance" + | "Macro Photography" + | "Mandola Art" + | "Marker Drawing" + | "Medievalism" + | "Minimalism" + | "Neo Baroque" + | "Neo Byzantine" + | "Neo Futurism" + | "Neo Impressionism" + | "Neo Rococo" + | "Neoclassicism" + | "Op Art" + | "Ornate And Intricate" + | "Pencil Sketch Drawing" + | "Pop Art 2" + | "Rococo" + | "Silhouette Art" + | "Simple Vector Art" + | "Sketchup" + | "Steampunk 2" + | "Surrealism" + | "Suprematism" + | "Terragen" + | "Tranquil Relaxing Atmosphere" + | "Sticker Designs" + | "Vibrant Rim Light" + | "Volumetric Lighting" + | "Watercolor 2" + | "Whimsical And Playful" + | "Mk Chromolithography" + | "Mk Cross Processing Print" + | "Mk Dufaycolor Photograph" + | "Mk Herbarium" + | "Mk Punk Collage" + | "Mk Mosaic" + | "Mk Van Gogh" + | "Mk Coloring Book" + | "Mk Singer Sargent" + | "Mk Pollock" + | "Mk Basquiat" + | "Mk Andy Warhol" + | "Mk Halftone Print" + | "Mk Gond Painting" + | "Mk Albumen Print" + | "Mk Aquatint Print" + | "Mk Anthotype Print" + | "Mk Inuit Carving" + | "Mk Bromoil Print" + | "Mk Calotype Print" + | "Mk Color Sketchnote" + | "Mk Cibulak Porcelain" + | "Mk Alcohol Ink Art" + | "Mk One Line Art" + | "Mk Blacklight Paint" + | "Mk Carnival Glass" + | "Mk Cyanotype Print" + | "Mk Cross Stitching" + | "Mk Encaustic Paint" + | "Mk Embroidery" + | "Mk Gyotaku" + | "Mk Luminogram" + | "Mk Lite Brite Art" + | "Mk Mokume Gane" + | "Pebble Art" + | "Mk Palekh" + | "Mk Suminagashi" + | "Mk Scrimshaw" + | "Mk Shibori" + | "Mk Vitreous Enamel" + | "Mk Ukiyo E" + | "Mk Vintage Airline Poster" + | "Mk Vintage Travel Poster" + | "Mk Bauhaus Style" + | "Mk Afrofuturism" + | "Mk Atompunk" + | "Mk Constructivism" + | "Mk Chicano Art" + | "Mk De Stijl" + | "Mk Dayak Art" + | "Mk Fayum Portrait" + | "Mk Illuminated Manuscript" + | "Mk Kalighat Painting" + | "Mk Madhubani Painting" + | "Mk Pictorialism" + | "Mk Pichwai Painting" + | "Mk Patachitra Painting" + | "Mk Samoan Art Inspired" + | "Mk Tlingit Art" + | "Mk Adnate Style" + | "Mk Ron English Style" + | "Mk Shepard Fairey Style" + >; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `30` + * You can choose Speed or Quality Default value: `"Extreme Speed"` */ - num_inference_steps?: number; + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ guidance_scale?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` */ - seed?: number; -}; -export type InpaintOutput = { + sharpness?: number; /** - * The generated image files info. + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` */ - image: Image; + aspect_ratio?: string; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Number of images to generate in one request Default value: `1` */ - seed: number; -}; -export type AnimatediffSparsectrlLcmInput = { + num_images?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` */ - prompt: string; + loras?: Array; /** - * The negative prompt to use. Use it to specify what you don't want. Default value: `""` + * Refiner (SDXL or SD 1.5) Default value: `"None"` */ - negative_prompt?: string; + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; /** - * The type of controlnet to use for generating the video. The controlnet determines how the video will be animated. Default value: `"scribble"` + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` */ - controlnet_type?: "scribble" | "rgb"; + refiner_switch?: number; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps to generate your final result which can increase the amount of detail in your image. Default value: `4` + * The format of the generated image. Default value: `"jpeg"` */ - num_inference_steps?: number; + output_format?: "png" | "jpeg" | "webp"; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - guidance_scale?: number; + sync_mode?: boolean; /** - * The same seed and the same prompt given to the same version of Stable - * Diffusion will output the same image every time. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ seed?: number; /** - * The URL of the first keyframe to use for the generation. - */ - keyframe_0_image_url?: string | Blob | File; - /** - * The frame index of the first keyframe to use for the generation. - */ - keyframe_0_index?: number; - /** - * The URL of the second keyframe to use for the generation. - */ - keyframe_1_image_url?: string | Blob | File; - /** - * The frame index of the second keyframe to use for the generation. + * The image to use as a reference for inpainting. */ - keyframe_1_index?: number; + inpaint_image_url: string | Blob | File; /** - * The URL of the third keyframe to use for the generation. + * The image to use as a mask for the generated image. */ - keyframe_2_image_url?: string | Blob | File; + mask_image_url?: string | Blob | File; /** - * The frame index of the third keyframe to use for the generation. + * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` */ - keyframe_2_index?: number; -}; -export type AnimatediffSparsectrlLcmOutput = { + inpaint_mode?: + | "Inpaint or Outpaint (default)" + | "Improve Detail (face, hand, eyes, etc.)" + | "Modify Content (add objects, change background, etc.)"; /** - * Generated video file. + * Describe what you want to inpaint. Default value: `""` */ - video: File; + inpaint_additional_prompt?: string; /** - * The seed used to generate the video. + * The directions to outpaint. Default value: `` */ - seed: number; -}; -export type PulidInput = { + outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; /** - * List of reference faces, ideally 4 images. + * If set to true, the advanced inpaint options ('inpaint_disable_initial_latent', + * 'inpaint_engine', 'inpaint_strength', 'inpaint_respective_field', + * 'inpaint_erode_or_dilate') will be overridden. + * Otherwise, the default values will be used. */ - reference_images: Array; + override_inpaint_options?: boolean; /** - * Prompt to generate the face from + * If set to true, the initial preprocessing will be disabled. */ - prompt: string; + inpaint_disable_initial_latent?: boolean; /** - * Negative prompt to generate the face from Default value: `"flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, hands, low resolution, partially rendered objects, deformed or partially rendered eyes, deformed, deformed eyeballs, cross-eyed,blurry"` + * Version of Fooocus inpaint model Default value: `"v2.6"` */ - negative_prompt?: string; + inpaint_engine?: "None" | "v1" | "v2.5" | "v2.6"; /** - * Number of images to generate Default value: `1` + * Same as the denoising strength in A1111 inpaint. Only used in inpaint, not + * used in outpaint. (Outpaint always use 1.0) Default value: `1` */ - num_images?: number; + inpaint_strength?: number; /** - * Guidance scale Default value: `1.2` + * The area to inpaint. Value 0 is same as "Only Masked" in A1111. Value 1 is + * same as "Whole Image" in A1111. Only used in inpaint, not used in outpaint. + * (Outpaint always use 1.0) Default value: `0.618` */ - guidance_scale?: number; + inpaint_respective_field?: number; /** - * Number of steps to take Default value: `4` + * Positive value will make white area in the mask larger, negative value will + * make white area smaller. (default is 0, always process before any mask + * invert) */ - num_inference_steps?: number; + inpaint_erode_or_dilate?: number; /** - * Random seed for reproducibility + * If set to true, the mask will be inverted. */ - seed?: number; + invert_mask?: boolean; /** - * Size of the generated image Default value: `[object Object]` + * */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + image_prompt_1?: ImagePrompt; /** - * ID scale Default value: `0.8` + * */ - id_scale?: number; + image_prompt_2?: ImagePrompt; /** - * Mode of generation Default value: `"fidelity"` + * */ - mode?: "fidelity" | "extreme style"; + image_prompt_3?: ImagePrompt; /** - * if you want to mix two ID image, please turn this on, otherwise, turn this off + * */ - id_mix?: boolean; -}; -export type PulidOutput = { + image_prompt_4?: ImagePrompt; /** - * List of generated images + * Mixing Image Prompt and Inpaint */ - images: Array; + mixing_image_prompt_and_inpaint?: boolean; /** - * Random seed used for reproducibility + * If set to false, the safety checker will be disabled. Default value: `true` */ - seed: number; + enable_safety_checker?: boolean; }; -export type IpAdapterFaceIdInput = { - /** - * The model type to use. 1_5 is the default and is recommended for most use cases. Default value: `"1_5-v1"` - */ - model_type?: - | "1_5-v1" - | "1_5-v1-plus" - | "1_5-v2-plus" - | "SDXL-v1" - | "SDXL-v2-plus" - | "1_5-auraface-v1"; - /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. - */ - prompt: string; - /** - * An image of a face to match. If an image with a size of 640x640 is not provided, it will be scaled and cropped to that size. - */ - face_image_url?: string | Blob | File; +export type FooocusInput = { /** - * URL to zip archive with images of faces. The images embedding will be averaged to - * create a more accurate face id. + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ - face_images_data_url?: string | Blob | File; + prompt?: string; /** - * The negative prompt to use.Use it to address details that you don't want + * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` + * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. - */ - seed?: number; - /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` - */ - guidance_scale?: number; - /** - * The number of inference steps to use for generating the image. The more steps - * the better the image will be but it will also take longer to generate. Default value: `50` - */ - num_inference_steps?: number; - /** - * The number of samples for face id. The more samples the better the image will - * be but it will also take longer to generate. Default is 4. Default value: `4` - */ - num_samples?: number; - /** - * The width of the generated image. Default value: `512` - */ - width?: number; - /** - * The height of the generated image. Default value: `512` - */ - height?: number; - /** - * The size of the face detection model. The higher the number the more accurate - * the detection will be but it will also take longer to run. The higher the number the more - * likely it will fail to find a face as well. Lower it if you are having trouble - * finding a face in the image. Default value: `640` - */ - face_id_det_size?: number; - /** - * The URL to the base 1.5 model. Default is SG161222/Realistic_Vision_V4.0_noVAE Default value: `"SG161222/Realistic_Vision_V4.0_noVAE"` - */ - base_1_5_model_repo?: string; - /** - * The URL to the base SDXL model. Default is SG161222/RealVisXL_V3.0 Default value: `"SG161222/RealVisXL_V3.0"` + * The style to use. Default value: `Fooocus Enhance,Fooocus V2,Fooocus Sharp` */ - base_sdxl_model_repo?: string; -}; -export type IpAdapterFaceIdOutput = { + styles?: Array< + | "Fooocus V2" + | "Fooocus Enhance" + | "Fooocus Sharp" + | "Fooocus Semi Realistic" + | "Fooocus Masterpiece" + | "Fooocus Photograph" + | "Fooocus Negative" + | "Fooocus Cinematic" + | "SAI 3D Model" + | "SAI Analog Film" + | "SAI Anime" + | "SAI Cinematic" + | "SAI Comic Book" + | "SAI Craft Clay" + | "SAI Digital Art" + | "SAI Enhance" + | "SAI Fantasy Art" + | "SAI Isometric" + | "SAI Line Art" + | "SAI Lowpoly" + | "SAI Neonpunk" + | "SAI Origami" + | "SAI Photographic" + | "SAI Pixel Art" + | "SAI Texture" + | "MRE Cinematic Dynamic" + | "MRE Spontaneous Picture" + | "MRE Artistic Vision" + | "MRE Dark Dream" + | "MRE Gloomy Art" + | "MRE Bad Dream" + | "MRE Underground" + | "MRE Surreal Painting" + | "MRE Dynamic Illustration" + | "MRE Undead Art" + | "MRE Elemental Art" + | "MRE Space Art" + | "MRE Ancient Illustration" + | "MRE Brave Art" + | "MRE Heroic Fantasy" + | "MRE Dark Cyberpunk" + | "MRE Lyrical Geometry" + | "MRE Sumi E Symbolic" + | "MRE Sumi E Detailed" + | "MRE Manga" + | "MRE Anime" + | "MRE Comic" + | "Ads Advertising" + | "Ads Automotive" + | "Ads Corporate" + | "Ads Fashion Editorial" + | "Ads Food Photography" + | "Ads Gourmet Food Photography" + | "Ads Luxury" + | "Ads Real Estate" + | "Ads Retail" + | "Artstyle Abstract" + | "Artstyle Abstract Expressionism" + | "Artstyle Art Deco" + | "Artstyle Art Nouveau" + | "Artstyle Constructivist" + | "Artstyle Cubist" + | "Artstyle Expressionist" + | "Artstyle Graffiti" + | "Artstyle Hyperrealism" + | "Artstyle Impressionist" + | "Artstyle Pointillism" + | "Artstyle Pop Art" + | "Artstyle Psychedelic" + | "Artstyle Renaissance" + | "Artstyle Steampunk" + | "Artstyle Surrealist" + | "Artstyle Typography" + | "Artstyle Watercolor" + | "Futuristic Biomechanical" + | "Futuristic Biomechanical Cyberpunk" + | "Futuristic Cybernetic" + | "Futuristic Cybernetic Robot" + | "Futuristic Cyberpunk Cityscape" + | "Futuristic Futuristic" + | "Futuristic Retro Cyberpunk" + | "Futuristic Retro Futurism" + | "Futuristic Sci Fi" + | "Futuristic Vaporwave" + | "Game Bubble Bobble" + | "Game Cyberpunk Game" + | "Game Fighting Game" + | "Game Gta" + | "Game Mario" + | "Game Minecraft" + | "Game Pokemon" + | "Game Retro Arcade" + | "Game Retro Game" + | "Game Rpg Fantasy Game" + | "Game Strategy Game" + | "Game Streetfighter" + | "Game Zelda" + | "Misc Architectural" + | "Misc Disco" + | "Misc Dreamscape" + | "Misc Dystopian" + | "Misc Fairy Tale" + | "Misc Gothic" + | "Misc Grunge" + | "Misc Horror" + | "Misc Kawaii" + | "Misc Lovecraftian" + | "Misc Macabre" + | "Misc Manga" + | "Misc Metropolis" + | "Misc Minimalist" + | "Misc Monochrome" + | "Misc Nautical" + | "Misc Space" + | "Misc Stained Glass" + | "Misc Techwear Fashion" + | "Misc Tribal" + | "Misc Zentangle" + | "Papercraft Collage" + | "Papercraft Flat Papercut" + | "Papercraft Kirigami" + | "Papercraft Paper Mache" + | "Papercraft Paper Quilling" + | "Papercraft Papercut Collage" + | "Papercraft Papercut Shadow Box" + | "Papercraft Stacked Papercut" + | "Papercraft Thick Layered Papercut" + | "Photo Alien" + | "Photo Film Noir" + | "Photo Glamour" + | "Photo Hdr" + | "Photo Iphone Photographic" + | "Photo Long Exposure" + | "Photo Neon Noir" + | "Photo Silhouette" + | "Photo Tilt Shift" + | "Cinematic Diva" + | "Abstract Expressionism" + | "Academia" + | "Action Figure" + | "Adorable 3D Character" + | "Adorable Kawaii" + | "Art Deco" + | "Art Nouveau" + | "Astral Aura" + | "Avant Garde" + | "Baroque" + | "Bauhaus Style Poster" + | "Blueprint Schematic Drawing" + | "Caricature" + | "Cel Shaded Art" + | "Character Design Sheet" + | "Classicism Art" + | "Color Field Painting" + | "Colored Pencil Art" + | "Conceptual Art" + | "Constructivism" + | "Cubism" + | "Dadaism" + | "Dark Fantasy" + | "Dark Moody Atmosphere" + | "Dmt Art Style" + | "Doodle Art" + | "Double Exposure" + | "Dripping Paint Splatter Art" + | "Expressionism" + | "Faded Polaroid Photo" + | "Fauvism" + | "Flat 2d Art" + | "Fortnite Art Style" + | "Futurism" + | "Glitchcore" + | "Glo Fi" + | "Googie Art Style" + | "Graffiti Art" + | "Harlem Renaissance Art" + | "High Fashion" + | "Idyllic" + | "Impressionism" + | "Infographic Drawing" + | "Ink Dripping Drawing" + | "Japanese Ink Drawing" + | "Knolling Photography" + | "Light Cheery Atmosphere" + | "Logo Design" + | "Luxurious Elegance" + | "Macro Photography" + | "Mandola Art" + | "Marker Drawing" + | "Medievalism" + | "Minimalism" + | "Neo Baroque" + | "Neo Byzantine" + | "Neo Futurism" + | "Neo Impressionism" + | "Neo Rococo" + | "Neoclassicism" + | "Op Art" + | "Ornate And Intricate" + | "Pencil Sketch Drawing" + | "Pop Art 2" + | "Rococo" + | "Silhouette Art" + | "Simple Vector Art" + | "Sketchup" + | "Steampunk 2" + | "Surrealism" + | "Suprematism" + | "Terragen" + | "Tranquil Relaxing Atmosphere" + | "Sticker Designs" + | "Vibrant Rim Light" + | "Volumetric Lighting" + | "Watercolor 2" + | "Whimsical And Playful" + | "Mk Chromolithography" + | "Mk Cross Processing Print" + | "Mk Dufaycolor Photograph" + | "Mk Herbarium" + | "Mk Punk Collage" + | "Mk Mosaic" + | "Mk Van Gogh" + | "Mk Coloring Book" + | "Mk Singer Sargent" + | "Mk Pollock" + | "Mk Basquiat" + | "Mk Andy Warhol" + | "Mk Halftone Print" + | "Mk Gond Painting" + | "Mk Albumen Print" + | "Mk Aquatint Print" + | "Mk Anthotype Print" + | "Mk Inuit Carving" + | "Mk Bromoil Print" + | "Mk Calotype Print" + | "Mk Color Sketchnote" + | "Mk Cibulak Porcelain" + | "Mk Alcohol Ink Art" + | "Mk One Line Art" + | "Mk Blacklight Paint" + | "Mk Carnival Glass" + | "Mk Cyanotype Print" + | "Mk Cross Stitching" + | "Mk Encaustic Paint" + | "Mk Embroidery" + | "Mk Gyotaku" + | "Mk Luminogram" + | "Mk Lite Brite Art" + | "Mk Mokume Gane" + | "Pebble Art" + | "Mk Palekh" + | "Mk Suminagashi" + | "Mk Scrimshaw" + | "Mk Shibori" + | "Mk Vitreous Enamel" + | "Mk Ukiyo E" + | "Mk Vintage Airline Poster" + | "Mk Vintage Travel Poster" + | "Mk Bauhaus Style" + | "Mk Afrofuturism" + | "Mk Atompunk" + | "Mk Constructivism" + | "Mk Chicano Art" + | "Mk De Stijl" + | "Mk Dayak Art" + | "Mk Fayum Portrait" + | "Mk Illuminated Manuscript" + | "Mk Kalighat Painting" + | "Mk Madhubani Painting" + | "Mk Pictorialism" + | "Mk Pichwai Painting" + | "Mk Patachitra Painting" + | "Mk Samoan Art Inspired" + | "Mk Tlingit Art" + | "Mk Adnate Style" + | "Mk Ron English Style" + | "Mk Shepard Fairey Style" + >; /** - * The generated image file info. + * You can choose Speed or Quality Default value: `"Extreme Speed"` */ - image: Image; + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ - seed: number; -}; -export type ImageutilsMarigoldDepthInput = { + guidance_scale?: number; /** - * Input image url. + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` */ - image_url: string | Blob | File; -}; -export type ImageutilsMarigoldDepthOutput = { + sharpness?: number; /** - * Combined image of all detected masks + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` */ - image?: Image; -}; -export type StableAudioInput = { + aspect_ratio?: string; /** - * The prompt to generate audio from + * Number of images to generate in one request Default value: `1` */ - prompt: string; + num_images?: number; /** - * The start point of the audio clip to generate + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` */ - seconds_start?: number; + loras?: Array; /** - * The duration of the audio clip to generate Default value: `30` + * Refiner (SDXL or SD 1.5) Default value: `"None"` */ - seconds_total?: number; + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; /** - * The number of steps to denoise the audio for Default value: `100` + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` */ - steps?: number; -}; -export type StableAudioOutput = { + refiner_switch?: number; /** - * The generated audio clip + * The format of the generated image. Default value: `"jpeg"` */ - audio_file: File; -}; -export type DiffusionEdgeInput = { + output_format?: "png" | "jpeg" | "webp"; /** - * The text prompt you would like to convert to speech. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - image_url: string | Blob | File; -}; -export type DiffusionEdgeOutput = { + sync_mode?: boolean; /** - * The generated image file info. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - image: Image; -}; -export type TriposrOutput = { + seed?: number; /** - * Generated 3D object file. + * The image to use as a reference for the generated image. */ - model_mesh: File; + control_image_url?: string | Blob | File; /** - * Inference timings. + * The type of image control Default value: `"PyraCanny"` */ - timings: Record; + control_type?: "ImagePrompt" | "PyraCanny" | "CPDS" | "FaceSwap"; /** - * Directory containing textures for the remeshed model. + * The strength of the control image. Use it to control how much the generated image + * should look like the control image. Default value: `1` */ - remeshing_dir?: File; -}; -export type TriposrInput = { + control_image_weight?: number; /** - * Path for the image file to be processed. + * The stop at value of the control image. Use it to control how much the generated image + * should look like the control image. Default value: `1` */ - image_url: string | Blob | File; + control_image_stop_at?: number; /** - * Output format for the 3D model. Default value: `"glb"` + * The image to use as a reference for inpainting. */ - output_format?: "glb" | "obj"; + inpaint_image_url?: string | Blob | File; /** - * Whether to remove the background from the input image. Default value: `true` + * The image to use as a mask for the generated image. */ - do_remove_background?: boolean; + mask_image_url?: string | Blob | File; /** - * Ratio of the foreground image to the original image. Default value: `0.9` + * */ - foreground_ratio?: number; + mixing_image_prompt_and_inpaint?: boolean; /** - * Resolution of the marching cubes. Above 512 is not recommended. Default value: `256` + * If set to false, the safety checker will be disabled. Default value: `true` */ - mc_resolution?: number; + enable_safety_checker?: boolean; }; export type FooocusUpscaleOrVaryInput = { /** @@ -8058,10 +10153,24 @@ export type FooocusUpscaleOrVaryInput = { * will output the same image every time. */ seed?: number; + /** + * The image to upscale or vary. + */ + uov_image_url: string | Blob | File; + /** + * The method to use for upscaling or varying. Default value: `"Vary (Strong)"` + */ + uov_method?: + | "Disabled" + | "Vary (Subtle)" + | "Vary (Strong)" + | "Upscale (1.5x)" + | "Upscale (2x)" + | "Upscale (Fast 2x)"; /** * */ - image_prompt_1: ImagePrompt; + image_prompt_1?: ImagePrompt; /** * */ @@ -8074,47 +10183,6 @@ export type FooocusUpscaleOrVaryInput = { * */ image_prompt_4?: ImagePrompt; - /** - * The image to use as a reference for inpainting. - */ - inpaint_image_url?: string | Blob | File; - /** - * The image to use as a mask for the generated image. - */ - mask_image_url?: string | Blob | File; - /** - * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` - */ - inpaint_mode?: - | "Inpaint or Outpaint (default)" - | "Improve Detail (face, hand, eyes, etc.)" - | "Modify Content (add objects, change background, etc.)"; - /** - * Describe what you want to inpaint. Default value: `""` - */ - inpaint_additional_prompt?: string; - /** - * The directions to outpaint. Default value: `` - */ - outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; - /** - * Mixing Image Prompt and Inpaint - */ - mixing_image_prompt_and_inpaint?: boolean; - /** - * The image to upscale or vary. - */ - uov_image_url?: string | Blob | File; - /** - * The method to use for upscaling or varying. Default value: `"Disabled"` - */ - uov_method?: - | "Disabled" - | "Vary (Subtle)" - | "Vary (Strong)" - | "Upscale (1.5x)" - | "Upscale (2x)" - | "Upscale (Fast 2x)"; /** * Mixing Image Prompt and Vary/Upscale */ @@ -8124,20 +10192,6 @@ export type FooocusUpscaleOrVaryInput = { */ enable_safety_checker?: boolean; }; -export type FooocusUpscaleOrVaryOutput = { - /** - * The generated image file info. - */ - images: Array; - /** - * The time taken for the generation process. - */ - timings: Record; - /** - * Whether the generated images contain NSFW concepts. - */ - has_nsfw_concepts: Array; -}; export type FooocusImagePromptInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` @@ -8432,138 +10486,1732 @@ export type FooocusImagePromptInput = { | "Mk Shepard Fairey Style" >; /** - * You can choose Speed or Quality Default value: `"Extreme Speed"` + * You can choose Speed or Quality Default value: `"Extreme Speed"` + */ + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` + */ + sharpness?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + */ + aspect_ratio?: string; + /** + * Number of images to generate in one request Default value: `1` + */ + num_images?: number; + /** + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` + */ + loras?: Array; + /** + * Refiner (SDXL or SD 1.5) Default value: `"None"` + */ + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + /** + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + */ + refiner_switch?: number; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "png" | "jpeg" | "webp"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * + */ + image_prompt_1: ImagePrompt; + /** + * + */ + image_prompt_2?: ImagePrompt; + /** + * + */ + image_prompt_3?: ImagePrompt; + /** + * + */ + image_prompt_4?: ImagePrompt; + /** + * The image to use as a reference for inpainting. + */ + inpaint_image_url?: string | Blob | File; + /** + * The image to use as a mask for the generated image. + */ + mask_image_url?: string | Blob | File; + /** + * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + */ + inpaint_mode?: + | "Inpaint or Outpaint (default)" + | "Improve Detail (face, hand, eyes, etc.)" + | "Modify Content (add objects, change background, etc.)"; + /** + * Describe what you want to inpaint. Default value: `""` + */ + inpaint_additional_prompt?: string; + /** + * The directions to outpaint. Default value: `` + */ + outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + /** + * Mixing Image Prompt and Inpaint + */ + mixing_image_prompt_and_inpaint?: boolean; + /** + * The image to upscale or vary. + */ + uov_image_url?: string | Blob | File; + /** + * The method to use for upscaling or varying. Default value: `"Disabled"` + */ + uov_method?: + | "Disabled" + | "Vary (Subtle)" + | "Vary (Strong)" + | "Upscale (1.5x)" + | "Upscale (2x)" + | "Upscale (Fast 2x)"; + /** + * Mixing Image Prompt and Vary/Upscale + */ + mixing_image_prompt_and_vary_upscale?: boolean; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FooocusOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: any; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type AnimateDiffV2VTurboInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type AnimatediffV2vInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7` + */ + guidance_scale?: number; + /** + * Base model to use for animation generation. Default value: `"cardosAnimev20"` + */ + base_model?: "darkSushiMixMix_colorful" | "cardosAnimev20"; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * Select every Nth frame from the video. + * This can be used to reduce the number of frames to process, which can reduce the time and the cost. + * However, it can also reduce the quality of the final video. Default value: `2` + */ + select_every_nth_frame?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type AnimatediffV2vOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; + /** + * + */ + timings: any; +}; +export type AnimateDiffV2VTurboOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; + /** + * + */ + timings: any; +}; +export type AnimatediffV2vTurboInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `8` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2.2` + */ + guidance_scale?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Select every Nth frame from the video. + * This can be used to reduce the number of frames to process, which can reduce the time and the cost. + * However, it can also reduce the quality of the final video. Default value: `2` + */ + select_every_nth_frame?: number; +}; +export type AnimateDiffV2VInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type AnimateDiffV2VOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type AnimatediffV2vTurboOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; + /** + * + */ + timings: any; +}; +export type AnimateDiffT2VTurboInput = { + /** + * The prompt to use for generating the video. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of frames to generate for the video. Default value: `16` + */ + num_frames?: number; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `4` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; + /** + * The size of the video to generate. Default value: `square` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type FastAnimatediffTextToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffTextToVideoInput = { + /** + * The prompt to use for generating the video. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of frames to generate for the video. Default value: `16` + */ + num_frames?: number; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; + /** + * The size of the video to generate. Default value: `square` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type FastAnimatediffVideoToVideoInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type AnimateDiffT2VOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type AnimateDiffT2VInput = { + /** + * The prompt to use for generating the video. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of frames to generate for the video. Default value: `16` + */ + num_frames?: number; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; + /** + * The size of the video to generate. Default value: `square` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type FastAnimatediffVideoToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffTurboTextToVideoInput = { + /** + * The prompt to use for generating the video. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of frames to generate for the video. Default value: `16` + */ + num_frames?: number; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `4` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; + /** + * The size of the video to generate. Default value: `square` + */ + video_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type FastAnimatediffTurboTextToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffTurboVideoToVideoOutput = { + /** + * Generated video file. + */ + video: File; + /** + * Seed used for generating the video. + */ + seed: number; +}; +export type FastAnimatediffTurboVideoToVideoInput = { + /** + * URL of the video. + */ + video_url: string | Blob | File; + /** + * The first N number of seconds of video to animate. Default value: `3` + */ + first_n_seconds?: number; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"(bad quality, worst quality:1.2), ugly faces, bad anime"` + */ + negative_prompt?: string; + /** + * The number of inference steps to perform. 4-12 is recommended for turbo mode. Default value: `8` + */ + num_inference_steps?: number; + /** + * The strength of the input video in the final output. Default value: `0.7` + */ + strength?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Number of frames per second to extract from the video. Default value: `8` + */ + fps?: number; + /** + * The motions to apply to the video. + */ + motions?: Array< + "zoom-out" | "zoom-in" | "pan-left" | "pan-right" | "tilt-up" | "tilt-down" + >; +}; +export type IllusionDiffusionOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type IllusionDiffusionInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * The scale of the ControlNet. Default value: `1` + */ + controlnet_conditioning_scale?: number; + /** + * + */ + control_guidance_start?: number; + /** + * Default value: `1` + */ + control_guidance_end?: number; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed?: number; + /** + * Scheduler / sampler to use for the image denoising process. Default value: `"Euler"` + */ + scheduler?: "DPM++ Karras SDE" | "Euler"; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `40` + */ + num_inference_steps?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; +}; +export type MarigoldDepthMapInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * Number of denoising steps. Defaults to `10`. The higher the number, the more accurate the result, but the slower the inference. Default value: `10` + */ + num_inference_steps?: number; + /** + * Number of predictions to average over. Defaults to `10`. The higher the number, the more accurate the result, but the slower the inference. Default value: `10` + */ + ensemble_size?: number; + /** + * Maximum processing resolution. Defaults `0` which means it uses the size of the input image. + */ + processing_res?: number; +}; +export type MarigoldDepthMapOutput = { + /** + * The depth map. + */ + image: Image; +}; +export type RemoveBackgroundOutput = { + /** + * Background removed image. + */ + image: Image; +}; +export type UpscaleInput = { + /** + * Url to input image + */ + image_url: string | Blob | File; + /** + * Rescaling factor Default value: `2` + */ + scale?: number; + /** + * Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200 + */ + tile?: number; + /** + * Upscaling a face + */ + face?: boolean; + /** + * Model to use for upscaling Default value: `"RealESRGAN_x4plus"` + */ + model?: + | "RealESRGAN_x4plus" + | "RealESRGAN_x2plus" + | "RealESRGAN_x4plus_anime_6B" + | "RealESRGAN_x4_v3" + | "RealESRGAN_x4_wdn_v3" + | "RealESRGAN_x4_anime_v3"; +}; +export type ImageutilsDepthOutput = { + /** + * The depth map. + */ + image: Image; +}; +export type RemoveBackgroundInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * If set to true, the resulting image be cropped to a bounding box around the subject + */ + crop_to_bbox?: boolean; +}; +export type ImageutilsDepthInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * a Default value: `6.283185307179586` + */ + a?: number; + /** + * bg_th Default value: `0.1` + */ + bg_th?: number; + /** + * depth_and_normal + */ + depth_and_normal?: boolean; +}; +export type UpscaleOutput = { + /** + * Upscaled image + */ + image: Image; +}; +export type NSFWImageDetectionOutput = { + /** + * The probability of the image being NSFW. + */ + nsfw_probability: number; +}; +export type SamInput = { + /** + * URL of the image to process + */ + image_url: string | Blob | File; +}; +export type SamOutput = { + /** + * Image with SAM segmentation map + */ + image: Image; +}; +export type NSFWImageDetectionInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type ImageutilsRembgOutput = { + /** + * Background removed image. + */ + image: Image; +}; +export type DepthMapOutput = { + /** + * The depth map. + */ + image: Image; +}; +export type ImageutilsRembgInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * If set to true, the resulting image be cropped to a bounding box around the subject + */ + crop_to_bbox?: boolean; +}; +export type DepthMapInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; + /** + * a Default value: `6.283185307179586` + */ + a?: number; + /** + * bg_th Default value: `0.1` + */ + bg_th?: number; + /** + * depth_and_normal + */ + depth_and_normal?: boolean; +}; +export type EsrganOutput = { + /** + * Upscaled image + */ + image: Image; +}; +export type EsrganInput = { + /** + * Url to input image + */ + image_url: string | Blob | File; + /** + * Rescaling factor Default value: `2` + */ + scale?: number; + /** + * Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200 + */ + tile?: number; + /** + * Upscaling a face + */ + face?: boolean; + /** + * Model to use for upscaling Default value: `"RealESRGAN_x4plus"` + */ + model?: + | "RealESRGAN_x4plus" + | "RealESRGAN_x2plus" + | "RealESRGAN_x4plus_anime_6B" + | "RealESRGAN_x4_v3" + | "RealESRGAN_x4_wdn_v3" + | "RealESRGAN_x4_anime_v3"; + /** + * Output image format (png or jpeg) Default value: `"png"` + */ + output_format?: "png" | "jpeg"; +}; +export type ControlnetsdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type ControlnetsdxlInput = { + /** + * Url to input image + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The scale of the ControlNet. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `50` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; +}; +export type FastSdxlControlnetCannyOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastSdxlControlnetCannyInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, DeepCache will be enabled. TBD + */ + enable_deep_cache?: boolean; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastSdxlControlnetCannyImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastSdxlControlnetCannyImageToImageInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type TextToImageControlNetInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, DeepCache will be enabled. TBD + */ + enable_deep_cache?: boolean; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type FastSdxlControlnetCannyInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FastSdxlControlnetCannyInpaintingInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The URL of the control image. + */ + control_image_url: string | Blob | File; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; +}; +export type InpaintOutput = { + /** + * The generated image files info. + */ + image: Image; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; +}; +export type AnimatediffSparsectrlLcmInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to specify what you don't want. Default value: `""` + */ + negative_prompt?: string; + /** + * The type of controlnet to use for generating the video. The controlnet determines how the video will be animated. Default value: `"scribble"` + */ + controlnet_type?: "scribble" | "rgb"; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps to generate your final result which can increase the amount of detail in your image. Default value: `4` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `1` + */ + guidance_scale?: number; + /** + * The same seed and the same prompt given to the same version of Stable + * Diffusion will output the same image every time. + */ + seed?: number; + /** + * The URL of the first keyframe to use for the generation. + */ + keyframe_0_image_url?: string | Blob | File; + /** + * The frame index of the first keyframe to use for the generation. + */ + keyframe_0_index?: number; + /** + * The URL of the second keyframe to use for the generation. + */ + keyframe_1_image_url?: string | Blob | File; + /** + * The frame index of the second keyframe to use for the generation. + */ + keyframe_1_index?: number; + /** + * The URL of the third keyframe to use for the generation. + */ + keyframe_2_image_url?: string | Blob | File; + /** + * The frame index of the third keyframe to use for the generation. + */ + keyframe_2_index?: number; +}; +export type AnimatediffSparsectrlLcmOutput = { + /** + * Generated video file. + */ + video: File; + /** + * The seed used to generate the video. + */ + seed: number; +}; +export type PulidInput = { + /** + * List of reference faces, ideally 4 images. */ - performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + reference_images: Array; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + * Prompt to generate the face from + */ + prompt: string; + /** + * Negative prompt to generate the face from Default value: `"flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, hands, low resolution, partially rendered objects, deformed or partially rendered eyes, deformed, deformed eyeballs, cross-eyed,blurry"` + */ + negative_prompt?: string; + /** + * Number of images to generate Default value: `1` + */ + num_images?: number; + /** + * Guidance scale Default value: `1.2` */ guidance_scale?: number; /** - * The sharpness of the generated image. Use it to control how sharp the generated - * image should be. Higher value means image and texture are sharper. Default value: `2` + * Number of steps to take Default value: `4` */ - sharpness?: number; + num_inference_steps?: number; /** - * The size of the generated image. You can choose between some presets or - * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + * Random seed for reproducibility */ - aspect_ratio?: string; + seed?: number; /** - * Number of images to generate in one request Default value: `1` + * Size of the generated image Default value: `[object Object]` */ - num_images?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The LoRAs to use for the image generation. You can use up to 5 LoRAs - * and they will be merged together to generate the final image. Default value: `[object Object]` + * ID scale Default value: `0.8` */ - loras?: Array; + id_scale?: number; /** - * Refiner (SDXL or SD 1.5) Default value: `"None"` + * Mode of generation Default value: `"fidelity"` */ - refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + mode?: "fidelity" | "extreme style"; /** - * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models - * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + * if you want to mix two ID image, please turn this on, otherwise, turn this off */ - refiner_switch?: number; + id_mix?: boolean; +}; +export type PulidOutput = { /** - * The format of the generated image. Default value: `"jpeg"` + * List of generated images */ - output_format?: "png" | "jpeg" | "webp"; + images: Array; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Random seed used for reproducibility */ - sync_mode?: boolean; + seed: number; +}; +export type IpAdapterFaceIdInput = { + /** + * The model type to use. 1_5 is the default and is recommended for most use cases. Default value: `"1_5-v1"` + */ + model_type?: + | "1_5-v1" + | "1_5-v1-plus" + | "1_5-v2-plus" + | "SDXL-v1" + | "SDXL-v2-plus" + | "1_5-auraface-v1"; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * An image of a face to match. If an image with a size of 640x640 is not provided, it will be scaled and cropped to that size. + */ + face_image_url?: string | Blob | File; + /** + * URL to zip archive with images of faces. The images embedding will be averaged to + * create a more accurate face id. + */ + face_images_data_url?: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `"blurry, low resolution, bad, ugly, low quality, pixelated, interpolated, compression artifacts, noisey, grainy"` + */ + negative_prompt?: string; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** - * + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - image_prompt_1: ImagePrompt; + guidance_scale?: number; /** - * + * The number of inference steps to use for generating the image. The more steps + * the better the image will be but it will also take longer to generate. Default value: `50` */ - image_prompt_2?: ImagePrompt; + num_inference_steps?: number; /** - * + * The number of samples for face id. The more samples the better the image will + * be but it will also take longer to generate. Default is 4. Default value: `4` */ - image_prompt_3?: ImagePrompt; + num_samples?: number; /** - * + * The width of the generated image. Default value: `512` */ - image_prompt_4?: ImagePrompt; + width?: number; /** - * The image to use as a reference for inpainting. + * The height of the generated image. Default value: `512` */ - inpaint_image_url?: string | Blob | File; + height?: number; /** - * The image to use as a mask for the generated image. + * The size of the face detection model. The higher the number the more accurate + * the detection will be but it will also take longer to run. The higher the number the more + * likely it will fail to find a face as well. Lower it if you are having trouble + * finding a face in the image. Default value: `640` */ - mask_image_url?: string | Blob | File; + face_id_det_size?: number; /** - * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + * The URL to the base 1.5 model. Default is SG161222/Realistic_Vision_V4.0_noVAE Default value: `"SG161222/Realistic_Vision_V4.0_noVAE"` */ - inpaint_mode?: - | "Inpaint or Outpaint (default)" - | "Improve Detail (face, hand, eyes, etc.)" - | "Modify Content (add objects, change background, etc.)"; + base_1_5_model_repo?: string; /** - * Describe what you want to inpaint. Default value: `""` + * The URL to the base SDXL model. Default is SG161222/RealVisXL_V3.0 Default value: `"SG161222/RealVisXL_V3.0"` */ - inpaint_additional_prompt?: string; + base_sdxl_model_repo?: string; +}; +export type IpAdapterFaceIdOutput = { /** - * The directions to outpaint. Default value: `` + * The generated image file info. */ - outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + image: Image; /** - * Mixing Image Prompt and Inpaint + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - mixing_image_prompt_and_inpaint?: boolean; + seed: number; +}; +export type ImageutilsMarigoldDepthInput = { /** - * The image to upscale or vary. + * Input image url. */ - uov_image_url?: string | Blob | File; + image_url: string | Blob | File; /** - * The method to use for upscaling or varying. Default value: `"Disabled"` + * Number of denoising steps. Defaults to `10`. The higher the number, the more accurate the result, but the slower the inference. Default value: `10` */ - uov_method?: - | "Disabled" - | "Vary (Subtle)" - | "Vary (Strong)" - | "Upscale (1.5x)" - | "Upscale (2x)" - | "Upscale (Fast 2x)"; + num_inference_steps?: number; /** - * Mixing Image Prompt and Vary/Upscale + * Number of predictions to average over. Defaults to `10`. The higher the number, the more accurate the result, but the slower the inference. Default value: `10` */ - mixing_image_prompt_and_vary_upscale?: boolean; + ensemble_size?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * Maximum processing resolution. Defaults `0` which means it uses the size of the input image. */ - enable_safety_checker?: boolean; + processing_res?: number; }; -export type FooocusImagePromptOutput = { +export type ImageutilsMarigoldDepthOutput = { + /** + * The depth map. + */ + image: Image; +}; +export type StableAudioInput = { + /** + * The prompt to generate audio from + */ + prompt: string; + /** + * The start point of the audio clip to generate + */ + seconds_start?: number; + /** + * The duration of the audio clip to generate Default value: `30` + */ + seconds_total?: number; + /** + * The number of steps to denoise the audio for Default value: `100` + */ + steps?: number; +}; +export type StableAudioOutput = { + /** + * The generated audio clip + */ + audio_file: File; +}; +export type DiffusionEdgeInput = { + /** + * The text prompt you would like to convert to speech. + */ + image_url: string | Blob | File; +}; +export type DiffusionEdgeOutput = { /** * The generated image file info. */ - images: Array; + image: Image; +}; +export type TriposrOutput = { /** - * The time taken for the generation process. + * Generated 3D object file. */ - timings: Record; + model_mesh: File; /** - * Whether the generated images contain NSFW concepts. + * Inference timings. + */ + timings: any; + /** + * Directory containing textures for the remeshed model. + */ + remeshing_dir?: File; +}; +export type RemeshingInput = { + /** + * Path for the object file to be remeshed. + */ + object_url: string | Blob | File; + /** + * Output format for the 3D model. Default value: `"glb"` + */ + output_format?: "glb" | "fbx" | "obj" | "stl" | "usdc"; + /** + * Number of faces for remesh Default value: `5000` + */ + faces?: number; + /** + * Merge duplicate vertices before exporting Default value: `true` + */ + merge?: boolean; + /** + * Preserve UVs during remeshing Default value: `true` + */ + preserve_uvs?: boolean; +}; +export type TriposrInput = { + /** + * Path for the image file to be processed. + */ + image_url: string | Blob | File; + /** + * Output format for the 3D model. Default value: `"glb"` + */ + output_format?: "glb" | "obj"; + /** + * Whether to remove the background from the input image. Default value: `true` + */ + do_remove_background?: boolean; + /** + * Ratio of the foreground image to the original image. Default value: `0.9` + */ + foreground_ratio?: number; + /** + * Resolution of the marching cubes. Above 512 is not recommended. Default value: `256` */ - has_nsfw_concepts: Array; + mc_resolution?: number; }; -export type FooocusInpaintInput = { +export type FooocusLegacyInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` */ @@ -8857,337 +12505,2261 @@ export type FooocusInpaintInput = { | "Mk Shepard Fairey Style" >; /** - * You can choose Speed or Quality Default value: `"Extreme Speed"` + * You can choose Speed or Quality Default value: `"Extreme Speed"` + */ + performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + */ + guidance_scale?: number; + /** + * The sharpness of the generated image. Use it to control how sharp the generated + * image should be. Higher value means image and texture are sharper. Default value: `2` + */ + sharpness?: number; + /** + * The size of the generated image. You can choose between some presets or + * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + */ + aspect_ratio?: string; + /** + * Number of images to generate in one request Default value: `1` + */ + num_images?: number; + /** + * The LoRAs to use for the image generation. You can use up to 5 LoRAs + * and they will be merged together to generate the final image. Default value: `[object Object]` + */ + loras?: Array; + /** + * Refiner (SDXL or SD 1.5) Default value: `"None"` + */ + refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + /** + * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models + * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + */ + refiner_switch?: number; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "png" | "jpeg" | "webp"; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The image to use as a reference for the generated image. + */ + control_image_url?: string | Blob | File; + /** + * The type of image control Default value: `"PyraCanny"` + */ + control_type?: "ImagePrompt" | "PyraCanny" | "CPDS" | "FaceSwap"; + /** + * The strength of the control image. Use it to control how much the generated image + * should look like the control image. Default value: `1` + */ + control_image_weight?: number; + /** + * The stop at value of the control image. Use it to control how much the generated image + * should look like the control image. Default value: `1` + */ + control_image_stop_at?: number; + /** + * The image to use as a reference for inpainting. + */ + inpaint_image_url?: string | Blob | File; + /** + * The image to use as a mask for the generated image. + */ + mask_image_url?: string | Blob | File; + /** + * + */ + mixing_image_prompt_and_inpaint?: boolean; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FooocusUpscaleOrVaryOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: any; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FooocusImagePromptOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: any; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type FooocusInpaintOutput = { + /** + * The generated image file info. + */ + images: Array; + /** + * The time taken for the generation process. + */ + timings: any; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; +}; +export type RetoucherInput = { + /** + * The URL of the image to be retouched. + */ + image_url: string | Blob | File; + /** + * Seed for reproducibility. Different seeds will make slightly different results. + */ + seed?: number; +}; +export type RetoucherOutput = { + /** + * The generated image file info. + */ + image: Image; + /** + * The seed used for the generation. + */ + seed: number; +}; +export type AnyLlmOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; + /** + * Error message if an error occurred + */ + error?: string; +}; +export type AnyLlmInput = { + /** + * Name of the model to use. Premium models are charged at 10x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + */ + model?: + | "anthropic/claude-3.5-sonnet" + | "anthropic/claude-3-5-haiku" + | "anthropic/claude-3-haiku" + | "google/gemini-pro-1.5" + | "google/gemini-flash-1.5" + | "google/gemini-flash-1.5-8b" + | "meta-llama/llama-3.2-1b-instruct" + | "meta-llama/llama-3.2-3b-instruct" + | "meta-llama/llama-3.1-8b-instruct" + | "meta-llama/llama-3.1-70b-instruct" + | "openai/gpt-4o-mini" + | "openai/gpt-4o"; + /** + * Prompt to be used for the chat completion + */ + prompt: string; + /** + * System prompt to provide context or instructions to the model + */ + system_prompt?: string; +}; +export type VisionInput = { + /** + * Name of the model to use. Premium models are charged at 3x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + */ + model?: + | "anthropic/claude-3.5-sonnet" + | "anthropic/claude-3-haiku" + | "google/gemini-pro-1.5" + | "google/gemini-flash-1.5" + | "google/gemini-flash-1.5-8b" + | "openai/gpt-4o"; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * System prompt to provide context or instructions to the model + */ + system_prompt?: string; + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; +}; +export type AnyLlmVisionOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; + /** + * Error message if an error occurred + */ + error?: string; +}; +export type ChatInput = { + /** + * Name of the model to use. Premium models are charged at 10x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + */ + model?: + | "anthropic/claude-3.5-sonnet" + | "anthropic/claude-3-5-haiku" + | "anthropic/claude-3-haiku" + | "google/gemini-pro-1.5" + | "google/gemini-flash-1.5" + | "google/gemini-flash-1.5-8b" + | "meta-llama/llama-3.2-1b-instruct" + | "meta-llama/llama-3.2-3b-instruct" + | "meta-llama/llama-3.1-8b-instruct" + | "meta-llama/llama-3.1-70b-instruct" + | "openai/gpt-4o-mini" + | "openai/gpt-4o"; + /** + * Prompt to be used for the chat completion + */ + prompt: string; + /** + * System prompt to provide context or instructions to the model + */ + system_prompt?: string; +}; +export type AnyLlmVisionInput = { + /** + * Name of the model to use. Premium models are charged at 3x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + */ + model?: + | "anthropic/claude-3.5-sonnet" + | "anthropic/claude-3-haiku" + | "google/gemini-pro-1.5" + | "google/gemini-flash-1.5" + | "google/gemini-flash-1.5-8b" + | "openai/gpt-4o"; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * System prompt to provide context or instructions to the model + */ + system_prompt?: string; + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; +}; +export type Llavav1513bInput = { + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * Maximum number of tokens to generate Default value: `64` + */ + max_tokens?: number; + /** + * Temperature for sampling Default value: `0.2` + */ + temperature?: number; + /** + * Top P for sampling Default value: `1` + */ + top_p?: number; +}; +export type Llavav1513bOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; +}; +export type LlavaNextInput = { + /** + * URL of the image to be processed + */ + image_url: string | Blob | File; + /** + * Prompt to be used for the image + */ + prompt: string; + /** + * Maximum number of tokens to generate Default value: `64` + */ + max_tokens?: number; + /** + * Temperature for sampling Default value: `0.2` + */ + temperature?: number; + /** + * Top P for sampling Default value: `1` + */ + top_p?: number; +}; +export type LlavaNextOutput = { + /** + * Generated output + */ + output: string; + /** + * Whether the output is partial + */ + partial?: boolean; +}; +export type ImageutilsNsfwOutput = { + /** + * The probability of the image being NSFW. + */ + nsfw_probability: number; +}; +export type ImageutilsNsfwInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type FastFooocusSdxlOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastFooocusSdxlInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type InpaintingFooocusInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type ImageToImageFooocusInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the prompt image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type FastFooocusSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type TextToImageFooocusInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type FastFooocusSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Leave it none to automatically infer from the prompt image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `8` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The rescale factor for the CFG. + */ + guidance_rescale?: number; + /** + * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + */ + enable_refiner?: boolean; +}; +export type FaceToStickerInput = { + /** + * URL of the video. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `20` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + */ + guidance_scale?: number; + /** + * The strength of the instant ID. Default value: `0.7` + */ + instant_id_strength?: number; + /** + * The weight of the IP adapter. Default value: `0.2` + */ + ip_adapter_weight?: number; + /** + * The amount of noise to add to the IP adapter. Default value: `0.5` + */ + ip_adapter_noise?: number; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * Whether to upscale the image 2x. + */ + upscale?: boolean; + /** + * The number of steps to use for upscaling. Only used if `upscale` is `true`. Default value: `10` + */ + upscale_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type FaceToStickerOutput = { + /** + * The generated images. + */ + images: Array; + /** + * The generated face sticker image. + */ + sticker_image: Image; + /** + * The generated face sticker image with the background removed. + */ + sticker_image_background_removed: Image; + /** + * Seed used during the inference. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + * The key is the image type and the value is a boolean. + */ + has_nsfw_concepts: any; +}; +export type MoondreamBatchedInput = { + /** + * Model ID to use for inference Default value: `"vikhyatk/moondream2"` + */ + model_id?: "vikhyatk/moondream2" | "fal-ai/moondream2-docci"; + /** + * List of input prompts and image URLs + */ + inputs: Array; + /** + * Maximum number of new tokens to generate Default value: `64` + */ + max_tokens?: number; + /** + * Temperature for sampling Default value: `0.2` + */ + temperature?: number; + /** + * Top P for sampling Default value: `1` + */ + top_p?: number; + /** + * Repetition penalty for sampling Default value: `1` + */ + repetition_penalty?: number; +}; +export type MoondreamBatchedOutput = { + /** + * List of generated outputs + */ + outputs: Array; + /** + * Whether the output is partial + */ + partial?: boolean; + /** + * Timings for different parts of the process + */ + timings: any; + /** + * Filenames of the images processed + */ + filenames?: Array; +}; +export type SadtalkerInput = { + /** + * URL of the source image + */ + source_image_url: string | Blob | File; + /** + * URL of the driven audio + */ + driven_audio_url: string | Blob | File; + /** + * The style of the pose + */ + pose_style?: number; + /** + * The resolution of the face model Default value: `"256"` + */ + face_model_resolution?: "256" | "512"; + /** + * The scale of the expression Default value: `1` + */ + expression_scale?: number; + /** + * The type of face enhancer to use + */ + face_enhancer?: "gfpgan"; + /** + * Whether to use still mode. Fewer head motion, works with preprocess `full`. + */ + still_mode?: boolean; + /** + * The type of preprocessing to use Default value: `"crop"` + */ + preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; +}; +export type SadtalkerOutput = { + /** + * URL of the generated video + */ + video: File; +}; +export type SadTalkerRefVideoInput = { + /** + * URL of the source image + */ + source_image_url: string | Blob | File; + /** + * URL of the driven audio + */ + driven_audio_url: string | Blob | File; + /** + * URL of the reference video + */ + reference_pose_video_url: string | Blob | File; + /** + * The style of the pose + */ + pose_style?: number; + /** + * The resolution of the face model Default value: `"256"` + */ + face_model_resolution?: "256" | "512"; + /** + * The scale of the expression Default value: `1` + */ + expression_scale?: number; + /** + * The type of face enhancer to use + */ + face_enhancer?: "gfpgan"; + /** + * Whether to use still mode. Fewer head motion, works with preprocess `full`. + */ + still_mode?: boolean; + /** + * The type of preprocessing to use Default value: `"crop"` + */ + preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; +}; +export type MusetalkInput = { + /** + * URL of the source video + */ + source_video_url: string | Blob | File; + /** + * URL of the audio + */ + audio_url: string | Blob | File; +}; +export type MusetalkOutput = { + /** + * The generated video file. + */ + video: File; +}; +export type SadTalkerInput = { + /** + * URL of the source image + */ + source_image_url: string | Blob | File; + /** + * URL of the driven audio + */ + driven_audio_url: string | Blob | File; + /** + * The style of the pose + */ + pose_style?: number; + /** + * The resolution of the face model Default value: `"256"` + */ + face_model_resolution?: "256" | "512"; + /** + * The scale of the expression Default value: `1` + */ + expression_scale?: number; + /** + * The type of face enhancer to use + */ + face_enhancer?: "gfpgan"; + /** + * Whether to use still mode. Fewer head motion, works with preprocess `full`. + */ + still_mode?: boolean; + /** + * The type of preprocessing to use Default value: `"crop"` + */ + preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; +}; +export type SadtalkerReferenceOutput = { + /** + * URL of the generated video + */ + video: File; +}; +export type SadtalkerReferenceInput = { + /** + * URL of the source image + */ + source_image_url: string | Blob | File; + /** + * URL of the driven audio + */ + driven_audio_url: string | Blob | File; + /** + * URL of the reference video + */ + reference_pose_video_url: string | Blob | File; + /** + * The style of the pose + */ + pose_style?: number; + /** + * The resolution of the face model Default value: `"256"` + */ + face_model_resolution?: "256" | "512"; + /** + * The scale of the expression Default value: `1` + */ + expression_scale?: number; + /** + * The type of face enhancer to use + */ + face_enhancer?: "gfpgan"; + /** + * Whether to use still mode. Fewer head motion, works with preprocess `full`. + */ + still_mode?: boolean; + /** + * The type of preprocessing to use Default value: `"crop"` + */ + preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; +}; +export type LayerDiffusionInput = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + */ + prompt?: string; + /** + * The prompt to use for generating the negative image. Be as descriptive as possible for best results. Default value: `"text, watermark"` + */ + negative_prompt?: string; + /** + * The guidance scale for the model. Default value: `8` + */ + guidance_scale?: number; + /** + * The number of inference steps for the model. Default value: `20` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to false, the safety checker will be disabled. Default value: `true` + */ + enable_safety_checker?: boolean; +}; +export type LayerDiffusionOutput = { + /** + * The URL of the generated image. + */ + image: Image; + /** + * The seed used to generate the image. + */ + seed: number; +}; +export type StableDiffusionV15Input = { + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type StableDiffusionV15Output = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type InpaintingSD15Input = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type ImageToImageSD15Input = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; +}; +export type LoraImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The latents saved for debugging. + */ + debug_latents?: File; + /** + * The latents saved for debugging per pass. + */ + debug_per_pass_latents?: File; +}; +export type LoraImageToImageInput = { + /** + * URL or HuggingFace ID of the base model to generate the image. + */ + model_name: string; + /** + * URL or HuggingFace ID of the custom U-Net model to use for the image generation. + */ + unet_name?: string; + /** + * The variant of the model to use for huggingface models, e.g. 'fp16'. + */ + variant?: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * If set to true, the prompt weighting syntax will be used. + * Additionally, this will lift the 77 token limit by averaging embeddings. + */ + prompt_weighting?: boolean; + /** + * URL of image to use for image to image/inpainting. + */ + image_url?: string | Blob | File; + /** + * The amount of noise to add to noise image for image. Only used if the image_url is provided. 1.0 is complete noise and 0 is no noise. Default value: `0.5` + */ + noise_strength?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The embeddings to use for the image generation. Only a single embedding is supported at the moment. + * The embeddings will be used to map the tokens in the prompt to the embedding weights. Default value: `` + */ + embeddings?: Array; + /** + * The control nets to use for the image generation. You can use any number of control nets + * and they will be applied to the image at the specified timesteps. Default value: `` + */ + controlnets?: Array; + /** + * If set to true, the controlnet will be applied to only the conditional predictions. + */ + controlnet_guess_mode?: boolean; + /** + * The IP adapter to use for the image generation. Default value: `` + */ + ip_adapter?: Array; + /** + * The path to the image encoder model to use for the image generation. + */ + image_encoder_path?: string; + /** + * The subfolder of the image encoder model to use for the image generation. + */ + image_encoder_subfolder?: string; + /** + * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` + */ + image_encoder_weight_name?: string; + /** + * The URL of the IC Light model to use for the image generation. + */ + ic_light_model_url?: string | Blob | File; + /** + * The URL of the IC Light model background image to use for the image generation. + * Make sure to use a background compatible with the model. + */ + ic_light_model_background_image_url?: string | Blob | File; + /** + * The URL of the IC Light model image to use for the image generation. + */ + ic_light_image_url?: string | Blob | File; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * Skips part of the image generation process, leading to slightly different results. + * This means the image renders faster, too. + */ + clip_skip?: number; + /** + * Scheduler / sampler to use for the image denoising process. + */ + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "Euler" + | "Euler A" + | "Euler (trailing timesteps)" + | "LCM" + | "LCM (trailing timesteps)" + | "DDIM" + | "TCD"; + /** + * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. + * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. + * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. Default value: `[object Object]` + */ + timesteps?: TimestepsInput; + /** + * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. + * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. + * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. Default value: `[object Object]` + */ + sigmas?: SigmasInput; + /** + * The format of the generated image. Default value: `"png"` + */ + image_format?: "jpeg" | "png"; + /** + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * The size of the tiles to be used for the image generation. Default value: `4096` + */ + tile_width?: number; + /** + * The size of the tiles to be used for the image generation. Default value: `4096` + */ + tile_height?: number; + /** + * The stride of the tiles to be used for the image generation. Default value: `2048` + */ + tile_stride_width?: number; + /** + * The stride of the tiles to be used for the image generation. Default value: `2048` + */ + tile_stride_height?: number; + /** + * The eta value to be used for the image generation. + */ + eta?: number; + /** + * If set to true, the latents will be saved for debugging. + */ + debug_latents?: boolean; + /** + * If set to true, the latents will be saved for debugging per pass. + */ + debug_per_pass_latents?: boolean; +}; +export type FastSdxlImageToImageOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastSdxlImageToImageInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` + */ + loras?: Array; + /** + * The list of embeddings to use. Default value: `` + */ + embeddings?: Array; + /** + * If set to true, the safety checker will be enabled. Default value: `true` */ - performance?: "Speed" | "Quality" | "Extreme Speed" | "Lightning"; + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type FastSdxlInpaintingOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; +export type FastSdxlInpaintingInput = { + /** + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * The size of the generated image. Default value: `square_hd` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `25` + */ + num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** - * The sharpness of the generated image. Use it to control how sharp the generated - * image should be. Higher value means image and texture are sharper. Default value: `2` + * determines how much the generated image resembles the initial image Default value: `0.95` */ - sharpness?: number; + strength?: number; /** - * The size of the generated image. You can choose between some presets or - * custom height and width that **must be multiples of 8**. Default value: `"1024x1024"` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - aspect_ratio?: string; + seed?: number; /** - * Number of images to generate in one request Default value: `1` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` */ num_images?: number; /** - * The LoRAs to use for the image generation. You can use up to 5 LoRAs - * and they will be merged together to generate the final image. Default value: `[object Object]` + * The list of LoRA weights to use. Default value: `` */ loras?: Array; /** - * Refiner (SDXL or SD 1.5) Default value: `"None"` + * The list of embeddings to use. Default value: `` */ - refiner_model?: "None" | "realisticVisionV60B1_v51VAE.safetensors"; + embeddings?: Array; /** - * Use 0.4 for SD1.5 realistic models; 0.667 for SD1.5 anime models - * 0.8 for XL-refiners; or any value for switching two SDXL models. Default value: `0.8` + * If set to true, the safety checker will be enabled. Default value: `true` */ - refiner_switch?: number; + enable_safety_checker?: boolean; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ - output_format?: "png" | "jpeg" | "webp"; + format?: "jpeg" | "png"; + /** + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` + */ + request_id?: string; +}; +export type LoraInpaintOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The latents saved for debugging. + */ + debug_latents?: File; + /** + * The latents saved for debugging per pass. + */ + debug_per_pass_latents?: File; +}; +export type LoraInpaintInput = { + /** + * URL or HuggingFace ID of the base model to generate the image. + */ + model_name: string; + /** + * URL or HuggingFace ID of the custom U-Net model to use for the image generation. + */ + unet_name?: string; + /** + * The variant of the model to use for huggingface models, e.g. 'fp16'. + */ + variant?: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * If set to true, the prompt weighting syntax will be used. + * Additionally, this will lift the 77 token limit by averaging embeddings. + */ + prompt_weighting?: boolean; + /** + * URL of image to use for image to image/inpainting. + */ + image_url?: string | Blob | File; + /** + * URL of black-and-white image to use as mask during inpainting. + */ + mask_url?: string | Blob | File; + /** + * The amount of noise to add to noise image for image. Only used if the image_url is provided. 1.0 is complete noise and 0 is no noise. Default value: `0.5` + */ + noise_strength?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The embeddings to use for the image generation. Only a single embedding is supported at the moment. + * The embeddings will be used to map the tokens in the prompt to the embedding weights. Default value: `` + */ + embeddings?: Array; + /** + * The control nets to use for the image generation. You can use any number of control nets + * and they will be applied to the image at the specified timesteps. Default value: `` + */ + controlnets?: Array; + /** + * If set to true, the controlnet will be applied to only the conditional predictions. + */ + controlnet_guess_mode?: boolean; + /** + * The IP adapter to use for the image generation. Default value: `` + */ + ip_adapter?: Array; + /** + * The path to the image encoder model to use for the image generation. + */ + image_encoder_path?: string; + /** + * The subfolder of the image encoder model to use for the image generation. + */ + image_encoder_subfolder?: string; + /** + * The weight name of the image encoder model to use for the image generation. Default value: `"pytorch_model.bin"` + */ + image_encoder_weight_name?: string; + /** + * The URL of the IC Light model to use for the image generation. + */ + ic_light_model_url?: string | Blob | File; + /** + * The URL of the IC Light model background image to use for the image generation. + * Make sure to use a background compatible with the model. + */ + ic_light_model_background_image_url?: string | Blob | File; + /** + * The URL of the IC Light model image to use for the image generation. + */ + ic_light_image_url?: string | Blob | File; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * Increasing the amount of steps tells Stable Diffusion that it should take more steps + * to generate your final result which can increase the amount of detail in your image. Default value: `30` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * Skips part of the image generation process, leading to slightly different results. + * This means the image renders faster, too. + */ + clip_skip?: number; + /** + * Scheduler / sampler to use for the image denoising process. + */ + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "Euler" + | "Euler A" + | "Euler (trailing timesteps)" + | "LCM" + | "LCM (trailing timesteps)" + | "DDIM" + | "TCD"; + /** + * Optionally override the timesteps to use for the denoising process. Only works with schedulers which support the `timesteps` argument in their `set_timesteps` method. + * Defaults to not overriding, in which case the scheduler automatically sets the timesteps based on the `num_inference_steps` parameter. + * If set to a custom timestep schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `sigmas` is set. Default value: `[object Object]` + */ + timesteps?: TimestepsInput; + /** + * Optionally override the sigmas to use for the denoising process. Only works with schedulers which support the `sigmas` argument in their `set_sigmas` method. + * Defaults to not overriding, in which case the scheduler automatically sets the sigmas based on the `num_inference_steps` parameter. + * If set to a custom sigma schedule, the `num_inference_steps` parameter will be ignored. Cannot be set if `timesteps` is set. Default value: `[object Object]` + */ + sigmas?: SigmasInput; + /** + * The format of the generated image. Default value: `"png"` + */ + image_format?: "jpeg" | "png"; + /** + * Number of images to generate in one request. Note that the higher the batch size, + * the longer it will take to generate the images. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. + */ + enable_safety_checker?: boolean; + /** + * The size of the tiles to be used for the image generation. Default value: `4096` + */ + tile_width?: number; + /** + * The size of the tiles to be used for the image generation. Default value: `4096` + */ + tile_height?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - sync_mode?: boolean; + tile_stride_width?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The stride of the tiles to be used for the image generation. Default value: `2048` */ - seed?: number; + tile_stride_height?: number; /** - * + * The eta value to be used for the image generation. */ - image_prompt_1: ImagePrompt; + eta?: number; /** - * + * If set to true, the latents will be saved for debugging. */ - image_prompt_2?: ImagePrompt; + debug_latents?: boolean; /** - * + * If set to true, the latents will be saved for debugging per pass. */ - image_prompt_3?: ImagePrompt; + debug_per_pass_latents?: boolean; +}; +export type PixartSigmaInput = { /** - * + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - image_prompt_4?: ImagePrompt; + prompt: string; /** - * The image to use as a reference for inpainting. + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - inpaint_image_url?: string | Blob | File; + negative_prompt?: string; /** - * The image to use as a mask for the generated image. + * The style to apply to the image. Default value: `"(No style)"` */ - mask_image_url?: string | Blob | File; + style?: + | "(No style)" + | "Cinematic" + | "Photographic" + | "Anime" + | "Manga" + | "Digital Art" + | "Pixel art" + | "Fantasy art" + | "Neonpunk" + | "3D Model"; /** - * The mode to use for inpainting. Default value: `"Inpaint or Outpaint (default)"` + * The size of the generated image. Default value: `square_hd` */ - inpaint_mode?: - | "Inpaint or Outpaint (default)" - | "Improve Detail (face, hand, eyes, etc.)" - | "Modify Content (add objects, change background, etc.)"; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Describe what you want to inpaint. Default value: `""` + * The number of inference steps to perform. Default value: `35` */ - inpaint_additional_prompt?: string; + num_inference_steps?: number; /** - * The directions to outpaint. Default value: `` + * The scheduler to use for the model. Default value: `"DPM-SOLVER"` */ - outpaint_selections?: Array<"Left" | "Right" | "Top" | "Bottom">; + scheduler?: "DPM-SOLVER" | "SA-SOLVER"; /** - * Mixing Image Prompt and Inpaint + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` */ - mixing_image_prompt_and_inpaint?: boolean; + guidance_scale?: number; /** - * The image to upscale or vary. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - uov_image_url?: string | Blob | File; + seed?: number; /** - * The method to use for upscaling or varying. Default value: `"Disabled"` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - uov_method?: - | "Disabled" - | "Vary (Subtle)" - | "Vary (Strong)" - | "Upscale (1.5x)" - | "Upscale (2x)" - | "Upscale (Fast 2x)"; + sync_mode?: boolean; /** - * Mixing Image Prompt and Vary/Upscale + * The number of images to generate. Default value: `1` */ - mixing_image_prompt_and_vary_upscale?: boolean; + num_images?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * If set to true, the safety checker will be enabled. */ enable_safety_checker?: boolean; }; -export type FooocusInpaintOutput = { +export type PixartSigmaOutput = { /** - * The generated image file info. + * The generated image files info. */ images: Array; /** - * The time taken for the generation process. + * The timings of the different steps of the generation process. */ - timings: Record; + timings: any; /** - * Whether the generated images contain NSFW concepts. + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - has_nsfw_concepts: Array; -}; -export type RetoucherInput = { + seed: number; /** - * The URL of the image to be retouched. + * Whether the generated images contain NSFW concepts. */ - image_url: string | Blob | File; + has_nsfw_concepts: Array; /** - * Seed for reproducibility. Different seeds will make slightly different results. + * The prompt used for generating the image. */ - seed?: number; + prompt: string; }; -export type RetoucherOutput = { +export type DreamshaperOutput = { /** - * The generated image file info. + * The generated image files info. */ - image: Image; + images: Array; /** - * The seed used for the generation. + * */ - seed: number; -}; -export type AnyLlmOutput = { + timings: any; /** - * Generated output + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - output: string; + seed: number; /** - * Whether the output is partial + * Whether the generated images contain NSFW concepts. */ - partial?: boolean; + has_nsfw_concepts: Array; /** - * Error message if an error occurred + * The prompt used for generating the image. */ - error?: string; + prompt: string; }; -export type AnyLlmInput = { +export type DreamshaperInput = { /** - * Name of the model to use. Premium models are charged at 3x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + * The Dreamshaper model to use. */ - model?: - | "anthropic/claude-3.5-sonnet" - | "anthropic/claude-3-haiku" - | "google/gemini-pro-1.5" - | "google/gemini-flash-1.5" - | "google/gemini-flash-1.5-8b" - | "openai/gpt-4o"; + model_name?: + | "Lykon/dreamshaper-xl-1-0" + | "Lykon/dreamshaper-xl-v2-turbo" + | "Lykon/dreamshaper-8"; /** - * Prompt to be used for the image + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * System prompt to provide context or instructions to the model + * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"` */ - system_prompt?: string; + negative_prompt?: string; /** - * URL of the image to be processed + * Default value: `[object Object]` */ - image_url: string | Blob | File; -}; -export type AnyLlmVisionOutput = { + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Generated output + * The number of inference steps to perform. Default value: `35` */ - output: string; + num_inference_steps?: number; /** - * Whether the output is partial + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ - partial?: boolean; + guidance_scale?: number; /** - * Error message if an error occurred + * The list of LoRA weights to use. Default value: `` */ - error?: string; -}; -export type AnyLlmVisionInput = { + loras?: Array; /** - * Name of the model to use. Premium models are charged at 3x the rate of standard models, they include: anthropic/claude-3.5-sonnet, anthropic/claude-3-5-haiku, google/gemini-pro-1.5, openai/gpt-4o. Default value: `"google/gemini-flash-1.5"` + * The list of embeddings to use. Default value: `` */ - model?: - | "anthropic/claude-3.5-sonnet" - | "anthropic/claude-3-haiku" - | "google/gemini-pro-1.5" - | "google/gemini-flash-1.5" - | "google/gemini-flash-1.5-8b" - | "openai/gpt-4o"; + embeddings?: Array; /** - * Prompt to be used for the image + * If set to true, the prompt will be expanded with additional prompts. */ - prompt: string; + expand_prompt?: boolean; /** - * System prompt to provide context or instructions to the model + * The number of images to generate. Default value: `1` */ - system_prompt?: string; + num_images?: number; /** - * URL of the image to be processed + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - image_url: string | Blob | File; -}; -export type Llavav1513bInput = { + seed?: number; /** - * URL of the image to be processed + * If set to true, the safety checker will be enabled. Default value: `true` */ - image_url: string | Blob | File; + enable_safety_checker?: boolean; /** - * Prompt to be used for the image + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - prompt: string; + sync_mode?: boolean; /** - * Maximum number of tokens to generate Default value: `64` + * The format of the generated image. Default value: `"jpeg"` */ - max_tokens?: number; + format?: "jpeg" | "png"; /** - * Temperature for sampling Default value: `0.2` + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - temperature?: number; + safety_checker_version?: "v1" | "v2"; +}; +export type DreamshaperImageToImageInput = { /** - * Top P for sampling Default value: `1` + * The Dreamshaper model to use. */ - top_p?: number; -}; -export type Llavav1513bOutput = { + model_name?: + | "Lykon/dreamshaper-xl-1-0" + | "Lykon/dreamshaper-xl-v2-turbo" + | "Lykon/dreamshaper-8"; /** - * Generated output + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - output: string; + prompt: string; /** - * Whether the output is partial + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - partial?: boolean; -}; -export type LlavaNextInput = { + negative_prompt?: string; /** - * URL of the image to be processed + * Default value: `[object Object]` */ - image_url: string | Blob | File; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Prompt to be used for the image + * The number of inference steps to perform. Default value: `25` */ - prompt: string; + num_inference_steps?: number; /** - * Maximum number of tokens to generate Default value: `64` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - max_tokens?: number; + guidance_scale?: number; /** - * Temperature for sampling Default value: `0.2` + * The list of LoRA weights to use. Default value: `` */ - temperature?: number; + loras?: Array; /** - * Top P for sampling Default value: `1` + * The list of embeddings to use. Default value: `` */ - top_p?: number; -}; -export type LlavaNextOutput = { + embeddings?: Array; /** - * Generated output + * If set to true, the prompt will be expanded with additional prompts. */ - output: string; + expand_prompt?: boolean; /** - * Whether the output is partial + * The number of images to generate. Default value: `1` */ - partial?: boolean; -}; -export type ImageutilsNsfwInput = { + num_images?: number; /** - * Input image url. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - image_url: string | Blob | File; -}; -export type ImageutilsNsfwOutput = { + seed?: number; /** - * Combined image of all detected masks + * If set to true, the safety checker will be enabled. Default value: `true` */ - image?: Image; -}; -export type FastFooocusSdxlOutput = { + enable_safety_checker?: boolean; /** - * The generated image files info. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - images: Array; + sync_mode?: boolean; /** - * + * The format of the generated image. Default value: `"jpeg"` */ - timings: Record; + format?: "jpeg" | "png"; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The URL of the image to use as a starting point for the generation. */ - seed: number; + image_url: string | Blob | File; /** - * Whether the generated images contain NSFW concepts. + * determines how much the generated image resembles the initial image Default value: `0.95` */ - has_nsfw_concepts: Array; + strength?: number; /** - * The prompt used for generating the image. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - prompt: string; + safety_checker_version?: "v1" | "v2"; }; -export type FastFooocusSdxlInput = { +export type DreamshaperInpaintingInput = { /** - * The URL of the image to use as a starting point for the generation. + * The Dreamshaper model to use. */ - image_url: string | Blob | File; + model_name?: + | "Lykon/dreamshaper-xl-1-0" + | "Lykon/dreamshaper-xl-v2-turbo" + | "Lykon/dreamshaper-8"; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -9199,7 +14771,7 @@ export type FastFooocusSdxlInput = { */ negative_prompt?: string; /** - * The size of the generated image. Leave it none to automatically infer from the prompt image. + * Default value: `[object Object]` */ image_size?: | ImageSize @@ -9210,57 +14782,67 @@ export type FastFooocusSdxlInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `8` + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The list of LoRA weights to use. Default value: `` */ - strength?: number; + loras?: Array; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The list of embeddings to use. Default value: `` */ - seed?: number; + embeddings?: Array; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** - * The list of embeddings to use. Default value: `` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - embeddings?: Array; + seed?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` - */ - safety_checker_version?: "v1" | "v2"; - /** - * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - expand_prompt?: boolean; + sync_mode?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** - * The rescale factor for the CFG. + * The URL of the image to use as a starting point for the generation. */ - guidance_rescale?: number; + image_url: string | Blob | File; /** - * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + * The URL of the mask to use for inpainting. */ - enable_refiner?: boolean; + mask_url: string | Blob | File; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; }; -export type FastFooocusSdxlImageToImageOutput = { +export type RealisticVisionOutput = { /** * The generated image files info. */ @@ -9268,7 +14850,7 @@ export type FastFooocusSdxlImageToImageOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -9283,11 +14865,11 @@ export type FastFooocusSdxlImageToImageOutput = { */ prompt: string; }; -export type FastFooocusSdxlImageToImageInput = { +export type RealisticVisionImageToImageInput = { /** - * The URL of the image to use as a starting point for the generation. + * The Realistic Vision model to use. */ - image_url: string | Blob | File; + model_name?: string; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -9299,7 +14881,7 @@ export type FastFooocusSdxlImageToImageInput = { */ negative_prompt?: string; /** - * The size of the generated image. Leave it none to automatically infer from the prompt image. + * Default value: `[object Object]` */ image_size?: | ImageSize @@ -9310,95 +14892,151 @@ export type FastFooocusSdxlImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `8` + * The number of inference steps to perform. Default value: `25` */ num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The list of LoRA weights to use. Default value: `` */ - strength?: number; + loras?: Array; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The list of embeddings to use. Default value: `` */ - seed?: number; + embeddings?: Array; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** - * The list of embeddings to use. Default value: `` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - embeddings?: Array; + seed?: number; /** * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` - */ - safety_checker_version?: "v1" | "v2"; - /** - * If set to true, the prompt will be expanded with additional prompts. Default value: `true` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - expand_prompt?: boolean; + sync_mode?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; /** - * The rescale factor for the CFG. + * The URL of the image to use as a starting point for the generation. */ - guidance_rescale?: number; + image_url: string | Blob | File; /** - * If set to true, a smaller model will try to refine the output after it was processed. Default value: `true` + * determines how much the generated image resembles the initial image Default value: `0.95` */ - enable_refiner?: boolean; + strength?: number; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; }; -export type FaceToStickerInput = { +export type RealisticVisionInput = { /** - * URL of the video. + * The Realistic Vision model to use. */ - image_url: string | Blob | File; + model_name?: string; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"` */ negative_prompt?: string; /** - * Increasing the amount of steps tells Stable Diffusion that it should take more steps - * to generate your final result which can increase the amount of detail in your image. Default value: `20` + * Default value: `[object Object]` + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `5` */ guidance_scale?: number; /** - * The strength of the instant ID. Default value: `0.7` + * The list of LoRA weights to use. Default value: `` */ - instant_id_strength?: number; + loras?: Array; /** - * The weight of the IP adapter. Default value: `0.2` + * The list of embeddings to use. Default value: `` */ - ip_adapter_weight?: number; + embeddings?: Array; /** - * The amount of noise to add to the IP adapter. Default value: `0.5` + * If set to true, the prompt will be expanded with additional prompts. */ - ip_adapter_noise?: number; + expand_prompt?: boolean; /** - * The size of the generated image. Default value: `square_hd` + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + format?: "jpeg" | "png"; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; +}; +export type RealisticVisionInpaintingInput = { + /** + * The Realistic Vision model to use. + */ + model_name?: string; + /** + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` + */ + negative_prompt?: string; + /** + * Default value: `[object Object]` */ image_size?: | ImageSize @@ -9409,236 +15047,288 @@ export type FaceToStickerInput = { | "landscape_4_3" | "landscape_16_9"; /** - * Whether to upscale the image 2x. - */ - upscale?: boolean; - /** - * The number of steps to use for upscaling. Only used if `upscale` is `true`. Default value: `10` + * The number of inference steps to perform. Default value: `25` */ - upscale_steps?: number; + num_inference_steps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - seed?: number; + guidance_scale?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * The list of LoRA weights to use. Default value: `` */ - enable_safety_checker?: boolean; -}; -export type FaceToStickerOutput = { + loras?: Array; /** - * The generated images. + * The list of embeddings to use. Default value: `` */ - images: Array; + embeddings?: Array; /** - * The generated face sticker image. + * If set to true, the prompt will be expanded with additional prompts. */ - sticker_image: Image; + expand_prompt?: boolean; /** - * The generated face sticker image with the background removed. + * The number of images to generate. Default value: `1` */ - sticker_image_background_removed: Image; + num_images?: number; /** - * Seed used during the inference. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - seed: number; + seed?: number; /** - * Whether the generated images contain NSFW concepts. - * The key is the image type and the value is a boolean. + * If set to true, the safety checker will be enabled. Default value: `true` */ - has_nsfw_concepts: any; -}; -export type MoondreamBatchedInput = { + enable_safety_checker?: boolean; /** - * Model ID to use for inference Default value: `"vikhyatk/moondream2"` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - model_id?: "vikhyatk/moondream2" | "fal-ai/moondream2-docci"; + sync_mode?: boolean; /** - * List of input prompts and image URLs + * The format of the generated image. Default value: `"jpeg"` */ - inputs: Array; + format?: "jpeg" | "png"; /** - * Maximum number of new tokens to generate Default value: `64` + * The URL of the image to use as a starting point for the generation. */ - max_tokens?: number; + image_url: string | Blob | File; /** - * Temperature for sampling Default value: `0.2` + * The URL of the mask to use for inpainting. */ - temperature?: number; + mask_url: string | Blob | File; /** - * Top P for sampling Default value: `1` + * determines how much the generated image resembles the initial image Default value: `0.95` */ - top_p?: number; + strength?: number; /** - * Repetition penalty for sampling Default value: `1` + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - repetition_penalty?: number; + safety_checker_version?: "v1" | "v2"; }; -export type MoondreamBatchedOutput = { +export type LightningModelsInput = { /** - * List of generated outputs + * The Lightning model to use. */ - outputs: Array; + model_name?: string; /** - * Whether the output is partial + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - partial?: boolean; + prompt: string; /** - * Timings for different parts of the process + * The negative prompt to use. Use it to address details that you don't want in the image. Default value: `"(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"` */ - timings: Record; + negative_prompt?: string; /** - * Filenames of the images processed + * Default value: `[object Object]` */ - filenames?: Array; -}; -export type SadtalkerInput = { + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * URL of the source image + * The number of inference steps to perform. Default value: `5` */ - source_image_url: string | Blob | File; + num_inference_steps?: number; /** - * URL of the driven audio + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ - driven_audio_url: string | Blob | File; + guidance_scale?: number; /** - * URL of the reference video + * The list of LoRA weights to use. Default value: `` */ - reference_pose_video_url: string | Blob | File; + loras?: Array; /** - * The style of the pose + * The list of embeddings to use. Default value: `` */ - pose_style?: number; + embeddings?: Array; /** - * The resolution of the face model Default value: `"256"` + * Scheduler / sampler to use for the image denoising process. */ - face_model_resolution?: "256" | "512"; + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "DPM++ SDE" + | "DPM++ SDE Karras" + | "KDPM 2A" + | "Euler" + | "Euler (trailing timesteps)" + | "Euler A" + | "LCM" + | "EDMDPMSolverMultistepScheduler" + | "TCDScheduler"; /** - * The scale of the expression Default value: `1` + * If set to true, the prompt will be expanded with additional prompts. */ - expression_scale?: number; + expand_prompt?: boolean; /** - * The type of face enhancer to use + * The number of images to generate. Default value: `1` */ - face_enhancer?: "gfpgan"; + num_images?: number; /** - * Whether to use still mode. Fewer head motion, works with preprocess `full`. + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - still_mode?: boolean; + seed?: number; /** - * The type of preprocessing to use Default value: `"crop"` + * If set to true, the safety checker will be enabled. Default value: `true` */ - preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; -}; -export type SadtalkerOutput = { + enable_safety_checker?: boolean; /** - * URL of the generated video + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - video: File; -}; -export type MusetalkInput = { + sync_mode?: boolean; /** - * URL of the source video + * The format of the generated image. Default value: `"jpeg"` */ - source_video_url: string | Blob | File; + format?: "jpeg" | "png"; /** - * URL of the audio + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - audio_url: string | Blob | File; + safety_checker_version?: "v1" | "v2"; }; -export type MusetalkOutput = { +export type LightningModelsOutput = { /** - * The generated video file. + * The generated image files info. */ - video: File; -}; -export type SadtalkerReferenceInput = { + images: Array; /** - * URL of the source image + * */ - source_image_url: string | Blob | File; + timings: any; /** - * URL of the driven audio + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - driven_audio_url: string | Blob | File; + seed: number; /** - * URL of the reference video + * Whether the generated images contain NSFW concepts. */ - reference_pose_video_url: string | Blob | File; + has_nsfw_concepts: Array; /** - * The style of the pose + * The prompt used for generating the image. */ - pose_style?: number; + prompt: string; +}; +export type LightningModelsInpaintingInput = { /** - * The resolution of the face model Default value: `"256"` + * The Lightning model to use. */ - face_model_resolution?: "256" | "512"; + model_name?: string; /** - * The scale of the expression Default value: `1` + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - expression_scale?: number; + prompt: string; /** - * The type of face enhancer to use + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - face_enhancer?: "gfpgan"; + negative_prompt?: string; /** - * Whether to use still mode. Fewer head motion, works with preprocess `full`. + * Default value: `[object Object]` */ - still_mode?: boolean; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The type of preprocessing to use Default value: `"crop"` + * The number of inference steps to perform. Default value: `5` */ - preprocess?: "crop" | "extcrop" | "resize" | "full" | "extfull"; -}; -export type SadtalkerReferenceOutput = { + num_inference_steps?: number; /** - * URL of the generated video + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ - video: File; -}; -export type LayerDiffusionInput = { + guidance_scale?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. Default value: `""` + * The list of LoRA weights to use. Default value: `` */ - prompt?: string; + loras?: Array; /** - * The prompt to use for generating the negative image. Be as descriptive as possible for best results. Default value: `"text, watermark"` + * The list of embeddings to use. Default value: `` */ - negative_prompt?: string; + embeddings?: Array; /** - * The guidance scale for the model. Default value: `8` + * Scheduler / sampler to use for the image denoising process. */ - guidance_scale?: number; + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "DPM++ SDE" + | "DPM++ SDE Karras" + | "KDPM 2A" + | "Euler" + | "Euler (trailing timesteps)" + | "Euler A" + | "LCM" + | "EDMDPMSolverMultistepScheduler" + | "TCDScheduler"; + /** + * If set to true, the prompt will be expanded with additional prompts. + */ + expand_prompt?: boolean; /** - * The number of inference steps for the model. Default value: `20` + * The number of images to generate. Default value: `1` */ - num_inference_steps?: number; + num_images?: number; /** * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; -}; -export type LayerDiffusionOutput = { /** - * The URL of the generated image. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - image: Image; + sync_mode?: boolean; /** - * The seed used to generate the image. + * The format of the generated image. Default value: `"jpeg"` */ - seed: number; -}; -export type StableDiffusionV15Input = { + format?: "jpeg" | "png"; /** * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + */ + safety_checker_version?: "v1" | "v2"; +}; +export type LightningModelsImageToImageInput = { + /** + * The Lightning model to use. + */ + model_name?: string; /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -9650,7 +15340,7 @@ export type StableDiffusionV15Input = { */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square` + * Default value: `[object Object]` */ image_size?: | ImageSize @@ -9661,156 +15351,175 @@ export type StableDiffusionV15Input = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `25` + * The number of inference steps to perform. Default value: `5` */ num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` */ guidance_scale?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The list of LoRA weights to use. Default value: `` */ - strength?: number; + loras?: Array; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * The list of embeddings to use. Default value: `` */ - seed?: number; + embeddings?: Array; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Scheduler / sampler to use for the image denoising process. */ - sync_mode?: boolean; + scheduler?: + | "DPM++ 2M" + | "DPM++ 2M Karras" + | "DPM++ 2M SDE" + | "DPM++ 2M SDE Karras" + | "DPM++ SDE" + | "DPM++ SDE Karras" + | "KDPM 2A" + | "Euler" + | "Euler (trailing timesteps)" + | "Euler A" + | "LCM" + | "EDMDPMSolverMultistepScheduler" + | "TCDScheduler"; /** - * The number of images to generate. Default value: `1` + * If set to true, the prompt will be expanded with additional prompts. */ - num_images?: number; + expand_prompt?: boolean; /** - * The list of LoRA weights to use. Default value: `` + * The number of images to generate. Default value: `1` */ - loras?: Array; + num_images?: number; /** - * The list of embeddings to use. Default value: `` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - embeddings?: Array; + seed?: number; /** - * If set to true, the safety checker will be enabled. + * If set to true, the safety checker will be enabled. Default value: `true` */ enable_safety_checker?: boolean; /** - * If set to true, the prompt will be expanded with additional prompts. + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - expand_prompt?: boolean; + sync_mode?: boolean; /** * The format of the generated image. Default value: `"jpeg"` */ format?: "jpeg" | "png"; -}; -export type StableDiffusionV15Output = { /** - * The generated image files info. + * The URL of the image to use as a starting point for the generation. */ - images: Array; + image_url: string | Blob | File; /** - * + * determines how much the generated image resembles the initial image Default value: `0.95` */ - timings: Record; + strength?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - seed: number; + safety_checker_version?: "v1" | "v2"; +}; +export type OmniZeroOutput = { /** - * Whether the generated images contain NSFW concepts. + * The generated image. */ - has_nsfw_concepts: Array; + image: Image; +}; +export type OmniZeroInput = { /** - * The prompt used for generating the image. + * Prompt to guide the image generation. */ prompt: string; -}; -export type LoraImageToImageInput = { /** - * The method to use for the sigmas. If set to 'custom', the sigmas will be set based - * on the provided sigmas schedule in the `array` field. - * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` + * Negative prompt to guide the image generation. Default value: `""` */ - method?: "default" | "array"; + negative_prompt?: string; /** - * Sigmas schedule to be used if 'custom' method is selected. Default value: `` + * Input image url. */ - array?: Array; -}; -export type LoraImageToImageOutput = { + image_url: string | Blob | File; /** - * The generated image files info. + * Composition image url. */ - images: Array; + composition_image_url: string | Blob | File; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Style image url. */ - seed: number; + style_image_url: string | Blob | File; /** - * Whether the generated images contain NSFW concepts. + * Identity image url. */ - has_nsfw_concepts: Array; + identity_image_url: string | Blob | File; /** - * The latents saved for debugging. + * Image strength. Default value: `0.75` */ - debug_latents?: File; + image_strength?: number; /** - * The latents saved for debugging per pass. + * Composition strength. Default value: `1` */ - debug_per_pass_latents?: File; -}; -export type FastSdxlImageToImageOutput = { + composition_strength?: number; /** - * The generated image files info. + * Depth strength. Default value: `0.5` */ - images: Array; + depth_strength?: number; /** - * + * Style strength. Default value: `1` + */ + style_strength?: number; + /** + * Face strength. Default value: `1` */ - timings: Record; + face_strength?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Identity strength. Default value: `1` */ - seed: number; + identity_strength?: number; /** - * Whether the generated images contain NSFW concepts. + * Guidance scale. Default value: `5` */ - has_nsfw_concepts: Array; + guidance_scale?: number; /** - * The prompt used for generating the image. + * Seed. Default value: `42` */ - prompt: string; + seed?: number; + /** + * Number of images. Default value: `1` + */ + number_of_images?: number; }; -export type FastSdxlImageToImageInput = { +export type CatVtonOutput = { /** - * The URL of the image to use as a starting point for the generation. + * The output image. */ - image_url: string | Blob | File; + image: Image; +}; +export type CatVtonInput = { /** - * The URL of the mask to use for inpainting. + * Url for the human image. */ - mask_url: string | Blob | File; + human_image_url: string | Blob | File; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Url to the garment image. */ - prompt: string; + garment_image_url: string | Blob | File; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Type of the Cloth to be tried on. + * + * Options: + * upper: Upper body cloth + * lower: Lower body cloth + * overall: Full body cloth + * inner: Inner cloth, like T-shirt inside a jacket + * outer: Outer cloth, like a jacket over a T-shirt */ - negative_prompt?: string; + cloth_type: "upper" | "lower" | "overall" | "inner" | "outer"; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated image. Default value: `portrait_4_3` */ image_size?: | ImageSize @@ -9821,64 +15530,33 @@ export type FastSdxlImageToImageInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `25` + * The number of inference steps to perform. Default value: `30` */ num_inference_steps?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` */ guidance_scale?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` - */ - strength?: number; - /** - * The same seed and the same prompt given to the same version of Stable Diffusion + * The same seed and the same input given to the same version of the model * will output the same image every time. */ seed?: number; +}; +export type DwposeOutput = { /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. - */ - sync_mode?: boolean; - /** - * The number of images to generate. Default value: `1` - */ - num_images?: number; - /** - * The list of LoRA weights to use. Default value: `` - */ - loras?: Array; - /** - * The list of embeddings to use. Default value: `` - */ - embeddings?: Array; - /** - * If set to true, the safety checker will be enabled. Default value: `true` - */ - enable_safety_checker?: boolean; - /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` - */ - safety_checker_version?: "v1" | "v2"; - /** - * If set to true, the prompt will be expanded with additional prompts. - */ - expand_prompt?: boolean; - /** - * The format of the generated image. Default value: `"jpeg"` + * The predicted pose image */ - format?: "jpeg" | "png"; + image: Image; +}; +export type DwposeInput = { /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * URL of the image to be processed */ - request_id?: string; + image_url: string | Blob | File; }; -export type FastSdxlInpaintingOutput = { +export type StableCascadeSoteDiffusionOutput = { /** * The generated image files info. */ @@ -9886,7 +15564,7 @@ export type FastSdxlInpaintingOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -9901,27 +15579,37 @@ export type FastSdxlInpaintingOutput = { */ prompt: string; }; -export type FastSdxlInpaintingInput = { - /** - * The URL of the image to use as a starting point for the generation. - */ - image_url: string | Blob | File; - /** - * The URL of the mask to use for inpainting. - */ - mask_url: string | Blob | File; +export type StableCascadeSoteDiffusionInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use.Use it to address details that you don't want + * The negative prompt to use. Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ negative_prompt?: string; /** - * The size of the generated image. Default value: `square_hd` + * Number of steps to run the first stage for. Default value: `25` + */ + first_stage_steps?: number; + /** + * Number of steps to run the second stage for. Default value: `10` + */ + second_stage_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `8` + */ + guidance_scale?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + */ + second_stage_guidance_scale?: number; + /** + * The size of the generated image. Default value: `[object Object]` */ image_size?: | ImageSize @@ -9932,632 +15620,654 @@ export type FastSdxlInpaintingInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `25` + * The same seed and the same prompt given to the same version of Stable Cascade + * will output the same image every time. */ - num_inference_steps?: number; + seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * If set to false, the safety checker will be disabled. Default value: `true` */ - guidance_scale?: number; + enable_safety_checker?: boolean; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * The number of images to generate. Default value: `1` */ - strength?: number; + num_images?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * If set to true, the image will be returned as base64 encoded string. */ - seed?: number; + sync_mode?: boolean; +}; +export type PolygonOutput = { /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * List of polygons */ - sync_mode?: boolean; + polygons: Array; +}; +export type Florence2LargeCaptionOutput = { /** - * The number of images to generate. Default value: `1` + * Results from the model */ - num_images?: number; + results: string; +}; +export type Florence2LargeCaptionInput = { /** - * The list of LoRA weights to use. Default value: `` + * The URL of the image to be processed. */ - loras?: Array; + image_url: string | Blob | File; +}; +export type ImageWithUserCoordinatesInput = { /** - * The list of embeddings to use. Default value: `` + * The URL of the image to be processed. */ - embeddings?: Array; + image_url: string | Blob | File; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The user input coordinates */ - enable_safety_checker?: boolean; + region: Region; +}; +export type ImageWithTextInput = { /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The URL of the image to be processed. */ - safety_checker_version?: "v1" | "v2"; + image_url: string | Blob | File; /** - * If set to true, the prompt will be expanded with additional prompts. + * Text input for the task */ - expand_prompt?: boolean; + text_input: string; +}; +export type Florence2LargeDetailedCaptionOutput = { /** - * The format of the generated image. Default value: `"jpeg"` + * Results from the model */ - format?: "jpeg" | "png"; + results: string; +}; +export type Florence2LargeDetailedCaptionInput = { /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * The URL of the image to be processed. */ - request_id?: string; + image_url: string | Blob | File; }; -export type LoraInpaintInput = { +export type Florence2LargeMoreDetailedCaptionOutput = { /** - * The method to use for the sigmas. If set to 'custom', the sigmas will be set based - * on the provided sigmas schedule in the `array` field. - * Defaults to 'default' which means the scheduler will use the sigmas of the scheduler. Default value: `"default"` + * Results from the model */ - method?: "default" | "array"; + results: string; +}; +export type Florence2LargeMoreDetailedCaptionInput = { /** - * Sigmas schedule to be used if 'custom' method is selected. Default value: `` + * The URL of the image to be processed. */ - array?: Array; + image_url: string | Blob | File; }; -export type LoraInpaintOutput = { +export type TextOutput = { /** - * The generated image files info. + * The output text */ - images: Array; + text: string; +}; +export type Florence2LargeObjectDetectionInput = { /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The URL of the image to be processed. */ - seed: number; + image_url: string | Blob | File; +}; +export type Florence2LargeObjectDetectionOutput = { /** - * Whether the generated images contain NSFW concepts. + * Results from the model */ - has_nsfw_concepts: Array; + results: BoundingBoxes; /** - * The latents saved for debugging. + * Processed image */ - debug_latents?: File; + image?: Image; +}; +export type Florence2LargeDenseRegionCaptionInput = { /** - * The latents saved for debugging per pass. + * The URL of the image to be processed. */ - debug_per_pass_latents?: File; + image_url: string | Blob | File; }; -export type PixartSigmaInput = { +export type Florence2LargeDenseRegionCaptionOutput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Results from the model */ - prompt: string; + results: BoundingBoxes; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Processed image */ - negative_prompt?: string; + image?: Image; +}; +export type Florence2LargeRegionProposalInput = { /** - * The style to apply to the image. Default value: `"(No style)"` + * The URL of the image to be processed. */ - style?: - | "(No style)" - | "Cinematic" - | "Photographic" - | "Anime" - | "Manga" - | "Digital Art" - | "Pixel art" - | "Fantasy art" - | "Neonpunk" - | "3D Model"; + image_url: string | Blob | File; +}; +export type Florence2LargeRegionProposalOutput = { /** - * The size of the generated image. Default value: `square_hd` + * Results from the model */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + results: BoundingBoxes; + /** + * Processed image + */ + image?: Image; +}; +export type ImageInput = { + /** + * Input image url. + */ + image_url: string | Blob | File; +}; +export type Florence2LargeCaptionToPhraseGroundingOutput = { + /** + * Results from the model + */ + results: BoundingBoxes; + /** + * Processed image + */ + image?: Image; +}; +export type Florence2LargeCaptionToPhraseGroundingInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeReferringExpressionSegmentationOutput = { + /** + * Results from the model + */ + results: PolygonOutput; + /** + * Processed image + */ + image?: Image; +}; +export type Florence2LargeReferringExpressionSegmentationInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * Text input for the task + */ + text_input: string; +}; +export type Florence2LargeRegionToSegmentationOutput = { + /** + * Results from the model + */ + results: PolygonOutput; + /** + * Processed image + */ + image?: Image; +}; +export type Florence2LargeRegionToSegmentationInput = { + /** + * The URL of the image to be processed. + */ + image_url: string | Blob | File; + /** + * The user input coordinates + */ + region: Region; +}; +export type Florence2LargeOpenVocabularyDetectionOutput = { /** - * The number of inference steps to perform. Default value: `35` + * Results from the model */ - num_inference_steps?: number; + results: BoundingBoxes; /** - * The scheduler to use for the model. Default value: `"DPM-SOLVER"` + * Processed image */ - scheduler?: "DPM-SOLVER" | "SA-SOLVER"; + image?: Image; +}; +export type Florence2LargeOpenVocabularyDetectionInput = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4.5` + * The URL of the image to be processed. */ - guidance_scale?: number; + image_url: string | Blob | File; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Text input for the task */ - seed?: number; + text_input: string; +}; +export type Florence2LargeRegionToCategoryOutput = { /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Results from the model */ - sync_mode?: boolean; + results: string; +}; +export type Florence2LargeRegionToCategoryInput = { /** - * The number of images to generate. Default value: `1` + * The URL of the image to be processed. */ - num_images?: number; + image_url: string | Blob | File; /** - * If set to true, the safety checker will be enabled. + * The user input coordinates */ - enable_safety_checker?: boolean; + region: Region; }; -export type PixartSigmaOutput = { +export type Florence2LargeRegionToDescriptionOutput = { /** - * The generated image files info. + * Results from the model */ - images: Array; + results: string; +}; +export type Florence2LargeRegionToDescriptionInput = { /** - * The timings of the different steps of the generation process. + * The URL of the image to be processed. */ - timings: Record; + image_url: string | Blob | File; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The user input coordinates */ - seed: number; + region: Region; +}; +export type Florence2LargeOcrOutput = { /** - * Whether the generated images contain NSFW concepts. + * Results from the model */ - has_nsfw_concepts: Array; + results: string; +}; +export type Florence2LargeOcrInput = { /** - * The prompt used for generating the image. + * The URL of the image to be processed. */ - prompt: string; + image_url: string | Blob | File; }; -export type DreamshaperOutput = { +export type Florence2LargeOcrWithRegionOutput = { /** - * The generated image files info. + * Results from the model */ - images: Array; + results: OCRBoundingBox; /** - * + * Processed image */ - timings: Record; + image?: Image; +}; +export type Florence2LargeOcrWithRegionInput = { /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The URL of the image to be processed. */ - seed: number; + image_url: string | Blob | File; +}; +export type Era3dOutput = { /** - * Whether the generated images contain NSFW concepts. + * Images with background removed */ - has_nsfw_concepts: Array; + images: Array; /** - * The prompt used for generating the image. + * Normal images with background removed */ - prompt: string; -}; -export type DreamshaperInput = { + normal_images: Array; /** - * The Dreamshaper model to use. + * Seed used for random number generation */ - model_name?: - | "Lykon/dreamshaper-xl-1-0" - | "Lykon/dreamshaper-xl-v2-turbo" - | "Lykon/dreamshaper-8"; + seed: number; +}; +export type Era3dInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * URL of the image to remove background from */ - prompt: string; + image_url: string | Blob | File; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `4` */ - negative_prompt?: string; + cfg?: number; /** - * Default value: `[object Object]` + * Number of steps to run the model for Default value: `40` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + steps?: number; /** - * The number of inference steps to perform. Default value: `25` + * Size of the image to crop to Default value: `400` */ - num_inference_steps?: number; + crop_size?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * Seed for random number generation Default value: `-1` */ - guidance_scale?: number; + seed?: number; /** - * The list of LoRA weights to use. Default value: `` + * Background removal Default value: `true` */ - loras?: Array; + background_removal?: boolean; +}; +export type LivePortraitImageOutput = { /** - * The list of embeddings to use. Default value: `` + * The generated image file. */ - embeddings?: Array; + image: Image; +}; +export type LivePortraitOutput = { /** - * If set to true, the prompt will be expanded with additional prompts. + * The generated video file. */ - expand_prompt?: boolean; + video: File; +}; +export type LivePortraitInput = { /** - * The number of images to generate. Default value: `1` + * URL of the video to drive the lip syncing. */ - num_images?: number; + video_url: string | Blob | File; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * URL of the image to be animated */ - seed?: number; + image_url: string | Blob | File; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Amount to blink the eyes */ - enable_safety_checker?: boolean; + blink?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Amount to raise or lower eyebrows */ - sync_mode?: boolean; + eyebrow?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * Amount to wink */ - format?: "jpeg" | "png"; + wink?: number; /** - * The URL of the image to use as a starting point for the generation. + * Amount to move pupils horizontally */ - image_url: string | Blob | File; + pupil_x?: number; /** - * The URL of the mask to use for inpainting. + * Amount to move pupils vertically */ - mask_url: string | Blob | File; + pupil_y?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * Amount to open mouth in 'aaa' shape */ - strength?: number; + aaa?: number; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * Amount to shape mouth in 'eee' position */ - safety_checker_version?: "v1" | "v2"; -}; -export type RealisticVisionOutput = { + eee?: number; /** - * The generated image files info. + * Amount to shape mouth in 'woo' position */ - images: Array; + woo?: number; /** - * + * Amount to smile */ - timings: Record; + smile?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Whether to set the lip to closed state before animation. Only takes effect when flag_eye_retargeting and flag_lip_retargeting are False. Default value: `true` */ - seed: number; + flag_lip_zero?: boolean; /** - * Whether the generated images contain NSFW concepts. + * Amount to rotate the face in pitch */ - has_nsfw_concepts: Array; + rotate_pitch?: number; /** - * The prompt used for generating the image. + * Amount to rotate the face in yaw */ - prompt: string; -}; -export type RealisticVisionInput = { + rotate_yaw?: number; /** - * The Realistic Vision model to use. + * Amount to rotate the face in roll */ - model_name?: string; + rotate_roll?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Whether to enable eye retargeting. */ - prompt: string; + flag_eye_retargeting?: boolean; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Whether to enable lip retargeting. */ - negative_prompt?: string; + flag_lip_retargeting?: boolean; /** - * Default value: `[object Object]` + * Whether to enable stitching. Recommended to set to True. Default value: `true` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + flag_stitching?: boolean; /** - * The number of inference steps to perform. Default value: `25` + * Whether to use relative motion. Default value: `true` */ - num_inference_steps?: number; + flag_relative?: boolean; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` */ - guidance_scale?: number; + flag_pasteback?: boolean; /** - * The list of LoRA weights to use. Default value: `` + * Whether to crop the source portrait to the face-cropping space. Default value: `true` */ - loras?: Array; + flag_do_crop?: boolean; /** - * The list of embeddings to use. Default value: `` + * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` */ - embeddings?: Array; + flag_do_rot?: boolean; /** - * If set to true, the prompt will be expanded with additional prompts. + * Size of the output image. Default value: `512` */ - expand_prompt?: boolean; + dsize?: number; /** - * The number of images to generate. Default value: `1` + * Scaling factor for the face crop. Default value: `2.3` */ - num_images?: number; + scale?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Horizontal offset ratio for face crop. */ - seed?: number; + vx_ratio?: number; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` */ - enable_safety_checker?: boolean; + vy_ratio?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Batch size for the model. The larger the batch size, the faster the model will run, but the more memory it will consume. Default value: `32` */ - sync_mode?: boolean; + batch_size?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. + * The safety checker will process the input image */ - format?: "jpeg" | "png"; + enable_safety_checker?: boolean; +}; +export type LivePortraitImageInput = { /** - * The URL of the image to use as a starting point for the generation. + * URL of the image to be animated */ image_url: string | Blob | File; /** - * The URL of the mask to use for inpainting. + * Amount to blink the eyes */ - mask_url: string | Blob | File; + blink?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * Amount to raise or lower eyebrows */ - strength?: number; + eyebrow?: number; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * Amount to wink */ - safety_checker_version?: "v1" | "v2"; -}; -export type LightningModelsInput = { + wink?: number; /** - * The Lightning model to use. + * Amount to move pupils horizontally */ - model_name?: string; + pupil_x?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Amount to move pupils vertically */ - prompt: string; + pupil_y?: number; /** - * The negative prompt to use.Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Amount to open mouth in 'aaa' shape */ - negative_prompt?: string; + aaa?: number; /** - * Default value: `[object Object]` + * Amount to shape mouth in 'eee' position */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + eee?: number; /** - * The number of inference steps to perform. Default value: `5` + * Amount to shape mouth in 'woo' position */ - num_inference_steps?: number; + woo?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `2` + * Amount to smile */ - guidance_scale?: number; + smile?: number; /** - * The list of LoRA weights to use. Default value: `` + * Amount to rotate the face in pitch */ - loras?: Array; + rotate_pitch?: number; /** - * The list of embeddings to use. Default value: `` + * Amount to rotate the face in yaw */ - embeddings?: Array; + rotate_yaw?: number; /** - * Scheduler / sampler to use for the image denoising process. + * Amount to rotate the face in roll */ - scheduler?: - | "DPM++ 2M" - | "DPM++ 2M Karras" - | "DPM++ 2M SDE" - | "DPM++ 2M SDE Karras" - | "DPM++ SDE" - | "DPM++ SDE Karras" - | "KDPM 2A" - | "Euler" - | "Euler (trailing timesteps)" - | "Euler A" - | "LCM" - | "EDMDPMSolverMultistepScheduler" - | "TCDScheduler"; + rotate_roll?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` */ - expand_prompt?: boolean; + flag_pasteback?: boolean; /** - * The number of images to generate. Default value: `1` + * Whether to crop the source portrait to the face-cropping space. Default value: `true` */ - num_images?: number; + flag_do_crop?: boolean; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` */ - seed?: number; + flag_do_rot?: boolean; /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Size of the output image. Default value: `512` */ - enable_safety_checker?: boolean; + dsize?: number; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Scaling factor for the face crop. Default value: `2.3` */ - sync_mode?: boolean; + scale?: number; /** - * The format of the generated image. Default value: `"jpeg"` + * Horizontal offset ratio for face crop. */ - format?: "jpeg" | "png"; + vx_ratio?: number; /** - * The URL of the image to use as a starting point for the generation. + * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` */ - image_url: string | Blob | File; + vy_ratio?: number; /** - * determines how much the generated image resembles the initial image Default value: `0.95` + * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. + * The safety checker will process the input image */ - strength?: number; + enable_safety_checker?: boolean; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * Output format Default value: `"jpeg"` */ - safety_checker_version?: "v1" | "v2"; + output_format?: "jpeg" | "png"; }; -export type LightningModelsOutput = { +export type MusePoseInput = { /** - * The generated image files info. + * URL of the image to animate. */ - images: Array; + image_url: string | Blob | File; /** - * + * The URL of the video to drive the animation */ - timings: Record; + video_url: string | Blob | File; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * The resolution to use for the pose detection. Default value: `512` */ - seed: number; + dwpose_detection_resolution?: number; /** - * Whether the generated images contain NSFW concepts. + * The resolution to use for the image during pose calculation. Default value: `720` */ - has_nsfw_concepts: Array; + dwpose_image_resolution?: number; /** - * The prompt used for generating the image. + * The frame to align the pose to. */ - prompt: string; -}; -export type OmniZeroOutput = { + dwpose_align_frame?: number; /** - * The generated image. + * The width of the output video. Default value: `748` */ - image: Image; -}; -export type OmniZeroInput = { + width?: number; /** - * Prompt to guide the image generation. + * The height of the output video. Default value: `748` */ - prompt: string; + height?: number; /** - * Negative prompt to guide the image generation. Default value: `""` + * The length of the output video. Default value: `300` */ - negative_prompt?: string; + length?: number; /** - * Input image url. + * The video slice frame number Default value: `48` */ - image_url: string | Blob | File; + slice?: number; /** - * Composition image url. + * The video slice overlap frame number Default value: `4` */ - composition_image_url: string | Blob | File; + overlap?: number; /** - * Style image url. + * Classifier free guidance Default value: `3.5` */ - style_image_url: string | Blob | File; + cfg?: number; /** - * Identity image url. + * The seed to use for the random number generator. */ - identity_image_url: string | Blob | File; + seed?: number; /** - * Image strength. Default value: `0.75` + * DDIM sampling steps Default value: `20` */ - image_strength?: number; + steps?: number; /** - * Composition strength. Default value: `1` + * The frames per second of the output video. */ - composition_strength?: number; + fps?: number; /** - * Depth strength. Default value: `0.5` + * Number of input frames to skip. Skipping 1 effectively reduces the fps in half. Default value: `1` */ - depth_strength?: number; + skip?: number; +}; +export type MusePoseOutput = { /** - * Style strength. Default value: `1` + * The generated video with the lip sync. */ - style_strength?: number; + video: File; +}; +export type KolorsInput = { /** - * Face strength. Default value: `1` + * The prompt to use for generating the image. Be as descriptive as possible + * for best results. */ - face_strength?: number; + prompt: string; /** - * Identity strength. Default value: `1` + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small + * details (e.g. moustache, blurry, low resolution). Default value: `""` */ - identity_strength?: number; + negative_prompt?: string; /** - * Guidance scale. Default value: `5` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show + * you. Default value: `5` */ guidance_scale?: number; /** - * Seed. Default value: `42` - */ - seed?: number; - /** - * Number of images. Default value: `1` - */ - number_of_images?: number; -}; -export type CatVtonOutput = { - /** - * The output image. + * The number of inference steps to perform. Default value: `50` */ - image: Image; -}; -export type CatVtonInput = { + num_inference_steps?: number; /** - * Url for the human image. + * Seed */ - human_image_url: string | Blob | File; + seed?: number; /** - * Url to the garment image. + * If set to true, the function will wait for the image to be generated and + * uploaded before returning the response. This will increase the latency of + * the function but it allows you to get the image directly in the response + * without going through the CDN. */ - garment_image_url: string | Blob | File; + sync_mode?: boolean; /** - * Type of the Cloth to be tried on. - * - * Options: - * upper: Upper body cloth - * lower: Lower body cloth - * overall: Full body cloth - * inner: Inner cloth, like T-shirt inside a jacket - * outer: Outer cloth, like a jacket over a T-shirt + * Enable safety checker. Default value: `true` */ - cloth_type: "upper" | "lower" | "overall" | "inner" | "outer"; + enable_safety_checker?: boolean; /** - * The size of the generated image. Default value: `portrait_4_3` + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The size of the generated image. Default value: `square_hd` */ image_size?: | ImageSize @@ -10568,33 +16278,40 @@ export type CatVtonInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The number of inference steps to perform. Default value: `30` + * The scheduler to use for the model. Default value: `"EulerDiscreteScheduler"` */ - num_inference_steps?: number; + scheduler?: + | "EulerDiscreteScheduler" + | "EulerAncestralDiscreteScheduler" + | "DPMSolverMultistepScheduler" + | "DPMSolverMultistepScheduler_SDE_karras" + | "UniPCMultistepScheduler" + | "DEISMultistepScheduler"; +}; +export type KolorsOutput = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `2.5` + * The generated image files info. */ - guidance_scale?: number; + images: Array; /** - * The same seed and the same input given to the same version of the model - * will output the same image every time. + * The timings of the different steps of the generation process. */ - seed?: number; -}; -export type DwposeOutput = { + timings: any; /** - * The predicted pose image + * Seed of the generated Image. It will be the same value of the one passed in + * the input or the randomly generated that was used in case none was passed. */ - image: Image; -}; -export type DwposeInput = { + seed: number; /** - * URL of the image to be processed + * Whether the generated images contain NSFW concepts. */ - image_url: string | Blob | File; + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; }; -export type StableCascadeSoteDiffusionOutput = { +export type SdxlControlnetUnionOutput = { /** * The generated image files info. */ @@ -10602,7 +16319,7 @@ export type StableCascadeSoteDiffusionOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -10617,37 +16334,31 @@ export type StableCascadeSoteDiffusionOutput = { */ prompt: string; }; -export type StableCascadeSoteDiffusionInput = { +export type InpaintingControlNetUnionInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ prompt: string; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` - */ - negative_prompt?: string; - /** - * Number of steps to run the first stage for. Default value: `20` + * The scale of the controlnet conditioning. Default value: `0.5` */ - first_stage_steps?: number; + controlnet_conditioning_scale?: number; /** - * Number of steps to run the second stage for. Default value: `10` + * The URL of the image to use as a starting point for the generation. */ - second_stage_steps?: number; + image_url: string | Blob | File; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `4` + * The URL of the mask to use for inpainting. */ - guidance_scale?: number; + mask_url: string | Blob | File; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - second_stage_guidance_scale?: number; + negative_prompt?: string; /** - * The size of the generated image. Default value: `square_hd` + * The size of the generated image. Leave it none to automatically infer from the control image. */ image_size?: | ImageSize @@ -10658,636 +16369,663 @@ export type StableCascadeSoteDiffusionInput = { | "landscape_4_3" | "landscape_16_9"; /** - * The same seed and the same prompt given to the same version of Stable Cascade + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion * will output the same image every time. */ seed?: number; /** - * If set to false, the safety checker will be disabled. Default value: `true` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - enable_safety_checker?: boolean; + sync_mode?: boolean; /** * The number of images to generate. Default value: `1` */ num_images?: number; /** - * If set to true, the image will be returned as base64 encoded string. + * The list of LoRA weights to use. Default value: `` */ - sync_mode?: boolean; -}; -export type Florence2LargeCaptionOutput = { + loras?: Array; /** - * Results from the model + * The list of embeddings to use. Default value: `` */ - results: string; -}; -export type Florence2LargeCaptionInput = { + embeddings?: Array; /** - * The URL of the image to be processed. + * If set to true, the safety checker will be enabled. Default value: `true` */ - image_url: string | Blob | File; + enable_safety_checker?: boolean; /** - * Text input for the task + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - text_input: string; -}; -export type Florence2LargeDetailedCaptionOutput = { + safety_checker_version?: "v1" | "v2"; /** - * Results from the model + * If set to true, the prompt will be expanded with additional prompts. */ - results: string; -}; -export type Florence2LargeDetailedCaptionInput = { + expand_prompt?: boolean; /** - * The URL of the image to be processed. + * The format of the generated image. Default value: `"jpeg"` */ - image_url: string | Blob | File; + format?: "jpeg" | "png"; /** - * Text input for the task + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - text_input: string; -}; -export type Florence2LargeMoreDetailedCaptionOutput = { + request_id?: string; /** - * Results from the model + * The URL of the control image. */ - results: string; -}; -export type Florence2LargeMoreDetailedCaptionInput = { + openpose_image_url?: string | Blob | File; /** - * The URL of the image to be processed. + * Whether to preprocess the openpose image. Default value: `true` */ - image_url: string | Blob | File; + openpose_preprocess?: boolean; /** - * Text input for the task + * The URL of the control image. */ - text_input: string; -}; -export type Florence2LargeObjectDetectionOutput = { + depth_image_url?: string | Blob | File; /** - * Results from the model + * Whether to preprocess the depth image. Default value: `true` */ - results: string; -}; -export type Florence2LargeObjectDetectionInput = { + depth_preprocess?: boolean; /** - * The URL of the image to be processed. + * The URL of the control image. */ - image_url: string | Blob | File; + teed_image_url?: string | Blob | File; /** - * Text input for the task + * Whether to preprocess the teed image. Default value: `true` */ - text_input: string; -}; -export type Florence2LargeDenseRegionCaptionOutput = { + teed_preprocess?: boolean; /** - * Results from the model + * The URL of the control image. */ - results: string; -}; -export type Florence2LargeDenseRegionCaptionInput = { + canny_image_url?: string | Blob | File; /** - * The URL of the image to be processed. + * Whether to preprocess the canny image. Default value: `true` */ - image_url: string | Blob | File; + canny_preprocess?: boolean; /** - * Text input for the task + * The URL of the control image. */ - text_input: string; -}; -export type Florence2LargeRegionProposalOutput = { + normal_image_url?: string | Blob | File; /** - * Results from the model + * Whether to preprocess the normal image. Default value: `true` */ - results: string; + normal_preprocess?: boolean; + /** + * The URL of the control image. + */ + segmentation_image_url?: string | Blob | File; + /** + * Whether to preprocess the segmentation image. Default value: `true` + */ + segmentation_preprocess?: boolean; }; -export type Florence2LargeRegionProposalInput = { +export type ImageToImageControlNetUnionInput = { /** - * The URL of the image to be processed. + * The prompt to use for generating the image. Be as descriptive as possible for best results. + */ + prompt: string; + /** + * The scale of the controlnet conditioning. Default value: `0.5` + */ + controlnet_conditioning_scale?: number; + /** + * The URL of the image to use as a starting point for the generation. */ image_url: string | Blob | File; /** - * Text input for the task + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - text_input: string; -}; -export type Florence2LargeCaptionToPhraseGroundingOutput = { + negative_prompt?: string; /** - * Results from the model + * The size of the generated image. Leave it none to automatically infer from the control image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `35` + */ + num_inference_steps?: number; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + */ + guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * The list of LoRA weights to use. Default value: `` */ - results: string; -}; -export type Florence2LargeCaptionToPhraseGroundingInput = { + loras?: Array; /** - * The URL of the image to be processed. + * The list of embeddings to use. Default value: `` */ - image_url: string | Blob | File; + embeddings?: Array; /** - * Text input for the task + * If set to true, the safety checker will be enabled. Default value: `true` */ - text_input: string; -}; -export type Florence2LargeReferringExpressionSegmentationOutput = { + enable_safety_checker?: boolean; /** - * Results from the model + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - results: string; -}; -export type Florence2LargeReferringExpressionSegmentationInput = { + safety_checker_version?: "v1" | "v2"; /** - * The URL of the image to be processed. + * If set to true, the prompt will be expanded with additional prompts. */ - image_url: string | Blob | File; + expand_prompt?: boolean; /** - * Text input for the task + * The format of the generated image. Default value: `"jpeg"` */ - text_input: string; -}; -export type Florence2LargeRegionToSegmentationOutput = { + format?: "jpeg" | "png"; /** - * Results from the model + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - results: string; -}; -export type Florence2LargeRegionToSegmentationInput = { + request_id?: string; /** - * The URL of the image to be processed. + * The URL of the control image. */ - image_url: string | Blob | File; + openpose_image_url?: string | Blob | File; /** - * Text input for the task + * Whether to preprocess the openpose image. Default value: `true` */ - text_input: string; -}; -export type Florence2LargeOpenVocabularyDetectionOutput = { + openpose_preprocess?: boolean; /** - * Results from the model + * The URL of the control image. */ - results: string; -}; -export type Florence2LargeOpenVocabularyDetectionInput = { + depth_image_url?: string | Blob | File; /** - * The URL of the image to be processed. + * Whether to preprocess the depth image. Default value: `true` */ - image_url: string | Blob | File; + depth_preprocess?: boolean; /** - * Text input for the task + * The URL of the control image. */ - text_input: string; -}; -export type Florence2LargeRegionToCategoryOutput = { + teed_image_url?: string | Blob | File; /** - * Results from the model + * Whether to preprocess the teed image. Default value: `true` */ - results: string; -}; -export type Florence2LargeRegionToCategoryInput = { + teed_preprocess?: boolean; /** - * The URL of the image to be processed. + * The URL of the control image. */ - image_url: string | Blob | File; + canny_image_url?: string | Blob | File; /** - * Text input for the task + * Whether to preprocess the canny image. Default value: `true` */ - text_input: string; -}; -export type Florence2LargeRegionToDescriptionOutput = { + canny_preprocess?: boolean; /** - * Results from the model + * The URL of the control image. */ - results: string; -}; -export type Florence2LargeRegionToDescriptionInput = { + normal_image_url?: string | Blob | File; /** - * The URL of the image to be processed. + * Whether to preprocess the normal image. Default value: `true` */ - image_url: string | Blob | File; + normal_preprocess?: boolean; /** - * Text input for the task + * The URL of the control image. */ - text_input: string; -}; -export type Florence2LargeOcrOutput = { + segmentation_image_url?: string | Blob | File; /** - * Results from the model + * Whether to preprocess the segmentation image. Default value: `true` */ - results: string; + segmentation_preprocess?: boolean; }; -export type Florence2LargeOcrInput = { +export type SdxlControlnetUnionInput = { /** - * The URL of the image to be processed. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - image_url: string | Blob | File; + prompt: string; /** - * Text input for the task + * The scale of the controlnet conditioning. Default value: `0.5` */ - text_input: string; -}; -export type Florence2LargeOcrWithRegionOutput = { + controlnet_conditioning_scale?: number; /** - * Results from the model + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - results: string; -}; -export type Florence2LargeOcrWithRegionInput = { + negative_prompt?: string; /** - * The URL of the image to be processed. + * The size of the generated image. Leave it none to automatically infer from the control image. */ - image_url: string | Blob | File; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * Text input for the task + * The number of inference steps to perform. Default value: `35` */ - text_input: string; -}; -export type Era3dOutput = { + num_inference_steps?: number; /** - * Images with background removed + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - images: Array; + seed?: number; /** - * Normal images with background removed + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - normal_images: Array; + guidance_scale?: number; /** - * Seed used for random number generation + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - seed: number; -}; -export type Era3dInput = { + sync_mode?: boolean; /** - * URL of the image to remove background from + * The number of images to generate. Default value: `1` */ - image_url: string | Blob | File; + num_images?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want the model to stick to your prompt when looking for a related image to show you. Default value: `4` + * The list of LoRA weights to use. Default value: `` */ - cfg?: number; + loras?: Array; /** - * Number of steps to run the model for Default value: `40` + * The list of embeddings to use. Default value: `` */ - steps?: number; + embeddings?: Array; /** - * Size of the image to crop to Default value: `400` + * If set to true, the safety checker will be enabled. Default value: `true` */ - crop_size?: number; + enable_safety_checker?: boolean; /** - * Seed for random number generation Default value: `-1` + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - seed?: number; + safety_checker_version?: "v1" | "v2"; /** - * Background removal Default value: `true` + * If set to true, the prompt will be expanded with additional prompts. */ - background_removal?: boolean; -}; -export type LivePortraitOutput = { + expand_prompt?: boolean; /** - * The generated video file. + * The format of the generated image. Default value: `"jpeg"` */ - video: File; -}; -export type LivePortraitInput = { + format?: "jpeg" | "png"; /** - * URL of the image to be animated + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - image_url: string | Blob | File; + request_id?: string; /** - * Amount to blink the eyes + * The URL of the control image. */ - blink?: number; + openpose_image_url?: string | Blob | File; /** - * Amount to raise or lower eyebrows + * Whether to preprocess the openpose image. Default value: `true` */ - eyebrow?: number; + openpose_preprocess?: boolean; /** - * Amount to wink + * The URL of the control image. */ - wink?: number; + depth_image_url?: string | Blob | File; /** - * Amount to move pupils horizontally + * Whether to preprocess the depth image. Default value: `true` */ - pupil_x?: number; + depth_preprocess?: boolean; /** - * Amount to move pupils vertically + * The URL of the control image. */ - pupil_y?: number; + teed_image_url?: string | Blob | File; /** - * Amount to open mouth in 'aaa' shape + * Whether to preprocess the teed image. Default value: `true` */ - aaa?: number; + teed_preprocess?: boolean; /** - * Amount to shape mouth in 'eee' position + * The URL of the control image. */ - eee?: number; + canny_image_url?: string | Blob | File; /** - * Amount to shape mouth in 'woo' position + * Whether to preprocess the canny image. Default value: `true` */ - woo?: number; + canny_preprocess?: boolean; /** - * Amount to smile + * The URL of the control image. */ - smile?: number; + normal_image_url?: string | Blob | File; /** - * Amount to rotate the face in pitch + * Whether to preprocess the normal image. Default value: `true` */ - rotate_pitch?: number; + normal_preprocess?: boolean; /** - * Amount to rotate the face in yaw + * The URL of the control image. */ - rotate_yaw?: number; + segmentation_image_url?: string | Blob | File; /** - * Amount to rotate the face in roll + * Whether to preprocess the segmentation image. Default value: `true` */ - rotate_roll?: number; + segmentation_preprocess?: boolean; +}; +export type SdxlControlnetUnionImageToImageOutput = { /** - * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` + * The generated image files info. */ - flag_pasteback?: boolean; + images: Array; /** - * Whether to crop the source portrait to the face-cropping space. Default value: `true` + * */ - flag_do_crop?: boolean; + timings: any; /** - * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. */ - flag_do_rot?: boolean; + seed: number; /** - * Size of the output image. Default value: `512` + * Whether the generated images contain NSFW concepts. */ - dsize?: number; + has_nsfw_concepts: Array; /** - * Scaling factor for the face crop. Default value: `2.3` + * The prompt used for generating the image. */ - scale?: number; + prompt: string; +}; +export type SdxlControlnetUnionImageToImageInput = { /** - * Horizontal offset ratio for face crop. + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - vx_ratio?: number; + prompt: string; /** - * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` + * The scale of the controlnet conditioning. Default value: `0.5` */ - vy_ratio?: number; + controlnet_conditioning_scale?: number; /** - * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. - * The safety checker will process the input image + * The URL of the image to use as a starting point for the generation. */ - enable_safety_checker?: boolean; + image_url: string | Blob | File; /** - * Output format Default value: `"jpeg"` + * The negative prompt to use.Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - output_format?: "jpeg" | "png"; -}; -export type LivePortraitImageOutput = { + negative_prompt?: string; /** - * The generated video file. + * The size of the generated image. Leave it none to automatically infer from the control image. */ - video: File; -}; -export type LivePortraitImageInput = { + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * URL of the image to be animated + * The number of inference steps to perform. Default value: `35` */ - image_url: string | Blob | File; + num_inference_steps?: number; /** - * Amount to blink the eyes + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - blink?: number; + guidance_scale?: number; /** - * Amount to raise or lower eyebrows + * determines how much the generated image resembles the initial image Default value: `0.95` */ - eyebrow?: number; + strength?: number; /** - * Amount to wink + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - wink?: number; + seed?: number; /** - * Amount to move pupils horizontally + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - pupil_x?: number; + sync_mode?: boolean; /** - * Amount to move pupils vertically + * The number of images to generate. Default value: `1` */ - pupil_y?: number; + num_images?: number; /** - * Amount to open mouth in 'aaa' shape + * The list of LoRA weights to use. Default value: `` */ - aaa?: number; + loras?: Array; /** - * Amount to shape mouth in 'eee' position + * The list of embeddings to use. Default value: `` */ - eee?: number; + embeddings?: Array; /** - * Amount to shape mouth in 'woo' position + * If set to true, the safety checker will be enabled. Default value: `true` */ - woo?: number; + enable_safety_checker?: boolean; /** - * Amount to smile + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - smile?: number; + safety_checker_version?: "v1" | "v2"; /** - * Amount to rotate the face in pitch + * If set to true, the prompt will be expanded with additional prompts. */ - rotate_pitch?: number; + expand_prompt?: boolean; /** - * Amount to rotate the face in yaw + * The format of the generated image. Default value: `"jpeg"` */ - rotate_yaw?: number; + format?: "jpeg" | "png"; /** - * Amount to rotate the face in roll + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - rotate_roll?: number; + request_id?: string; /** - * Whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space. Default value: `true` + * The URL of the control image. */ - flag_pasteback?: boolean; + openpose_image_url?: string | Blob | File; /** - * Whether to crop the source portrait to the face-cropping space. Default value: `true` + * Whether to preprocess the openpose image. Default value: `true` */ - flag_do_crop?: boolean; + openpose_preprocess?: boolean; /** - * Whether to conduct the rotation when flag_do_crop is True. Default value: `true` + * The URL of the control image. */ - flag_do_rot?: boolean; + depth_image_url?: string | Blob | File; /** - * Size of the output image. Default value: `512` + * Whether to preprocess the depth image. Default value: `true` */ - dsize?: number; + depth_preprocess?: boolean; /** - * Scaling factor for the face crop. Default value: `2.3` + * The URL of the control image. */ - scale?: number; + teed_image_url?: string | Blob | File; /** - * Horizontal offset ratio for face crop. + * Whether to preprocess the teed image. Default value: `true` */ - vx_ratio?: number; + teed_preprocess?: boolean; /** - * Vertical offset ratio for face crop. Positive values move up, negative values move down. Default value: `-0.125` + * The URL of the control image. */ - vy_ratio?: number; + canny_image_url?: string | Blob | File; /** - * Whether to enable the safety checker. If enabled, the model will check if the input image contains a face before processing it. - * The safety checker will process the input image + * Whether to preprocess the canny image. Default value: `true` */ - enable_safety_checker?: boolean; + canny_preprocess?: boolean; /** - * Output format Default value: `"jpeg"` + * The URL of the control image. */ - output_format?: "jpeg" | "png"; -}; -export type MusePoseInput = { + normal_image_url?: string | Blob | File; /** - * URL of the image to animate. + * Whether to preprocess the normal image. Default value: `true` */ - image_url: string | Blob | File; + normal_preprocess?: boolean; /** - * The URL of the video to drive the animation + * The URL of the control image. */ - video_url: string | Blob | File; + segmentation_image_url?: string | Blob | File; /** - * The resolution to use for the pose detection. Default value: `512` + * Whether to preprocess the segmentation image. Default value: `true` */ - dwpose_detection_resolution?: number; + segmentation_preprocess?: boolean; +}; +export type TextToImageControlNetUnionInput = { /** - * The resolution to use for the image during pose calculation. Default value: `720` + * The prompt to use for generating the image. Be as descriptive as possible for best results. */ - dwpose_image_resolution?: number; + prompt: string; /** - * The frame to align the pose to. + * The scale of the controlnet conditioning. Default value: `0.5` */ - dwpose_align_frame?: number; + controlnet_conditioning_scale?: number; /** - * The width of the output video. Default value: `748` + * The negative prompt to use. Use it to address details that you don't want + * in the image. This could be colors, objects, scenery and even the small details + * (e.g. moustache, blurry, low resolution). Default value: `""` */ - width?: number; + negative_prompt?: string; /** - * The height of the output video. Default value: `748` + * The size of the generated image. Leave it none to automatically infer from the control image. */ - height?: number; + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; /** - * The length of the output video. Default value: `300` + * The number of inference steps to perform. Default value: `35` */ - length?: number; + num_inference_steps?: number; /** - * The video slice frame number Default value: `48` + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. */ - slice?: number; + seed?: number; /** - * The video slice overlap frame number Default value: `4` + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ - overlap?: number; + guidance_scale?: number; /** - * Classifier free guidance Default value: `3.5` + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. */ - cfg?: number; + sync_mode?: boolean; /** - * The seed to use for the random number generator. + * The number of images to generate. Default value: `1` */ - seed?: number; + num_images?: number; /** - * DDIM sampling steps Default value: `20` + * The list of LoRA weights to use. Default value: `` */ - steps?: number; + loras?: Array; /** - * The frames per second of the output video. + * The list of embeddings to use. Default value: `` */ - fps?: number; + embeddings?: Array; /** - * Number of input frames to skip. Skipping 1 effectively reduces the fps in half. Default value: `1` + * If set to true, the safety checker will be enabled. Default value: `true` */ - skip?: number; -}; -export type MusePoseOutput = { + enable_safety_checker?: boolean; /** - * The generated video with the lip sync. + * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` */ - video: File; -}; -export type KolorsInput = { + safety_checker_version?: "v1" | "v2"; /** - * The prompt to use for generating the image. Be as descriptive as possible - * for best results. + * If set to true, the prompt will be expanded with additional prompts. */ - prompt: string; + expand_prompt?: boolean; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small - * details (e.g. moustache, blurry, low resolution). Default value: `""` + * The format of the generated image. Default value: `"jpeg"` */ - negative_prompt?: string; + format?: "jpeg" | "png"; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show - * you. Default value: `5` + * An id bound to a request, can be used with response to identify the request + * itself. Default value: `""` */ - guidance_scale?: number; + request_id?: string; /** - * The number of inference steps to perform. Default value: `50` + * The URL of the control image. */ - num_inference_steps?: number; + openpose_image_url?: string | Blob | File; /** - * Seed + * Whether to preprocess the openpose image. Default value: `true` */ - seed?: number; + openpose_preprocess?: boolean; /** - * If set to true, the function will wait for the image to be generated and - * uploaded before returning the response. This will increase the latency of - * the function but it allows you to get the image directly in the response - * without going through the CDN. + * The URL of the control image. */ - sync_mode?: boolean; + depth_image_url?: string | Blob | File; /** - * Enable safety checker. Default value: `true` + * Whether to preprocess the depth image. Default value: `true` */ - enable_safety_checker?: boolean; + depth_preprocess?: boolean; /** - * The number of images to generate. Default value: `1` + * The URL of the control image. */ - num_images?: number; + teed_image_url?: string | Blob | File; /** - * The size of the generated image. Default value: `square_hd` + * Whether to preprocess the teed image. Default value: `true` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + teed_preprocess?: boolean; /** - * The scheduler to use for the model. Default value: `"EulerDiscreteScheduler"` + * The URL of the control image. */ - scheduler?: - | "EulerDiscreteScheduler" - | "EulerAncestralDiscreteScheduler" - | "DPMSolverMultistepScheduler" - | "DPMSolverMultistepScheduler_SDE_karras" - | "UniPCMultistepScheduler" - | "DEISMultistepScheduler"; -}; -export type KolorsOutput = { + canny_image_url?: string | Blob | File; /** - * The generated image files info. + * Whether to preprocess the canny image. Default value: `true` */ - images: Array; + canny_preprocess?: boolean; /** - * The timings of the different steps of the generation process. + * The URL of the control image. */ - timings: Record; + normal_image_url?: string | Blob | File; /** - * Seed of the generated Image. It will be the same value of the one passed in - * the input or the randomly generated that was used in case none was passed. + * Whether to preprocess the normal image. Default value: `true` */ - seed: number; + normal_preprocess?: boolean; /** - * Whether the generated images contain NSFW concepts. + * The URL of the control image. */ - has_nsfw_concepts: Array; + segmentation_image_url?: string | Blob | File; /** - * The prompt used for generating the image. + * Whether to preprocess the segmentation image. Default value: `true` */ - prompt: string; + segmentation_preprocess?: boolean; }; -export type SdxlControlnetUnionOutput = { +export type SdxlControlnetUnionInpaintingOutput = { /** * The generated image files info. */ @@ -11295,7 +17033,7 @@ export type SdxlControlnetUnionOutput = { /** * */ - timings: Record; + timings: any; /** * Seed of the generated Image. It will be the same value of the one passed in the * input or the randomly generated that was used in case none was passed. @@ -11310,7 +17048,7 @@ export type SdxlControlnetUnionOutput = { */ prompt: string; }; -export type SdxlControlnetUnionInput = { +export type SdxlControlnetUnionInpaintingInput = { /** * The prompt to use for generating the image. Be as descriptive as possible for best results. */ @@ -11320,7 +17058,15 @@ export type SdxlControlnetUnionInput = { */ controlnet_conditioning_scale?: number; /** - * The negative prompt to use. Use it to address details that you don't want + * The URL of the image to use as a starting point for the generation. + */ + image_url: string | Blob | File; + /** + * The URL of the mask to use for inpainting. + */ + mask_url: string | Blob | File; + /** + * The negative prompt to use.Use it to address details that you don't want * in the image. This could be colors, objects, scenery and even the small details * (e.g. moustache, blurry, low resolution). Default value: `""` */ @@ -11340,16 +17086,20 @@ export type SdxlControlnetUnionInput = { * The number of inference steps to perform. Default value: `35` */ num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. - */ - seed?: number; /** * The CFG (Classifier Free Guidance) scale is a measure of how close you want * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` */ guidance_scale?: number; + /** + * determines how much the generated image resembles the initial image Default value: `0.95` + */ + strength?: number; + /** + * The same seed and the same prompt given to the same version of Stable Diffusion + * will output the same image every time. + */ + seed?: number; /** * If set to true, the function will wait for the image to be generated and uploaded * before returning the response. This will increase the latency of the function but @@ -11438,503 +17188,752 @@ export type SdxlControlnetUnionInput = { */ segmentation_preprocess?: boolean; }; -export type SdxlControlnetUnionImageToImageOutput = { +export type SAM2VideoRLEInput = { + /** + * The URL of the video to be segmented. + */ + video_url: string | Blob | File; + /** + * List of prompts to segment the video Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; + /** + * Return the Run Length Encoding of the mask. + */ + return_rle?: boolean; +}; +export type Sam2ImageInput = { + /** + * URL of the image to be segmented + */ + image_url: string | Blob | File; + /** + * List of prompts to segment the image Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; +}; +export type SAM2VideoOutput = { + /** + * The segmented video. + */ + video: File; +}; +export type SAM2RLEOutput = { + /** + * Run Length Encoding of the mask. + */ + rle: string | Array; +}; +export type Sam2ImageOutput = { + /** + * Segmented image. + */ + image: Image; +}; +export type SAM2VideoInput = { + /** + * The URL of the video to be segmented. + */ + video_url: string | Blob | File; + /** + * List of prompts to segment the video Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; +}; +export type SAM2ImageInput = { + /** + * URL of the image to be segmented + */ + image_url: string | Blob | File; + /** + * List of prompts to segment the image Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; +}; +export type Sam2VideoOutput = { + /** + * The segmented video. + */ + video: File; +}; +export type SAM2ImageOutput = { + /** + * Segmented image. + */ + image: Image; +}; +export type Sam2VideoInput = { + /** + * The URL of the video to be segmented. + */ + video_url: string | Blob | File; + /** + * List of prompts to segment the video Default value: `` + */ + prompts?: Array; + /** + * Coordinates for boxes Default value: `` + */ + box_prompts?: Array; +}; +export type ImageutilsSamInput = { + /** + * Url to input image + */ + image_url: string | Blob | File; + /** + * The prompt to use when generating masks + */ + text_prompt?: string; + /** + * Image size Default value: `1024` + */ + size?: number; + /** + * IOU threshold for filtering the annotations Default value: `0.9` + */ + iou?: number; + /** + * Draw high-resolution segmentation masks Default value: `true` + */ + retina?: boolean; + /** + * Object confidence threshold Default value: `0.4` + */ + confidence?: number; + /** + * Coordinates for multiple boxes, e.g. [[x,y,w,h],[x2,y2,w2,h2]] Default value: `0,0,0,0` + */ + box_prompt?: Array>; + /** + * Coordinates for multiple points [[x1,y1],[x2,y2]] Default value: `0,0` + */ + point_prompt?: Array>; + /** + * Label for point, [1,0], 0 = background, 1 = foreground Default value: `0` + */ + point_label?: Array; + /** + * Draw the edges of the masks + */ + with_contours?: boolean; + /** + * Attempt better quality output using morphologyEx + */ + better_quality?: boolean; + /** + * Output black and white, multiple masks will be combined into one mask + */ + black_white?: boolean; + /** + * Invert mask colors + */ + invert?: boolean; +}; +export type ImageutilsSamOutput = { + /** + * Combined image of all detected masks + */ + image?: Image; +}; +export type MiniCpmInput = { + /** + * List of image URLs to be used for the image description + */ + image_urls: Array; + /** + * Prompt to be used for the image description + */ + prompt: string; +}; +export type MiniCpmOutput = { + /** + * Response from the model + */ + output: string; +}; +export type MiniCPMV26VideoInput = { + /** + * URL of the video to be analyzed + */ + video_url: string | Blob | File; + /** + * Prompt to be used for the video description + */ + prompt: string; +}; +export type MiniCPMV26ImageInput = { + /** + * List of image URLs to be used for the image description + */ + image_urls: Array; + /** + * Prompt to be used for the image description + */ + prompt: string; +}; +export type MiniCpmVideoOutput = { + /** + * Response from the model + */ + output: string; +}; +export type MiniCpmVideoInput = { + /** + * URL of the video to be analyzed + */ + video_url: string | Blob | File; + /** + * Prompt to be used for the video description + */ + prompt: string; +}; +export type ControlnextInput = { + /** + * URL of the reference image. + */ + image_url: string | Blob | File; + /** + * URL of the input video. + */ + video_url: string | Blob | File; + /** + * Height of the output video. Default value: `1024` + */ + height?: number; /** - * The generated image files info. + * Width of the output video. Default value: `576` */ - images: Array; + width?: number; /** - * + * Guidance scale for the diffusion process. Default value: `3` */ - timings: Record; + guidance_scale?: number; /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Number of inference steps. Default value: `25` */ - seed: number; + num_inference_steps?: number; /** - * Whether the generated images contain NSFW concepts. + * Maximum number of frames to process. Default value: `240` */ - has_nsfw_concepts: Array; + max_frame_num?: number; /** - * The prompt used for generating the image. + * Number of frames to process in each batch. Default value: `24` */ - prompt: string; -}; -export type SdxlControlnetUnionImageToImageInput = { + batch_frames?: number; /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Number of overlapping frames between batches. Default value: `6` */ - prompt: string; + overlap?: number; /** - * The scale of the controlnet conditioning. Default value: `0.5` + * Stride for sampling frames from the input video. Default value: `2` */ - controlnet_conditioning_scale?: number; + sample_stride?: number; /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Chunk size for decoding frames. Default value: `2` */ - negative_prompt?: string; + decode_chunk_size?: number; /** - * The size of the generated image. Leave it none to automatically infer from the control image. + * Motion bucket ID for the pipeline. Default value: `127` */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + motion_bucket_id?: number; /** - * The number of inference steps to perform. Default value: `35` + * Frames per second for the output video. Default value: `7` */ - num_inference_steps?: number; + fps?: number; /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Condition scale for ControlNeXt. Default value: `1` */ - seed?: number; + controlnext_cond_scale?: number; +}; +export type ControlnextOutput = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * The generated video. */ - guidance_scale?: number; + video: File; +}; +export type GrowMaskOutput = { /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * The mask */ - sync_mode?: boolean; + image: Image; +}; +export type WorkflowutilsCannyInput = { /** - * The number of images to generate. Default value: `1` + * Input image url. */ - num_images?: number; + image_url: string | Blob | File; /** - * The list of LoRA weights to use. Default value: `` + * Low threshold for the hysteresis procedure Default value: `100` */ - loras?: Array; + low_threshold?: number; /** - * The list of embeddings to use. Default value: `` + * High threshold for the hysteresis procedure Default value: `200` */ - embeddings?: Array; + high_threshold?: number; +}; +export type BlurMaskOutput = { /** - * If set to true, the safety checker will be enabled. Default value: `true` + * The mask */ - enable_safety_checker?: boolean; + image: Image; +}; +export type InsightfaceOutput = { /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * faces detected sorted by size */ - safety_checker_version?: "v1" | "v2"; + faces: Array; /** - * If set to true, the prompt will be expanded with additional prompts. + * Bounding box of the face. */ - expand_prompt?: boolean; + bbox: Array; /** - * The format of the generated image. Default value: `"jpeg"` + * Keypoints of the face. */ - format?: "jpeg" | "png"; + kps?: Array>; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * Keypoints of the face on the image. */ - request_id?: string; + kps_image: Image; /** - * The URL of the control image. + * Confidence score of the detection. */ - openpose_image_url?: string | Blob | File; + det_score: number; /** - * Whether to preprocess the openpose image. Default value: `true` + * Embedding of the face. */ - openpose_preprocess?: boolean; + embedding_file: File; /** - * The URL of the control image. + * Either M or F if available. */ - depth_image_url?: string | Blob | File; + sex?: string; +}; +export type CompositeImageInput = { /** - * Whether to preprocess the depth image. Default value: `true` + * Input image url. */ - depth_preprocess?: boolean; + background_image_url: string | Blob | File; /** - * The URL of the control image. + * Overlay image url. */ - teed_image_url?: string | Blob | File; + overlay_image_url: string | Blob | File; /** - * Whether to preprocess the teed image. Default value: `true` + * Optional mask image url. */ - teed_preprocess?: boolean; + mask_image_url?: string | Blob | File; +}; +export type ResizeToPixelsInput = { /** - * The URL of the control image. + * Input image url. */ - canny_image_url?: string | Blob | File; + image_url: string | Blob | File; /** - * Whether to preprocess the canny image. Default value: `true` + * Maximum number of pixels in the output image. Default value: `1000000` */ - canny_preprocess?: boolean; + max_pixels?: number; /** - * The URL of the control image. + * If set, the output dimensions will be divisible by this value. */ - normal_image_url?: string | Blob | File; + enforce_divisibility?: number; +}; +export type ShrinkMaskOutput = { /** - * Whether to preprocess the normal image. Default value: `true` + * The mask */ - normal_preprocess?: boolean; + image: Image; +}; +export type TransparentImageToMaskOutput = { /** - * The URL of the control image. + * The mask */ - segmentation_image_url?: string | Blob | File; + image: Image; +}; +export type MaskInput = { /** - * Whether to preprocess the segmentation image. Default value: `true` + * Input image url. */ - segmentation_preprocess?: boolean; + image_url: string | Blob | File; }; -export type SdxlControlnetUnionInpaintingOutput = { +export type WorkflowutilsCannyOutput = { /** - * The generated image files info. + * The output image */ - images: Array; + image: Image; +}; +export type InvertMaskOutput = { /** - * + * The mask */ - timings: Record; + image: Image; +}; +export type TeedInput = { /** - * Seed of the generated Image. It will be the same value of the one passed in the - * input or the randomly generated that was used in case none was passed. + * Input image url. */ - seed: number; + image_url: string | Blob | File; +}; +export type BlurMaskInput = { /** - * Whether the generated images contain NSFW concepts. + * Input image url. */ - has_nsfw_concepts: Array; + image_url: string | Blob | File; /** - * The prompt used for generating the image. + * The radius of the Gaussian blur. Default value: `5` */ - prompt: string; + radius?: number; }; -export type SdxlControlnetUnionInpaintingInput = { +export type InsertTextInput = { /** - * The prompt to use for generating the image. Be as descriptive as possible for best results. + * Input text */ - prompt: string; + text: string; /** - * The scale of the controlnet conditioning. Default value: `0.5` + * Template to insert text into */ - controlnet_conditioning_scale?: number; + template: string; +}; +export type RegexReplaceInput = { /** - * The negative prompt to use. Use it to address details that you don't want - * in the image. This could be colors, objects, scenery and even the small details - * (e.g. moustache, blurry, low resolution). Default value: `""` + * Input text */ - negative_prompt?: string; + text: string; /** - * The size of the generated image. Leave it none to automatically infer from the control image. + * Pattern to replace */ - image_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; + pattern: string; /** - * The number of inference steps to perform. Default value: `35` + * Replacement text */ - num_inference_steps?: number; + replace: string; +}; +export type ImageSizeOutput = { /** - * The same seed and the same prompt given to the same version of Stable Diffusion - * will output the same image every time. + * Image size */ - seed?: number; + image_size: any; +}; +export type CompareTextInput = { /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related image to show you. Default value: `7.5` + * Input text */ - guidance_scale?: number; + text: string; /** - * If set to true, the function will wait for the image to be generated and uploaded - * before returning the response. This will increase the latency of the function but - * it allows you to get the image directly in the response without going through the CDN. + * Text to compare against */ - sync_mode?: boolean; + compare_text: string; /** - * The number of images to generate. Default value: `1` + * Text to return if the input text matches the compare text */ - num_images?: number; + return_text: string; /** - * The list of LoRA weights to use. Default value: `` + * Text to return if the input text does not match the compare text */ - loras?: Array; + fail_text: string; +}; +export type TeedOutput = { /** - * The list of embeddings to use. Default value: `` + * The edge map. */ - embeddings?: Array; + image: Image; +}; +export type GrowMaskInput = { /** - * If set to true, the safety checker will be enabled. Default value: `true` + * Input image url. */ - enable_safety_checker?: boolean; + image_url: string | Blob | File; /** - * The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model. Default value: `"v1"` + * The number of pixels to grow the mask. Default value: `5` */ - safety_checker_version?: "v1" | "v2"; + pixels?: number; /** - * If set to true, the prompt will be expanded with additional prompts. + * The threshold to convert the image to a mask. 0-255. Default value: `128` */ - expand_prompt?: boolean; + threshold?: number; +}; +export type RGBAToRGBImageInput = { /** - * The format of the generated image. Default value: `"jpeg"` + * Input image url. */ - format?: "jpeg" | "png"; + image_url: string | Blob | File; /** - * An id bound to a request, can be used with response to identify the request - * itself. Default value: `""` + * Color to replace the transparent pixels with */ - request_id?: string; + transparent_color: Color; +}; +export type ResizeImageInput = { /** - * The URL of the control image. + * Input image url. */ - openpose_image_url?: string | Blob | File; + image_url: string | Blob | File; /** - * Whether to preprocess the openpose image. Default value: `true` + * Width of the resized image */ - openpose_preprocess?: boolean; + width: number; /** - * The URL of the control image. + * Height of the resized image */ - depth_image_url?: string | Blob | File; + height: number; /** - * Whether to preprocess the depth image. Default value: `true` + * Resizing mode */ - depth_preprocess?: boolean; + mode: "crop" | "pad" | "scale"; /** - * The URL of the control image. + * Resizing strategy. Only used when mode is 'scale', default is nearest Default value: `"nearest"` */ - teed_image_url?: string | Blob | File; + resampling?: "nearest" | "bilinear" | "bicubic" | "lanczos"; /** - * Whether to preprocess the teed image. Default value: `true` + * Proportions of the image. Only used when mode is 'scale', default is fit Default value: `"fit"` */ - teed_preprocess?: boolean; + scaling_proportions?: "fit" | "fill" | "stretch"; /** - * The URL of the control image. + * Position of cropping. Only used when mode is 'crop', default is center Default value: `"center"` */ - canny_image_url?: string | Blob | File; + cropping_position?: + | "center" + | "top_left" + | "top_right" + | "bottom_left" + | "bottom_right"; /** - * Whether to preprocess the canny image. Default value: `true` + * Color of padding. Only used when mode is 'pad', default is black Default value: `"black"` */ - canny_preprocess?: boolean; + padding_color?: "black" | "white" | "red" | "green" | "blue"; +}; +export type TransparentImageToMaskInput = { /** - * The URL of the control image. + * Input image url. */ - normal_image_url?: string | Blob | File; + image_url: string | Blob | File; + /** + * The threshold to convert the image to a mask. Default value: `128` + */ + threshold?: number; +}; +export type ShrinkMaskInput = { /** - * Whether to preprocess the normal image. Default value: `true` + * Input image url. */ - normal_preprocess?: boolean; + image_url: string | Blob | File; /** - * The URL of the control image. + * The number of pixels to shrink the mask. Default value: `5` */ - segmentation_image_url?: string | Blob | File; + pixels?: number; /** - * Whether to preprocess the segmentation image. Default value: `true` + * The threshold to convert the image to a mask. 0-255. Default value: `128` */ - segmentation_preprocess?: boolean; + threshold?: number; }; -export type Sam2ImageInput = { +export type InsightfaceInput = { /** - * The URL of the video to be segmented. + * Input image url. */ - video_url: string | Blob | File; + image_url: string | Blob | File; /** - * List of prompts to segment the video Default value: `` + * Threshold for the edge map. Default value: `0.5` */ - prompts?: Array; + threshold?: number; /** - * Coordinates for boxes Default value: `` + * Size of the detection. Default value: `640` */ - box_prompts?: Array; -}; -export type Sam2ImageOutput = { + det_size_width?: number; /** - * Segmented image. + * Size of the detection. Default value: `640` */ - image: Image; -}; -export type Sam2VideoInput = { + det_size_height?: number; /** - * The URL of the video to be segmented. + * Maximum number of faces to detect. Default value: `1` */ - video_url: string | Blob | File; + max_face_num?: number; /** - * List of prompts to segment the video Default value: `` + * URL of the model weights. Default value: `"buffalo_l"` */ - prompts?: Array; + model_url?: string | Blob | File; /** - * Coordinates for boxes Default value: `` + * Sorting of the faces. Default value: `"size"` */ - box_prompts?: Array; -}; -export type Sam2VideoOutput = { + sorting?: string; /** - * Segmented image. + * Whether to run in sync mode. Default value: `true` */ - image: Image; + sync_mode?: boolean; }; -export type ImageutilsSamInput = { +export type PiDiOutput = { /** - * Input image url. + * Image with Pidi lines detected */ - image_url: string | Blob | File; + image: Image; }; -export type ImageutilsSamOutput = { +export type CannyInput = { /** - * Combined image of all detected masks + * URL of the image to process */ - image?: Image; -}; -export type MiniCpmInput = { + image_url: string | Blob | File; /** - * URL of the video to be analyzed + * Low threshold for the hysteresis procedure. Edges with a strength higher than the low threshold will appear in the output image, if there are strong edges nearby. Default value: `100` */ - video_url: string | Blob | File; + low_threshold?: number; /** - * Prompt to be used for the video description + * High threshold for the hysteresis procedure. Edges with a strength higher than the high threshold will always appear as edges in the output image. Default value: `200` */ - prompt: string; + high_threshold?: number; }; -export type MiniCpmOutput = { +export type HEDInput = { /** - * Response from the model + * URL of the image to process */ - output: string; -}; -export type MiniCpmVideoInput = { + image_url: string | Blob | File; /** - * URL of the video to be analyzed + * Whether to use the safe version of the HED detector */ - video_url: string | Blob | File; + safe?: boolean; /** - * Prompt to be used for the video description + * Whether to use the scribble version of the HED detector */ - prompt: string; + scribble?: boolean; }; -export type MiniCpmVideoOutput = { +export type CannyOutput = { /** - * Response from the model + * Image with edges detected using the Canny algorithm */ - output: string; + image: Image; }; -export type ControlnextInput = { +export type ScribbleOutput = { /** - * URL of the reference image. - */ - image_url: string | Blob | File; - /** - * URL of the input video. + * Image with lines detected using the Scribble detector */ - video_url: string | Blob | File; + image: Image; +}; +export type ZoeInput = { /** - * Height of the output video. Default value: `1024` + * URL of the image to process */ - height?: number; + image_url: string | Blob | File; +}; +export type MiDaSInput = { /** - * Width of the output video. Default value: `576` + * URL of the image to process */ - width?: number; + image_url: string | Blob | File; /** - * Guidance scale for the diffusion process. Default value: `3` + * A parameter for the MiDaS detector Default value: `6.283185307179586` */ - guidance_scale?: number; + a?: number; /** - * Number of inference steps. Default value: `25` + * Background threshold for the MiDaS detector Default value: `0.1` */ - num_inference_steps?: number; + background_threshold?: number; +}; +export type ImagePreprocessorsDepthAnythingV2Output = { /** - * Maximum number of frames to process. Default value: `240` + * Image with depth map */ - max_frame_num?: number; + image: Image; +}; +export type TeeDInput = { /** - * Number of frames to process in each batch. Default value: `24` + * URL of the image to process */ - batch_frames?: number; + image_url: string | Blob | File; +}; +export type MiDaSOutput = { /** - * Number of overlapping frames between batches. Default value: `6` + * Image with MiDaS depth map */ - overlap?: number; + depth_map: Image; /** - * Stride for sampling frames from the input video. Default value: `2` + * Image with MiDaS normal map */ - sample_stride?: number; + normal_map: Image; +}; +export type TeeDOutput = { /** - * Chunk size for decoding frames. Default value: `2` + * Image with TeeD lines detected */ - decode_chunk_size?: number; + image: Image; +}; +export type MLSDInput = { /** - * Motion bucket ID for the pipeline. Default value: `127` + * URL of the image to process */ - motion_bucket_id?: number; + image_url: string | Blob | File; /** - * Frames per second for the output video. Default value: `7` + * Score threshold for the MLSD detector Default value: `0.1` */ - fps?: number; + score_threshold?: number; /** - * Condition scale for ControlNeXt. Default value: `1` + * Distance threshold for the MLSD detector Default value: `0.1` */ - controlnext_cond_scale?: number; + distance_threshold?: number; }; -export type ControlnextOutput = { +export type ImagePreprocessorsDepthAnythingV2Input = { /** - * The generated video. + * URL of the image to process */ - video: File; + image_url: string | Blob | File; }; -export type WorkflowutilsCannyOutput = { +export type ZoeOutput = { /** - * The edge map. + * Image with depth map */ image: Image; }; -export type WorkflowutilsCannyInput = { +export type LineartInput = { /** - * Input image url. + * URL of the image to process */ image_url: string | Blob | File; /** - * Threshold for the edge map. Default value: `0.5` - */ - threshold?: number; - /** - * Size of the detection. Default value: `640` + * Whether to use the coarse model */ - det_size_width?: number; + coarse?: boolean; +}; +export type PiDiInput = { /** - * Size of the detection. Default value: `640` + * URL of the image to process */ - det_size_height?: number; + image_url: string | Blob | File; /** - * Maximum number of faces to detect. Default value: `1` + * Whether to use the safe version of the Pidi detector */ - max_face_num?: number; + safe?: boolean; /** - * URL of the model weights. Default value: `"buffalo_l"` + * Whether to use the scribble version of the Pidi detector */ - model_url?: string | Blob | File; + scribble?: boolean; /** - * Sorting of the faces. Default value: `"size"` + * Whether to apply the filter to the image. */ - sorting?: string; + apply_filter?: boolean; +}; +export type HEDOutput = { /** - * Whether to run in sync mode. Default value: `true` + * Image with lines detected using the HED detector */ - sync_mode?: boolean; + image: Image; }; -export type ImagePreprocessorsDepthAnythingV2Output = { +export type LineartOutput = { /** - * Image with lines detected using the MLSD detector + * Image with edges detected using the Canny algorithm */ image: Image; }; -export type ImagePreprocessorsDepthAnythingV2Input = { +export type ScribbleInput = { /** * URL of the image to process */ @@ -11948,7 +17947,7 @@ export type ImagePreprocessorsDepthAnythingV2Input = { */ safe?: boolean; }; -export type ImagePreprocessorsHedOutput = { +export type MLSDOutput = { /** * Image with lines detected using the MLSD detector */ @@ -11960,37 +17959,45 @@ export type ImagePreprocessorsHedInput = { */ image_url: string | Blob | File; /** - * The model to use for the Scribble detector Default value: `"HED"` + * Whether to use the safe version of the HED detector */ - model?: "HED" | "PiDi"; + safe?: boolean; /** - * Whether to use the safe version of the Scribble detector + * Whether to use the scribble version of the HED detector */ - safe?: boolean; + scribble?: boolean; }; -export type ImagePreprocessorsLineartOutput = { +export type DepthAnythingV2Output = { /** - * Image with lines detected using the MLSD detector + * Image with depth map */ image: Image; }; -export type ImagePreprocessorsLineartInput = { +export type DepthAnythingV2Input = { /** * URL of the image to process */ image_url: string | Blob | File; +}; +export type ImagePreprocessorsHedOutput = { /** - * The model to use for the Scribble detector Default value: `"HED"` + * Image with lines detected using the HED detector */ - model?: "HED" | "PiDi"; + image: Image; +}; +export type ImagePreprocessorsLineartInput = { /** - * Whether to use the safe version of the Scribble detector + * URL of the image to process */ - safe?: boolean; + image_url: string | Blob | File; + /** + * Whether to use the coarse model + */ + coarse?: boolean; }; -export type ImagePreprocessorsMidasOutput = { +export type ImagePreprocessorsLineartOutput = { /** - * Image with lines detected using the MLSD detector + * Image with edges detected using the Canny algorithm */ image: Image; }; @@ -12000,19 +18007,23 @@ export type ImagePreprocessorsMidasInput = { */ image_url: string | Blob | File; /** - * The model to use for the Scribble detector Default value: `"HED"` + * A parameter for the MiDaS detector Default value: `6.283185307179586` */ - model?: "HED" | "PiDi"; + a?: number; /** - * Whether to use the safe version of the Scribble detector + * Background threshold for the MiDaS detector Default value: `0.1` */ - safe?: boolean; + background_threshold?: number; }; -export type ImagePreprocessorsMlsdOutput = { +export type ImagePreprocessorsMidasOutput = { /** - * Image with lines detected using the MLSD detector + * Image with MiDaS depth map */ - image: Image; + depth_map: Image; + /** + * Image with MiDaS normal map + */ + normal_map: Image; }; export type ImagePreprocessorsMlsdInput = { /** @@ -12020,57 +18031,59 @@ export type ImagePreprocessorsMlsdInput = { */ image_url: string | Blob | File; /** - * The model to use for the Scribble detector Default value: `"HED"` + * Score threshold for the MLSD detector Default value: `0.1` */ - model?: "HED" | "PiDi"; + score_threshold?: number; /** - * Whether to use the safe version of the Scribble detector + * Distance threshold for the MLSD detector Default value: `0.1` */ - safe?: boolean; + distance_threshold?: number; }; -export type ImagePreprocessorsPidiOutput = { +export type ImagePreprocessorsMlsdOutput = { /** * Image with lines detected using the MLSD detector */ image: Image; }; +export type ImagePreprocessorsPidiOutput = { + /** + * Image with Pidi lines detected + */ + image: Image; +}; export type ImagePreprocessorsPidiInput = { /** * URL of the image to process */ image_url: string | Blob | File; /** - * The model to use for the Scribble detector Default value: `"HED"` + * Whether to use the safe version of the Pidi detector */ - model?: "HED" | "PiDi"; + safe?: boolean; /** - * Whether to use the safe version of the Scribble detector + * Whether to use the scribble version of the Pidi detector */ - safe?: boolean; -}; -export type ImagePreprocessorsSamOutput = { + scribble?: boolean; /** - * Image with lines detected using the MLSD detector + * Whether to apply the filter to the image. */ - image: Image; + apply_filter?: boolean; }; export type ImagePreprocessorsSamInput = { /** * URL of the image to process */ image_url: string | Blob | File; +}; +export type ImagePreprocessorsSamOutput = { /** - * The model to use for the Scribble detector Default value: `"HED"` - */ - model?: "HED" | "PiDi"; - /** - * Whether to use the safe version of the Scribble detector + * Image with SAM segmentation map */ - safe?: boolean; + image: Image; }; export type ImagePreprocessorsScribbleOutput = { /** - * Image with lines detected using the MLSD detector + * Image with lines detected using the Scribble detector */ image: Image; }; @@ -12088,29 +18101,15 @@ export type ImagePreprocessorsScribbleInput = { */ safe?: boolean; }; -export type ImagePreprocessorsTeedOutput = { - /** - * Image with lines detected using the MLSD detector - */ - image: Image; -}; export type ImagePreprocessorsTeedInput = { /** * URL of the image to process */ image_url: string | Blob | File; - /** - * The model to use for the Scribble detector Default value: `"HED"` - */ - model?: "HED" | "PiDi"; - /** - * Whether to use the safe version of the Scribble detector - */ - safe?: boolean; }; -export type ImagePreprocessorsZoeOutput = { +export type ImagePreprocessorsTeedOutput = { /** - * Image with lines detected using the MLSD detector + * Image with TeeD lines detected */ image: Image; }; @@ -12119,14 +18118,12 @@ export type ImagePreprocessorsZoeInput = { * URL of the image to process */ image_url: string | Blob | File; +}; +export type ImagePreprocessorsZoeOutput = { /** - * The model to use for the Scribble detector Default value: `"HED"` - */ - model?: "HED" | "PiDi"; - /** - * Whether to use the safe version of the Scribble detector + * Image with depth map */ - safe?: boolean; + image: Image; }; export type F5TtsOutput = { /** From 81909d13d94a672f6cb2ab7e692fe4f35a36cf52 Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Tue, 12 Nov 2024 01:00:36 -0800 Subject: [PATCH 3/6] fix(demo): streaming page type --- .../app/streaming/page.tsx | 180 ++++++++++++------ 1 file changed, 122 insertions(+), 58 deletions(-) diff --git a/apps/demo-nextjs-app-router/app/streaming/page.tsx b/apps/demo-nextjs-app-router/app/streaming/page.tsx index 245e7f2..2d061b4 100644 --- a/apps/demo-nextjs-app-router/app/streaming/page.tsx +++ b/apps/demo-nextjs-app-router/app/streaming/page.tsx @@ -1,85 +1,149 @@ "use client"; -import { createFalClient } from "@fal-ai/client"; +import { fal } from "@fal-ai/client"; import { useState } from "react"; -const fal = createFalClient({ +fal.config({ proxyUrl: "/api/fal/proxy", }); -type LlavaInput = { - prompt: string; - image_url: string; - max_new_tokens?: number; - temperature?: number; - top_p?: number; +type ErrorProps = { + error: any; }; -type LlavaOutput = { - output: string; - partial: boolean; - stats: { - num_input_tokens: number; - num_output_tokens: number; - }; +function Error(props: ErrorProps) { + if (!props.error) { + return null; + } + return ( +
+ Error {props.error.message} +
+ ); +} + +const DEFAULT_ENDPOINT_ID = "fal-ai/llavav15-13b"; +const DEFAULT_INPUT = { + prompt: "Do you know who drew this picture and what is the name of it?", + image_url: "https://llava-vl.github.io/static/images/monalisa.jpg", + max_new_tokens: 100, + temperature: 0.2, + top_p: 1, }; -export default function StreamingDemo() { - const [answer, setAnswer] = useState(""); - const [streamStatus, setStreamStatus] = useState("idle"); +export default function StreamingTest() { + // Input state + const [endpointId, setEndpointId] = useState(DEFAULT_ENDPOINT_ID); + const [input, setInput] = useState( + JSON.stringify(DEFAULT_INPUT, null, 2), + ); + // Result state + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [events, setEvents] = useState([]); + const [elapsedTime, setElapsedTime] = useState(0); + + const reset = () => { + setLoading(false); + setError(null); + setEvents([]); + setElapsedTime(0); + }; - const runInference = async () => { - const stream = await fal.stream( - "fal-ai/llavav15-13b", - { - input: { - prompt: - "Do you know who drew this picture and what is the name of it?", - image_url: "https://llava-vl.github.io/static/images/monalisa.jpg", - max_new_tokens: 100, - temperature: 0.2, - top_p: 1, - }, - }, - ); - setStreamStatus("running"); + const run = async () => { + reset(); + setLoading(true); + const start = Date.now(); + try { + const stream = await fal.stream(endpointId, { + input: JSON.parse(input), + }); - for await (const partial of stream) { - setAnswer(partial.output); - } + for await (const partial of stream) { + setEvents((events) => [partial, ...events]); + } - const result = await stream.done(); - setStreamStatus("done"); - setAnswer(result.output); + const result = await stream.done(); + setEvents((events) => [result, ...events]); + } catch (error: any) { + setError(error); + } finally { + setLoading(false); + setElapsedTime(Date.now() - start); + } }; - return (

- Hello fal +{" "} - streaming + fal + queue

- -
- +
+ + setEndpointId(e.target.value)} + /> +
+
+ +
+ + + +
-
-

Answer

- - streaming: {streamStatus} - +
+

JSON Result

+

+ {`Elapsed Time (seconds): ${(elapsedTime / 1000).toFixed(2)}`} +

+
+ {events.map((event, index) => ( +
+                  {JSON.stringify(event, null, 2)}
+                
+ ))} +
-

- {answer} -

From 769ec46e79836a78775b56060dffb841b805fb87 Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Wed, 13 Nov 2024 00:08:55 -0800 Subject: [PATCH 4/6] chore: update reference docs --- docs/reference/classes/ApiError.html | 4 +-- docs/reference/classes/FalStream.html | 12 +++---- docs/reference/classes/ValidationError.html | 4 +-- docs/reference/functions/createFalClient.html | 4 +-- .../functions/isCompletedQueueStatus.html | 2 +- docs/reference/functions/isQueueStatus.html | 2 +- docs/reference/functions/parseEndpointId.html | 2 +- docs/reference/functions/withMiddleware.html | 4 +-- docs/reference/functions/withProxy.html | 2 +- docs/reference/hierarchy.html | 2 +- docs/reference/index.html | 4 +-- .../interfaces/CompletedQueueStatus.html | 4 +-- docs/reference/interfaces/FalClient.html | 36 +++++++++---------- .../interfaces/InProgressQueueStatus.html | 4 +-- .../interfaces/InQueueQueueStatus.html | 4 +-- docs/reference/interfaces/QueueClient.html | 16 ++++----- docs/reference/interfaces/RealtimeClient.html | 4 +-- docs/reference/interfaces/StorageClient.html | 8 ++--- .../reference/interfaces/StreamingClient.html | 14 ++++---- docs/reference/types/Metrics.html | 2 +- docs/reference/types/QueueStatus.html | 2 +- docs/reference/types/RequestLog.html | 2 +- docs/reference/types/RequestMiddleware.html | 2 +- docs/reference/types/ResponseHandler.html | 2 +- docs/reference/types/Result.html | 4 +-- docs/reference/types/RunOptions.html | 4 +-- docs/reference/types/UrlOptions.html | 4 +-- docs/reference/types/ValidationErrorInfo.html | 2 +- docs/reference/types/WebHookResponse.html | 4 +-- docs/reference/variables/fal.html | 4 +-- typedoc.json | 2 +- 31 files changed, 83 insertions(+), 83 deletions(-) diff --git a/docs/reference/classes/ApiError.html b/docs/reference/classes/ApiError.html index d74bec3..b25a479 100644 --- a/docs/reference/classes/ApiError.html +++ b/docs/reference/classes/ApiError.html @@ -1,4 +1,4 @@ -ApiError | @fal-ai/client - v1.0.4

Class ApiError<Body>

Type Parameters

  • Body

Hierarchy (view full)

Constructors

constructor +ApiError | @fal-ai/client - v1.1.0-alpha.2

Class ApiError<Body>

Type Parameters

  • Body

Hierarchy (view full)

Constructors

Properties

Constructors

Properties

body: Body
status: number
+

Constructors

Properties

body: Body
status: number
diff --git a/docs/reference/classes/FalStream.html b/docs/reference/classes/FalStream.html index d56429b..a70fbaa 100644 --- a/docs/reference/classes/FalStream.html +++ b/docs/reference/classes/FalStream.html @@ -1,5 +1,5 @@ -FalStream | @fal-ai/client - v1.0.4

Class FalStream<Input, Output>

The class representing a streaming response. With t

-

Type Parameters

  • Input
  • Output

Constructors

constructor +FalStream | @fal-ai/client - v1.1.0-alpha.2

Class FalStream<Input, Output>

The class representing a streaming response. With t

+

Type Parameters

  • Input
  • Output

Constructors

Properties

config endpointId options @@ -9,16 +9,16 @@ abort done on -

Constructors

Properties

config: Required<Config>
endpointId: string
options: StreamOptions<Input>
url: string

Accessors

  • get signal(): AbortSignal
  • Gets the AbortSignal instance that can be used to listen for abort events.

    +

Constructors

Properties

config: Required<Config>
endpointId: string
options: StreamOptions<Input>
url: string

Accessors

Methods

Methods

  • Aborts the streaming request.

    Note: This method is noop in case the request is already done.

    Parameters

    • Optionalreason: string | Error

      optional cause for aborting the request.

      -

    Returns void

  • Gets a reference to the Promise that indicates whether the streaming +

Returns void

  • Gets a reference to the Promise that indicates whether the streaming is done or not. Developers should always call this in their apps to ensure the request is over.

    An alternative to this, is to use on('done') in case your application architecture works best with event listeners.

    Returns Promise<Output>

    the promise that resolves when the request is done.

    -
  • Parameters

    • type: FalStreamEventType
    • listener: EventHandler<any>

    Returns void

+
  • Parameters

    • type: FalStreamEventType
    • listener: EventHandler<any>

    Returns void

diff --git a/docs/reference/classes/ValidationError.html b/docs/reference/classes/ValidationError.html index 3b5ff7a..a39e1bb 100644 --- a/docs/reference/classes/ValidationError.html +++ b/docs/reference/classes/ValidationError.html @@ -1,6 +1,6 @@ -ValidationError | @fal-ai/client - v1.0.4

Class ValidationError

Hierarchy (view full)

  • ApiError<ValidationErrorBody>
    • ValidationError

Constructors

constructor +ValidationError | @fal-ai/client - v1.1.0-alpha.2

Class ValidationError

Hierarchy (view full)

  • ApiError<ValidationErrorBody>
    • ValidationError

Constructors

Properties

Accessors

Methods

Constructors

Properties

body: ValidationErrorBody
status: number

Accessors

Methods

+

Constructors

Properties

body: ValidationErrorBody
status: number

Accessors

Methods

diff --git a/docs/reference/functions/createFalClient.html b/docs/reference/functions/createFalClient.html index e66ab61..f620b83 100644 --- a/docs/reference/functions/createFalClient.html +++ b/docs/reference/functions/createFalClient.html @@ -1,4 +1,4 @@ -createFalClient | @fal-ai/client - v1.0.4

Function createFalClient

  • Creates a new reference of the FalClient.

    +createFalClient | @fal-ai/client - v1.1.0-alpha.2

    Function createFalClient

    • Creates a new reference of the FalClient.

      Parameters

      • userConfig: Config = {}

        Optional configuration to override the default settings.

      Returns FalClient

      a new instance of the FalClient.

      -
    +
diff --git a/docs/reference/functions/isCompletedQueueStatus.html b/docs/reference/functions/isCompletedQueueStatus.html index 85548ab..4e24c15 100644 --- a/docs/reference/functions/isCompletedQueueStatus.html +++ b/docs/reference/functions/isCompletedQueueStatus.html @@ -1 +1 @@ -isCompletedQueueStatus | @fal-ai/client - v1.0.4

Function isCompletedQueueStatus

+isCompletedQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Function isCompletedQueueStatus

diff --git a/docs/reference/functions/isQueueStatus.html b/docs/reference/functions/isQueueStatus.html index 1cd2969..42ea62f 100644 --- a/docs/reference/functions/isQueueStatus.html +++ b/docs/reference/functions/isQueueStatus.html @@ -1 +1 @@ -isQueueStatus | @fal-ai/client - v1.0.4

Function isQueueStatus

+isQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Function isQueueStatus

diff --git a/docs/reference/functions/parseEndpointId.html b/docs/reference/functions/parseEndpointId.html index d12ecd4..2b6c04a 100644 --- a/docs/reference/functions/parseEndpointId.html +++ b/docs/reference/functions/parseEndpointId.html @@ -1 +1 @@ -parseEndpointId | @fal-ai/client - v1.0.4

Function parseEndpointId

  • Parameters

    • id: string

    Returns EndpointId

+parseEndpointId | @fal-ai/client - v1.1.0-alpha.2

Function parseEndpointId

  • Parameters

    • id: string

    Returns EndpointId

diff --git a/docs/reference/functions/withMiddleware.html b/docs/reference/functions/withMiddleware.html index 9cec8eb..a4dc9b4 100644 --- a/docs/reference/functions/withMiddleware.html +++ b/docs/reference/functions/withMiddleware.html @@ -1,4 +1,4 @@ -withMiddleware | @fal-ai/client - v1.0.4

Function withMiddleware

  • Setup a execution chain of middleware functions.

    +withMiddleware | @fal-ai/client - v1.1.0-alpha.2

    Function withMiddleware

    +
diff --git a/docs/reference/functions/withProxy.html b/docs/reference/functions/withProxy.html index c0276aa..d7287e0 100644 --- a/docs/reference/functions/withProxy.html +++ b/docs/reference/functions/withProxy.html @@ -1 +1 @@ -withProxy | @fal-ai/client - v1.0.4

Function withProxy

+withProxy | @fal-ai/client - v1.1.0-alpha.2

Function withProxy

diff --git a/docs/reference/hierarchy.html b/docs/reference/hierarchy.html index d4818e2..446abe0 100644 --- a/docs/reference/hierarchy.html +++ b/docs/reference/hierarchy.html @@ -1 +1 @@ -@fal-ai/client - v1.0.4

@fal-ai/client - v1.0.4

Class Hierarchy

+@fal-ai/client - v1.1.0-alpha.2

@fal-ai/client - v1.1.0-alpha.2

Class Hierarchy

diff --git a/docs/reference/index.html b/docs/reference/index.html index 9cfb732..0593b5b 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,4 +1,4 @@ -@fal-ai/client - v1.0.4

@fal-ai/client - v1.0.4

Index

Classes

ApiError +@fal-ai/client - v1.1.0-alpha.2

@fal-ai/client - v1.1.0-alpha.2

Index

Classes

Interfaces

+
diff --git a/docs/reference/interfaces/CompletedQueueStatus.html b/docs/reference/interfaces/CompletedQueueStatus.html index d839d79..b21e076 100644 --- a/docs/reference/interfaces/CompletedQueueStatus.html +++ b/docs/reference/interfaces/CompletedQueueStatus.html @@ -1,6 +1,6 @@ -CompletedQueueStatus | @fal-ai/client - v1.0.4

Interface CompletedQueueStatus

interface CompletedQueueStatus {
    logs: RequestLog[];
    metrics?: Metrics;
    request_id: string;
    response_url: string;
    status: "COMPLETED";
}

Hierarchy

  • BaseQueueStatus
    • CompletedQueueStatus

Properties

logs +CompletedQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Interface CompletedQueueStatus

interface CompletedQueueStatus {
    logs: RequestLog[];
    metrics?: Metrics;
    request_id: string;
    response_url: string;
    status: "COMPLETED";
}

Hierarchy

  • BaseQueueStatus
    • CompletedQueueStatus

Properties

logs: RequestLog[]
metrics?: Metrics
request_id: string
response_url: string
status: "COMPLETED"
+

Properties

logs: RequestLog[]
metrics?: Metrics
request_id: string
response_url: string
status: "COMPLETED"
diff --git a/docs/reference/interfaces/FalClient.html b/docs/reference/interfaces/FalClient.html index 88ff040..e02ac19 100644 --- a/docs/reference/interfaces/FalClient.html +++ b/docs/reference/interfaces/FalClient.html @@ -1,7 +1,7 @@ -FalClient | @fal-ai/client - v1.0.4

Interface FalClient

The main client type, it provides access to simple API model usage, +FalClient | @fal-ai/client - v1.1.0-alpha.2

Interface FalClient

The main client type, it provides access to simple API model usage, as well as access to the queue and storage APIs.

createFalClient

-
interface FalClient {
    queue: QueueClient;
    realtime: RealtimeClient;
    storage: StorageClient;
    stream: (<Output, Input>(endpointId: string, options: StreamOptions<Input>) => Promise<FalStream<Input, Output>>);
    streaming: StreamingClient;
    run<Output, Input>(endpointId: string, options: RunOptions<Input>): Promise<Result<Output>>;
    subscribe<Output, Input>(endpointId: string, options: RunOptions<Input> & QueueSubscribeOptions): Promise<Result<Output>>;
}

Properties

interface FalClient {
    queue: QueueClient;
    realtime: RealtimeClient;
    storage: StorageClient;
    stream: (<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>) => Promise<FalStream<InputType<Id>, OutputType<Id>>>);
    streaming: StreamingClient;
    run<Id>(endpointId: Id, options: RunOptions<InputType<Id>>): Promise<Result<OutputType<Id>>>;
    subscribe<Id>(endpointId: Id, options: RunOptions<InputType<Id>> & QueueSubscribeOptions): Promise<Result<OutputType<Id>>>;
}

Properties

queue realtime storage stream @@ -9,32 +9,32 @@

Methods

Properties

The queue client to interact with the queue API.

-
realtime: RealtimeClient

The realtime client to interact with the realtime API +

realtime: RealtimeClient

The realtime client to interact with the realtime API and receive updates in real-time.

  • #RealtimeClient
  • #RealtimeClient.connect
-
storage: StorageClient

The storage client to interact with the storage API.

-
stream: (<Output, Input>(endpointId: string, options: StreamOptions<Input>) => Promise<FalStream<Input, Output>>)

Calls a fal app that supports streaming and provides a streaming-capable +

storage: StorageClient

The storage client to interact with the storage API.

+
stream: (<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>) => Promise<FalStream<InputType<Id>, OutputType<Id>>>)

Calls a fal app that supports streaming and provides a streaming-capable object as a result, that can be used to get partial results through either AsyncIterator or through an event listener.

-

Type declaration

    • <Output, Input>(endpointId, options): Promise<FalStream<Input, Output>>
    • Calls a fal app that supports streaming and provides a streaming-capable +

      Type declaration

        • <Id>(endpointId, options): Promise<FalStream<InputType<Id>, OutputType<Id>>>
        • Calls a fal app that supports streaming and provides a streaming-capable object as a result, that can be used to get partial results through either AsyncIterator or through an event listener.

          -

          Type Parameters

          • Output = any
          • Input = Record<string, any>

          Parameters

          • endpointId: string

            the endpoint id, e.g. fal-ai/llavav15-13b.

            -
          • options: StreamOptions<Input>

            the request options, including the input payload.

            -

          Returns Promise<FalStream<Input, Output>>

          the FalStream instance.

          +

      Type Parameters

      • Id extends EndpointType

      Parameters

      • endpointId: Id

        the endpoint id, e.g. fal-ai/llavav15-13b.

        +
      • options: StreamOptions<InputType<Id>>

        the request options, including the input payload.

        +

      Returns Promise<FalStream<InputType<Id>, OutputType<Id>>>

      the FalStream instance.

the endpoint id, e.g. fal-ai/llavav15-13b.

the request options, including the input payload.

the FalStream instance.

-
streaming: StreamingClient

The streaming client to interact with the streaming API.

+
streaming: StreamingClient

The streaming client to interact with the streaming API.

#stream

-

Methods

  • Subscribes to updates for a specific request in the queue.

    -

    Type Parameters

    • Output = any
    • Input = Record<string, any>

    Parameters

    • endpointId: string

      The ID of the API endpoint.

      -
    • options: RunOptions<Input> & QueueSubscribeOptions

      Options to configure how the request is run and how updates are received.

      -

    Returns Promise<Result<Output>>

    A promise that resolves to the result of the request once it's completed.

    -
+

Methods

  • Runs a fal endpoints identified by its endpointId.

    +

    Type Parameters

    • Id extends EndpointType

    Parameters

    • endpointId: Id

      the registered function revision id or alias.

      +
    • options: RunOptions<InputType<Id>>

    Returns Promise<Result<OutputType<Id>>>

    the remote function output

    +
  • Subscribes to updates for a specific request in the queue.

    +

    Type Parameters

    • Id extends EndpointType

    Parameters

    • endpointId: Id

      The ID of the API endpoint.

      +
    • options: RunOptions<InputType<Id>> & QueueSubscribeOptions

      Options to configure how the request is run and how updates are received.

      +

    Returns Promise<Result<OutputType<Id>>>

    A promise that resolves to the result of the request once it's completed.

    +
diff --git a/docs/reference/interfaces/InProgressQueueStatus.html b/docs/reference/interfaces/InProgressQueueStatus.html index 45834c6..9df11f2 100644 --- a/docs/reference/interfaces/InProgressQueueStatus.html +++ b/docs/reference/interfaces/InProgressQueueStatus.html @@ -1,5 +1,5 @@ -InProgressQueueStatus | @fal-ai/client - v1.0.4

Interface InProgressQueueStatus

interface InProgressQueueStatus {
    logs: RequestLog[];
    request_id: string;
    response_url: string;
    status: "IN_PROGRESS";
}

Hierarchy

  • BaseQueueStatus
    • InProgressQueueStatus

Properties

logs +InProgressQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Interface InProgressQueueStatus

interface InProgressQueueStatus {
    logs: RequestLog[];
    request_id: string;
    response_url: string;
    status: "IN_PROGRESS";
}

Hierarchy

  • BaseQueueStatus
    • InProgressQueueStatus

Properties

logs: RequestLog[]
request_id: string
response_url: string
status: "IN_PROGRESS"
+

Properties

logs: RequestLog[]
request_id: string
response_url: string
status: "IN_PROGRESS"
diff --git a/docs/reference/interfaces/InQueueQueueStatus.html b/docs/reference/interfaces/InQueueQueueStatus.html index 86d9d7e..7a6038c 100644 --- a/docs/reference/interfaces/InQueueQueueStatus.html +++ b/docs/reference/interfaces/InQueueQueueStatus.html @@ -1,5 +1,5 @@ -InQueueQueueStatus | @fal-ai/client - v1.0.4

Interface InQueueQueueStatus

interface InQueueQueueStatus {
    queue_position: number;
    request_id: string;
    response_url: string;
    status: "IN_QUEUE";
}

Hierarchy

  • BaseQueueStatus
    • InQueueQueueStatus

Properties

queue_position +InQueueQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Interface InQueueQueueStatus

interface InQueueQueueStatus {
    queue_position: number;
    request_id: string;
    response_url: string;
    status: "IN_QUEUE";
}

Hierarchy

  • BaseQueueStatus
    • InQueueQueueStatus

Properties

queue_position: number
request_id: string
response_url: string
status: "IN_QUEUE"
+

Properties

queue_position: number
request_id: string
response_url: string
status: "IN_QUEUE"
diff --git a/docs/reference/interfaces/QueueClient.html b/docs/reference/interfaces/QueueClient.html index dfa176e..5a8b24c 100644 --- a/docs/reference/interfaces/QueueClient.html +++ b/docs/reference/interfaces/QueueClient.html @@ -1,6 +1,6 @@ -QueueClient | @fal-ai/client - v1.0.4

Interface QueueClient

Represents a request queue with methods for submitting requests, +QueueClient | @fal-ai/client - v1.1.0-alpha.2

Interface QueueClient

Represents a request queue with methods for submitting requests, checking their status, retrieving results, and subscribing to updates.

-
interface QueueClient {
    cancel(endpointId: string, options: BaseQueueOptions): Promise<void>;
    result<Output>(endpointId: string, options: BaseQueueOptions): Promise<Result<Output>>;
    status(endpointId: string, options: QueueStatusOptions): Promise<QueueStatus>;
    streamStatus(endpointId: string, options: QueueStatusStreamOptions): Promise<FalStream<unknown, QueueStatus>>;
    submit<Input>(endpointId: string, options: SubmitOptions<Input>): Promise<InQueueQueueStatus>;
    subscribeToStatus(endpointId: string, options: QueueStatusSubscriptionOptions): Promise<CompletedQueueStatus>;
}

Methods

interface QueueClient {
    cancel(endpointId: string, options: BaseQueueOptions): Promise<void>;
    result<Output>(endpointId: string, options: BaseQueueOptions): Promise<Result<Output>>;
    status(endpointId: string, options: QueueStatusOptions): Promise<QueueStatus>;
    streamStatus(endpointId: string, options: QueueStatusStreamOptions): Promise<FalStream<unknown, QueueStatus>>;
    submit<Input>(endpointId: string, options: SubmitOptions<Input>): Promise<InQueueQueueStatus>;
    subscribeToStatus(endpointId: string, options: QueueStatusSubscriptionOptions): Promise<CompletedQueueStatus>;
}

Methods

cancel result status streamStatus @@ -12,25 +12,25 @@ is run and how updates are received.

Returns Promise<void>

A promise that resolves once the request is cancelled.

If the request cannot be cancelled.

-
  • Retrieves the result of a specific request from the queue.

    Type Parameters

    • Output

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: BaseQueueOptions

      Options to configure how the request is run.

    Returns Promise<Result<Output>>

    A promise that resolves to the result of the request.

    -
  • Retrieves the status of a specific request in the queue.

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: QueueStatusOptions

      Options to configure how the request is run.

    Returns Promise<QueueStatus>

    A promise that resolves to the status of the request.

    -
  • Subscribes to updates for a specific request in the queue using HTTP streaming events.

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: QueueStatusStreamOptions

      Options to configure how the request is run and how updates are received.

    Returns Promise<FalStream<unknown, QueueStatus>>

    The streaming object that can be used to listen for updates.

    -
  • Submits a request to the queue.

    Type Parameters

    • Input

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: SubmitOptions<Input>

      Options to configure how the request is run.

    Returns Promise<InQueueQueueStatus>

    A promise that resolves to the result of enqueuing the request.

    -
  • Subscribes to updates for a specific request in the queue using polling or streaming. See options.mode for more details.

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: QueueStatusSubscriptionOptions

      Options to configure how the request is run and how updates are received.

    Returns Promise<CompletedQueueStatus>

    A promise that resolves to the final status of the request.

    -
+
diff --git a/docs/reference/interfaces/RealtimeClient.html b/docs/reference/interfaces/RealtimeClient.html index 023c412..e20a9bd 100644 --- a/docs/reference/interfaces/RealtimeClient.html +++ b/docs/reference/interfaces/RealtimeClient.html @@ -1,6 +1,6 @@ -RealtimeClient | @fal-ai/client - v1.0.4

Interface RealtimeClient

interface RealtimeClient {
    connect<Input, Output>(app: string, handler: RealtimeConnectionHandler<Output>): RealtimeConnection<Input>;
}

Methods

connect +RealtimeClient | @fal-ai/client - v1.1.0-alpha.2

Interface RealtimeClient

interface RealtimeClient {
    connect<Input, Output>(app: string, handler: RealtimeConnectionHandler<Output>): RealtimeConnection<Input>;
}

Methods

Methods

  • Connect to the realtime endpoint. The default implementation uses WebSockets to connect to fal function endpoints that support WSS.

    Type Parameters

    • Input = any
    • Output = any

    Parameters

    • app: string

      the app alias or identifier.

    • handler: RealtimeConnectionHandler<Output>

      the connection handler.

      -

    Returns RealtimeConnection<Input>

+

Returns RealtimeConnection<Input>

diff --git a/docs/reference/interfaces/StorageClient.html b/docs/reference/interfaces/StorageClient.html index 6726fbb..17d018c 100644 --- a/docs/reference/interfaces/StorageClient.html +++ b/docs/reference/interfaces/StorageClient.html @@ -1,14 +1,14 @@ -StorageClient | @fal-ai/client - v1.0.4

Interface StorageClient

File support for the client. This interface establishes the contract for +StorageClient | @fal-ai/client - v1.1.0-alpha.2

Interface StorageClient

File support for the client. This interface establishes the contract for uploading files to the server and transforming the input to replace file objects with URLs.

-
interface StorageClient {
    transformInput: ((input: Record<string, any>) => Promise<Record<string, any>>);
    upload: ((file: Blob) => Promise<string>);
}

Properties

interface StorageClient {
    transformInput: ((input: Record<string, any>) => Promise<Record<string, any>>);
    upload: ((file: Blob) => Promise<string>);
}

Properties

transformInput: ((input: Record<string, any>) => Promise<Record<string, any>>)

Transform the input to replace file objects with URLs. This is used to transform the input before sending it to the server and ensures that the server receives URLs instead of file objects.

Type declaration

upload: ((file: Blob) => Promise<string>)

Upload a file to the server. Returns the URL of the uploaded file.

+
upload: ((file: Blob) => Promise<string>)

Upload a file to the server. Returns the URL of the uploaded file.

Type declaration

    • (file): Promise<string>
    • Parameters

      • file: Blob

        the file to upload

      Returns Promise<string>

      the URL of the uploaded file

      -
+
diff --git a/docs/reference/interfaces/StreamingClient.html b/docs/reference/interfaces/StreamingClient.html index 044771e..4a696b5 100644 --- a/docs/reference/interfaces/StreamingClient.html +++ b/docs/reference/interfaces/StreamingClient.html @@ -1,9 +1,9 @@ -StreamingClient | @fal-ai/client - v1.0.4

Interface StreamingClient

The streaming client interface.

-
interface StreamingClient {
    stream<Output, Input>(endpointId: string, options: StreamOptions<Input>): Promise<FalStream<Input, Output>>;
}

Methods

Methods

  • Calls a fal app that supports streaming and provides a streaming-capable +StreamingClient | @fal-ai/client - v1.1.0-alpha.2

    Interface StreamingClient

    The streaming client interface.

    +
    interface StreamingClient {
        stream<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>): Promise<FalStream<InputType<Id>, OutputType<Id>>>;
    }

    Methods

    Methods

    • Calls a fal app that supports streaming and provides a streaming-capable object as a result, that can be used to get partial results through either AsyncIterator or through an event listener.

      -

      Type Parameters

      • Output = any
      • Input = Record<string, any>

      Parameters

      • endpointId: string

        the endpoint id, e.g. fal-ai/llavav15-13b.

        -
      • options: StreamOptions<Input>

        the request options, including the input payload.

        -

      Returns Promise<FalStream<Input, Output>>

      the FalStream instance.

      -
    +

    Type Parameters

    • Id extends EndpointType

    Parameters

    • endpointId: Id

      the endpoint id, e.g. fal-ai/llavav15-13b.

      +
    • options: StreamOptions<InputType<Id>>

      the request options, including the input payload.

      +

    Returns Promise<FalStream<InputType<Id>, OutputType<Id>>>

    the FalStream instance.

    +
diff --git a/docs/reference/types/Metrics.html b/docs/reference/types/Metrics.html index db281ff..da76467 100644 --- a/docs/reference/types/Metrics.html +++ b/docs/reference/types/Metrics.html @@ -1 +1 @@ -Metrics | @fal-ai/client - v1.0.4

Type Alias Metrics

Metrics: {
    inference_time: number | null;
}
+Metrics | @fal-ai/client - v1.1.0-alpha.2

Type Alias Metrics

Metrics: {
    inference_time: number | null;
}
diff --git a/docs/reference/types/QueueStatus.html b/docs/reference/types/QueueStatus.html index 4abf496..ab7e2b0 100644 --- a/docs/reference/types/QueueStatus.html +++ b/docs/reference/types/QueueStatus.html @@ -1 +1 @@ -QueueStatus | @fal-ai/client - v1.0.4
+QueueStatus | @fal-ai/client - v1.1.0-alpha.2
diff --git a/docs/reference/types/RequestLog.html b/docs/reference/types/RequestLog.html index 5bbe12a..60e439b 100644 --- a/docs/reference/types/RequestLog.html +++ b/docs/reference/types/RequestLog.html @@ -1 +1 @@ -RequestLog | @fal-ai/client - v1.0.4

Type Alias RequestLog

RequestLog: {
    level:
        | "STDERR"
        | "STDOUT"
        | "ERROR"
        | "INFO"
        | "WARN"
        | "DEBUG";
    message: string;
    source: "USER";
    timestamp: string;
}
+RequestLog | @fal-ai/client - v1.1.0-alpha.2

Type Alias RequestLog

RequestLog: {
    level:
        | "STDERR"
        | "STDOUT"
        | "ERROR"
        | "INFO"
        | "WARN"
        | "DEBUG";
    message: string;
    source: "USER";
    timestamp: string;
}
diff --git a/docs/reference/types/RequestMiddleware.html b/docs/reference/types/RequestMiddleware.html index 6f203b5..4c25938 100644 --- a/docs/reference/types/RequestMiddleware.html +++ b/docs/reference/types/RequestMiddleware.html @@ -1 +1 @@ -RequestMiddleware | @fal-ai/client - v1.0.4

Type Alias RequestMiddleware

RequestMiddleware: ((request: RequestConfig) => Promise<RequestConfig>)
+RequestMiddleware | @fal-ai/client - v1.1.0-alpha.2

Type Alias RequestMiddleware

RequestMiddleware: ((request: RequestConfig) => Promise<RequestConfig>)
diff --git a/docs/reference/types/ResponseHandler.html b/docs/reference/types/ResponseHandler.html index 036faa4..ce494b9 100644 --- a/docs/reference/types/ResponseHandler.html +++ b/docs/reference/types/ResponseHandler.html @@ -1 +1 @@ -ResponseHandler | @fal-ai/client - v1.0.4

Type Alias ResponseHandler<Output>

ResponseHandler<Output>: ((response: Response) => Promise<Output>)

Type Parameters

  • Output
+ResponseHandler | @fal-ai/client - v1.1.0-alpha.2

Type Alias ResponseHandler<Output>

ResponseHandler<Output>: ((response: Response) => Promise<Output>)

Type Parameters

  • Output
diff --git a/docs/reference/types/Result.html b/docs/reference/types/Result.html index b50c9e5..09eeb67 100644 --- a/docs/reference/types/Result.html +++ b/docs/reference/types/Result.html @@ -1,3 +1,3 @@ -Result | @fal-ai/client - v1.0.4

Type Alias Result<T>

Result<T>: {
    data: T;
    requestId: string;
}

Represents an API result, containing the data, +Result | @fal-ai/client - v1.1.0-alpha.2

Type Alias Result<T>

Result<T>: {
    data: T;
    requestId: string;
}

Represents an API result, containing the data, the request ID and any other relevant information.

-

Type Parameters

  • T
+

Type Parameters

  • T
diff --git a/docs/reference/types/RunOptions.html b/docs/reference/types/RunOptions.html index 57be25e..7313028 100644 --- a/docs/reference/types/RunOptions.html +++ b/docs/reference/types/RunOptions.html @@ -1,6 +1,6 @@ -RunOptions | @fal-ai/client - v1.0.4

Type Alias RunOptions<Input>

RunOptions<Input>: {
    input?: Input;
    method?:
        | "get"
        | "post"
        | "put"
        | "delete"
        | string;
}

The function input and other configuration when running +RunOptions | @fal-ai/client - v1.1.0-alpha.2

Type Alias RunOptions<Input>

RunOptions<Input>: {
    input?: Input;
    method?:
        | "get"
        | "post"
        | "put"
        | "delete"
        | string;
}

The function input and other configuration when running the function, such as the HTTP method to use.

Type Parameters

  • Input

Type declaration

  • Optional Readonlyinput?: Input

    The function input. It will be submitted either as query params or the body payload, depending on the method.

  • Optional Readonlymethod?:
        | "get"
        | "post"
        | "put"
        | "delete"
        | string

    The HTTP method, defaults to post;

    -
+
diff --git a/docs/reference/types/UrlOptions.html b/docs/reference/types/UrlOptions.html index e27765d..21059d4 100644 --- a/docs/reference/types/UrlOptions.html +++ b/docs/reference/types/UrlOptions.html @@ -1,6 +1,6 @@ -UrlOptions | @fal-ai/client - v1.0.4

Type Alias UrlOptions

UrlOptions: {
    path?: string;
    query?: Record<string, string>;
    subdomain?: string;
}

Type declaration

  • Optionalpath?: string

    The path to append to the function URL.

    +UrlOptions | @fal-ai/client - v1.1.0-alpha.2

    Type Alias UrlOptions

    UrlOptions: {
        path?: string;
        query?: Record<string, string>;
        subdomain?: string;
    }

    Type declaration

    • Optionalpath?: string

      The path to append to the function URL.

    • Optional Readonlyquery?: Record<string, string>

      The query parameters to include in the URL.

    • Optional Readonlysubdomain?: string

      If true, the function will use the queue to run the function asynchronously and return the result in a separate call. This influences how the URL is built.

      -
    +
diff --git a/docs/reference/types/ValidationErrorInfo.html b/docs/reference/types/ValidationErrorInfo.html index a78d1ce..177f351 100644 --- a/docs/reference/types/ValidationErrorInfo.html +++ b/docs/reference/types/ValidationErrorInfo.html @@ -1 +1 @@ -ValidationErrorInfo | @fal-ai/client - v1.0.4

Type Alias ValidationErrorInfo

ValidationErrorInfo: {
    loc: (string | number)[];
    msg: string;
    type: string;
}
+ValidationErrorInfo | @fal-ai/client - v1.1.0-alpha.2

Type Alias ValidationErrorInfo

ValidationErrorInfo: {
    loc: (string | number)[];
    msg: string;
    type: string;
}
diff --git a/docs/reference/types/WebHookResponse.html b/docs/reference/types/WebHookResponse.html index a5b1dfb..c200a4e 100644 --- a/docs/reference/types/WebHookResponse.html +++ b/docs/reference/types/WebHookResponse.html @@ -1,4 +1,4 @@ -WebHookResponse | @fal-ai/client - v1.0.4

Type Alias WebHookResponse<Payload>

WebHookResponse<Payload>: {
    error: never;
    payload: Payload;
    request_id: string;
    status: "OK";
} | {
    error: string;
    payload: Payload;
    request_id: string;
    status: "ERROR";
}

Represents the response from a WebHook request. +WebHookResponse | @fal-ai/client - v1.1.0-alpha.2

Type Alias WebHookResponse<Payload>

WebHookResponse<Payload>: {
    error: never;
    payload: Payload;
    request_id: string;
    status: "OK";
} | {
    error: string;
    payload: Payload;
    request_id: string;
    status: "ERROR";
}

Represents the response from a WebHook request. This is a union type that varies based on the status property.

Type Parameters

  • Payload = any

    The type of the payload in the response. It defaults to any, allowing for flexibility in specifying the structure of the payload.

    @@ -10,4 +10,4 @@
  • payload: Payload

    The payload of the response, structure determined by the Payload type.

  • request_id: string

    The unique identifier for the request.

  • status: "ERROR"

    Indicates an unsuccessful response.

    -
+
diff --git a/docs/reference/variables/fal.html b/docs/reference/variables/fal.html index 35f68d8..ac58f72 100644 --- a/docs/reference/variables/fal.html +++ b/docs/reference/variables/fal.html @@ -1,3 +1,3 @@ -fal | @fal-ai/client - v1.0.4

Variable falConst

fal: SingletonFalClient = ...

Creates a singleton instance of the client. This is useful as a compatibility +fal | @fal-ai/client - v1.1.0-alpha.2

Variable falConst

fal: SingletonFalClient = ...

Creates a singleton instance of the client. This is useful as a compatibility layer for existing code that uses the clients version prior to 1.0.0.

-
+
diff --git a/typedoc.json b/typedoc.json index fea30c7..7382342 100644 --- a/typedoc.json +++ b/typedoc.json @@ -2,7 +2,7 @@ "$schema": "https://typedoc.org/schema.json", "out": "docs/reference", "entryPoints": ["./libs/client/src/index.ts"], - "exclude": ["./src/__tests__/**", "*.spec.ts", "./src/types/endpoints.ts"], + "exclude": ["./src/__tests__/**", "*.spec.ts", "./libs/client/src/types/endpoints.ts"], "excludeExternals": true, "excludeInternal": false, "includeVersion": true, From c2e44f775f6128c3b43337b6f614f4138fdef727 Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Wed, 13 Nov 2024 00:12:25 -0800 Subject: [PATCH 5/6] chore: prepare client release --- libs/client/package.json | 2 +- libs/client/src/types/endpoints.ts | 24 ++++++++++++++++++++++++ typedoc.json | 6 +++++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/libs/client/package.json b/libs/client/package.json index 9bed0ba..c342849 100644 --- a/libs/client/package.json +++ b/libs/client/package.json @@ -1,7 +1,7 @@ { "name": "@fal-ai/client", "description": "The fal.ai client for JavaScript and TypeScript", - "version": "1.1.0-alpha.2", + "version": "1.1.0", "license": "MIT", "repository": { "type": "git", diff --git a/libs/client/src/types/endpoints.ts b/libs/client/src/types/endpoints.ts index 407e571..4535715 100644 --- a/libs/client/src/types/endpoints.ts +++ b/libs/client/src/types/endpoints.ts @@ -590,6 +590,10 @@ export type FluxProTextToImageInput = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; }; export type FluxProV11UltraInput = { /** @@ -619,6 +623,10 @@ export type FluxProV11UltraInput = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; /** * The aspect ratio of the generated image. Default value: `"16:9"` */ @@ -690,6 +698,10 @@ export type FluxProPlusTextToImageInput = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; }; export type FluxLoraFastTrainingInput = { /** @@ -1693,6 +1705,10 @@ export type FluxProUltraTextToImageInput = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; /** * The aspect ratio of the generated image. Default value: `"16:9"` */ @@ -1764,6 +1780,10 @@ export type FluxProV11Input = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; }; export type FluxProNewInput = { /** @@ -1809,6 +1829,10 @@ export type FluxProNewInput = { * The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive. Default value: `"2"` */ safety_tolerance?: "1" | "2" | "3" | "4" | "5" | "6"; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; }; export type FluxProNewOutput = { /** diff --git a/typedoc.json b/typedoc.json index 7382342..04d1c26 100644 --- a/typedoc.json +++ b/typedoc.json @@ -2,7 +2,11 @@ "$schema": "https://typedoc.org/schema.json", "out": "docs/reference", "entryPoints": ["./libs/client/src/index.ts"], - "exclude": ["./src/__tests__/**", "*.spec.ts", "./libs/client/src/types/endpoints.ts"], + "exclude": [ + "./src/__tests__/**", + "*.spec.ts", + "./libs/client/src/types/endpoints.ts" + ], "excludeExternals": true, "excludeInternal": false, "includeVersion": true, From 6405bbe233a1fc8e94b33cec4e894c9da773bdcf Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Wed, 13 Nov 2024 00:13:04 -0800 Subject: [PATCH 6/6] chore: update reference docs --- docs/reference/classes/ApiError.html | 4 ++-- docs/reference/classes/FalStream.html | 12 ++++++------ docs/reference/classes/ValidationError.html | 4 ++-- docs/reference/functions/createFalClient.html | 4 ++-- .../functions/isCompletedQueueStatus.html | 2 +- docs/reference/functions/isQueueStatus.html | 2 +- docs/reference/functions/parseEndpointId.html | 2 +- docs/reference/functions/withMiddleware.html | 4 ++-- docs/reference/functions/withProxy.html | 2 +- docs/reference/hierarchy.html | 2 +- docs/reference/index.html | 4 ++-- .../interfaces/CompletedQueueStatus.html | 4 ++-- docs/reference/interfaces/FalClient.html | 18 +++++++++--------- .../interfaces/InProgressQueueStatus.html | 4 ++-- .../interfaces/InQueueQueueStatus.html | 4 ++-- docs/reference/interfaces/QueueClient.html | 16 ++++++++-------- docs/reference/interfaces/RealtimeClient.html | 4 ++-- docs/reference/interfaces/StorageClient.html | 8 ++++---- docs/reference/interfaces/StreamingClient.html | 6 +++--- docs/reference/types/Metrics.html | 2 +- docs/reference/types/QueueStatus.html | 2 +- docs/reference/types/RequestLog.html | 2 +- docs/reference/types/RequestMiddleware.html | 2 +- docs/reference/types/ResponseHandler.html | 2 +- docs/reference/types/Result.html | 4 ++-- docs/reference/types/RunOptions.html | 4 ++-- docs/reference/types/UrlOptions.html | 4 ++-- docs/reference/types/ValidationErrorInfo.html | 2 +- docs/reference/types/WebHookResponse.html | 4 ++-- docs/reference/variables/fal.html | 4 ++-- 30 files changed, 69 insertions(+), 69 deletions(-) diff --git a/docs/reference/classes/ApiError.html b/docs/reference/classes/ApiError.html index b25a479..46aadab 100644 --- a/docs/reference/classes/ApiError.html +++ b/docs/reference/classes/ApiError.html @@ -1,4 +1,4 @@ -ApiError | @fal-ai/client - v1.1.0-alpha.2

Class ApiError<Body>

Type Parameters

  • Body

Hierarchy (view full)

Constructors

constructor +ApiError | @fal-ai/client - v1.1.0

Class ApiError<Body>

Type Parameters

  • Body

Hierarchy (view full)

Constructors

Properties

Constructors

Properties

body: Body
status: number
+

Constructors

Properties

body: Body
status: number
diff --git a/docs/reference/classes/FalStream.html b/docs/reference/classes/FalStream.html index a70fbaa..c6100c2 100644 --- a/docs/reference/classes/FalStream.html +++ b/docs/reference/classes/FalStream.html @@ -1,5 +1,5 @@ -FalStream | @fal-ai/client - v1.1.0-alpha.2

Class FalStream<Input, Output>

The class representing a streaming response. With t

-

Type Parameters

  • Input
  • Output

Constructors

constructor +FalStream | @fal-ai/client - v1.1.0

Class FalStream<Input, Output>

The class representing a streaming response. With t

+

Type Parameters

  • Input
  • Output

Constructors

Properties

config endpointId options @@ -9,16 +9,16 @@ abort done on -

Constructors

Properties

config: Required<Config>
endpointId: string
options: StreamOptions<Input>
url: string

Accessors

  • get signal(): AbortSignal
  • Gets the AbortSignal instance that can be used to listen for abort events.

    +

Constructors

Properties

config: Required<Config>
endpointId: string
options: StreamOptions<Input>
url: string

Accessors

Methods

Methods

  • Aborts the streaming request.

    Note: This method is noop in case the request is already done.

    Parameters

    • Optionalreason: string | Error

      optional cause for aborting the request.

      -

    Returns void

  • Gets a reference to the Promise that indicates whether the streaming +

Returns void

  • Gets a reference to the Promise that indicates whether the streaming is done or not. Developers should always call this in their apps to ensure the request is over.

    An alternative to this, is to use on('done') in case your application architecture works best with event listeners.

    Returns Promise<Output>

    the promise that resolves when the request is done.

    -
  • Parameters

    • type: FalStreamEventType
    • listener: EventHandler<any>

    Returns void

+
  • Parameters

    • type: FalStreamEventType
    • listener: EventHandler<any>

    Returns void

diff --git a/docs/reference/classes/ValidationError.html b/docs/reference/classes/ValidationError.html index a39e1bb..864f5fd 100644 --- a/docs/reference/classes/ValidationError.html +++ b/docs/reference/classes/ValidationError.html @@ -1,6 +1,6 @@ -ValidationError | @fal-ai/client - v1.1.0-alpha.2

Class ValidationError

Hierarchy (view full)

  • ApiError<ValidationErrorBody>
    • ValidationError

Constructors

constructor +ValidationError | @fal-ai/client - v1.1.0

Class ValidationError

Hierarchy (view full)

  • ApiError<ValidationErrorBody>
    • ValidationError

Constructors

Properties

Accessors

Methods

Constructors

Properties

body: ValidationErrorBody
status: number

Accessors

Methods

+

Constructors

Properties

body: ValidationErrorBody
status: number

Accessors

Methods

diff --git a/docs/reference/functions/createFalClient.html b/docs/reference/functions/createFalClient.html index f620b83..5d163c2 100644 --- a/docs/reference/functions/createFalClient.html +++ b/docs/reference/functions/createFalClient.html @@ -1,4 +1,4 @@ -createFalClient | @fal-ai/client - v1.1.0-alpha.2

Function createFalClient

  • Creates a new reference of the FalClient.

    +createFalClient | @fal-ai/client - v1.1.0

    Function createFalClient

    • Creates a new reference of the FalClient.

      Parameters

      • userConfig: Config = {}

        Optional configuration to override the default settings.

      Returns FalClient

      a new instance of the FalClient.

      -
    +
diff --git a/docs/reference/functions/isCompletedQueueStatus.html b/docs/reference/functions/isCompletedQueueStatus.html index 4e24c15..82cb4d9 100644 --- a/docs/reference/functions/isCompletedQueueStatus.html +++ b/docs/reference/functions/isCompletedQueueStatus.html @@ -1 +1 @@ -isCompletedQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Function isCompletedQueueStatus

+isCompletedQueueStatus | @fal-ai/client - v1.1.0

Function isCompletedQueueStatus

diff --git a/docs/reference/functions/isQueueStatus.html b/docs/reference/functions/isQueueStatus.html index 42ea62f..1c50ee7 100644 --- a/docs/reference/functions/isQueueStatus.html +++ b/docs/reference/functions/isQueueStatus.html @@ -1 +1 @@ -isQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Function isQueueStatus

+isQueueStatus | @fal-ai/client - v1.1.0

Function isQueueStatus

diff --git a/docs/reference/functions/parseEndpointId.html b/docs/reference/functions/parseEndpointId.html index 2b6c04a..39b23a0 100644 --- a/docs/reference/functions/parseEndpointId.html +++ b/docs/reference/functions/parseEndpointId.html @@ -1 +1 @@ -parseEndpointId | @fal-ai/client - v1.1.0-alpha.2

Function parseEndpointId

  • Parameters

    • id: string

    Returns EndpointId

+parseEndpointId | @fal-ai/client - v1.1.0

Function parseEndpointId

  • Parameters

    • id: string

    Returns EndpointId

diff --git a/docs/reference/functions/withMiddleware.html b/docs/reference/functions/withMiddleware.html index a4dc9b4..8be19b3 100644 --- a/docs/reference/functions/withMiddleware.html +++ b/docs/reference/functions/withMiddleware.html @@ -1,4 +1,4 @@ -withMiddleware | @fal-ai/client - v1.1.0-alpha.2

Function withMiddleware

diff --git a/docs/reference/functions/withProxy.html b/docs/reference/functions/withProxy.html index d7287e0..c4cfa4a 100644 --- a/docs/reference/functions/withProxy.html +++ b/docs/reference/functions/withProxy.html @@ -1 +1 @@ -withProxy | @fal-ai/client - v1.1.0-alpha.2

Function withProxy

+withProxy | @fal-ai/client - v1.1.0

Function withProxy

diff --git a/docs/reference/hierarchy.html b/docs/reference/hierarchy.html index 446abe0..cff7b0c 100644 --- a/docs/reference/hierarchy.html +++ b/docs/reference/hierarchy.html @@ -1 +1 @@ -@fal-ai/client - v1.1.0-alpha.2

@fal-ai/client - v1.1.0-alpha.2

Class Hierarchy

+@fal-ai/client - v1.1.0

@fal-ai/client - v1.1.0

Class Hierarchy

diff --git a/docs/reference/index.html b/docs/reference/index.html index 0593b5b..176bb3f 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,4 +1,4 @@ -@fal-ai/client - v1.1.0-alpha.2

@fal-ai/client - v1.1.0-alpha.2

Index

Classes

ApiError +@fal-ai/client - v1.1.0

@fal-ai/client - v1.1.0

Index

Classes

Interfaces

+
diff --git a/docs/reference/interfaces/CompletedQueueStatus.html b/docs/reference/interfaces/CompletedQueueStatus.html index b21e076..885bd17 100644 --- a/docs/reference/interfaces/CompletedQueueStatus.html +++ b/docs/reference/interfaces/CompletedQueueStatus.html @@ -1,6 +1,6 @@ -CompletedQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Interface CompletedQueueStatus

interface CompletedQueueStatus {
    logs: RequestLog[];
    metrics?: Metrics;
    request_id: string;
    response_url: string;
    status: "COMPLETED";
}

Hierarchy

  • BaseQueueStatus
    • CompletedQueueStatus

Properties

logs +CompletedQueueStatus | @fal-ai/client - v1.1.0

Interface CompletedQueueStatus

interface CompletedQueueStatus {
    logs: RequestLog[];
    metrics?: Metrics;
    request_id: string;
    response_url: string;
    status: "COMPLETED";
}

Hierarchy

  • BaseQueueStatus
    • CompletedQueueStatus

Properties

logs: RequestLog[]
metrics?: Metrics
request_id: string
response_url: string
status: "COMPLETED"
+

Properties

logs: RequestLog[]
metrics?: Metrics
request_id: string
response_url: string
status: "COMPLETED"
diff --git a/docs/reference/interfaces/FalClient.html b/docs/reference/interfaces/FalClient.html index e02ac19..7de76c6 100644 --- a/docs/reference/interfaces/FalClient.html +++ b/docs/reference/interfaces/FalClient.html @@ -1,7 +1,7 @@ -FalClient | @fal-ai/client - v1.1.0-alpha.2

Interface FalClient

The main client type, it provides access to simple API model usage, +FalClient | @fal-ai/client - v1.1.0

Interface FalClient

The main client type, it provides access to simple API model usage, as well as access to the queue and storage APIs.

createFalClient

-
interface FalClient {
    queue: QueueClient;
    realtime: RealtimeClient;
    storage: StorageClient;
    stream: (<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>) => Promise<FalStream<InputType<Id>, OutputType<Id>>>);
    streaming: StreamingClient;
    run<Id>(endpointId: Id, options: RunOptions<InputType<Id>>): Promise<Result<OutputType<Id>>>;
    subscribe<Id>(endpointId: Id, options: RunOptions<InputType<Id>> & QueueSubscribeOptions): Promise<Result<OutputType<Id>>>;
}

Properties

interface FalClient {
    queue: QueueClient;
    realtime: RealtimeClient;
    storage: StorageClient;
    stream: (<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>) => Promise<FalStream<InputType<Id>, OutputType<Id>>>);
    streaming: StreamingClient;
    run<Id>(endpointId: Id, options: RunOptions<InputType<Id>>): Promise<Result<OutputType<Id>>>;
    subscribe<Id>(endpointId: Id, options: RunOptions<InputType<Id>> & QueueSubscribeOptions): Promise<Result<OutputType<Id>>>;
}

Properties

queue realtime storage stream @@ -9,14 +9,14 @@

Methods

Properties

The queue client to interact with the queue API.

-
realtime: RealtimeClient

The realtime client to interact with the realtime API +

realtime: RealtimeClient

The realtime client to interact with the realtime API and receive updates in real-time.

  • #RealtimeClient
  • #RealtimeClient.connect
-
storage: StorageClient

The storage client to interact with the storage API.

-
stream: (<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>) => Promise<FalStream<InputType<Id>, OutputType<Id>>>)

Calls a fal app that supports streaming and provides a streaming-capable +

storage: StorageClient

The storage client to interact with the storage API.

+
stream: (<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>) => Promise<FalStream<InputType<Id>, OutputType<Id>>>)

Calls a fal app that supports streaming and provides a streaming-capable object as a result, that can be used to get partial results through either AsyncIterator or through an event listener.

Type declaration

    • <Id>(endpointId, options): Promise<FalStream<InputType<Id>, OutputType<Id>>>
    • Calls a fal app that supports streaming and provides a streaming-capable @@ -28,13 +28,13 @@

the endpoint id, e.g. fal-ai/llavav15-13b.

the request options, including the input payload.

the FalStream instance.

-
streaming: StreamingClient

The streaming client to interact with the streaming API.

+
streaming: StreamingClient

The streaming client to interact with the streaming API.

#stream

-

Methods

Methods

  • Runs a fal endpoints identified by its endpointId.

    Type Parameters

    • Id extends EndpointType

    Parameters

    • endpointId: Id

      the registered function revision id or alias.

    • options: RunOptions<InputType<Id>>

    Returns Promise<Result<OutputType<Id>>>

    the remote function output

    -
  • Subscribes to updates for a specific request in the queue.

    +
  • Subscribes to updates for a specific request in the queue.

    Type Parameters

    • Id extends EndpointType

    Parameters

    • endpointId: Id

      The ID of the API endpoint.

    • options: RunOptions<InputType<Id>> & QueueSubscribeOptions

      Options to configure how the request is run and how updates are received.

    Returns Promise<Result<OutputType<Id>>>

    A promise that resolves to the result of the request once it's completed.

    -
+
diff --git a/docs/reference/interfaces/InProgressQueueStatus.html b/docs/reference/interfaces/InProgressQueueStatus.html index 9df11f2..f7ec17f 100644 --- a/docs/reference/interfaces/InProgressQueueStatus.html +++ b/docs/reference/interfaces/InProgressQueueStatus.html @@ -1,5 +1,5 @@ -InProgressQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Interface InProgressQueueStatus

interface InProgressQueueStatus {
    logs: RequestLog[];
    request_id: string;
    response_url: string;
    status: "IN_PROGRESS";
}

Hierarchy

  • BaseQueueStatus
    • InProgressQueueStatus

Properties

logs +InProgressQueueStatus | @fal-ai/client - v1.1.0

Interface InProgressQueueStatus

interface InProgressQueueStatus {
    logs: RequestLog[];
    request_id: string;
    response_url: string;
    status: "IN_PROGRESS";
}

Hierarchy

  • BaseQueueStatus
    • InProgressQueueStatus

Properties

logs: RequestLog[]
request_id: string
response_url: string
status: "IN_PROGRESS"
+

Properties

logs: RequestLog[]
request_id: string
response_url: string
status: "IN_PROGRESS"
diff --git a/docs/reference/interfaces/InQueueQueueStatus.html b/docs/reference/interfaces/InQueueQueueStatus.html index 7a6038c..5b9eb02 100644 --- a/docs/reference/interfaces/InQueueQueueStatus.html +++ b/docs/reference/interfaces/InQueueQueueStatus.html @@ -1,5 +1,5 @@ -InQueueQueueStatus | @fal-ai/client - v1.1.0-alpha.2

Interface InQueueQueueStatus

interface InQueueQueueStatus {
    queue_position: number;
    request_id: string;
    response_url: string;
    status: "IN_QUEUE";
}

Hierarchy

  • BaseQueueStatus
    • InQueueQueueStatus

Properties

queue_position +InQueueQueueStatus | @fal-ai/client - v1.1.0

Interface InQueueQueueStatus

interface InQueueQueueStatus {
    queue_position: number;
    request_id: string;
    response_url: string;
    status: "IN_QUEUE";
}

Hierarchy

  • BaseQueueStatus
    • InQueueQueueStatus

Properties

queue_position: number
request_id: string
response_url: string
status: "IN_QUEUE"
+

Properties

queue_position: number
request_id: string
response_url: string
status: "IN_QUEUE"
diff --git a/docs/reference/interfaces/QueueClient.html b/docs/reference/interfaces/QueueClient.html index 5a8b24c..100978e 100644 --- a/docs/reference/interfaces/QueueClient.html +++ b/docs/reference/interfaces/QueueClient.html @@ -1,6 +1,6 @@ -QueueClient | @fal-ai/client - v1.1.0-alpha.2

Interface QueueClient

Represents a request queue with methods for submitting requests, +QueueClient | @fal-ai/client - v1.1.0

Interface QueueClient

Represents a request queue with methods for submitting requests, checking their status, retrieving results, and subscribing to updates.

-
interface QueueClient {
    cancel(endpointId: string, options: BaseQueueOptions): Promise<void>;
    result<Output>(endpointId: string, options: BaseQueueOptions): Promise<Result<Output>>;
    status(endpointId: string, options: QueueStatusOptions): Promise<QueueStatus>;
    streamStatus(endpointId: string, options: QueueStatusStreamOptions): Promise<FalStream<unknown, QueueStatus>>;
    submit<Input>(endpointId: string, options: SubmitOptions<Input>): Promise<InQueueQueueStatus>;
    subscribeToStatus(endpointId: string, options: QueueStatusSubscriptionOptions): Promise<CompletedQueueStatus>;
}

Methods

interface QueueClient {
    cancel(endpointId: string, options: BaseQueueOptions): Promise<void>;
    result<Output>(endpointId: string, options: BaseQueueOptions): Promise<Result<Output>>;
    status(endpointId: string, options: QueueStatusOptions): Promise<QueueStatus>;
    streamStatus(endpointId: string, options: QueueStatusStreamOptions): Promise<FalStream<unknown, QueueStatus>>;
    submit<Input>(endpointId: string, options: SubmitOptions<Input>): Promise<InQueueQueueStatus>;
    subscribeToStatus(endpointId: string, options: QueueStatusSubscriptionOptions): Promise<CompletedQueueStatus>;
}

Methods

cancel result status streamStatus @@ -12,25 +12,25 @@ is run and how updates are received.

Returns Promise<void>

A promise that resolves once the request is cancelled.

If the request cannot be cancelled.

-
  • Retrieves the result of a specific request from the queue.

    Type Parameters

    • Output

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: BaseQueueOptions

      Options to configure how the request is run.

    Returns Promise<Result<Output>>

    A promise that resolves to the result of the request.

    -
  • Retrieves the status of a specific request in the queue.

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: QueueStatusOptions

      Options to configure how the request is run.

    Returns Promise<QueueStatus>

    A promise that resolves to the status of the request.

    -
  • Subscribes to updates for a specific request in the queue using HTTP streaming events.

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: QueueStatusStreamOptions

      Options to configure how the request is run and how updates are received.

    Returns Promise<FalStream<unknown, QueueStatus>>

    The streaming object that can be used to listen for updates.

    -
  • Submits a request to the queue.

    Type Parameters

    • Input

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: SubmitOptions<Input>

      Options to configure how the request is run.

    Returns Promise<InQueueQueueStatus>

    A promise that resolves to the result of enqueuing the request.

    -
  • Subscribes to updates for a specific request in the queue using polling or streaming. See options.mode for more details.

    Parameters

    • endpointId: string

      The ID of the function web endpoint.

    • options: QueueStatusSubscriptionOptions

      Options to configure how the request is run and how updates are received.

    Returns Promise<CompletedQueueStatus>

    A promise that resolves to the final status of the request.

    -
+
diff --git a/docs/reference/interfaces/RealtimeClient.html b/docs/reference/interfaces/RealtimeClient.html index e20a9bd..f518fcf 100644 --- a/docs/reference/interfaces/RealtimeClient.html +++ b/docs/reference/interfaces/RealtimeClient.html @@ -1,6 +1,6 @@ -RealtimeClient | @fal-ai/client - v1.1.0-alpha.2

Interface RealtimeClient

interface RealtimeClient {
    connect<Input, Output>(app: string, handler: RealtimeConnectionHandler<Output>): RealtimeConnection<Input>;
}

Methods

connect +RealtimeClient | @fal-ai/client - v1.1.0

Interface RealtimeClient

interface RealtimeClient {
    connect<Input, Output>(app: string, handler: RealtimeConnectionHandler<Output>): RealtimeConnection<Input>;
}

Methods

Methods

  • Connect to the realtime endpoint. The default implementation uses WebSockets to connect to fal function endpoints that support WSS.

    Type Parameters

    • Input = any
    • Output = any

    Parameters

    • app: string

      the app alias or identifier.

    • handler: RealtimeConnectionHandler<Output>

      the connection handler.

      -

    Returns RealtimeConnection<Input>

+

Returns RealtimeConnection<Input>

diff --git a/docs/reference/interfaces/StorageClient.html b/docs/reference/interfaces/StorageClient.html index 17d018c..4ad638c 100644 --- a/docs/reference/interfaces/StorageClient.html +++ b/docs/reference/interfaces/StorageClient.html @@ -1,14 +1,14 @@ -StorageClient | @fal-ai/client - v1.1.0-alpha.2

Interface StorageClient

File support for the client. This interface establishes the contract for +StorageClient | @fal-ai/client - v1.1.0

Interface StorageClient

File support for the client. This interface establishes the contract for uploading files to the server and transforming the input to replace file objects with URLs.

-
interface StorageClient {
    transformInput: ((input: Record<string, any>) => Promise<Record<string, any>>);
    upload: ((file: Blob) => Promise<string>);
}

Properties

interface StorageClient {
    transformInput: ((input: Record<string, any>) => Promise<Record<string, any>>);
    upload: ((file: Blob) => Promise<string>);
}

Properties

transformInput: ((input: Record<string, any>) => Promise<Record<string, any>>)

Transform the input to replace file objects with URLs. This is used to transform the input before sending it to the server and ensures that the server receives URLs instead of file objects.

Type declaration

upload: ((file: Blob) => Promise<string>)

Upload a file to the server. Returns the URL of the uploaded file.

+
upload: ((file: Blob) => Promise<string>)

Upload a file to the server. Returns the URL of the uploaded file.

Type declaration

    • (file): Promise<string>
    • Parameters

      • file: Blob

        the file to upload

      Returns Promise<string>

      the URL of the uploaded file

      -
+
diff --git a/docs/reference/interfaces/StreamingClient.html b/docs/reference/interfaces/StreamingClient.html index 4a696b5..163f18e 100644 --- a/docs/reference/interfaces/StreamingClient.html +++ b/docs/reference/interfaces/StreamingClient.html @@ -1,9 +1,9 @@ -StreamingClient | @fal-ai/client - v1.1.0-alpha.2

Interface StreamingClient

The streaming client interface.

-
interface StreamingClient {
    stream<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>): Promise<FalStream<InputType<Id>, OutputType<Id>>>;
}

Methods

stream +StreamingClient | @fal-ai/client - v1.1.0

Interface StreamingClient

The streaming client interface.

+
interface StreamingClient {
    stream<Id>(endpointId: Id, options: StreamOptions<InputType<Id>>): Promise<FalStream<InputType<Id>, OutputType<Id>>>;
}

Methods

Methods

  • Calls a fal app that supports streaming and provides a streaming-capable object as a result, that can be used to get partial results through either AsyncIterator or through an event listener.

    Type Parameters

    • Id extends EndpointType

    Parameters

    • endpointId: Id

      the endpoint id, e.g. fal-ai/llavav15-13b.

    • options: StreamOptions<InputType<Id>>

      the request options, including the input payload.

    Returns Promise<FalStream<InputType<Id>, OutputType<Id>>>

    the FalStream instance.

    -
+
diff --git a/docs/reference/types/Metrics.html b/docs/reference/types/Metrics.html index da76467..ed4ac32 100644 --- a/docs/reference/types/Metrics.html +++ b/docs/reference/types/Metrics.html @@ -1 +1 @@ -Metrics | @fal-ai/client - v1.1.0-alpha.2

Type Alias Metrics

Metrics: {
    inference_time: number | null;
}
+Metrics | @fal-ai/client - v1.1.0

Type Alias Metrics

Metrics: {
    inference_time: number | null;
}
diff --git a/docs/reference/types/QueueStatus.html b/docs/reference/types/QueueStatus.html index ab7e2b0..c8c9181 100644 --- a/docs/reference/types/QueueStatus.html +++ b/docs/reference/types/QueueStatus.html @@ -1 +1 @@ -QueueStatus | @fal-ai/client - v1.1.0-alpha.2
+QueueStatus | @fal-ai/client - v1.1.0
diff --git a/docs/reference/types/RequestLog.html b/docs/reference/types/RequestLog.html index 60e439b..f90840f 100644 --- a/docs/reference/types/RequestLog.html +++ b/docs/reference/types/RequestLog.html @@ -1 +1 @@ -RequestLog | @fal-ai/client - v1.1.0-alpha.2

Type Alias RequestLog

RequestLog: {
    level:
        | "STDERR"
        | "STDOUT"
        | "ERROR"
        | "INFO"
        | "WARN"
        | "DEBUG";
    message: string;
    source: "USER";
    timestamp: string;
}
+RequestLog | @fal-ai/client - v1.1.0

Type Alias RequestLog

RequestLog: {
    level:
        | "STDERR"
        | "STDOUT"
        | "ERROR"
        | "INFO"
        | "WARN"
        | "DEBUG";
    message: string;
    source: "USER";
    timestamp: string;
}
diff --git a/docs/reference/types/RequestMiddleware.html b/docs/reference/types/RequestMiddleware.html index 4c25938..be94b39 100644 --- a/docs/reference/types/RequestMiddleware.html +++ b/docs/reference/types/RequestMiddleware.html @@ -1 +1 @@ -RequestMiddleware | @fal-ai/client - v1.1.0-alpha.2

Type Alias RequestMiddleware

RequestMiddleware: ((request: RequestConfig) => Promise<RequestConfig>)
+RequestMiddleware | @fal-ai/client - v1.1.0

Type Alias RequestMiddleware

RequestMiddleware: ((request: RequestConfig) => Promise<RequestConfig>)
diff --git a/docs/reference/types/ResponseHandler.html b/docs/reference/types/ResponseHandler.html index ce494b9..2b5471f 100644 --- a/docs/reference/types/ResponseHandler.html +++ b/docs/reference/types/ResponseHandler.html @@ -1 +1 @@ -ResponseHandler | @fal-ai/client - v1.1.0-alpha.2

Type Alias ResponseHandler<Output>

ResponseHandler<Output>: ((response: Response) => Promise<Output>)

Type Parameters

  • Output
+ResponseHandler | @fal-ai/client - v1.1.0

Type Alias ResponseHandler<Output>

ResponseHandler<Output>: ((response: Response) => Promise<Output>)

Type Parameters

  • Output
diff --git a/docs/reference/types/Result.html b/docs/reference/types/Result.html index 09eeb67..d45c2ea 100644 --- a/docs/reference/types/Result.html +++ b/docs/reference/types/Result.html @@ -1,3 +1,3 @@ -Result | @fal-ai/client - v1.1.0-alpha.2

Type Alias Result<T>

Result<T>: {
    data: T;
    requestId: string;
}

Represents an API result, containing the data, +Result | @fal-ai/client - v1.1.0

Type Alias Result<T>

Result<T>: {
    data: T;
    requestId: string;
}

Represents an API result, containing the data, the request ID and any other relevant information.

-

Type Parameters

  • T
+

Type Parameters

  • T
diff --git a/docs/reference/types/RunOptions.html b/docs/reference/types/RunOptions.html index 7313028..21bd01a 100644 --- a/docs/reference/types/RunOptions.html +++ b/docs/reference/types/RunOptions.html @@ -1,6 +1,6 @@ -RunOptions | @fal-ai/client - v1.1.0-alpha.2

Type Alias RunOptions<Input>

RunOptions<Input>: {
    input?: Input;
    method?:
        | "get"
        | "post"
        | "put"
        | "delete"
        | string;
}

The function input and other configuration when running +RunOptions | @fal-ai/client - v1.1.0

Type Alias RunOptions<Input>

RunOptions<Input>: {
    input?: Input;
    method?:
        | "get"
        | "post"
        | "put"
        | "delete"
        | string;
}

The function input and other configuration when running the function, such as the HTTP method to use.

Type Parameters

  • Input

Type declaration

  • Optional Readonlyinput?: Input

    The function input. It will be submitted either as query params or the body payload, depending on the method.

  • Optional Readonlymethod?:
        | "get"
        | "post"
        | "put"
        | "delete"
        | string

    The HTTP method, defaults to post;

    -
+
diff --git a/docs/reference/types/UrlOptions.html b/docs/reference/types/UrlOptions.html index 21059d4..111f914 100644 --- a/docs/reference/types/UrlOptions.html +++ b/docs/reference/types/UrlOptions.html @@ -1,6 +1,6 @@ -UrlOptions | @fal-ai/client - v1.1.0-alpha.2

Type Alias UrlOptions

UrlOptions: {
    path?: string;
    query?: Record<string, string>;
    subdomain?: string;
}

Type declaration

  • Optionalpath?: string

    The path to append to the function URL.

    +UrlOptions | @fal-ai/client - v1.1.0

    Type Alias UrlOptions

    UrlOptions: {
        path?: string;
        query?: Record<string, string>;
        subdomain?: string;
    }

    Type declaration

    • Optionalpath?: string

      The path to append to the function URL.

    • Optional Readonlyquery?: Record<string, string>

      The query parameters to include in the URL.

    • Optional Readonlysubdomain?: string

      If true, the function will use the queue to run the function asynchronously and return the result in a separate call. This influences how the URL is built.

      -
    +
diff --git a/docs/reference/types/ValidationErrorInfo.html b/docs/reference/types/ValidationErrorInfo.html index 177f351..c92f12d 100644 --- a/docs/reference/types/ValidationErrorInfo.html +++ b/docs/reference/types/ValidationErrorInfo.html @@ -1 +1 @@ -ValidationErrorInfo | @fal-ai/client - v1.1.0-alpha.2

Type Alias ValidationErrorInfo

ValidationErrorInfo: {
    loc: (string | number)[];
    msg: string;
    type: string;
}
+ValidationErrorInfo | @fal-ai/client - v1.1.0

Type Alias ValidationErrorInfo

ValidationErrorInfo: {
    loc: (string | number)[];
    msg: string;
    type: string;
}
diff --git a/docs/reference/types/WebHookResponse.html b/docs/reference/types/WebHookResponse.html index c200a4e..b10ffea 100644 --- a/docs/reference/types/WebHookResponse.html +++ b/docs/reference/types/WebHookResponse.html @@ -1,4 +1,4 @@ -WebHookResponse | @fal-ai/client - v1.1.0-alpha.2

Type Alias WebHookResponse<Payload>

WebHookResponse<Payload>: {
    error: never;
    payload: Payload;
    request_id: string;
    status: "OK";
} | {
    error: string;
    payload: Payload;
    request_id: string;
    status: "ERROR";
}

Represents the response from a WebHook request. +WebHookResponse | @fal-ai/client - v1.1.0

Type Alias WebHookResponse<Payload>

WebHookResponse<Payload>: {
    error: never;
    payload: Payload;
    request_id: string;
    status: "OK";
} | {
    error: string;
    payload: Payload;
    request_id: string;
    status: "ERROR";
}

Represents the response from a WebHook request. This is a union type that varies based on the status property.

Type Parameters

  • Payload = any

    The type of the payload in the response. It defaults to any, allowing for flexibility in specifying the structure of the payload.

    @@ -10,4 +10,4 @@
  • payload: Payload

    The payload of the response, structure determined by the Payload type.

  • request_id: string

    The unique identifier for the request.

  • status: "ERROR"

    Indicates an unsuccessful response.

    -
+
diff --git a/docs/reference/variables/fal.html b/docs/reference/variables/fal.html index ac58f72..9d6b341 100644 --- a/docs/reference/variables/fal.html +++ b/docs/reference/variables/fal.html @@ -1,3 +1,3 @@ -fal | @fal-ai/client - v1.1.0-alpha.2

Variable falConst

fal: SingletonFalClient = ...

Creates a singleton instance of the client. This is useful as a compatibility +fal | @fal-ai/client - v1.1.0

Variable falConst

fal: SingletonFalClient = ...

Creates a singleton instance of the client. This is useful as a compatibility layer for existing code that uses the clients version prior to 1.0.0.

-
+