From bc8e9ac19f7da8bf434516099f8184d8394ca9d4 Mon Sep 17 00:00:00 2001 From: Matteo Ferrando Date: Mon, 25 Nov 2024 21:25:46 -0400 Subject: [PATCH 1/3] feat: introduce fal cdn v3 with storage API (#106) --- libs/client/src/storage.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/client/src/storage.ts b/libs/client/src/storage.ts index adb1981..ebb4186 100644 --- a/libs/client/src/storage.ts +++ b/libs/client/src/storage.ts @@ -66,7 +66,8 @@ async function initiateUpload( file.name || `${Date.now()}.${getExtensionFromContentType(contentType)}`; return await dispatchRequest({ method: "POST", - targetUrl: `${getRestApiUrl()}/storage/upload/initiate`, + // NOTE: We want to test V3 without making it the default at the API level + targetUrl: `${getRestApiUrl()}/storage/upload/initiate?storage_type=fal-cdn-v3`, input: { content_type: contentType, file_name: filename, From 9170021b77cae866996d16cda36e98a4b7ed6529 Mon Sep 17 00:00:00 2001 From: Matteo Ferrando Date: Mon, 25 Nov 2024 21:27:27 -0400 Subject: [PATCH 2/3] feat: automatically do multi-part upload on 90+ MB files (#111) * feat: introduce fal cdn v3 with storage API * feat: automatically do multi-part upload on 90+ MB files * fix: use new API from CDN * alpha * fix: include dependency * chore: bump alpha * refactor: remove semaphore * refactor: remove console --- libs/client/package.json | 2 +- libs/client/src/storage.ts | 120 +++++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 6 deletions(-) diff --git a/libs/client/package.json b/libs/client/package.json index 5b13b60..b197974 100644 --- a/libs/client/package.json +++ b/libs/client/package.json @@ -1,7 +1,7 @@ { "name": "@fal-ai/client", "description": "The fal.ai client for JavaScript and TypeScript", - "version": "1.1.3", + "version": "1.2.0-alpha.5", "license": "MIT", "repository": { "type": "git", diff --git a/libs/client/src/storage.ts b/libs/client/src/storage.ts index ebb4186..5bd86a7 100644 --- a/libs/client/src/storage.ts +++ b/libs/client/src/storage.ts @@ -1,7 +1,6 @@ import { getRestApiUrl, RequiredConfig } from "./config"; import { dispatchRequest } from "./request"; import { isPlainObject } from "./utils"; - /** * File support for the client. This interface establishes the contract for * uploading files to the server and transforming the input to replace file @@ -53,17 +52,15 @@ function getExtensionFromContentType(contentType: string): string { /** * Initiate the upload of a file to the server. This returns the URL to upload * the file to and the URL of the file once it is uploaded. - * - * @param file the file to upload - * @returns the URL to upload the file to and the URL of the file once it is uploaded. */ async function initiateUpload( file: Blob, config: RequiredConfig, + contentType: string, ): Promise { - const contentType = file.type || "application/octet-stream"; const filename = file.name || `${Date.now()}.${getExtensionFromContentType(contentType)}`; + return await dispatchRequest({ method: "POST", // NOTE: We want to test V3 without making it the default at the API level @@ -76,6 +73,111 @@ async function initiateUpload( }); } +/** + * Initiate the multipart upload of a file to the server. This returns the URL to upload + * the file to and the URL of the file once it is uploaded. + */ +async function initiateMultipartUpload( + file: Blob, + config: RequiredConfig, + contentType: string, +): Promise { + const filename = + file.name || `${Date.now()}.${getExtensionFromContentType(contentType)}`; + + return await dispatchRequest({ + method: "POST", + targetUrl: `${getRestApiUrl()}/storage/upload/initiate-multipart?storage_type=fal-cdn-v3`, + input: { + content_type: contentType, + file_name: filename, + }, + config, + }); +} + +type MultipartObject = { + partNumber: number; + etag: string; +}; + +async function partUploadRetries( + uploadUrl: string, + chunk: Blob, + config: RequiredConfig, + tries: number = 3, +): Promise { + if (tries === 0) { + throw new Error("Part upload failed, retries exhausted"); + } + + const { fetch, responseHandler } = config; + + try { + const response = await fetch(uploadUrl, { + method: "PUT", + body: chunk, + }); + + return (await responseHandler(response)) as MultipartObject; + } catch (error) { + return await partUploadRetries(uploadUrl, chunk, config, tries - 1); + } +} + +async function multipartUpload( + file: Blob, + config: RequiredConfig, +): Promise { + const { fetch, responseHandler } = config; + const contentType = file.type || "application/octet-stream"; + const { upload_url: uploadUrl, file_url: url } = + await initiateMultipartUpload(file, config, contentType); + + // Break the file into 10MB chunks + const chunkSize = 10 * 1024 * 1024; + const chunks = Math.ceil(file.size / chunkSize); + + const parsedUrl = new URL(uploadUrl); + + const responses: MultipartObject[] = []; + + try { + for (let i = 0; i < chunks; i++) { + const start = i * chunkSize; + const end = Math.min(start + chunkSize, file.size); + + const chunk = file.slice(start, end); + + const partNumber = i + 1; + // {uploadUrl}/{part_number}?uploadUrlParams=... + const partUploadUrl = `${parsedUrl.origin}${parsedUrl.pathname}/${partNumber}${parsedUrl.search}`; + + responses.push(await partUploadRetries(partUploadUrl, chunk, config)); + } + } catch (error) { + throw error; + } + + // Complete the upload + const completeUrl = `${parsedUrl.origin}${parsedUrl.pathname}/complete${parsedUrl.search}`; + const response = await fetch(completeUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + parts: responses.map((mpart) => ({ + partNumber: mpart.partNumber, + etag: mpart.etag, + })), + }), + }); + await responseHandler(response); + + return url; +} + // eslint-disable-next-line @typescript-eslint/no-explicit-any type KeyValuePair = [string, any]; @@ -88,10 +190,18 @@ export function createStorageClient({ }: StorageClientDependencies): StorageClient { const ref: StorageClient = { upload: async (file: Blob) => { + // Check for 90+ MB file size to do multipart upload + if (file.size > 90 * 1024 * 1024) { + return await multipartUpload(file, config); + } + + const contentType = file.type || "application/octet-stream"; + const { fetch, responseHandler } = config; const { upload_url: uploadUrl, file_url: url } = await initiateUpload( file, config, + contentType, ); const response = await fetch(uploadUrl, { method: "PUT", From 8b2f66b63f1998854b7eec07ccb9552963f67f18 Mon Sep 17 00:00:00 2001 From: Daniel Rochetti Date: Mon, 25 Nov 2024 17:28:05 -0800 Subject: [PATCH 3/3] chore(client): update endpoint types (#113) --- libs/client/src/types/endpoints.ts | 335 ++++++++++++++++++++++++++--- 1 file changed, 303 insertions(+), 32 deletions(-) diff --git a/libs/client/src/types/endpoints.ts b/libs/client/src/types/endpoints.ts index 4d4003e..b539fe1 100644 --- a/libs/client/src/types/endpoints.ts +++ b/libs/client/src/types/endpoints.ts @@ -2069,7 +2069,7 @@ export type CreativeUpscalerInput = { */ shape_preservation?: number; /** - * The suffix to add to the generated prompt. Not used for a custom prompt. This is useful to add a common ending to all prompts such as 'high quality' etc or embedding tokens. Default value: `" high quality, highly detailed, high resolution, sharp"` + * The suffix to add to the prompt. This is useful to add a common ending to all prompts such as 'high quality' etc or embedding tokens. Default value: `" high quality, highly detailed, high resolution, sharp"` */ prompt_suffix?: string; /** @@ -6286,6 +6286,91 @@ export type FluxLoraFastTrainingOutput = { */ config_file: File; }; +export type FluxLoraFillInput = { + /** + * The prompt to generate an image from. + */ + prompt: string; + /** + * The size of the generated image. + */ + image_size?: + | ImageSize + | "square_hd" + | "square" + | "portrait_4_3" + | "portrait_16_9" + | "landscape_4_3" + | "landscape_16_9"; + /** + * The number of inference steps to perform. Default value: `28` + */ + num_inference_steps?: number; + /** + * The same seed and the same prompt given to the same version of the model + * will output the same image every time. + */ + seed?: number; + /** + * The LoRAs to use for the image generation. You can use any number of LoRAs + * and they will be merged together to generate the final image. Default value: `` + */ + loras?: Array; + /** + * The CFG (Classifier Free Guidance) scale is a measure of how close you want + * the model to stick to your prompt when looking for a related image to show you. Default value: `30` + */ + guidance_scale?: number; + /** + * If set to true, the function will wait for the image to be generated and uploaded + * before returning the response. This will increase the latency of the function but + * it allows you to get the image directly in the response without going through the CDN. + */ + sync_mode?: boolean; + /** + * The number of images to generate. Default value: `1` + */ + num_images?: number; + /** + * If set to true, the safety checker will be enabled. Default value: `true` + */ + enable_safety_checker?: boolean; + /** + * The format of the generated image. Default value: `"jpeg"` + */ + output_format?: "jpeg" | "png"; + /** + * URL of image to use for fill operation + */ + image_url: string | Blob | File; + /** + * The mask to area to Inpaint in. + */ + mask_url: string | Blob | File; +}; +export type FluxLoraFillOutput = { + /** + * The generated image files info. + */ + images: Array; + /** + * + */ + timings: any; + /** + * Seed of the generated Image. It will be the same value of the one passed in the + * input or the randomly generated that was used in case none was passed. + */ + seed: number; + /** + * Whether the generated images contain NSFW concepts. + */ + has_nsfw_concepts: Array; + /** + * The prompt used for generating the image. + */ + prompt: string; +}; export type FluxLoraImageToImageInput = { /** * The prompt to generate an image from. @@ -6537,6 +6622,55 @@ export type FluxLoraOutput = { */ prompt: string; }; +export type FluxLoraPortraitTrainerInput = { + /** + * URL to zip archive with images of a consistent style. Try to use at least 10 images, although more is better. + * + * In addition to images the archive can contain text files with captions. Each text file should have the same name as the image file it corresponds to. + * + * The captions can include a special string `[trigger]`. If a trigger_word is specified, it will replace `[trigger]` in the captions. + */ + images_data_url: string | Blob | File; + /** + * Trigger phrase to be used in the captions. If None, a trigger word will not be used. + * If no captions are provide the trigger_work will be used instead of captions. If captions are provided, the trigger word will replace the `[trigger]` string in the captions. + */ + trigger_phrase?: string; + /** + * Learning rate to use for training. Default value: `0.00009` + */ + learning_rate?: number; + /** + * Number of steps to train the LoRA on. Default value: `2500` + */ + steps?: number; + /** + * If True, multiresolution training will be used. Default value: `true` + */ + multiresolution_training?: boolean; + /** + * If True, the subject will be cropped from the image. Default value: `true` + */ + subject_crop?: boolean; + /** + * The format of the archive. If not specified, the format will be inferred from the URL. + */ + data_archive_format?: string; + /** + * URL to a checkpoint to resume training from. Default value: `""` + */ + resume_from_checkpoint?: string; +}; +export type FluxLoraPortraitTrainerOutput = { + /** + * URL to the trained diffusers lora weights. + */ + diffusers_lora_file: File; + /** + * URL to the training configuration file. + */ + config_file: File; +}; export type FluxProCannyControlInput = { /** * The prompt to generate an image from. @@ -11382,44 +11516,23 @@ export type ImageToVideoInput = { */ prompt: string; /** - * The size of the generated video. Default value: `[object Object]` - */ - video_size?: - | ImageSize - | "square_hd" - | "square" - | "portrait_4_3" - | "portrait_16_9" - | "landscape_4_3" - | "landscape_16_9"; - /** - * The negative prompt to generate video from Default value: `""` + * The negative prompt to generate the video from. Default value: `"low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly"` */ negative_prompt?: string; /** - * The number of inference steps to perform. Default value: `50` - */ - num_inference_steps?: number; - /** - * The same seed and the same prompt given to the same version of the model - * will output the same video every time. + * The seed to use for random number generation. */ seed?: number; /** - * The CFG (Classifier Free Guidance) scale is a measure of how close you want - * the model to stick to your prompt when looking for a related video to show you. Default value: `7` - */ - guidance_scale?: number; - /** - * Use RIFE for video interpolation Default value: `true` + * The number of inference steps to take. Default value: `30` */ - use_rife?: boolean; + num_inference_steps?: number; /** - * The target FPS of the video Default value: `16` + * The guidance scale to use. Default value: `3` */ - export_fps?: number; + guidance_scale?: number; /** - * The URL to the image to generate the video from. + * The URL of the image to generate the video from. */ image_url: string | Blob | File; }; @@ -12806,6 +12919,50 @@ export type IpAdapterFaceIdOutput = { */ seed: number; }; +export type KlingVideoV15ProImageToVideoInput = { + /** + * + */ + prompt: string; + /** + * + */ + image_url: string | Blob | File; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV15ProImageToVideoOutput = { + /** + * The generated video + */ + video: File; +}; +export type KlingVideoV15ProTextToVideoInput = { + /** + * + */ + prompt: string; + /** + * The duration of the generated video in seconds Default value: `"5"` + */ + duration?: "5" | "10"; + /** + * The aspect ratio of the generated video frame Default value: `"16:9"` + */ + aspect_ratio?: "16:9" | "9:16" | "1:1"; +}; +export type KlingVideoV15ProTextToVideoOutput = { + /** + * The generated video + */ + video: File; +}; export type KlingVideoV1ProImageToVideoInput = { /** * @@ -14637,6 +14794,74 @@ export type LoraOutput = { */ debug_per_pass_latents?: File; }; +export type LtxVideoImageToVideoInput = { + /** + * The prompt to generate the video from. + */ + prompt: string; + /** + * The negative prompt to generate the video from. Default value: `"low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly"` + */ + negative_prompt?: string; + /** + * The seed to use for random number generation. + */ + seed?: number; + /** + * The number of inference steps to take. Default value: `30` + */ + num_inference_steps?: number; + /** + * The guidance scale to use. Default value: `3` + */ + guidance_scale?: number; + /** + * The URL of the image to generate the video from. + */ + image_url: string | Blob | File; +}; +export type LtxVideoImageToVideoOutput = { + /** + * The generated video. + */ + video: File; + /** + * The seed used for random number generation. + */ + seed: number; +}; +export type LtxVideoInput = { + /** + * The prompt to generate the video from. + */ + prompt: string; + /** + * The negative prompt to generate the video from. Default value: `"low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly"` + */ + negative_prompt?: string; + /** + * The seed to use for random number generation. + */ + seed?: number; + /** + * The number of inference steps to take. Default value: `30` + */ + num_inference_steps?: number; + /** + * The guidance scale to use. Default value: `3` + */ + guidance_scale?: number; +}; +export type LtxVideoOutput = { + /** + * The generated video. + */ + video: File; + /** + * The seed used for random number generation. + */ + seed: number; +}; export type LumaDreamMachineImageToVideoInput = { /** * @@ -18877,6 +19102,28 @@ export type TextToImageTurboInput = { */ expand_prompt?: boolean; }; +export type TextToVideoInput = { + /** + * The prompt to generate the video from. + */ + prompt: string; + /** + * The negative prompt to generate the video from. Default value: `"low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly"` + */ + negative_prompt?: string; + /** + * The seed to use for random number generation. + */ + seed?: number; + /** + * The number of inference steps to take. Default value: `30` + */ + num_inference_steps?: number; + /** + * The guidance scale to use. Default value: `3` + */ + guidance_scale?: number; +}; export type TimestepsInput = { /** * The method to use for the timesteps. If set to 'array', the timesteps will be set based @@ -19494,6 +19741,10 @@ export type EndpointTypeMap = { input: FluxLoraFastTrainingInput; output: FluxLoraFastTrainingOutput; }; + "fal-ai/flux-lora-portrait-trainer": { + input: FluxLoraPortraitTrainerInput; + output: FluxLoraPortraitTrainerOutput; + }; "fal-ai/recraft-v3": { input: RecraftV3Input; output: RecraftV3Output; @@ -19518,6 +19769,10 @@ export type EndpointTypeMap = { input: FluxLoraInput; output: FluxLoraOutput; }; + "fal-ai/flux-lora/inpainting": { + input: FluxLoraInpaintingInput; + output: FluxLoraInpaintingOutput; + }; "fal-ai/flux/schnell": { input: FluxSchnellInput; output: FluxSchnellOutput; @@ -19590,9 +19845,9 @@ export type EndpointTypeMap = { input: FluxRealismInput; output: FluxRealismOutput; }; - "fal-ai/flux-lora/inpainting": { - input: FluxLoraInpaintingInput; - output: FluxLoraInpaintingOutput; + "fal-ai/flux-lora-fill": { + input: FluxLoraFillInput; + output: FluxLoraFillOutput; }; "fal-ai/flux-lora/image-to-image": { input: FluxLoraImageToImageInput; @@ -19694,6 +19949,14 @@ export type EndpointTypeMap = { input: KlingVideoV1ProImageToVideoInput; output: KlingVideoV1ProImageToVideoOutput; }; + "fal-ai/kling-video/v1.5/pro/image-to-video": { + input: KlingVideoV15ProImageToVideoInput; + output: KlingVideoV15ProImageToVideoOutput; + }; + "fal-ai/kling-video/v1.5/pro/text-to-video": { + input: KlingVideoV15ProTextToVideoInput; + output: KlingVideoV15ProTextToVideoOutput; + }; "fal-ai/cogvideox-5b": { input: Cogvideox5bInput; output: Cogvideox5bOutput; @@ -19706,6 +19969,14 @@ export type EndpointTypeMap = { input: Cogvideox5bImageToVideoInput; output: Cogvideox5bImageToVideoOutput; }; + "fal-ai/ltx-video": { + input: LtxVideoInput; + output: LtxVideoOutput; + }; + "fal-ai/ltx-video/image-to-video": { + input: LtxVideoImageToVideoInput; + output: LtxVideoImageToVideoOutput; + }; "fal-ai/stable-video": { input: StableVideoInput; output: StableVideoOutput;