diff --git a/api/src/processing/match-action.js b/api/src/processing/match-action.js index 5852b19d7..73888326d 100644 --- a/api/src/processing/match-action.js +++ b/api/src/processing/match-action.js @@ -181,6 +181,7 @@ export default function({ case "ok": case "xiaohongshu": case "newgrounds": + case "sora": params = { type: "proxy" }; break; diff --git a/api/src/processing/match.js b/api/src/processing/match.js index 1265297cd..57193039e 100644 --- a/api/src/processing/match.js +++ b/api/src/processing/match.js @@ -30,6 +30,7 @@ import facebook from "./services/facebook.js"; import bluesky from "./services/bluesky.js"; import xiaohongshu from "./services/xiaohongshu.js"; import newgrounds from "./services/newgrounds.js"; +import sora from "./services/sora.js"; let freebind; @@ -276,6 +277,14 @@ export default async function({ host, patternMatch, params, authType }) { }); break; + case "sora": + r = await sora({ + postId: patternMatch.postId, + quality: params.videoQuality, + isAudioOnly, + }); + break; + default: return createResponse("error", { code: "error.api.service.unsupported" diff --git a/api/src/processing/service-config.js b/api/src/processing/service-config.js index 0a35838bd..aa62876f1 100644 --- a/api/src/processing/service-config.js +++ b/api/src/processing/service-config.js @@ -221,6 +221,10 @@ export const services = { "v/:id" ], subdomains: ["music", "m"], + }, + sora: { + patterns: ["p/:postId"], + altDomains: ["sora.chatgpt.com"] } } diff --git a/api/src/processing/service-patterns.js b/api/src/processing/service-patterns.js index 6dc3ccbd0..854f63cfb 100644 --- a/api/src/processing/service-patterns.js +++ b/api/src/processing/service-patterns.js @@ -87,4 +87,7 @@ export const testers = { "youtube": pattern => pattern.id?.length <= 11, + + "sora": pattern => + pattern.postId?.length <= 64, } diff --git a/api/src/processing/services/sora.js b/api/src/processing/services/sora.js new file mode 100644 index 000000000..edac46fae --- /dev/null +++ b/api/src/processing/services/sora.js @@ -0,0 +1,157 @@ +import { genericUserAgent } from "../../config.js"; + +// Helper function to add delay between requests +const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + +// Helper function to check if response is a Cloudflare challenge +const isCloudflareChallenge = (response) => { + return ( + response.status === 403 || + response.status === 503 || + (response.status === 200 && + response.headers.get("server")?.includes("cloudflare")) + ); +}; + +// Enhanced fetch with retry logic for Cloudflare challenges +const fetchWithRetry = async (url, options, maxRetries = 3) => { + let lastError; + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + const response = await fetch(url, options); + + // If it's a Cloudflare challenge and not the last attempt, wait and retry + if (isCloudflareChallenge(response) && attempt < maxRetries) { + await delay(1000 * attempt); // Exponential backoff + continue; + } + + return response; + } catch (error) { + lastError = error; + if (attempt < maxRetries) { + await delay(1000 * attempt); + continue; + } + throw error; + } + } + + throw lastError; +}; + +export default async function (obj) { + let videoId = obj.postId; + if (!videoId) { + return { error: "fetch.empty" }; + } + + try { + // For /p/ (post) URLs, use HTML parsing + if (obj.postId) { + return await handlePostUrl(obj.postId, obj); + } + + return { error: "fetch.empty" }; + } catch (error) { + console.error("Sora service error:", error); + return { error: "fetch.fail" }; + } +} + +async function handlePostUrl(postId, obj) { + const targetUrl = `https://sora.chatgpt.com/p/${postId}`; + + const res = await fetchWithRetry(targetUrl, { + headers: { + "user-agent": genericUserAgent, + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-US,en;q=0.9", + "accept-encoding": "gzip, deflate, br", + "sec-ch-ua": + '"Google Chrome";v="138", "Chromium";v="138", "Not=A?Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Windows"', + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-user": "?1", + "upgrade-insecure-requests": "1", + "cache-control": "max-age=0", + dnt: "1", + }, + }); + + if (!res.ok) { + return { error: "fetch.fail" }; + } + + const html = await res.text(); + + // Extract video URL from og:video meta tag + let videoUrl; + let title; + + const ogVideoMatch = html.match( + /\s\\]+\.mp4[^"'>\s]*)/g, + /"(https:\/\/videos\.openai\.com\/[^"]+\.mp4[^"]*)"/g, + /'(https:\/\/videos\.openai\.com\/[^']+\.mp4[^']*)'/g, + ]; + + for (const pattern of videoPatterns) { + const match = html.match(pattern); + if (match) { + videoUrl = match[0].replace(/^["']|["']$/g, ""); // Remove quotes + break; + } + } + } + + // Extract title from HTML title tag + const titleMatch = html.match(/([^<]+)<\/title>/i); + if (titleMatch) { + title = titleMatch[1].replace(" - Sora", "").replace(" | Sora", "").trim(); + } + + // Decode HTML entities if present (only for fallback URLs that weren't already decoded) + if (videoUrl && !ogVideoMatch) { + videoUrl = videoUrl.replace(/&/g, "&"); + } + + if (!videoUrl) { + return { error: "fetch.empty" }; + } + + // Generate filename + const cleanId = postId.replace(/[^a-zA-Z0-9_-]/g, ""); + const videoFilename = `sora_${cleanId}.mp4`; + + return { + type: "proxy", + urls: videoUrl, + filename: videoFilename, + fileMetadata: { + title: title || `Sora Video ${cleanId}`, + }, + }; +} diff --git a/api/src/processing/url.js b/api/src/processing/url.js index 9ac8a3ee9..9c3949402 100644 --- a/api/src/processing/url.js +++ b/api/src/processing/url.js @@ -110,6 +110,12 @@ function aliasURL(url) { } break; + case "chatgpt": + if (url.hostname === 'sora.chatgpt.com') { + url.hostname = 'sora.com'; + } + break; + case "redd": /* reddit short video links can be treated by changing https://v.redd.it/<id> to https://reddit.com/video/<id>.*/