From cbfafc28f5a2e7d8b160eb0c393968c8771281d9 Mon Sep 17 00:00:00 2001 From: NanditaPatil-dotcom Date: Sat, 29 Nov 2025 00:13:06 +0530 Subject: [PATCH 1/2] fix : screenshot error --- electron/ScreenshotHelper.ts | 30 ++++++++++++++++++++++++------ package.json | 5 ++++- start.sh | 3 +++ 3 files changed, 31 insertions(+), 7 deletions(-) create mode 100755 start.sh diff --git a/electron/ScreenshotHelper.ts b/electron/ScreenshotHelper.ts index caa957cd..184ba80f 100644 --- a/electron/ScreenshotHelper.ts +++ b/electron/ScreenshotHelper.ts @@ -2,9 +2,8 @@ import path from "node:path" import fs from "node:fs" -import { app } from "electron" +import { app, desktopCapturer } from "electron" import { v4 as uuidv4 } from "uuid" -import screenshot from "screenshot-desktop" export class ScreenshotHelper { private screenshotQueue: string[] = [] @@ -28,10 +27,10 @@ export class ScreenshotHelper { // Create directories if they don't exist if (!fs.existsSync(this.screenshotDir)) { - fs.mkdirSync(this.screenshotDir) + fs.mkdirSync(this.screenshotDir, { recursive: true }) } if (!fs.existsSync(this.extraScreenshotDir)) { - fs.mkdirSync(this.extraScreenshotDir) + fs.mkdirSync(this.extraScreenshotDir, { recursive: true }) } } @@ -84,11 +83,30 @@ export class ScreenshotHelper { // Add a small delay to ensure window is hidden await new Promise(resolve => setTimeout(resolve, 100)) + // Use Electron's desktopCapturer to get screen sources + const sources = await desktopCapturer.getSources({ + types: ['screen'], + thumbnailSize: { width: 1920, height: 1080 } + }) + + if (sources.length === 0) { + throw new Error("No screen sources available") + } + + // Get the primary screen (first source) + const primaryScreen = sources[0] + const image = primaryScreen.thumbnail + + if (image.isEmpty()) { + throw new Error("Failed to capture screen - image is empty") + } + let screenshotPath = "" + const screenshotBuffer = image.toPNG() if (this.view === "queue") { screenshotPath = path.join(this.screenshotDir, `${uuidv4()}.png`) - await screenshot({ filename: screenshotPath }) + await fs.promises.writeFile(screenshotPath, screenshotBuffer) this.screenshotQueue.push(screenshotPath) if (this.screenshotQueue.length > this.MAX_SCREENSHOTS) { @@ -103,7 +121,7 @@ export class ScreenshotHelper { } } else { screenshotPath = path.join(this.extraScreenshotDir, `${uuidv4()}.png`) - await screenshot({ filename: screenshotPath }) + await fs.promises.writeFile(screenshotPath, screenshotBuffer) this.extraScreenshotQueue.push(screenshotPath) if (this.extraScreenshotQueue.length > this.MAX_SCREENSHOTS) { diff --git a/package.json b/package.json index c3ca446c..b3a9adc8 100644 --- a/package.json +++ b/package.json @@ -155,5 +155,8 @@ "tailwind-merge": "^2.5.4", "tesseract.js": "^5.0.5", "uuid": "^11.0.3" - } + }, + "bin": { + "start-cluely": "./start.sh" +} } diff --git a/start.sh b/start.sh new file mode 100755 index 00000000..4d76df7e --- /dev/null +++ b/start.sh @@ -0,0 +1,3 @@ +cd "/home/nandita/Documents/free-cluely" +echo "Launching Free Cluely... hold tight!" +npm start From 75f761d8a5a9d4a7d0b88cdefecc21390b5b9921 Mon Sep 17 00:00:00 2001 From: NanditaPatil-dotcom Date: Sat, 29 Nov 2025 04:58:38 +0530 Subject: [PATCH 2/2] fix : ollama screenshot error - but no analysis-model switch --- electron/LLMHelper.ts | 193 +++++++++++++++++++++++++++-------- electron/ScreenshotHelper.ts | 2 +- 2 files changed, 153 insertions(+), 42 deletions(-) diff --git a/electron/LLMHelper.ts b/electron/LLMHelper.ts index 5edd592b..ff561723 100644 --- a/electron/LLMHelper.ts +++ b/electron/LLMHelper.ts @@ -1,11 +1,17 @@ import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai" import fs from "fs" +import sharp from "sharp" interface OllamaResponse { response: string done: boolean } +interface OllamaVisionResponse { + response: string + done: boolean +} + export class LLMHelper { private model: GenerativeModel | null = null private readonly systemPrompt = `You are Wingman AI, a helpful, proactive assistant for any kind of problem or situation (not just coding). For any user input, analyze the situation, provide a clear problem statement, relevant context, and suggest several possible responses or actions the user could take next. Always explain your reasoning. Present your suggestions as a list of options or next steps.` @@ -15,12 +21,12 @@ export class LLMHelper { constructor(apiKey?: string, useOllama: boolean = false, ollamaModel?: string, ollamaUrl?: string) { this.useOllama = useOllama - + if (useOllama) { this.ollamaUrl = ollamaUrl || "http://localhost:11434" - this.ollamaModel = ollamaModel || "gemma:latest" // Default fallback + this.ollamaModel = ollamaModel // Default fallback console.log(`[LLMHelper] Using Ollama with model: ${this.ollamaModel}`) - + // Auto-detect and use first available model if specified model doesn't exist this.initializeOllamaModel() } else if (apiKey) { @@ -50,6 +56,7 @@ export class LLMHelper { return text; } + private async callOllama(prompt: string): Promise { try { const response = await fetch(`${this.ollamaUrl}/api/generate`, { @@ -80,6 +87,51 @@ export class LLMHelper { } } + private supportsOllamaVision(): boolean { + const model = this.ollamaModel.toLowerCase() + const visionHints = ["vision", "llava", "moondream", "pixtral", "minicpm", "gpt4o", "flux", "reka"] + return visionHints.some((hint) => model.includes(hint)) + } + + private async callOllamaVision(prompt: string, images: string[]): Promise { + if (!this.supportsOllamaVision()) { + throw new Error(`Current Ollama model (${this.ollamaModel}) does not support image understanding. Switch to a vision-capable model such as llama3.2-vision, llava, gemma2:vision, etc.`) + } + + try { + const response = await fetch(`${this.ollamaUrl}/api/generate`, { + method: "POST", + headers: { + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: this.ollamaModel, + prompt, + images, + stream: false, + options: { + temperature: 0.4, + top_p: 0.9 + } + }) + }) + + if (!response.ok) { + throw new Error(`Ollama vision API error: ${response.status} ${response.statusText}`) + } + + const data: OllamaVisionResponse = await response.json() + if (!data?.response) { + throw new Error("Ollama vision API returned an empty response") + } + + return data.response + } catch (error) { + console.error("[LLMHelper] Error calling Ollama vision endpoint:", error) + throw error + } + } + private async checkOllamaAvailable(): Promise { try { const response = await fetch(`${this.ollamaUrl}/api/tags`) @@ -123,8 +175,6 @@ export class LLMHelper { public async extractProblemFromImages(imagePaths: string[]) { try { - const imageParts = await Promise.all(imagePaths.map(path => this.fileToGenerativePart(path))) - const prompt = `${this.systemPrompt}\n\nYou are a wingman. Please analyze these images and extract the following information in JSON format:\n{ "problem_statement": "A clear statement of the problem or situation depicted in the images.", "context": "Relevant background or context from the images.", @@ -132,6 +182,19 @@ export class LLMHelper { "reasoning": "Explanation of why these suggestions are appropriate." }\nImportant: Return ONLY the JSON object, without any markdown formatting or code blocks.` + if (this.useOllama) { + const imagePayloads = await Promise.all( + imagePaths.map(async (imagePath) => (await fs.promises.readFile(imagePath)).toString("base64")) + ) + const text = await this.callOllamaVision(prompt, imagePayloads) + return JSON.parse(this.cleanJsonResponse(text)) + } + + if (!this.model) { + throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.") + } + + const imageParts = await Promise.all(imagePaths.map(path => this.fileToGenerativePart(path))) const result = await this.model.generateContent([prompt, ...imageParts]) const response = await result.response const text = this.cleanJsonResponse(response.text()) @@ -153,13 +216,21 @@ export class LLMHelper { } }\nImportant: Return ONLY the JSON object, without any markdown formatting or code blocks.` - console.log("[LLMHelper] Calling Gemini LLM for solution..."); + console.log("[LLMHelper] Calling LLM for solution..."); try { - const result = await this.model.generateContent(prompt) - console.log("[LLMHelper] Gemini LLM returned result."); - const response = await result.response - const text = this.cleanJsonResponse(response.text()) - const parsed = JSON.parse(text) + const text = this.useOllama + ? await this.callOllama(prompt) + : await (async () => { + if (!this.model) { + throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.") + } + const result = await this.model.generateContent(prompt) + const response = await result.response + return response.text() + })() + + const cleaned = this.cleanJsonResponse(text) + const parsed = JSON.parse(cleaned) console.log("[LLMHelper] Parsed LLM response:", parsed) return parsed } catch (error) { @@ -170,8 +241,6 @@ export class LLMHelper { public async debugSolutionWithImages(problemInfo: any, currentCode: string, debugImagePaths: string[]) { try { - const imageParts = await Promise.all(debugImagePaths.map(path => this.fileToGenerativePart(path))) - const prompt = `${this.systemPrompt}\n\nYou are a wingman. Given:\n1. The original problem or situation: ${JSON.stringify(problemInfo, null, 2)}\n2. The current response or approach: ${currentCode}\n3. The debug information in the provided images\n\nPlease analyze the debug information and provide feedback in this JSON format:\n{ "solution": { "code": "The code or main answer here.", @@ -182,6 +251,21 @@ export class LLMHelper { } }\nImportant: Return ONLY the JSON object, without any markdown formatting or code blocks.` + if (this.useOllama) { + const imagePayloads = await Promise.all( + debugImagePaths.map(async (imagePath) => (await fs.promises.readFile(imagePath)).toString("base64")) + ) + const text = await this.callOllamaVision(prompt, imagePayloads) + const parsed = JSON.parse(this.cleanJsonResponse(text)) + console.log("[LLMHelper] Parsed debug LLM response:", parsed) + return parsed + } + + if (!this.model) { + throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.") + } + + const imageParts = await Promise.all(debugImagePaths.map(path => this.fileToGenerativePart(path))) const result = await this.model.generateContent([prompt, ...imageParts]) const response = await result.response const text = this.cleanJsonResponse(response.text()) @@ -195,61 +279,88 @@ export class LLMHelper { } public async analyzeAudioFile(audioPath: string) { + if (this.useOllama) { + throw new Error("Audio analysis is currently only supported when using Gemini. Switch off USE_OLLAMA or supply a Gemini API key.") + } + try { - const audioData = await fs.promises.readFile(audioPath); + if (!this.model) { + throw new Error("Gemini model is not initialized. Provide a Gemini API key or disable Ollama mode.") + } + + const audioData = await fs.promises.readFile(audioPath) const audioPart = { inlineData: { data: audioData.toString("base64"), mimeType: "audio/mp3" } - }; - const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user.`; - const result = await this.model.generateContent([prompt, audioPart]); - const response = await result.response; - const text = response.text(); - return { text, timestamp: Date.now() }; + } + const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user.` + const result = await this.model.generateContent([prompt, audioPart]) + const response = await result.response + const text = response.text() + return { text, timestamp: Date.now() } } catch (error) { - console.error("Error analyzing audio file:", error); - throw error; + console.error("Error analyzing audio file:", error) + throw error } } public async analyzeAudioFromBase64(data: string, mimeType: string) { + if (this.useOllama) { + throw new Error("Audio analysis is currently only supported when using Gemini. Switch off USE_OLLAMA or supply a Gemini API key.") + } + try { + if (!this.model) { + throw new Error("Gemini model is not initialized. Provide a Gemini API key or disable Ollama mode.") + } + const audioPart = { inlineData: { data, mimeType } - }; - const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user and be concise.`; - const result = await this.model.generateContent([prompt, audioPart]); - const response = await result.response; - const text = response.text(); - return { text, timestamp: Date.now() }; + } + const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user and be concise.` + const result = await this.model.generateContent([prompt, audioPart]) + const response = await result.response + const text = response.text() + return { text, timestamp: Date.now() } } catch (error) { - console.error("Error analyzing audio from base64:", error); - throw error; + console.error("Error analyzing audio from base64:", error) + throw error } } public async analyzeImageFile(imagePath: string) { try { - const imageData = await fs.promises.readFile(imagePath); + const base64Image = (await fs.promises.readFile(imagePath)).toString("base64") + const prompt = `${this.systemPrompt}\n\nDescribe the content of this image in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the image. Do not return a structured JSON object, just answer naturally as you would to a user. Be concise and brief.` + + if (this.useOllama) { + const text = await this.callOllamaVision(prompt, [base64Image]) + return { text: text.trim(), timestamp: Date.now() } + } + + if (!this.model) { + throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.") + } + const imagePart = { inlineData: { - data: imageData.toString("base64"), + data: base64Image, mimeType: "image/png" } - }; - const prompt = `${this.systemPrompt}\n\nDescribe the content of this image in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the image. Do not return a structured JSON object, just answer naturally as you would to a user. Be concise and brief.`; - const result = await this.model.generateContent([prompt, imagePart]); - const response = await result.response; - const text = response.text(); - return { text, timestamp: Date.now() }; + } + + const result = await this.model.generateContent([prompt, imagePart]) + const response = await result.response + const text = response.text() + return { text, timestamp: Date.now() } } catch (error) { - console.error("Error analyzing image file:", error); - throw error; + console.error("Error analyzing image file:", error) + throw error } } @@ -357,4 +468,4 @@ export class LLMHelper { return { success: false, error: error.message }; } } -} \ No newline at end of file +} diff --git a/electron/ScreenshotHelper.ts b/electron/ScreenshotHelper.ts index 184ba80f..d5d59e5a 100644 --- a/electron/ScreenshotHelper.ts +++ b/electron/ScreenshotHelper.ts @@ -86,7 +86,7 @@ export class ScreenshotHelper { // Use Electron's desktopCapturer to get screen sources const sources = await desktopCapturer.getSources({ types: ['screen'], - thumbnailSize: { width: 1920, height: 1080 } + thumbnailSize: { width: 1024, height: 1024 } }) if (sources.length === 0) {