Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ node_modules/
jspm_packages/
dist-electron/
dist/
release/

# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
Expand Down
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ If you’re looking for a hosted desktop recording API, consider checking out [R
- Git installed on your computer
- **Either** a Gemini API key (get it from [Google AI Studio](https://makersuite.google.com/app/apikey))
- **Or** Ollama installed locally for private LLM usage (recommended for privacy)
- *(Optional)* A free **Deepgram API key** for Live Voice Transcription & Translation

### Installation Steps

Expand Down Expand Up @@ -114,7 +115,8 @@ The built app will be in the `release` folder.
3. **Keyboard Shortcuts**:
- `Cmd/Ctrl + B`: Toggle window visibility
- `Cmd/Ctrl + H`: Take screenshot
- 'Cmd/Enter': Get solution
- `Cmd/Ctrl + Shift + R`: Toggle Live Voice Recording & Transcription
- `Cmd/Enter`: Get solution
- `Cmd/Ctrl + Arrow Keys`: Move window

## 🔧 Troubleshooting
Expand Down Expand Up @@ -169,10 +171,12 @@ If you see other errors:
- AI analyzes images, documents, presentations, or problems
- Get instant explanations, answers, and solutions

### **Audio Intelligence**
- Process audio files and recordings
- Real-time transcription and analysis
- Perfect for meeting notes and content review
### **Audio Intelligence & Live Transcription**
- Trigger live voice recording instantly with `Cmd/Ctrl + Shift + R`
- Captures both **system audio** (meetings) and **microphone** simultaneously
- **Real-time Transcription & Translation** powered by Deepgram `nova-3`
- Instantly translates foreign languages to English (or other targets) live on screen
- Submits exact transcripts directly to Gemini for instantaneous insights

### **Contextual Chat**
- Chat with AI about anything you see on screen
Expand Down Expand Up @@ -230,7 +234,7 @@ If you see other errors:
## Technical Details

### **AI Models Supported**
- **Gemini 2.0 Flash** - Latest Google AI with vision capabilities
- **Gemini 2.5 Flash** / **Gemini 3 Flash** / **Gemini 2.5 Flash Lite** - High-speed, high-limit daily requests
- **Llama 3.2** - Meta's advanced local model via Ollama
- **CodeLlama** - Specialized coding assistance
- **Mistral** - Lightweight, fast responses
Expand Down
21 changes: 21 additions & 0 deletions electron-launcher.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Launcher script that ensures ELECTRON_RUN_AS_NODE is not set
// This is needed because ELECTRON_RUN_AS_NODE=1 in the system environment
// forces Electron to run as Node.js instead of the browser process
delete process.env.ELECTRON_RUN_AS_NODE;

const proc = require('child_process');
const electronPath = require('electron');

const env = { ...process.env };
delete env.ELECTRON_RUN_AS_NODE;

const child = proc.spawn(electronPath, ['.'], {
stdio: 'inherit',
env: env
});

child.on('close', (code) => process.exit(code || 0));
child.on('error', (err) => {
console.error('Failed to start electron:', err);
process.exit(1);
});
142 changes: 124 additions & 18 deletions electron/LLMHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,34 @@ export class LLMHelper {
private useOllama: boolean = false
private ollamaModel: string = "llama3.2"
private ollamaUrl: string = "http://localhost:11434"
private userInfo: string = ""
private geminiModelName: string = "gemini-2.5-flash"
private apiKey: string = ""

constructor(apiKey?: string, useOllama: boolean = false, ollamaModel?: string, ollamaUrl?: string) {
this.useOllama = useOllama

if (useOllama) {
this.ollamaUrl = ollamaUrl || "http://localhost:11434"
this.ollamaModel = ollamaModel || "gemma:latest" // Default fallback
console.log(`[LLMHelper] Using Ollama with model: ${this.ollamaModel}`)

// Auto-detect and use first available model if specified model doesn't exist
this.initializeOllamaModel()
} else if (apiKey) {
this.apiKey = apiKey
const genAI = new GoogleGenerativeAI(apiKey)
this.model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" })
console.log("[LLMHelper] Using Google Gemini")
this.model = genAI.getGenerativeModel({ model: this.geminiModelName })
console.log(`[LLMHelper] Using Google Gemini (${this.geminiModelName})`)
} else {
throw new Error("Either provide Gemini API key or enable Ollama mode")
}
}

public setUserInfo(info: string) {
this.userInfo = info;
}

private async fileToGenerativePart(imagePath: string) {
const imageData = await fs.promises.readFile(imagePath)
return {
Expand Down Expand Up @@ -124,7 +132,7 @@ export class LLMHelper {
public async extractProblemFromImages(imagePaths: string[]) {
try {
const imageParts = await Promise.all(imagePaths.map(path => this.fileToGenerativePart(path)))

const prompt = `${this.systemPrompt}\n\nYou are a wingman. Please analyze these images and extract the following information in JSON format:\n{
"problem_statement": "A clear statement of the problem or situation depicted in the images.",
"context": "Relevant background or context from the images.",
Expand Down Expand Up @@ -171,7 +179,7 @@ export class LLMHelper {
public async debugSolutionWithImages(problemInfo: any, currentCode: string, debugImagePaths: string[]) {
try {
const imageParts = await Promise.all(debugImagePaths.map(path => this.fileToGenerativePart(path)))

const prompt = `${this.systemPrompt}\n\nYou are a wingman. Given:\n1. The original problem or situation: ${JSON.stringify(problemInfo, null, 2)}\n2. The current response or approach: ${currentCode}\n3. The debug information in the provided images\n\nPlease analyze the debug information and provide feedback in this JSON format:\n{
"solution": {
"code": "The code or main answer here.",
Expand Down Expand Up @@ -203,7 +211,7 @@ export class LLMHelper {
mimeType: "audio/mp3"
}
};
const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user.`;
const prompt = `${this.systemPrompt}\n\nThe provided audio clip contains a question or a problem statement. Please act as a solver and provide the direct answer or solution to whatever is being asked in the audio. Do not return a structured JSON object, just answer naturally as you would to a user, and format your answer clearly with markdown.`;
const result = await this.model.generateContent([prompt, audioPart]);
const response = await result.response;
const text = response.text();
Expand All @@ -222,7 +230,16 @@ export class LLMHelper {
mimeType
}
};
const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user and be concise.`;

const userContextStr = this.userInfo ? `\n\nAbout the User:\n${this.userInfo}` : "";
const prompt = `You are an invisible, real-time meeting assistant. The user is currently in a live professional meeting. The following text is a direct transcription of a question the user just spoke into their microphone.${userContextStr}

Your Constraints:
- Answer the question directly and concisely.
- NEVER acknowledge that this text comes from an audio transcription.
- NEVER describe the speaker's voice, tone, or audio quality.
- Provide actionable answers that the user can immediately use in their meeting.`;

const result = await this.model.generateContent([prompt, audioPart]);
const response = await result.response;
const text = response.text();
Expand All @@ -233,6 +250,38 @@ export class LLMHelper {
}
}

public async analyzeAudioFromBase64Stream(data: string, mimeType: string, onChunk: (chunk: string) => void) {
try {
const audioPart = {
inlineData: {
data,
mimeType
}
};

const userContextStr = this.userInfo ? `\n\nAbout the User:\n${this.userInfo}` : "";
const prompt = `You are an invisible, real-time meeting assistant. The user is currently in a live professional meeting. The following text is a direct transcription of a question the user just spoke into their microphone.${userContextStr}

Your Constraints:
- Answer the question directly and concisely.
- NEVER acknowledge that this text comes from an audio transcription.
- NEVER describe the speaker's voice, tone, or audio quality.
- Provide actionable answers that the user can immediately use in their meeting.`;

const result = await this.model.generateContentStream([prompt, audioPart]);
let fullText = "";
for await (const chunk of result.stream) {
const chunkText = chunk.text();
fullText += chunkText;
onChunk(chunkText);
}
return { text: fullText, timestamp: Date.now() };
} catch (error) {
console.error("Error analyzing audio from base64 stream:", error);
throw error;
}
}

public async analyzeImageFile(imagePath: string) {
try {
const imageData = await fs.promises.readFile(imagePath);
Expand All @@ -253,6 +302,31 @@ export class LLMHelper {
}
}

public async analyzeImageFileStream(imagePath: string, onChunk: (chunk: string) => void) {
try {
const imageData = await fs.promises.readFile(imagePath);
const imagePart = {
inlineData: {
data: imageData.toString("base64"),
mimeType: "image/png"
}
};
const prompt = `${this.systemPrompt}\n\nDescribe the content of this image in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the image. Do not return a structured JSON object, just answer naturally as you would to a user. Be concise and brief.`;

const result = await this.model.generateContentStream([prompt, imagePart]);
let fullText = "";
for await (const chunk of result.stream) {
const chunkText = chunk.text();
fullText += chunkText;
onChunk(chunkText);
}
return { text: fullText, timestamp: Date.now() };
} catch (error) {
console.error("Error analyzing image file stream:", error);
throw error;
}
}

public async chatWithGemini(message: string): Promise<string> {
try {
if (this.useOllama) {
Expand All @@ -270,6 +344,30 @@ export class LLMHelper {
}
}

public async chatWithGeminiStream(message: string, onChunk: (chunk: string) => void): Promise<string> {
try {
if (this.useOllama) {
const text = await this.callOllama(message);
onChunk(text);
return text;
} else if (this.model) {
const result = await this.model.generateContentStream(message);
let fullText = "";
for await (const chunk of result.stream) {
const chunkText = chunk.text();
fullText += chunkText;
onChunk(chunkText);
}
return fullText;
} else {
throw new Error("No LLM provider configured");
}
} catch (error) {
console.error("[LLMHelper] Error in chatWithGeminiStream:", error);
throw error;
}
}

public async chat(message: string): Promise<string> {
return this.chatWithGemini(message);
}
Expand All @@ -280,11 +378,11 @@ export class LLMHelper {

public async getOllamaModels(): Promise<string[]> {
if (!this.useOllama) return [];

try {
const response = await fetch(`${this.ollamaUrl}/api/tags`);
if (!response.ok) throw new Error('Failed to fetch models');

const data = await response.json();
return data.models?.map((model: any) => model.name) || [];
} catch (error) {
Expand All @@ -298,35 +396,43 @@ export class LLMHelper {
}

public getCurrentModel(): string {
return this.useOllama ? this.ollamaModel : "gemini-2.0-flash";
return this.useOllama ? this.ollamaModel : this.geminiModelName
}

public async switchToOllama(model?: string, url?: string): Promise<void> {
this.useOllama = true;
if (url) this.ollamaUrl = url;

if (model) {
this.ollamaModel = model;
} else {
// Auto-detect first available model
await this.initializeOllamaModel();
}

console.log(`[LLMHelper] Switched to Ollama: ${this.ollamaModel} at ${this.ollamaUrl}`);
}

public async switchToGemini(apiKey?: string): Promise<void> {
public async switchToGemini(apiKey?: string, modelName?: string): Promise<void> {
if (modelName) {
this.geminiModelName = modelName;
}
if (apiKey) {
this.apiKey = apiKey;
const genAI = new GoogleGenerativeAI(apiKey);
this.model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" });
this.model = genAI.getGenerativeModel({ model: this.geminiModelName });
} else if (this.apiKey) {
// Re-create model with new model name using existing API key
const genAI = new GoogleGenerativeAI(this.apiKey);
this.model = genAI.getGenerativeModel({ model: this.geminiModelName });
}

if (!this.model && !apiKey) {
throw new Error("No Gemini API key provided and no existing model instance");
}

this.useOllama = false;
console.log("[LLMHelper] Switched to Gemini");
console.log(`[LLMHelper] Switched to Gemini (${this.geminiModelName})`);
}

public async testConnection(): Promise<{ success: boolean; error?: string }> {
Expand Down
8 changes: 6 additions & 2 deletions electron/ProcessingHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ export class ProcessingHelper {

constructor(appState: AppState) {
this.appState = appState

// Check if user wants to use Ollama
const useOllama = process.env.USE_OLLAMA === "true"
const ollamaModel = process.env.OLLAMA_MODEL // Don't set default here, let LLMHelper auto-detect
const ollamaUrl = process.env.OLLAMA_URL || "http://localhost:11434"

if (useOllama) {
console.log("[ProcessingHelper] Initializing with Ollama")
this.llmHelper = new LLMHelper(undefined, true, ollamaModel, ollamaUrl)
Expand Down Expand Up @@ -159,6 +159,10 @@ export class ProcessingHelper {
return this.llmHelper.analyzeAudioFromBase64(data, mimeType);
}

public async processAudioBase64Stream(data: string, mimeType: string, onChunk: (chunk: string) => void) {
return this.llmHelper.analyzeAudioFromBase64Stream(data, mimeType, onChunk);
}

// Add audio file processing method
public async processAudioFile(filePath: string) {
return this.llmHelper.analyzeAudioFile(filePath);
Expand Down
15 changes: 10 additions & 5 deletions electron/ScreenshotHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,18 @@ export class ScreenshotHelper {
private extraScreenshotQueue: string[] = []
private readonly MAX_SCREENSHOTS = 5

private readonly screenshotDir: string
private readonly extraScreenshotDir: string
private screenshotDir: string = ""
private extraScreenshotDir: string = ""
private dirsInitialized: boolean = false

private view: "queue" | "solutions" = "queue"

constructor(view: "queue" | "solutions" = "queue") {
this.view = view
}

// Initialize directories
private ensureDirs(): void {
if (this.dirsInitialized) return
this.screenshotDir = path.join(app.getPath("userData"), "screenshots")
this.extraScreenshotDir = path.join(
app.getPath("userData"),
Expand All @@ -33,6 +36,7 @@ export class ScreenshotHelper {
if (!fs.existsSync(this.extraScreenshotDir)) {
fs.mkdirSync(this.extraScreenshotDir)
}
this.dirsInitialized = true
}

public getView(): "queue" | "solutions" {
Expand Down Expand Up @@ -78,12 +82,13 @@ export class ScreenshotHelper {
hideMainWindow: () => void,
showMainWindow: () => void
): Promise<string> {
this.ensureDirs()
try {
hideMainWindow()

// Add a small delay to ensure window is hidden
await new Promise(resolve => setTimeout(resolve, 100))

let screenshotPath = ""

if (this.view === "queue") {
Expand Down
Loading