diff --git a/README.md b/README.md index 3868a66..d694010 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,40 @@ The server provides the following tools for controlling macOS: - `y`: number (y-coordinate) - accepts integers, doubles, or string representations - Returns the RGBA color values (0-255) of the pixel at the specified coordinates +### 7. Capture Screen +- Tool name: `captureScreen` +- Input: + - `quality`: number (optional, 0.0-1.0, default: 0.1) - JPEG compression quality + - `scale`: number (optional, 0.1-1.0, default: 0.25) - Scale factor for image size +- Captures the entire screen and returns it as a base64-encoded JPEG image +- Default settings (10% quality, 25% scale) optimize for fast processing and prevent timeouts + +### 8. Capture Region +- Tool name: `captureRegion` +- Input: + - `x`: number (x-coordinate of the region) + - `y`: number (y-coordinate of the region) + - `width`: number (width of the region) + - `height`: number (height of the region) + - `quality`: number (optional, 0.0-1.0, default: 0.1) - JPEG compression quality + - `scale`: number (optional, 0.1-1.0, default: 0.25) - Scale factor for image size +- Captures a specific screen region and returns it as a base64-encoded JPEG image +- Default settings optimize for fast processing + +### 9. Save Screenshot +- Tool name: `saveScreenshot` +- Input: + - `filename`: string (path to save the screenshot) + - `x`: number (optional, x-coordinate of the region) + - `y`: number (optional, y-coordinate of the region) + - `width`: number (optional, width of the region) + - `height`: number (optional, height of the region) + - `quality`: number (optional, 0.0-1.0, default: 0.1) - JPEG compression quality + - `scale`: number (optional, 0.1-1.0, default: 0.25) - Scale factor for image size +- Captures the screen or a region and saves it to a file +- File format is determined by the filename extension (.jpg, .jpeg, .png) +- Quality parameter only affects JPEG files + ## Security Considerations This server requires full accessibility permissions in System Preferences to control your mouse and keyboard. Be careful when running it and only connect trusted MCP clients. diff --git a/Sources/swift-mcp-gui/Tools/Screen/CaptureRegionTool.swift b/Sources/swift-mcp-gui/Tools/Screen/CaptureRegionTool.swift new file mode 100644 index 0000000..9f0d772 --- /dev/null +++ b/Sources/swift-mcp-gui/Tools/Screen/CaptureRegionTool.swift @@ -0,0 +1,105 @@ +import Foundation +import MCP +import SwiftAutoGUI +import AppKit + +struct CaptureRegionTool { + static func register(in registry: ToolRegistry) { + let tool = Tool( + name: "captureRegion", + description: "Capture specific screen region and return as base64 encoded image", + inputSchema: .object([ + "type": .string("object"), + "properties": .object([ + "x": .object([ + "type": .string("number"), + "description": .string("X coordinate of the region") + ]), + "y": .object([ + "type": .string("number"), + "description": .string("Y coordinate of the region") + ]), + "width": .object([ + "type": .string("number"), + "description": .string("Width of the region") + ]), + "height": .object([ + "type": .string("number"), + "description": .string("Height of the region") + ]), + "quality": .object([ + "type": .string("number"), + "description": .string("JPEG compression quality (0.0-1.0, default: 0.1). Lower values reduce file size.") + ]), + "scale": .object([ + "type": .string("number"), + "description": .string("Scale factor for image size (0.1-1.0, default: 0.25). Lower values reduce resolution.") + ]) + ]), + "required": .array([.string("x"), .string("y"), .string("width"), .string("height")]) + ]) + ) + + registry.registerTool(definition: tool) { arguments in + let parser = ParameterParser(arguments: arguments) + + do { + let x = try parser.parseDouble("x") + let y = try parser.parseDouble("y") + let width = try parser.parseDouble("width") + let height = try parser.parseDouble("height") + let quality = (try? parser.parseDouble("quality")) ?? 0.1 + let scale = (try? parser.parseDouble("scale")) ?? 0.25 + + let region = CGRect(x: x, y: y, width: width, height: height) + + guard let screenshot = SwiftAutoGUI.screenshot(region: region) else { + return .init(content: [.text("Failed to capture screen region")], isError: true) + } + + // Create scaled image more efficiently + let scaledWidth = Int(screenshot.size.width * scale) + let scaledHeight = Int(screenshot.size.height * scale) + + guard let cgImage = screenshot.cgImage(forProposedRect: nil, context: nil, hints: nil) else { + return .init(content: [.text("Failed to get CGImage")], isError: true) + } + + let bitmapRep = NSBitmapImageRep(bitmapDataPlanes: nil, + pixelsWide: scaledWidth, + pixelsHigh: scaledHeight, + bitsPerSample: 8, + samplesPerPixel: 4, + hasAlpha: true, + isPlanar: false, + colorSpaceName: .deviceRGB, + bytesPerRow: 0, + bitsPerPixel: 0) + + guard let bitmap = bitmapRep else { + return .init(content: [.text("Failed to create bitmap")], isError: true) + } + + NSGraphicsContext.saveGraphicsState() + NSGraphicsContext.current = NSGraphicsContext(bitmapImageRep: bitmap) + let context = NSGraphicsContext.current?.cgContext + context?.interpolationQuality = .low // Use low quality for speed + + let destRect = CGRect(x: 0, y: 0, width: scaledWidth, height: scaledHeight) + context?.draw(cgImage, in: destRect) + + NSGraphicsContext.restoreGraphicsState() + + // Use JPEG compression with specified quality + guard let jpegData = bitmap.representation(using: .jpeg, properties: [.compressionFactor: quality]) else { + return .init(content: [.text("Failed to convert screenshot to JPEG")], isError: true) + } + + let base64String = jpegData.base64EncodedString() + return .init(content: [.text("{\"image\": \"data:image/jpeg;base64,\(base64String)\"}")], isError: false) + } catch { + return .init(content: [.text(error.localizedDescription)], isError: true) + } + } + } +} \ No newline at end of file diff --git a/Sources/swift-mcp-gui/Tools/Screen/CaptureScreenTool.swift b/Sources/swift-mcp-gui/Tools/Screen/CaptureScreenTool.swift new file mode 100644 index 0000000..b52a778 --- /dev/null +++ b/Sources/swift-mcp-gui/Tools/Screen/CaptureScreenTool.swift @@ -0,0 +1,77 @@ +import Foundation +import MCP +import SwiftAutoGUI +import AppKit + +struct CaptureScreenTool { + static func register(in registry: ToolRegistry) { + let tool = Tool( + name: "captureScreen", + description: "Capture entire screen and return as base64 encoded image", + inputSchema: .object([ + "type": .string("object"), + "properties": .object([ + "quality": .object([ + "type": .string("number"), + "description": .string("JPEG compression quality (0.0-1.0, default: 0.1). Lower values reduce file size.") + ]), + "scale": .object([ + "type": .string("number"), + "description": .string("Scale factor for image size (0.1-1.0, default: 0.25). Lower values reduce resolution.") + ]) + ]) + ]) + ) + + registry.registerTool(definition: tool) { arguments in + let parser = ParameterParser(arguments: arguments) + let quality = (try? parser.parseDouble("quality")) ?? 0.1 + let scale = (try? parser.parseDouble("scale")) ?? 0.25 + + guard let screenshot = SwiftAutoGUI.screenshot() else { + return .init(content: [.text("Failed to capture screen")], isError: true) + } + + // Create scaled image more efficiently + let scaledWidth = Int(screenshot.size.width * scale) + let scaledHeight = Int(screenshot.size.height * scale) + + guard let cgImage = screenshot.cgImage(forProposedRect: nil, context: nil, hints: nil) else { + return .init(content: [.text("Failed to get CGImage")], isError: true) + } + + let bitmapRep = NSBitmapImageRep(bitmapDataPlanes: nil, + pixelsWide: scaledWidth, + pixelsHigh: scaledHeight, + bitsPerSample: 8, + samplesPerPixel: 4, + hasAlpha: true, + isPlanar: false, + colorSpaceName: .deviceRGB, + bytesPerRow: 0, + bitsPerPixel: 0) + + guard let bitmap = bitmapRep else { + return .init(content: [.text("Failed to create bitmap")], isError: true) + } + + NSGraphicsContext.saveGraphicsState() + NSGraphicsContext.current = NSGraphicsContext(bitmapImageRep: bitmap) + let context = NSGraphicsContext.current?.cgContext + context?.interpolationQuality = .low // Use low quality for speed + + let destRect = CGRect(x: 0, y: 0, width: scaledWidth, height: scaledHeight) + context?.draw(cgImage, in: destRect) + + NSGraphicsContext.restoreGraphicsState() + + // Use JPEG compression with specified quality + guard let jpegData = bitmap.representation(using: .jpeg, properties: [.compressionFactor: quality]) else { + return .init(content: [.text("Failed to convert screenshot to JPEG")], isError: true) + } + + let base64String = jpegData.base64EncodedString() + return .init(content: [.text("{\"image\": \"data:image/jpeg;base64,\(base64String)\"}")], isError: false) + } + } +} \ No newline at end of file diff --git a/Sources/swift-mcp-gui/Tools/Screen/SaveScreenshotTool.swift b/Sources/swift-mcp-gui/Tools/Screen/SaveScreenshotTool.swift new file mode 100644 index 0000000..616d696 --- /dev/null +++ b/Sources/swift-mcp-gui/Tools/Screen/SaveScreenshotTool.swift @@ -0,0 +1,127 @@ +import Foundation +import MCP +import SwiftAutoGUI +import AppKit + +struct SaveScreenshotTool { + static func register(in registry: ToolRegistry) { + let tool = Tool( + name: "saveScreenshot", + description: "Capture screen and save to file", + inputSchema: .object([ + "type": .string("object"), + "properties": .object([ + "filename": .object([ + "type": .string("string"), + "description": .string("Filename to save the screenshot") + ]), + "x": .object([ + "type": .string("number"), + "description": .string("X coordinate of the region (optional)") + ]), + "y": .object([ + "type": .string("number"), + "description": .string("Y coordinate of the region (optional)") + ]), + "width": .object([ + "type": .string("number"), + "description": .string("Width of the region (optional)") + ]), + "height": .object([ + "type": .string("number"), + "description": .string("Height of the region (optional)") + ]), + "quality": .object([ + "type": .string("number"), + "description": .string("JPEG compression quality (0.0-1.0, default: 0.1). Lower values reduce file size. Only affects JPEG files.") + ]), + "scale": .object([ + "type": .string("number"), + "description": .string("Scale factor for image size (0.1-1.0, default: 0.5). Lower values reduce resolution.") + ]) + ]), + "required": .array([.string("filename")]) + ]) + ) + + registry.registerTool(definition: tool) { arguments in + let parser = ParameterParser(arguments: arguments) + + do { + let filename = try parser.parseString("filename") + let quality = (try? parser.parseDouble("quality")) ?? 0.1 + let scale = (try? parser.parseDouble("scale")) ?? 0.5 + + // Try to parse optional region parameters + let x = try? parser.parseDouble("x") + let y = try? parser.parseDouble("y") + let width = try? parser.parseDouble("width") + let height = try? parser.parseDouble("height") + + // Capture screenshot + let screenshot: NSImage? + if let x = x, let y = y, let width = width, let height = height { + let region = CGRect(x: x, y: y, width: width, height: height) + screenshot = SwiftAutoGUI.screenshot(region: region) + } else { + screenshot = SwiftAutoGUI.screenshot() + } + + guard let image = screenshot else { + return .init(content: [.text("Failed to capture screenshot")], isError: true) + } + + // Scale down the image if needed + let scaledImage: NSImage + if scale < 1.0 { + let newSize = NSSize(width: image.size.width * scale, + height: image.size.height * scale) + scaledImage = NSImage(size: newSize) + scaledImage.lockFocus() + image.draw(in: NSRect(origin: .zero, size: newSize), + from: NSRect(origin: .zero, size: image.size), + operation: .copy, + fraction: 1.0) + scaledImage.unlockFocus() + } else { + scaledImage = image + } + + // Convert to bitmap representation + guard let tiffData = scaledImage.tiffRepresentation, + let bitmapRep = NSBitmapImageRep(data: tiffData) else { + return .init(content: [.text("Failed to convert screenshot")], isError: true) + } + + // Determine file type and save with appropriate format + let fileExtension = (filename as NSString).pathExtension.lowercased() + let imageData: Data? + + switch fileExtension { + case "jpg", "jpeg": + imageData = bitmapRep.representation(using: .jpeg, properties: [.compressionFactor: quality]) + case "png": + imageData = bitmapRep.representation(using: .png, properties: [:]) + default: + // Default to JPEG with quality if no extension or unknown extension + imageData = bitmapRep.representation(using: .jpeg, properties: [.compressionFactor: quality]) + } + + guard let data = imageData else { + return .init(content: [.text("Failed to encode image")], isError: true) + } + + // Save to file + do { + try data.write(to: URL(fileURLWithPath: filename)) + return .init(content: [.text("{\"success\": true, \"filename\": \"\(filename)\"}")], isError: false) + } catch { + return .init(content: [.text("Failed to save file: \(error.localizedDescription)")], isError: true) + } + + } catch { + return .init(content: [.text(error.localizedDescription)], isError: true) + } + } + } +} \ No newline at end of file diff --git a/Sources/swift-mcp-gui/Tools/ToolRegistry.swift b/Sources/swift-mcp-gui/Tools/ToolRegistry.swift index 7b107e7..0bdbb1f 100644 --- a/Sources/swift-mcp-gui/Tools/ToolRegistry.swift +++ b/Sources/swift-mcp-gui/Tools/ToolRegistry.swift @@ -30,5 +30,8 @@ class ToolRegistry { SendKeysTool.register(in: self) GetScreenSizeTool.register(in: self) GetPixelColorTool.register(in: self) + CaptureScreenTool.register(in: self) + CaptureRegionTool.register(in: self) + SaveScreenshotTool.register(in: self) } } \ No newline at end of file diff --git a/Tests/swift-mcp-guiTests/Tools/ScreenToolsTests.swift b/Tests/swift-mcp-guiTests/Tools/ScreenToolsTests.swift index 1770b35..5b036b4 100644 --- a/Tests/swift-mcp-guiTests/Tools/ScreenToolsTests.swift +++ b/Tests/swift-mcp-guiTests/Tools/ScreenToolsTests.swift @@ -11,6 +11,9 @@ struct ScreenToolsTests { ScrollTool.register(in: toolRegistry) GetScreenSizeTool.register(in: toolRegistry) GetPixelColorTool.register(in: toolRegistry) + CaptureScreenTool.register(in: toolRegistry) + CaptureRegionTool.register(in: toolRegistry) + SaveScreenshotTool.register(in: toolRegistry) } @Test("Scroll tool execution") @@ -209,4 +212,139 @@ struct ScreenToolsTests { } != nil) } } + + @Test("Capture screen tool execution") + func captureScreenToolExecution() async throws { + let arguments: Value = .object([:]) + + let result = try await toolRegistry.execute(name: "captureScreen", arguments: arguments) + // This might succeed or fail depending on screen access permissions + if result.isError != true { + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("\"image\":") && + text.contains("data:image/jpeg;base64,") + } + return false + } != nil) + } + } + + @Test("Capture screen tool with low quality") + func captureScreenToolLowQuality() async throws { + let arguments: Value = .object([ + "quality": .double(0.3) + ]) + + let result = try await toolRegistry.execute(name: "captureScreen", arguments: arguments) + // This might succeed or fail depending on screen access permissions + if result.isError != true { + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("\"image\":") && + text.contains("data:image/jpeg;base64,") + } + return false + } != nil) + } + } + + @Test("Capture region tool execution") + func captureRegionToolExecution() async throws { + let arguments: Value = .object([ + "x": .int(100), + "y": .int(100), + "width": .int(200), + "height": .int(200) + ]) + + let result = try await toolRegistry.execute(name: "captureRegion", arguments: arguments) + // This might succeed or fail depending on screen access permissions + if result.isError != true { + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("\"image\":") && + text.contains("data:image/jpeg;base64,") + } + return false + } != nil) + } + } + + @Test("Capture region tool with missing parameters") + func captureRegionToolMissingParameters() async throws { + let arguments: Value = .object([ + "x": .int(100), + "y": .int(100) + // Missing width and height + ]) + + let result = try await toolRegistry.execute(name: "captureRegion", arguments: arguments) + #expect(result.isError == true) + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("Missing parameter: width") + } + return false + } != nil) + } + + @Test("Save screenshot tool execution") + func saveScreenshotToolExecution() async throws { + let arguments: Value = .object([ + "filename": .string("test_screenshot.png") + ]) + + let result = try await toolRegistry.execute(name: "saveScreenshot", arguments: arguments) + // This might succeed or fail depending on screen access permissions + if result.isError != true { + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("\"success\": true") && + text.contains("\"filename\": \"test_screenshot.png\"") + } + return false + } != nil) + } + } + + @Test("Save screenshot tool with region") + func saveScreenshotToolWithRegion() async throws { + let arguments: Value = .object([ + "filename": .string("test_region_screenshot.png"), + "x": .int(50), + "y": .int(50), + "width": .int(150), + "height": .int(150) + ]) + + let result = try await toolRegistry.execute(name: "saveScreenshot", arguments: arguments) + // This might succeed or fail depending on screen access permissions + if result.isError != true { + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("\"success\": true") && + text.contains("\"filename\": \"test_region_screenshot.png\"") + } + return false + } != nil) + } + } + + @Test("Save screenshot tool with missing filename") + func saveScreenshotToolMissingFilename() async throws { + let arguments: Value = .object([ + "x": .int(50), + "y": .int(50) + ]) + + let result = try await toolRegistry.execute(name: "saveScreenshot", arguments: arguments) + #expect(result.isError == true) + #expect(result.content.first { + if case .text(let text) = $0 { + return text.contains("Missing parameter: filename") + } + return false + } != nil) + } } \ No newline at end of file