Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,40 @@ The server provides the following tools for controlling macOS:
- `y`: number (y-coordinate) - accepts integers, doubles, or string representations
- Returns the RGBA color values (0-255) of the pixel at the specified coordinates

### 7. Capture Screen
- Tool name: `captureScreen`
- Input:
- `quality`: number (optional, 0.0-1.0, default: 0.1) - JPEG compression quality
- `scale`: number (optional, 0.1-1.0, default: 0.25) - Scale factor for image size
- Captures the entire screen and returns it as a base64-encoded JPEG image
- Default settings (10% quality, 25% scale) optimize for fast processing and prevent timeouts

### 8. Capture Region
- Tool name: `captureRegion`
- Input:
- `x`: number (x-coordinate of the region)
- `y`: number (y-coordinate of the region)
- `width`: number (width of the region)
- `height`: number (height of the region)
- `quality`: number (optional, 0.0-1.0, default: 0.1) - JPEG compression quality
- `scale`: number (optional, 0.1-1.0, default: 0.25) - Scale factor for image size
- Captures a specific screen region and returns it as a base64-encoded JPEG image
- Default settings optimize for fast processing

### 9. Save Screenshot
- Tool name: `saveScreenshot`
- Input:
- `filename`: string (path to save the screenshot)
- `x`: number (optional, x-coordinate of the region)
- `y`: number (optional, y-coordinate of the region)
- `width`: number (optional, width of the region)
- `height`: number (optional, height of the region)
- `quality`: number (optional, 0.0-1.0, default: 0.1) - JPEG compression quality
- `scale`: number (optional, 0.1-1.0, default: 0.25) - Scale factor for image size
- Captures the screen or a region and saves it to a file
- File format is determined by the filename extension (.jpg, .jpeg, .png)
- Quality parameter only affects JPEG files

## Security Considerations

This server requires full accessibility permissions in System Preferences to control your mouse and keyboard. Be careful when running it and only connect trusted MCP clients.
Expand Down
105 changes: 105 additions & 0 deletions Sources/swift-mcp-gui/Tools/Screen/CaptureRegionTool.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import Foundation
import MCP
import SwiftAutoGUI
import AppKit

struct CaptureRegionTool {
static func register(in registry: ToolRegistry) {
let tool = Tool(
name: "captureRegion",
description: "Capture specific screen region and return as base64 encoded image",
inputSchema: .object([
"type": .string("object"),
"properties": .object([
"x": .object([
"type": .string("number"),
"description": .string("X coordinate of the region")
]),
"y": .object([
"type": .string("number"),
"description": .string("Y coordinate of the region")
]),
"width": .object([
"type": .string("number"),
"description": .string("Width of the region")
]),
"height": .object([
"type": .string("number"),
"description": .string("Height of the region")
]),
"quality": .object([
"type": .string("number"),
"description": .string("JPEG compression quality (0.0-1.0, default: 0.1). Lower values reduce file size.")
]),
"scale": .object([
"type": .string("number"),
"description": .string("Scale factor for image size (0.1-1.0, default: 0.25). Lower values reduce resolution.")
])
]),
"required": .array([.string("x"), .string("y"), .string("width"), .string("height")])
])
)

registry.registerTool(definition: tool) { arguments in
let parser = ParameterParser(arguments: arguments)

do {
let x = try parser.parseDouble("x")
let y = try parser.parseDouble("y")
let width = try parser.parseDouble("width")
let height = try parser.parseDouble("height")
let quality = (try? parser.parseDouble("quality")) ?? 0.1
let scale = (try? parser.parseDouble("scale")) ?? 0.25

let region = CGRect(x: x, y: y, width: width, height: height)

guard let screenshot = SwiftAutoGUI.screenshot(region: region) else {
return .init(content: [.text("Failed to capture screen region")], isError: true)
}

// Create scaled image more efficiently
let scaledWidth = Int(screenshot.size.width * scale)
let scaledHeight = Int(screenshot.size.height * scale)

guard let cgImage = screenshot.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
return .init(content: [.text("Failed to get CGImage")], isError: true)
}

let bitmapRep = NSBitmapImageRep(bitmapDataPlanes: nil,
pixelsWide: scaledWidth,
pixelsHigh: scaledHeight,
bitsPerSample: 8,
samplesPerPixel: 4,
hasAlpha: true,
isPlanar: false,
colorSpaceName: .deviceRGB,
bytesPerRow: 0,
bitsPerPixel: 0)

guard let bitmap = bitmapRep else {
return .init(content: [.text("Failed to create bitmap")], isError: true)
}

NSGraphicsContext.saveGraphicsState()
NSGraphicsContext.current = NSGraphicsContext(bitmapImageRep: bitmap)
let context = NSGraphicsContext.current?.cgContext
context?.interpolationQuality = .low // Use low quality for speed

let destRect = CGRect(x: 0, y: 0, width: scaledWidth, height: scaledHeight)
context?.draw(cgImage, in: destRect)

NSGraphicsContext.restoreGraphicsState()

// Use JPEG compression with specified quality
guard let jpegData = bitmap.representation(using: .jpeg, properties: [.compressionFactor: quality]) else {
return .init(content: [.text("Failed to convert screenshot to JPEG")], isError: true)
}

let base64String = jpegData.base64EncodedString()
return .init(content: [.text("{\"image\": \"data:image/jpeg;base64,\(base64String)\"}")], isError: false)
} catch {
return .init(content: [.text(error.localizedDescription)], isError: true)
}
}
}
}
77 changes: 77 additions & 0 deletions Sources/swift-mcp-gui/Tools/Screen/CaptureScreenTool.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import Foundation
import MCP
import SwiftAutoGUI
import AppKit

struct CaptureScreenTool {
static func register(in registry: ToolRegistry) {
let tool = Tool(
name: "captureScreen",
description: "Capture entire screen and return as base64 encoded image",
inputSchema: .object([
"type": .string("object"),
"properties": .object([
"quality": .object([
"type": .string("number"),
"description": .string("JPEG compression quality (0.0-1.0, default: 0.1). Lower values reduce file size.")
]),
"scale": .object([
"type": .string("number"),
"description": .string("Scale factor for image size (0.1-1.0, default: 0.25). Lower values reduce resolution.")
])
])
])
)

registry.registerTool(definition: tool) { arguments in
let parser = ParameterParser(arguments: arguments)
let quality = (try? parser.parseDouble("quality")) ?? 0.1
let scale = (try? parser.parseDouble("scale")) ?? 0.25

guard let screenshot = SwiftAutoGUI.screenshot() else {
return .init(content: [.text("Failed to capture screen")], isError: true)
}

// Create scaled image more efficiently
let scaledWidth = Int(screenshot.size.width * scale)
let scaledHeight = Int(screenshot.size.height * scale)

guard let cgImage = screenshot.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
return .init(content: [.text("Failed to get CGImage")], isError: true)
}

let bitmapRep = NSBitmapImageRep(bitmapDataPlanes: nil,
pixelsWide: scaledWidth,
pixelsHigh: scaledHeight,
bitsPerSample: 8,
samplesPerPixel: 4,
hasAlpha: true,
isPlanar: false,
colorSpaceName: .deviceRGB,
bytesPerRow: 0,
bitsPerPixel: 0)

guard let bitmap = bitmapRep else {
return .init(content: [.text("Failed to create bitmap")], isError: true)
}

NSGraphicsContext.saveGraphicsState()
NSGraphicsContext.current = NSGraphicsContext(bitmapImageRep: bitmap)
let context = NSGraphicsContext.current?.cgContext
context?.interpolationQuality = .low // Use low quality for speed

let destRect = CGRect(x: 0, y: 0, width: scaledWidth, height: scaledHeight)
context?.draw(cgImage, in: destRect)

NSGraphicsContext.restoreGraphicsState()

// Use JPEG compression with specified quality
guard let jpegData = bitmap.representation(using: .jpeg, properties: [.compressionFactor: quality]) else {
return .init(content: [.text("Failed to convert screenshot to JPEG")], isError: true)
}

let base64String = jpegData.base64EncodedString()
return .init(content: [.text("{\"image\": \"data:image/jpeg;base64,\(base64String)\"}")], isError: false)
}
}
}
127 changes: 127 additions & 0 deletions Sources/swift-mcp-gui/Tools/Screen/SaveScreenshotTool.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import Foundation
import MCP
import SwiftAutoGUI
import AppKit

struct SaveScreenshotTool {
static func register(in registry: ToolRegistry) {
let tool = Tool(
name: "saveScreenshot",
description: "Capture screen and save to file",
inputSchema: .object([
"type": .string("object"),
"properties": .object([
"filename": .object([
"type": .string("string"),
"description": .string("Filename to save the screenshot")
]),
"x": .object([
"type": .string("number"),
"description": .string("X coordinate of the region (optional)")
]),
"y": .object([
"type": .string("number"),
"description": .string("Y coordinate of the region (optional)")
]),
"width": .object([
"type": .string("number"),
"description": .string("Width of the region (optional)")
]),
"height": .object([
"type": .string("number"),
"description": .string("Height of the region (optional)")
]),
"quality": .object([
"type": .string("number"),
"description": .string("JPEG compression quality (0.0-1.0, default: 0.1). Lower values reduce file size. Only affects JPEG files.")
]),
"scale": .object([
"type": .string("number"),
"description": .string("Scale factor for image size (0.1-1.0, default: 0.5). Lower values reduce resolution.")
])
]),
"required": .array([.string("filename")])
])
)

registry.registerTool(definition: tool) { arguments in
let parser = ParameterParser(arguments: arguments)

do {
let filename = try parser.parseString("filename")
let quality = (try? parser.parseDouble("quality")) ?? 0.1
let scale = (try? parser.parseDouble("scale")) ?? 0.5

// Try to parse optional region parameters
let x = try? parser.parseDouble("x")
let y = try? parser.parseDouble("y")
let width = try? parser.parseDouble("width")
let height = try? parser.parseDouble("height")

// Capture screenshot
let screenshot: NSImage?
if let x = x, let y = y, let width = width, let height = height {
let region = CGRect(x: x, y: y, width: width, height: height)
screenshot = SwiftAutoGUI.screenshot(region: region)
} else {
screenshot = SwiftAutoGUI.screenshot()
}

guard let image = screenshot else {
return .init(content: [.text("Failed to capture screenshot")], isError: true)
}

// Scale down the image if needed
let scaledImage: NSImage
if scale < 1.0 {
let newSize = NSSize(width: image.size.width * scale,
height: image.size.height * scale)
scaledImage = NSImage(size: newSize)
scaledImage.lockFocus()
image.draw(in: NSRect(origin: .zero, size: newSize),
from: NSRect(origin: .zero, size: image.size),
operation: .copy,
fraction: 1.0)
scaledImage.unlockFocus()
} else {
scaledImage = image
}

// Convert to bitmap representation
guard let tiffData = scaledImage.tiffRepresentation,
let bitmapRep = NSBitmapImageRep(data: tiffData) else {
return .init(content: [.text("Failed to convert screenshot")], isError: true)
}

// Determine file type and save with appropriate format
let fileExtension = (filename as NSString).pathExtension.lowercased()
let imageData: Data?

switch fileExtension {
case "jpg", "jpeg":
imageData = bitmapRep.representation(using: .jpeg, properties: [.compressionFactor: quality])
case "png":
imageData = bitmapRep.representation(using: .png, properties: [:])
default:
// Default to JPEG with quality if no extension or unknown extension
imageData = bitmapRep.representation(using: .jpeg, properties: [.compressionFactor: quality])
}

guard let data = imageData else {
return .init(content: [.text("Failed to encode image")], isError: true)
}

// Save to file
do {
try data.write(to: URL(fileURLWithPath: filename))
return .init(content: [.text("{\"success\": true, \"filename\": \"\(filename)\"}")], isError: false)
} catch {
return .init(content: [.text("Failed to save file: \(error.localizedDescription)")], isError: true)
}

} catch {
return .init(content: [.text(error.localizedDescription)], isError: true)
}
}
}
}
3 changes: 3 additions & 0 deletions Sources/swift-mcp-gui/Tools/ToolRegistry.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,8 @@ class ToolRegistry {
SendKeysTool.register(in: self)
GetScreenSizeTool.register(in: self)
GetPixelColorTool.register(in: self)
CaptureScreenTool.register(in: self)
CaptureRegionTool.register(in: self)
SaveScreenshotTool.register(in: self)
}
}
Loading