Skip to content

Commit

Permalink
feat: adds PDF support (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinhermawan authored Nov 3, 2024
1 parent bc58fc8 commit 975b4f1
Show file tree
Hide file tree
Showing 7 changed files with 221 additions and 20 deletions.
21 changes: 10 additions & 11 deletions Playground/Playground/ViewModels/AppViewModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,25 @@ final class AppViewModel {
var temperature = 0.5

init() {
if let existingApiKey = UserDefaults.standard.string(forKey: "apiKey") {
self.apiKey = existingApiKey
}

fetchModels()
configureChat()
fetchModels()
}

func setHeaders(_ headers: [String: String]) {
chat = LLMChatAnthropic(apiKey: apiKey, headers: headers)
}

func saveSettings() {
UserDefaults.standard.set(apiKey, forKey: "apiKey")

if let newApiKey = UserDefaults.standard.string(forKey: "apiKey") {
self.apiKey = newApiKey
}

configureChat()
}

private func configureChat() {
chat = LLMChatAnthropic(apiKey: apiKey, headers: ["anthropic-beta": "prompt-caching-2024-07-31"])
if let apiKey = UserDefaults.standard.string(forKey: "apiKey") {
self.apiKey = apiKey
}

chat = LLMChatAnthropic(apiKey: apiKey)
}

private func fetchModels() {
Expand Down
8 changes: 7 additions & 1 deletion Playground/Playground/Views/AppView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@ struct AppView: View {
NavigationLink("Tool Use") {
ToolUseView()
}

}

Section("Beta") {
NavigationLink("Prompt Caching") {
PromptCachingView()
}

NavigationLink("PDF Support") {
PDFSupportView()
}
}
}
}
Expand Down
125 changes: 125 additions & 0 deletions Playground/Playground/Views/PDFSupportView.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//
// PDFSupportView.swift
// Playground
//
// Created by Kevin Hermawan on 11/3/24.
//

import SwiftUI
import LLMChatAnthropic

struct PDFSupportView: View {
@Environment(AppViewModel.self) private var viewModel
@State private var isPreferencesPresented: Bool = false

@State private var document: String = "https://arxiv.org/pdf/1706.03762"
@State private var prompt: String = "Explain this document"

@State private var response: String = ""
@State private var inputTokens: Int = 0
@State private var outputTokens: Int = 0
@State private var totalTokens: Int = 0

var body: some View {
@Bindable var viewModelBindable = viewModel

VStack {
Form {
Section("Prompts") {
TextField("Document", text: $document)
TextField("Prompt", text: $prompt)
}

Section("Response") {
Text(response)
}

UsageSection(inputTokens: inputTokens, outputTokens: outputTokens, totalTokens: totalTokens)
}

VStack {
SendButton(stream: viewModel.stream, onSend: onSend, onStream: onStream)
}
}
.onAppear {
viewModel.setHeaders(["anthropic-beta": "pdfs-2024-09-25"])
}
.toolbar {
ToolbarItem(placement: .principal) {
NavigationTitle("PDF Support")
}

ToolbarItem(placement: .primaryAction) {
Button("Preferences", systemImage: "gearshape", action: { isPreferencesPresented.toggle() })
}
}
.sheet(isPresented: $isPreferencesPresented) {
PreferencesView()
}
}

private func onSend() {
clear()

let messages = [
ChatMessage(role: .system, content: viewModel.systemPrompt),
ChatMessage(role: .user, content: [.text(prompt), .document(document)])
]

let options = ChatOptions(temperature: viewModel.temperature)

Task {
do {
let completion = try await viewModel.chat.send(model: viewModel.selectedModel, messages: messages, options: options)

if let text = completion.content.first?.text {
self.response = text
}

if let usage = completion.usage {
self.inputTokens = usage.inputTokens
self.outputTokens = usage.outputTokens
self.totalTokens = usage.totalTokens
}
} catch {
print(String(describing: error))
}
}
}

private func onStream() {
clear()

let messages = [
ChatMessage(role: .system, content: viewModel.systemPrompt),
ChatMessage(role: .user, content: [.text(prompt), .document(document)])
]

let options = ChatOptions(temperature: viewModel.temperature)

Task {
do {
for try await chunk in viewModel.chat.stream(model: viewModel.selectedModel, messages: messages, options: options) {
if let text = chunk.delta?.text {
self.response += text
}

if let usage = chunk.usage {
self.inputTokens = usage.inputTokens
self.outputTokens = usage.outputTokens
self.totalTokens = usage.totalTokens
}
}
} catch {
print(String(describing: error))
}
}
}

private func clear() {
self.response = ""
self.inputTokens = 0
self.outputTokens = 0
self.totalTokens = 0
}
}
3 changes: 3 additions & 0 deletions Playground/Playground/Views/PromptCachingView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ struct PromptCachingView: View {
SendButton(stream: viewModel.stream, onSend: onSend, onStream: onStream)
}
}
.onAppear {
viewModel.setHeaders(["anthropic-beta": "prompt-caching-2024-07-31"])
}
.toolbar {
ToolbarItem(placement: .principal) {
NavigationTitle("Prompt Caching")
Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,31 @@ let task = Task {

To learn more about prompt caching, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).

#### PDF Support (Beta)

```swift
let chat = LLMChatAnthropic(
apiKey: "<YOUR_ANTHROPIC_API_KEY>",
headers: ["anthropic-beta": "pdfs-2024-09-25"] // Required
)

let messages = [
ChatMessage(role: .user, content: [.text("Explain this document"), .document(document)])
]

let task = Task {
do {
let completion = try await chat.send(model: "claude-3-5-sonnet", messages: messages)

print(completion.content.first?.text ?? "No response")
} catch {
print(String(describing: error))
}
}
```

To learn more about PDF support, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/pdf-support).

### Error Handling

`LLMChatAnthropic` provides structured error handling through the `LLMChatAnthropicError` enum. This enum contains three cases that represent different types of errors you might encounter:
Expand Down
34 changes: 26 additions & 8 deletions Sources/LLMChatAnthropic/ChatMessage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ public struct ChatMessage: Encodable, Sendable {
/// The role of the participant in the chat conversation.
public let role: Role

/// The content of the message, which can be text or image.
/// The content of the message, which can be text, image, or document.
public let content: [Content]

/// The cache control settings for the message. Only applicable when the role is `system`.
/// The cache control settings for the message.
public var cacheControl: CacheControl?

/// An enum that represents the role of a participant in the chat.
Expand All @@ -33,6 +33,9 @@ public struct ChatMessage: Encodable, Sendable {
/// A case that represents image content.
case image(String)

/// A case that represents document content.
case document(String)

public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)

Expand All @@ -45,7 +48,7 @@ public struct ChatMessage: Encodable, Sendable {
var sourceContainer = container.nestedContainer(keyedBy: SourceCodingKeys.self, forKey: .source)

if imageString.hasPrefix("http://") || imageString.hasPrefix("https://") {
let (base64String, mediaType) = Content.convertImageUrlToBase64(url: imageString)
let (base64String, mediaType) = Content.convertFileToBase64(url: imageString)
try sourceContainer.encode("base64", forKey: .type)
try sourceContainer.encode(mediaType, forKey: .mediaType)
try sourceContainer.encode(base64String, forKey: .data)
Expand All @@ -55,6 +58,20 @@ public struct ChatMessage: Encodable, Sendable {
try sourceContainer.encode(mediaType, forKey: .mediaType)
try sourceContainer.encode(imageString, forKey: .data)
}
case .document(let documentString):
try container.encode("document", forKey: .type)
var sourceContainer = container.nestedContainer(keyedBy: SourceCodingKeys.self, forKey: .source)

if documentString.hasPrefix("http://") || documentString.hasPrefix("https://") {
let (base64String, mediaType) = Content.convertFileToBase64(url: documentString)
try sourceContainer.encode("base64", forKey: .type)
try sourceContainer.encode(mediaType, forKey: .mediaType)
try sourceContainer.encode(base64String, forKey: .data)
} else {
try sourceContainer.encode("base64", forKey: .type)
try sourceContainer.encode("application/pdf", forKey: .mediaType)
try sourceContainer.encode(documentString, forKey: .data)
}
}
}

Expand All @@ -66,13 +83,13 @@ public struct ChatMessage: Encodable, Sendable {
case type, mediaType = "media_type", data
}

private static func convertImageUrlToBase64(url: String) -> (String, String) {
guard let imageUrl = URL(string: url), let imageData = try? Data(contentsOf: imageUrl) else {
private static func convertFileToBase64(url: String) -> (String, String) {
guard let fileUrl = URL(string: url), let fileData = try? Data(contentsOf: fileUrl) else {
return ("", "")
}

let base64String = imageData.base64EncodedString()
let mediaType = detectMediaType(from: imageData)
let base64String = fileData.base64EncodedString()
let mediaType = detectMediaType(from: fileData)

return (base64String, mediaType)
}
Expand All @@ -96,6 +113,8 @@ public struct ChatMessage: Encodable, Sendable {
return "image/gif"
} else if bytes.starts(with: [0x52, 0x49, 0x46, 0x46]) && String(data: data.subdata(in: 8..<12), encoding: .ascii) == "WEBP" {
return "image/webp"
} else if bytes.starts(with: [0x25, 0x50, 0x44, 0x46]) {
return "application/pdf"
} else {
return ""
}
Expand All @@ -120,7 +139,6 @@ public struct ChatMessage: Encodable, Sendable {
}
}


/// Creates a new instance of ``ChatMessage``.
/// - Parameters:
/// - role: The role of the participant.
Expand Down
25 changes: 25 additions & 0 deletions Sources/LLMChatAnthropic/Documentation.docc/Documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,31 @@ let task = Task {

To learn more about prompt caching, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).

#### PDF Support (Beta)

```swift
let chat = LLMChatAnthropic(
apiKey: "<YOUR_ANTHROPIC_API_KEY>",
headers: ["anthropic-beta": "pdfs-2024-09-25"] // Required
)

let messages = [
ChatMessage(role: .user, content: [.text("Explain this document"), .document(document)])
]

let task = Task {
do {
let completion = try await chat.send(model: "claude-3-5-sonnet", messages: messages)

print(completion.content.first?.text ?? "No response")
} catch {
print(String(describing: error))
}
}
```

To learn more about PDF support, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/pdf-support).

### Error Handling

``LLMChatAnthropic`` provides structured error handling through the ``LLMChatAnthropicError`` enum. This enum contains three cases that represent different types of errors you might encounter:
Expand Down

0 comments on commit 975b4f1

Please sign in to comment.