diff --git a/README.md b/README.md index d59e2ea..2a884de 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,24 @@ let output = try Chroma.highlight(code, language: .swift) print(output) ``` +### Inferring Language IDs + +Infer a language from file names, paths, or URLs: + +```swift +let language = LanguageID.fromFileName("MyFile.swift") +let output = try Chroma.highlight(code, language: language) +``` + +`language` is optional; passing `nil` skips syntax highlighting and returns the original text. + +Fallback to plain text when the language is unavailable: + +```swift +let options = HighlightOptions(missingLanguageHandling: .fallbackToPlainText) +let output = try Chroma.highlight(code, language: "unknown", options: options) +``` + ### Themes Chroma includes two built-in themes: diff --git a/Sources/Chroma/BenchmarkSupport.swift b/Sources/Chroma/BenchmarkSupport.swift index cf956f1..e502c70 100644 --- a/Sources/Chroma/BenchmarkSupport.swift +++ b/Sources/Chroma/BenchmarkSupport.swift @@ -1,3 +1,5 @@ +import Foundation + @_spi(Benchmarking) public struct TokenBuffer { fileprivate let tokens: [Token] @@ -9,9 +11,14 @@ public struct TokenBuffer { public enum BenchmarkSupport { public static func tokenize( _ code: String, - language: LanguageID, + language: LanguageID?, registry: LanguageRegistry = .builtIn() ) throws -> TokenBuffer { + guard let language else { + let ns = code as NSString + return TokenBuffer(tokens: [Token(kind: .plain, range: NSRange(location: 0, length: ns.length))]) + } + guard let language = registry.language(for: language) else { throw Highlighter.Error.languageNotFound(language) } @@ -22,10 +29,15 @@ public enum BenchmarkSupport { public static func tokenize( _ code: String, - language: LanguageID, + language: LanguageID?, registry: LanguageRegistry = .builtIn(), metrics: inout TokenizerMetrics ) throws -> TokenBuffer { + guard let language else { + let ns = code as NSString + return TokenBuffer(tokens: [Token(kind: .plain, range: NSRange(location: 0, length: ns.length))]) + } + guard let language = registry.language(for: language) else { throw Highlighter.Error.languageNotFound(language) } diff --git a/Sources/Chroma/Chroma.swift b/Sources/Chroma/Chroma.swift index c310dda..6693259 100644 --- a/Sources/Chroma/Chroma.swift +++ b/Sources/Chroma/Chroma.swift @@ -8,7 +8,7 @@ public enum Chroma { /// Convenience helper for one-off highlighting using `Chroma.shared`. public static func highlight( _ code: String, - language: LanguageID, + language: LanguageID?, options: HighlightOptions = .init() ) throws -> String { try shared.highlight(code, language: language, options: options) @@ -16,14 +16,14 @@ public enum Chroma { public static func tokenize( _ code: String, - language: LanguageID + language: LanguageID? ) throws -> [Token] { try shared.tokenize(code, language: language) } public static func tokenize( _ code: String, - language: LanguageID, + language: LanguageID?, emit: (Token) -> Void ) throws { try shared.tokenize(code, language: language, emit: emit) diff --git a/Sources/Chroma/HighlightOptions.swift b/Sources/Chroma/HighlightOptions.swift index 69236a2..292097a 100644 --- a/Sources/Chroma/HighlightOptions.swift +++ b/Sources/Chroma/HighlightOptions.swift @@ -22,6 +22,11 @@ public struct LineNumberOptions: Equatable { } public struct HighlightOptions: Equatable { + public enum MissingLanguageHandling: Equatable { + case error + case fallbackToPlainText + } + public enum DiffCodeStyle: Equatable { case syntax case plain @@ -44,6 +49,7 @@ public struct HighlightOptions: Equatable { } public var theme: Theme? + public var missingLanguageHandling: MissingLanguageHandling public var diff: DiffHighlight public var highlightLines: LineRangeSet public var lineNumbers: LineNumberOptions @@ -57,12 +63,14 @@ public struct HighlightOptions: Equatable { public init( theme: Theme? = nil, + missingLanguageHandling: MissingLanguageHandling = .error, diff: DiffHighlight = .auto(), highlightLines: LineRangeSet = .init(), lineNumbers: LineNumberOptions = .none, indent: Int = 0 ) { self.theme = theme + self.missingLanguageHandling = missingLanguageHandling self.diff = diff self.highlightLines = highlightLines self.lineNumbers = lineNumbers diff --git a/Sources/Chroma/Highlighter.swift b/Sources/Chroma/Highlighter.swift index b8aac54..5e047e7 100644 --- a/Sources/Chroma/Highlighter.swift +++ b/Sources/Chroma/Highlighter.swift @@ -16,10 +16,17 @@ public final class Highlighter { public func highlight( _ code: String, - language: LanguageID, + language: LanguageID?, options: HighlightOptions = .init() ) throws -> String { - guard let language = registry.language(for: language) else { + guard let language else { + return code + } + + guard let definition = registry.language(for: language) else { + if options.missingLanguageHandling == .fallbackToPlainText { + return code + } throw Error.languageNotFound(language) } @@ -30,11 +37,11 @@ public final class Highlighter { let tokens = [Token(kind: .plain, range: NSRange(location: 0, length: ns.length))] return renderer.render(code: code, tokens: tokens) } - if isMarkdown(language.id) { - let tokenizer = MarkdownTokenizer(rules: language.rules, registry: registry) + if isMarkdown(definition.id) { + let tokenizer = MarkdownTokenizer(rules: definition.rules, registry: registry) return renderer.render(code: code, tokens: tokenizer.tokenize(code)) } - let tokenizer = RegexTokenizer(rules: language.rules, fastPath: language.fastPath) + let tokenizer = RegexTokenizer(rules: definition.rules, fastPath: definition.fastPath) return renderer.render(code: code) { emit in tokenizer.scan(code, emit: emit) } @@ -42,35 +49,46 @@ public final class Highlighter { public func tokenize( _ code: String, - language: LanguageID + language: LanguageID? ) throws -> [Token] { - guard let language = registry.language(for: language) else { + guard let language else { + let ns = code as NSString + return [Token(kind: .plain, range: NSRange(location: 0, length: ns.length))] + } + + guard let definition = registry.language(for: language) else { throw Error.languageNotFound(language) } - if isMarkdown(language.id) { - let tokenizer = MarkdownTokenizer(rules: language.rules, registry: registry) + if isMarkdown(definition.id) { + let tokenizer = MarkdownTokenizer(rules: definition.rules, registry: registry) return tokenizer.tokenize(code) } - let tokenizer = RegexTokenizer(rules: language.rules, fastPath: language.fastPath) + let tokenizer = RegexTokenizer(rules: definition.rules, fastPath: definition.fastPath) return tokenizer.tokenize(code) } public func tokenize( _ code: String, - language: LanguageID, + language: LanguageID?, emit: (Token) -> Void ) throws { - guard let language = registry.language(for: language) else { + guard let language else { + let ns = code as NSString + emit(Token(kind: .plain, range: NSRange(location: 0, length: ns.length))) + return + } + + guard let definition = registry.language(for: language) else { throw Error.languageNotFound(language) } - if isMarkdown(language.id) { - let tokenizer = MarkdownTokenizer(rules: language.rules, registry: registry) + if isMarkdown(definition.id) { + let tokenizer = MarkdownTokenizer(rules: definition.rules, registry: registry) tokenizer.scan(code, emit: emit) return } - let tokenizer = RegexTokenizer(rules: language.rules, fastPath: language.fastPath) + let tokenizer = RegexTokenizer(rules: definition.rules, fastPath: definition.fastPath) tokenizer.scan(code, emit: emit) } diff --git a/Sources/Chroma/LanguageID.swift b/Sources/Chroma/LanguageID.swift index 7554d35..a86bc3e 100644 --- a/Sources/Chroma/LanguageID.swift +++ b/Sources/Chroma/LanguageID.swift @@ -1,3 +1,5 @@ +import Foundation + public struct LanguageID: Hashable, RawRepresentable, ExpressibleByStringLiteral, CustomStringConvertible { public var rawValue: String @@ -59,3 +61,106 @@ public extension LanguageID { static let dockerfile: Self = "dockerfile" static let makefile: Self = "makefile" } + +public extension LanguageID { + /// Infers a language from a file name or returns `nil` when no match is found. + static func fromFileName(_ fileName: String) -> LanguageID? { + let trimmed = fileName.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + + let name = trimmed + .split(whereSeparator: { $0 == "/" || $0 == "\\" }) + .last + .map(String.init) ?? trimmed + + let lowercased = name.lowercased() + if let direct = fileNameLookup[lowercased] { + return direct + } + if lowercased.hasPrefix("dockerfile.") { + return .dockerfile + } + if lowercased.hasPrefix("makefile.") { + return .makefile + } + + guard let ext = fileExtension(from: lowercased), + let language = extensionLookup[ext] else { + return nil + } + return language + } + + /// Infers a language from a file path or returns `nil` when no match is found. + static func fromFilePath(_ path: String) -> LanguageID? { + fromFileName(URL(fileURLWithPath: path).lastPathComponent) + } + + /// Infers a language from a file URL or returns `nil` when no match is found. + static func fromURL(_ url: URL) -> LanguageID? { + fromFileName(url.lastPathComponent) + } +} + +private extension LanguageID { + static let fileNameLookup: [String: LanguageID] = [ + "makefile": .makefile, + "gnumakefile": .makefile, + "dockerfile": .dockerfile, + ] + + static let extensionLookup: [String: LanguageID] = [ + "swift": .swift, + "m": .objectiveC, + "mm": .objectiveC, + "c": .c, + "cpp": .cpp, + "cxx": .cxx, + "cc": .cpp, + "c++": .cplusplus, + "hpp": .cpp, + "hxx": .cxx, + "hh": .cpp, + "js": .js, + "jsx": .jsx, + "ts": .ts, + "tsx": .tsx, + "py": .py, + "rb": .rb, + "go": .go, + "rs": .rust, + "kt": .kotlin, + "kts": .kotlin, + "java": .java, + "cs": .cs, + "php": .php, + "dart": .dart, + "lua": .lua, + "sh": .sh, + "bash": .bash, + "zsh": .zsh, + "sql": .sql, + "css": .css, + "scss": .scss, + "sass": .sass, + "less": .less, + "html": .html, + "htm": .html, + "xml": .xml, + "json": .json, + "yaml": .yaml, + "yml": .yml, + "toml": .toml, + "md": .md, + "markdown": .markdown, + "dockerfile": .dockerfile, + "mk": .makefile, + ] + + static func fileExtension(from name: String) -> String? { + guard let dotIndex = name.lastIndex(of: ".") else { return nil } + let nextIndex = name.index(after: dotIndex) + guard nextIndex < name.endIndex else { return nil } + return String(name[nextIndex...]) + } +} diff --git a/Tests/ChromaTests/ChromaFacadeTests.swift b/Tests/ChromaTests/ChromaFacadeTests.swift index eabd176..7e0f9c6 100644 --- a/Tests/ChromaTests/ChromaFacadeTests.swift +++ b/Tests/ChromaTests/ChromaFacadeTests.swift @@ -26,4 +26,22 @@ struct ChromaFacadeTests { _ = try Chroma.highlight("value", language: "unknown") } } + + @Test("Chroma.highlight falls back to plain text when configured") + func unknownLanguageFallback() throws { + let code = "value" + let output = try Chroma.highlight( + code, + language: "unknown", + options: .init(missingLanguageHandling: .fallbackToPlainText) + ) + #expect(output == code) + } + + @Test("Chroma.highlight returns plain text when language is nil") + func nilLanguageReturnsPlainText() throws { + let code = "let value = 1" + let output = try Chroma.highlight(code, language: nil) + #expect(output == code) + } } diff --git a/Tests/ChromaTests/HighlightOptionsTests.swift b/Tests/ChromaTests/HighlightOptionsTests.swift index 4ba8494..007a580 100644 --- a/Tests/ChromaTests/HighlightOptionsTests.swift +++ b/Tests/ChromaTests/HighlightOptionsTests.swift @@ -7,6 +7,7 @@ struct HighlightOptionsTests { func defaults() { let options = HighlightOptions() #expect(options.theme == nil) + #expect(options.missingLanguageHandling == .error) #expect(options.diff == .auto()) #expect(options.highlightLines == LineRangeSet()) #expect(options.lineNumbers == .none) diff --git a/Tests/ChromaTests/IdentifierTests.swift b/Tests/ChromaTests/IdentifierTests.swift index 0faa7ad..7eb55c3 100644 --- a/Tests/ChromaTests/IdentifierTests.swift +++ b/Tests/ChromaTests/IdentifierTests.swift @@ -1,3 +1,4 @@ +import Foundation import Testing @testable import Chroma @@ -16,4 +17,24 @@ struct IdentifierTests { #expect(kind.rawValue == "keyword") #expect(kind.description == "keyword") } + + @Test("LanguageID infers from file names") + func languageIDFromFileName() { + #expect(LanguageID.fromFileName("MyFile.swift") == .swift) + #expect(LanguageID.fromFileName("hello.kt") == .kotlin) + #expect(LanguageID.fromFileName("Dockerfile") == .dockerfile) + #expect(LanguageID.fromFileName("Dockerfile.dev") == .dockerfile) + #expect(LanguageID.fromFileName("Makefile") == .makefile) + #expect(LanguageID.fromFileName("Makefile.local") == .makefile) + #expect(LanguageID.fromFileName("unknown.ext") == nil) + } + + @Test("LanguageID infers from paths and URLs") + func languageIDFromPathAndURL() { + #expect(LanguageID.fromFilePath("/tmp/project/Foo.tsx") == .tsx) + #expect(LanguageID.fromFilePath("/tmp/project/GNUmakefile") == .makefile) + + let url = URL(fileURLWithPath: "/tmp/project/App.jsx") + #expect(LanguageID.fromURL(url) == .jsx) + } } diff --git a/Tests/ChromaTests/Support/TestSupport.swift b/Tests/ChromaTests/Support/TestSupport.swift index 2a48798..960ec0f 100644 --- a/Tests/ChromaTests/Support/TestSupport.swift +++ b/Tests/ChromaTests/Support/TestSupport.swift @@ -88,7 +88,7 @@ func assertGolden( func highlightWithTestTheme( _ code: String, - language: LanguageID, + language: LanguageID?, registry: LanguageRegistry = .builtIn(), options: HighlightOptions = .init() ) throws -> String {