-
Notifications
You must be signed in to change notification settings - Fork 0
/
SwiftSyllables.swift
108 lines (100 loc) · 4.52 KB
/
SwiftSyllables.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
//
// SwiftSyllables.swift
// SwiftSyllables
//
// Created by Vivian Qu on 7/30/16.
//
//
import Foundation
open class SwiftSyllables {
// Static variable for syllable dictionary
static var syllableDict : [String: Int] = [String: Int]()
/*
* Use NSLinguisticTagger to tag valid words
*/
fileprivate class func validWords(_ text: String, scheme: String) -> [String] {
let options = UInt(NSLinguisticTagger.Options.omitWhitespace.rawValue | NSLinguisticTagger.Options.omitPunctuation.rawValue | NSLinguisticTagger.Options.omitOther.rawValue)
let taggerOptions : NSLinguisticTagger.Options = NSLinguisticTagger.Options(rawValue: options)
let tagger = NSLinguisticTagger(tagSchemes: NSLinguisticTagger.availableTagSchemes(forLanguage: "en"),
options: Int(options))
tagger.string = text
var validWords: [String] = []
tagger.enumerateTags(in: NSMakeRange(0, text.count), scheme:NSLinguisticTagScheme(rawValue: scheme), options: taggerOptions) {
tag, tokenRange, _, _ in let string = (text as NSString).substring(with: tokenRange)
if tag == NSLinguisticTag.word {
if let firstChar = string.first {
if firstChar != "\'" {
// Exclude words that start with an apostraphe
validWords.append(string)
}
}
}
}
return validWords
}
/*
* Read syllable dictionary from the bundle
*/
fileprivate class func configureSyllableDict() -> [String : Int]? {
if self.syllableDict.count == 0 {
// Read pronunciation dictionary from bundle
let fileName : String = "cmudict"
let podBundle : Bundle = Bundle(for: self)
if let bundleURL = podBundle.url(forResource: "CMUDict", withExtension: "bundle") {
if let bundle = Bundle(url: bundleURL) {
let resourcePath = bundle.path(forResource: fileName, ofType: nil)
guard let path = resourcePath else { return nil }
let data : NSMutableData? = NSMutableData.init(contentsOfFile: path)
if let foundData = data {
// let unarchiver : NSKeyedUnarchiver = NSKeyedUnarchiver.init(forReadingWith: foundData as Data)
guard let unarchiver: NSKeyedUnarchiver = try? NSKeyedUnarchiver(forReadingFrom: foundData as Data) else {
assertionFailure("Could not find data")
return self.syllableDict
}
let dict : Any? = unarchiver.decodeObject(forKey: "cmudict")
unarchiver.finishDecoding()
if let processedDict = dict as? [String : Int] {
self.syllableDict = processedDict
}
} else {
assertionFailure("Could not find data")
}
} else {
assertionFailure("Could not load the bundle")
}
}
}
return self.syllableDict
}
/*
* Public methods
*/
open class func getSyllables(_ string: String) -> Int {
guard let syllableDict = self.configureSyllableDict() else { return 0 }
// Tokenize the string and read from the corpus
var countSyllables = 0
// Strip apostrophes from words to check against dictionary properly
var sanitizedString = string
if string.contains("'") {
sanitizedString = sanitizedString.replacingOccurrences(of: "'", with: "")
}
if string.contains("’") {
sanitizedString = sanitizedString.replacingOccurrences(of: "’", with: "")
}
let taggedWords : [String] = self.validWords(sanitizedString, scheme: convertFromNSLinguisticTagScheme(NSLinguisticTagScheme.tokenType))
for word : String in taggedWords {
let upperCase = word.uppercased()
if let syllables = syllableDict[upperCase] {
countSyllables += syllables
} else {
// Fall back to heuristic algorithm
countSyllables += SwiftSyllablesHeuristic.getSyllablesForWord(word)
}
}
return countSyllables
}
}
// Helper function inserted by Swift 4.2 migrator.
fileprivate func convertFromNSLinguisticTagScheme(_ input: NSLinguisticTagScheme) -> String {
return input.rawValue
}