-
Notifications
You must be signed in to change notification settings - Fork 12
/
xlate.rkt
executable file
·179 lines (151 loc) · 4.21 KB
/
xlate.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#lang racket
(require net/url
json)
(module+ test (require rackunit rackunit/text-ui))
(provide xlate t8)
;; The returned data sometimes has HTML entities in it; the functions
;; on this page translate those to regular characters.
;; From http://htmlhelp.com/reference/html40/entities/special.html
(define entity-integers-by-name
#hash(
("quot" . 34)
("amp" . 38)
("lt" . 60)
("gt" . 62)
("OElig" . 338)
("oelig" . 339)
("Scaron" . 352)
("scaron" . 353)
("Yuml" . 376)
("circ" . 710)
("tilde" . 732)
("ensp" . 8194)
("emsp" . 8195)
("thinsp" . 8201)
("zwnj" . 8204)
("zwj" . 8205)
("lrm" . 8206)
("rlm" . 8207)
("ndash" . 8211)
("mdash" . 8212)
("lsquo" . 8216)
("rsquo" . 8217)
("sbquo" . 8218)
("ldquo" . 8220)
("rdquo" . 8221)
("bdquo" . 8222)
("dagger" . 8224)
("Dagger" . 8225)
("permil" . 8240)
("lsaquo" . 8249)
("rsaquo" . 8250)
("euro" . 8364)
))
(define (replace-html-entities str)
(define (numeric str)
(regexp-replace*
#px"&#([0-9]+);"
str
(lambda (whole-match digits)
(string (integer->char (string->number digits))))))
(define (named str)
(regexp-replace*
#px"&([a-z]+);"
str
(lambda (whole-match word)
(cond
((hash-ref entity-integers-by-name word #f)
=> (curry format "&#~a;"))
(else
str)))))
(numeric (named str)))
(module+ test
(define-test-suite replace-tests
(check-equal?
(replace-html-entities "")
"")
(check-equal?
(replace-html-entities "frotz")
"frotz")
(check-equal?
(replace-html-entities "&frotz;")
"&frotz;")
(check-equal?
(replace-html-entities "&")
"&")
(check-equal?
(replace-html-entities ""plonk"")
"\"plonk\"")
(check-equal?
(replace-html-entities "frotzM")
"frotzM")
(check-equal?
(replace-html-entities "frotz{why notA")
"frotz{why notA")))
;; Translate text using Google's translation API v2.
(define (snag text from to)
(call/input-url
(make-url
"https" ;; scheme
#f ;; user
"www.googleapis.com" ;; host
#f ;; port
#t ;; path-absolute?
;; path
(map ((curryr make-path/param) '()) (list "language" "translate" "v2"))
;; query
`([key . ,(bytes->string/utf-8 (get-preference 'google-API-key))]
[q . ,text]
[source . ,from]
[target . ,to])
#f ;; fragment
)
get-pure-port
read-json))
(module+ test
(define-test-suite snag-tests
(check-equal?
(hash-ref
(hash-ref
(snag "print \"hello, world\\n\"" "perl" "java")
'error)
'message)
"Invalid Value")))
;; List of language codes, to "from" and "to":
;; https://developers.google.com/translate/v2/using_rest#language-params
(define (xlate from to text)
(let* ([stuff (snag text from to)]
[data (hash-ref stuff 'data #f)])
(cond
[data
(replace-html-entities
(hash-ref
(first
(hash-ref
(hash-ref
stuff
'data)
'translations))
'translatedText))]
[else
(hash-ref (hash-ref stuff 'error) 'message)])))
(define t8 xlate)
(module+ test
(define-test-suite xlate-tests
;; TODO -- wrap these in an exception handler, as well as a
;; timeout thingy, so that we can run tests without an Internet
;; connection.
(displayln "")
(displayln (xlate "en" "it" "forty-five separate amendments"))
(displayln "45 emendamenti separati")
(displayln "")
(displayln (xlate "en" "fr" "fledermaus: have I rubbed this in your face yet?"))
(displayln "fledermaus: je n'ai frotté dans votre visage encore?")
(displayln "")
(displayln (xlate "frotz" "plotz" "I doubt this will get translated properly"))
(displayln "Invalid Value"))
(define-test-suite all-tests
replace-tests
snag-tests
xlate-tests)
(run-tests all-tests 'verbose))