-
-
Notifications
You must be signed in to change notification settings - Fork 4
/
prettifier.go
278 lines (259 loc) · 5.97 KB
/
prettifier.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
package gotestdox
import (
"fmt"
"io"
"os"
"strings"
"unicode"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
// Prettify takes a string input representing the name of a Go test, and
// attempts to turn it into a readable sentence, by replacing camel-case
// transitions and underscores with spaces.
//
// input is expected to be a valid Go test name, as produced by 'go test
// -json'. For example, input might be the string:
//
// TestFoo/has_well-formed_output
//
// Here, the parent test is TestFoo, and this data is about a subtest whose
// name is 'has well-formed output'. Go's [testing] package replaces spaces in
// subtest names with underscores, and unprintable characters with the
// equivalent Go literal.
//
// Prettify does its best to reverse this transformation, yielding (something
// close to) the original subtest name. For example:
//
// Foo has well-formed output
//
// # Multiword function names
//
// Because Go function names are often in camel-case, there's an ambiguity in
// parsing a test name like this:
//
// TestHandleInputClosesInputAfterReading
//
// We can see that this is about a function named HandleInput, but Prettify has
// no way of knowing that. Without this information, it would produce:
//
// Handle input closes input after reading
//
// To give it a hint, we can add an underscore after the name of the function:
//
// TestHandleInput_ClosesInputAfterReading
//
// This will be interpreted as marking the end of a multiword function name:
//
// HandleInput closes input after reading
//
// # Debugging
//
// If the GOTESTDOX_DEBUG environment variable is set, Prettify will output
// (copious) debug information to the [DebugWriter] stream, elaborating on its
// decisions.
func Prettify(input string) string {
var prefix string
p := &prettifier{
words: []string{},
debug: io.Discard,
}
if os.Getenv("GOTESTDOX_DEBUG") != "" {
p.debug = DebugWriter
}
p.log("input:", input)
if strings.HasPrefix(input, "Fuzz") {
input = strings.TrimPrefix(input, "Fuzz")
prefix = "[fuzz] "
}
p.input = []rune(strings.TrimPrefix(input, "Test"))
for state := betweenWords; state != nil; {
state = state(p)
}
result := prefix + strings.Join(p.words, " ")
p.log(fmt.Sprintf("result: %q", result))
return result
}
// Heavily inspired by Rob Pike's talk on 'Lexical Scanning in Go':
// https://www.youtube.com/watch?v=HxaD_trXwRE
type prettifier struct {
debug io.Writer
input []rune
start, pos int
words []string
inSubTest bool
seenUnderscore bool
}
func (p *prettifier) backup() {
p.pos--
}
func (p *prettifier) skip() {
p.start = p.pos
}
func (p *prettifier) prev() rune {
return p.input[p.pos-1]
}
func (p *prettifier) next() rune {
next := p.peek()
p.pos++
return next
}
func (p *prettifier) peek() rune {
if p.pos >= len(p.input) {
return eof
}
next := p.input[p.pos]
return next
}
func (p *prettifier) inInitialism() bool {
// deal with Is and As corner cases
if len(p.input) > p.start+1 && p.input[p.start+1] == 's' {
return false
}
for _, r := range p.input[p.start:p.pos] {
if unicode.IsLower(r) && r != 's' {
return false
}
}
return true
}
func (p *prettifier) emit() {
word := string(p.input[p.start:p.pos])
switch {
case len(p.words) == 0:
// This is the first word, capitalise it
word = cases.Title(language.Und, cases.NoLower).String(word)
case len(word) == 1:
// Single letter word such as A
word = cases.Lower(language.Und).String(word)
case p.inInitialism():
// leave capitalisation as is
default:
word = cases.Lower(language.Und).String(word)
}
p.log(fmt.Sprintf("emit %q", word))
p.words = append(p.words, word)
p.skip()
}
func (p *prettifier) multiWordFunction() {
var fname string
for _, w := range p.words {
fname += cases.Title(language.Und, cases.NoLower).String(w)
}
p.log("multiword function", fname)
p.words = []string{fname}
p.seenUnderscore = true
}
func (p *prettifier) log(args ...interface{}) {
fmt.Fprintln(p.debug, args...)
}
func (p *prettifier) logState(stateName string) {
next := "EOF"
if p.pos < len(p.input) {
next = string(p.input[p.pos])
}
p.log(fmt.Sprintf("%s: [%s] -> %s",
stateName,
string(p.input[p.start:p.pos]),
next,
))
}
type stateFunc func(p *prettifier) stateFunc
func betweenWords(p *prettifier) stateFunc {
for {
p.logState("betweenWords")
switch p.next() {
case eof:
return nil
case '_', '/':
p.skip()
default:
return inWord
}
}
}
func inWord(p *prettifier) stateFunc {
for {
p.logState("inWord")
switch r := p.peek(); {
case r == eof:
p.emit()
return nil
case r == '_':
p.emit()
if !p.seenUnderscore && !p.inSubTest {
// special 'end of function name' marker
p.multiWordFunction()
}
return betweenWords
case r == '/':
p.emit()
p.inSubTest = true
return betweenWords
case unicode.IsUpper(r):
if p.prev() == '-' {
// inside hyphenated word
p.next()
continue
}
if p.inInitialism() {
// keep going
p.next()
continue
}
p.emit()
return betweenWords
case unicode.IsDigit(r):
if unicode.IsDigit(p.prev()) {
// in a multi-digit number
p.next()
continue
}
if p.prev() == '-' {
// in a negative number
p.next()
continue
}
if p.prev() == '=' {
// in some phrase like 'n=3'
p.next()
continue
}
if p.inInitialism() {
// keep going
p.next()
continue
}
p.emit()
return betweenWords
default:
if p.pos-p.start <= 1 {
// word too short
p.next()
continue
}
if p.input[p.start] == '\'' {
// inside a quoted word
p.next()
continue
}
if !p.inInitialism() {
// keep going
p.next()
continue
}
if p.inInitialism() && r == 's' {
p.next()
p.emit()
return betweenWords
}
// start a new word
p.backup()
p.emit()
}
}
}
const eof rune = 0
// DebugWriter identifies the stream to which debug information should be
// printed, if desired. By default it is [os.Stderr].
var DebugWriter io.Writer = os.Stderr