-
Notifications
You must be signed in to change notification settings - Fork 2
/
tester.lisp
402 lines (373 loc) · 16.5 KB
/
tester.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
(in-package :lisp)
;;; *********************************************
;;; An engine to test interpreters or compilers
;;; Christian Queinnec
;;; \'Ecole Polytechnique & INRIA-Rocquencourt
;;; *********************************************
;;; This file contains the basic facility to easily run an interpreter
;;; defining a language. It can ask the user for expressions to be
;;; evaluated and print their results or take expressions from a file,
;;; evaluate them and compare their value to the expected value. If an
;;; error occurs then the test suite is aborted.
;;; A validation suite is a sequence of expressions followed by their
;;; expected results. The expression is evaluated in the language being
;;; tested then its result is compared to the expected result.
;;; An example of a test suite is:
;;; /------------------------
;;; | (car '(a b))
;;; | a
;;; | (car (list))
;;; | *** ; means that an error is expected
;;; | (oblist)
;;; | --- ; means that no error is expected but the value is unimportant
;;; It is an error of course to expect an error if no error occurrs.
;;; This package may use the tester-error function in case of an
;;; internal error. It is not defined here since errors are not
;;; portable. You must have a location to define that variable for
;;; some compilers. Two cases may trigger this error: a suite test
;;; missing an expecting result or a (toplevel) form returning an
;;; unprinted value. Other errors are caught within engine-tester.
;;; The testing engine can be parameterized in various ways.
;;; Apart the engine itself, two functions are offered that ease its use.
;;; These are the `interpreter' and `suite-test' functions:
;;; This is the read function to use. It is possible to tailor this
;;; function to suit particular needs (for instance when generating
;;; tests instead of reading them out of a file).
(defparameter +eof+ (cons "end of" "file"))
(alias0 tester-read (lambda (&optional (stream *standard-input*))
(read stream nil +eof+)))
(defun eof-object? (obj)
(eq obj +eof+))
;;; This is an internal variable that, in real Scheme, may be true or
;;; false without perceivable difference. It does make a difference
;;; however in non compliant Scheme systems. If your Scheme only
;;; offers dynamic-extent continuations, you must set it to false (in
;;; that case, all continuations created by tester.scm will be used in
;;; their dynamic extent). Alternatively, in Scheme->C which seems to
;;; have problems to garbage collect continuations, set it to true to
;;; only take one continuation and use it out of its dynamic extent.
(defparameter tester-take-only-one-continuation +false+)
;;; This is the call/cc to use. It is possible to use dynamic-extent
;;; continuations instead. For instance, in Bigloo, you may use:
;;; (set! tester-call/cc (lambda (f)
;;; (bind-exit (k) (f k))))
;;; But don't forget to define tester-take-only-one-continuation to false.
(alias0 tester-call/cc call/cc)
;;; The interpreter function takes four arguments:
;;; -- an input prompt. This string will be printed whenever the
;;; interpreter wants to read an expression to evaluate.
;;; -- an output prompt. Values are printed preceded by this string.
;;; -- an error handler which is called whenever an error is detected.
;;; -- a make-toplevel function that will return a thunk implementing
;;; one interpreting step (read-eval-print).
;;; make-toplevel may be roughly defined as
;;; (lambda (read-exression display-value error-catcher)
;;; (lambda () (display-value (tested-eval (read-expression)))))
;;; The toplevel function will be repeatedly invoked from the
;;; interpreter. make-toplevel is invoked only once
;;; by the testing-engine with
;;; == (read-expression): a function that reads an expression and echoes it
;;; after printing the input-prompt. It returns the read expression.
;;; == (display-value v): the function that prints a value preceded by
;;; the output prompt. A new toplevel step is performed right after.
;;; == (error-catcher message . culprits): a function that reports the error,
;;; aborts the current computation and restarts a new cycle.
;;; You must write your interpreter so that when it detects errors,
;;; it calls this error-catcher function. You can have single error
;;; function or trap the native error function of your particular
;;; system.
(defun interpreter (prompt-in ; prompt to read expression
prompt-out ; prompt preceding results
continue? ; continue after unexpected error
make-toplevel) ; toplevel generator
;; display the result of an evaluation
(labels ((display-status (status expected v)
(case status
((unexpected-error)
(newline)
(display v)
(display " an unexpected ERROR occurs !!!")
(newline)
continue?)
((correct-result)
(display prompt-out)
(display v)
(newline)
+true+) ; continue iteration
(otherwise +false+)))) ; stop iteration
;; starts toplevel
(tester-call/cc
(lambda (exit) ; exit when test suite is finished
(engine-tester
(lambda () ; read expression
(display prompt-in)
(let ((e (tester-read)))
(if (eof-object? e)
(funcall exit 'end))
e))
(lambda () 'nothing) ; read expected result (useless)
(lambda (expected obtained) ; compare expected and obtained results
(cond ((eq? obtained '***) +false+)
((eq? obtained '---) +false+)
(t +true+)))
(function display-status)
make-toplevel)))))
;;; suite-test is similar to the preceding one except that tests are taken
;;; from a file, possibly echoed on the console and checked to be correct.
;;; The suite contains expressions followed by their expected result.
;;; The result of the evaluation is compared to this result, the
;;; suite is aborted if an error occurs.
;;; suite-test takes six arguments:
;;; -- a file-name: The file contains the expressions to be evaluated and
;;; the expected results.
;;; -- an input prompt
;;; -- an output prompt
;;; -- a boolean flag which governs if read expressions from the file are
;;; echoed on the console.
;;; -- a make-toplevel function that will return the toplevel function
;;; make-toplevel may be roughly defined as
;;; (lambda (test-read test-checker wrong)
;;; (lambda () (test-checker (tested-eval (test-read)))))
;;; The toplevel function will be repeatedly invoked from the
;;; interpreter. The arguments of make-toplevel are
;;; == (test-read): the function that reads an expression, echoing it
;;; after printing the input-prompt.
;;; == (test-checker v): this function takes a value, reads the expected
;;; result, compares them and if according, prints the value preceded by
;;; the output prompt. A new toplevel is started after that.
;;; == (wrong message . culprits): a function that reports the error,
;;; aborts the current computation and restart a new cycle.
;;; -- a (comparator) function that takes the obtained result and the
;;; expected result and compares them yielding a boolean.
;;; Very often, result-eval is just `equal?' but must
;;; recognize the *** and --- items which meaning is "an error is
;;; expected" or "an unimportant value (but no error)".
(defun suite-test (file ; the test suite
prompt-in ; the prompt to read
prompt-out ; the prompt to display
echo? ; echo expressions ?
make-toplevel ; a toplevel generator
compare) ; how to compare results
(let ((in (open-input-file file))
(native-display (function display))
(native-newline (function newline)))
(labels (
;; Two small utilities to display things
(display (exp)
(if echo? (funcall native-display exp)))
(newline ()
(if echo? (funcall native-newline)))
;; Display the result of the test, return a boolean to indicate
;; whether the tests should continue or not.
(display-status (status expected v)
(case status
((expected-error)
(set! echo? +true+)
(display prompt-out)
(display v)
(display " an ERROR was expected !!! ")
(newline)
+false+) ; stop iteration
((error-occurred)
(display " OK OK")
(newline)
+true+) ; continue iteration
((unexpected-error)
(newline)
(display v)
(set! echo? +true+)
(display " an unexpected ERROR occured !!!")
(newline)
(display " value expected: ")
(display expected)
(newline)
+false+) ; stop iteration
((correct-result)
(display prompt-out)
(display v)
(display " OK")
(newline)
+true+) ; continue iteration
((incorrect-result)
(set! echo? +true+)
(display prompt-out)
(display v)
(display " ERROR !!!")
(newline)
(display "value expected:")
(display expected)
(newline)
+false+) ; stop iteration
((uninteresting-result)
(display " OK")
(newline)
+true+) ; continue iteration
(else (display "No such status")
(newline)
+false+)))) ; stop iteration
(tester-call/cc
(lambda (exit) ; exit when test suite is finished
(engine-tester
(lambda () ; read test
(let ((e (tester-read in)))
(if (eof-object? e)
(begin (close-input-port in)
(funcall exit 'done)))
(display prompt-in)
(display e)
(newline)
e))
(lambda () ; read result
(let ((expected (tester-read in)))
(if (eof-object? expected)
(tester-error "Missing expected result" expected)
expected)))
compare
(function display-status)
make-toplevel))))))
;;; A test engine on top of which the two previous are written:
;;; (read-test) reads an expression to evaluate
;;; (read-result) reads the expected result
;;; (compare expected obtained) compares what was obtained from what
;;; was expected. The value of `expected' can also
;;; be *** or ---
;;; (display-status message expected obtained) displays the result of the
;;; test. It usually prints the result and a comment like `OK'.
;;; Testing is abandoned if display-status returns +false+.
;;; (make-toplevel read print error) returns a thunk implementing one step
;;; of the intepreter.
(defun engine-tester (read-test ; read a test
read-result ; read the expected result
compare ; compare the two
display-status ; display the comparison
make-toplevel) ; make a toplevel
(tester-call/cc
(lambda (abort) ; exit all tests
(let ((resume +false+)) ; will be initialized below.
;; compare the result V with what was expected. If that
;; function is called then no error ocurred (unless *** is
;; given to it simulating an error internally caught).
(labels ((check-result (v)
(let ((expected (funcall read-result)))
(if (cond ((funcall compare expected v)
(funcall display-status 'correct-result expected v))
((eq? expected '***)
(funcall display-status 'expected-error expected v))
((eq? expected '---)
(funcall display-status 'uninteresting-result expected v))
(t
(funcall display-status 'incorrect-result expected v)))
(funcall resume +true+)
(funcall abort +false+))))
;; This function is called whenever an error is detected.
(handle-exception (msg &rest culprits)
;;(write `(handle-exception called))(newline) ; DEBUG
(let ((expected (funcall read-result))
(v (cons msg culprits)))
(if (cond ((eq? expected '***)
(funcall display-status 'error-occurred expected v))
(t
(funcall display-status 'unexpected-error expected v)))
(funcall resume +true+)
(funcall abort +false+)))))
(let ((toplevel (funcall make-toplevel
read-test
(function check-result)
(function handle-exception))))
;; The goal is to call (toplevel) ever and ever but to ensure
;; that the continuation is correctly reset.
(named-let named-loop ()
(tester-call/cc
(lambda (k)
(if (and tester-take-only-one-continuation resume)
'nothing
(set! resume k))
(let ((r (funcall toplevel)))
;; if this error is triggered, see note below.
(tester-error "(toplevel) should not return!" r))))
(named-loop))))))))
(defun tester-error (msg &rest other)
(print other)
(error msg))
;;; Examples:
;;; Suppose you have written an interpreter called `evaluate', then the
;;; following will start a toplevel loop. Errors detected in evaluate
;;; are supposed to call the `wrong' function.
;;;(define (scheme)
;;; (interpreter "?? " " == " +true+
;;; (lambda (read print error)
;;; (set! wrong error) ;; Errors in the interpreter calls wrong
;;; (lambda () (print (evaluate (read)))))))
;;; The problem is that errors in the underlying system are not caught.
;;; Suppose at that time to have something to trap errors, say catch-error
;;; as in Mac-Lisp (it returns the result in a pair or the string that names
;;; the error if any), then you can write:
;;;(define (scheme)
;;; (interpreter "?? " " == " +true+
;;; (lambda (read print error)
;;; (set! wrong error) ;; Errors in the interpreter calls wrong
;;; (lambda () (let ((r (catch-error (evaluate (read)))))
;;; (if (pair? r) (print (car r))
;;; (error r)))))))
;;; NOTE: Both the print and error functions (in interpreter and
;;; suite-test) have a control effect. They restart a new toplevel
;;; iteration. So it is *important* not to forget to call them to
;;; reiterate the toplevel. If you return a value from toplevel
;;; without calling print or error, you'll get an internal error (ie
;;; an invocation of tester-error). [The reason lies with toplevel
;;; returning more than once in some concurrent interpreter I wrote].
;;; If you have a file containing a test suite, say suite.tst, then you
;;; can try it with:
;;;(define (test-scheme)
;;; (suite-test "suite.tst"
;;; "?? " "== " +true+
;;; (lambda (read print error)
;;; (set! wrong error)
;;; (lambda ()
;;; (print (eval (read)))))
;;; equal?))
;;; Another comparison function could be:
;;; (lambda (expected obtained)
;;; (cond ((or (eq? obtained '---)(eq? obtained '***))
;;; (equal? expected obtained))
;;; (else (member obtained expected))))
;;; Other suggestions: tests and results can be read from two different files.
;;; You can use other compare functions such as member, set-equal? or even
;;; use pattern-matching. Here are the two lastly mentioned comparators.
;;; Compares if sets X and Y have the same (with equal?) elements.
(defun set-equal? (x y)
(labels ((remove-one (item list)
(if (pair? list)
(if (equal? item (car list))
(cdr list)
(cons (car list) (remove-one item (cdr list))))
'())))
(if (pair? x)
(and (member (car x) y)
(set-equal? (cdr x) (remove-one (car x) y)))
(null? y))))
;;; Compares if the expression fits the pattern. Two special patterns exist:
;;; ?- which accepts anything
;;; ??- which accepts a (possibly empty) sequence of anything.
;;; Otherwise comparisons are performed with equal?.
(defun naive-match (pattern expression)
(labels ((naive-match-list (patterns expressions)
(if (pair? patterns)
(if (eq? (car patterns) '??-) ; accepts any sequence of things
(or (naive-match-list (cdr patterns) expressions)
(and (pair? expressions)
(naive-match-list patterns (cdr expressions))))
(and (pair? expressions)
(naive-match (car patterns) (car expressions))
(naive-match-list (cdr patterns) (cdr expressions))))
(naive-match patterns expressions))))
(or (eq? pattern '?-) ; accepts anything
(if (pair? pattern)
(naive-match-list pattern expression)
(equal? pattern expression)))))
;;; AGAIN A NOTE:
;;; To catch the errors of the underlying Scheme is difficult.
;;; This tester engine has been used since 1992 on a wide variety of
;;; interpreters, some of which are concurrent and/or return multiple
;;; results.
;;; end of tester.scm