-
Notifications
You must be signed in to change notification settings - Fork 7
/
pico_minify.py
498 lines (406 loc) · 21.3 KB
/
pico_minify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
from utils import *
from pico_defs import fixnum_is_negative, float_is_negative, Language
from pico_tokenize import Token, TokenType, StopTraverse, k_skip_children
from pico_parse import Node, NodeType, VarKind
from pico_parse import k_unary_ops_prec, get_precedence, is_right_assoc, can_replace_with_unary
from pico_parse import is_vararg_expr, is_short_block_stmt, is_root_global_or_builtin_local
from pico_output import format_luanum, format_fixnum, format_string_literal
class Focus(Bitmask):
chars = compressed = tokens = ...
none = 0
def minify_string_literal(ctxt, token, focus, value=None):
if value is None:
value = token.parsed_value
if focus.chars:
return format_string_literal(value, use_complex_long=ctxt.version >= 40)
else:
# haven't found a good balanced heuristic for 'long' yet
return format_string_literal(value, long=token.value.startswith('['))
def minify_needs_comments(minify):
# returns whether minify_code makes use of the tokens' comments
return not minify.get("wspace", True)
def get_node_bodies(node):
if node.type in (NodeType.if_, NodeType.elseif):
yield node.then
if node.else_:
yield from get_node_bodies(node.else_)
else:
yield node.body
def analyze_code_for_minify(root, focus, ctxt, safe_builtins):
shorthands_can_nest = ctxt.version >= 42
shorts = CounterDictionary()
longs = CounterDictionary()
allowed_shorts = set()
def allow_shorthand_nesting(parent, body, child):
# at this point, both parent and child are either shorthands or may become shorthands
if shorthands_can_nest:
# the then of a short if-else cannot contain anything except nested if-elses
if ((parent.type == NodeType.if_ and parent.else_ and body is parent.then) and
not (child.type == NodeType.if_ and child.else_)):
return False
# note - pico8 now supports shorthands terminated by 'end', but we don't generate those yet
if body.next_token() is not child.next_token():
return False
return True
else:
# in theory, could've supported nesting print even here, but never did
return False
def allow_parent_shorthands(node):
allowed = True
prev_parent = node
def check_parent_shorthand(parent):
nonlocal prev_parent, allowed
if (parent.short or parent in allowed_shorts) and not allow_shorthand_nesting(parent, prev_parent, node):
allowed = False
prev_parent = parent
node.traverse_parents(check_parent_shorthand)
return allowed
def allow_child_shorthands(node, body):
allowed = True
def check_child_shorthand(child):
nonlocal allowed
if (child.short or child in allowed_shorts) and not allow_shorthand_nesting(node, body, child):
allowed = False
body.traverse_nodes(post=check_child_shorthand)
return allowed
def analyze_node_post(node):
if node.type in (NodeType.if_, NodeType.while_):
to_short = node.short
weight = 1
if node.type == NodeType.if_:
else_ = node.else_
while else_ and else_.type == NodeType.elseif:
weight += 1
else_ = else_.else_
has_elseif = weight > 1
# can the node be converted to shorthand?
if not to_short and not has_elseif:
has_empties, starts_with_do = False, False
allow_shorthand = allow_parent_shorthands(node)
# now check the children
if not allow_child_shorthands(node, node.cond):
allow_shorthand = False
for i, body in enumerate(get_node_bodies(node)):
if not allow_child_shorthands(node, body):
allow_shorthand = False
if not body.children:
has_empties = True
if i == 0:
# a shorthand cannot begin with a do block. (would be a longhand)
starts_with_do = body.first_token().value == "do"
# empty bodies require extra ';'s to shorten, which worsens compression
to_short = allow_shorthand and not (has_empties and not focus.chars) and not starts_with_do
if to_short:
allowed_shorts.add(node)
if to_short:
shorts[node.type] += weight
else:
longs[node.type] += weight
# print shorthands save tokens, so we don't currently undo them
# note: could support calls to something other than the print builtin, but not sure it's safe...
elif (node.type == NodeType.call and node.func.type == NodeType.var and is_root_global_or_builtin_local(node.func) and
node.func.name == "print" and node.func.var.name == "print" and # both before and after any rename
not node.func.var.reassigned and (not root.has_env or not safe_builtins)):
# note: could support expressions, but only in non-vararg contexts, which mostly defeats the purpose
if not node.short and node.parent.type == NodeType.block and allow_parent_shorthands(node) and allow_child_shorthands(node, node):
allowed_shorts.add(node)
root.traverse_nodes(post=analyze_node_post)
new_shorts = {}
for type in (NodeType.if_, NodeType.while_):
# if everything can be made short, that's always best.
# else, consistency is better for compression while more shorts are better for chars
if focus.chars or not longs[type] or (not focus.compressed and longs[type] * 1.4 <= shorts[type]):
new_shorts[type] = True
elif focus.compressed:
new_shorts[type] = False
else:
new_shorts[type] = None # leave alone
if focus.compressed and not (new_shorts[NodeType.if_] and new_shorts[NodeType.while_]):
new_shorts[NodeType.call] = None
else:
new_shorts[NodeType.call] = True
return Dynamic(new_shorts=new_shorts, allowed_shorts=allowed_shorts)
def on_enable_shorthand(node):
# remove line breaks originally in the source
vline = node.first_token().vline
def fix_vlines(token):
token.vline = vline
node.traverse_tokens(fix_vlines)
def minify_change_block_shorthand(node, new_short):
if new_short:
node.short = True
node.remove_token(2, ("then", "do"))
if node.type == NodeType.if_ and node.else_:
node.else_.short = True
node.else_.remove_token(-1, "end")
else:
node.remove_token(-1, "end")
# we can assume node.cond is not wrapped in parens, since we're in a post-visit
# wrap it in parens ourselves (TODO: eww...)
node.cond.replace_with(Node(NodeType.group, [], child=node.cond.move()))
node.cond.children.append(node.cond.child)
node.cond.insert_token(0, TokenType.punct, "(", near_next=True)
node.cond.append_token(TokenType.punct, ")")
# fixup empty bodies
for body in get_node_bodies(node):
if not body.children:
body.append_token(TokenType.punct, ";")
on_enable_shorthand(node)
else:
node.short = False
node.insert_token(2, TokenType.keyword, "then" if node.type == NodeType.if_ else "do")
if node.type == NodeType.if_ and node.else_:
node.else_.short = False
node.else_.append_token(TokenType.keyword, "end", near_next=True)
else:
node.append_token(TokenType.keyword, "end", near_next=True)
def minify_change_print_shorthand(node, new_short):
assert new_short
node.short = True
node.func.var.implicit = True
node.add_extra_child(node.func)
end_paren = node.children[-1]
if isinstance(end_paren, Token) and end_paren.value == ")": # else, this is a paren-less call
node.remove_token(1, "(")
node.remove_token(-1, ")")
node.remove_child(0)
node.insert_token(0, TokenType.punct, "?", near_next=True)
node.func = node.children[0]
on_enable_shorthand(node)
def node_contains_vars(root, vars):
def visitor(node):
if node.type == NodeType.var and node.var in vars:
raise StopTraverse()
try:
root.traverse_nodes(visitor, extra=True)
return False
except StopTraverse:
return True
def expr_is_trivial(root, ctxt, safe_only, allow_member=True, allow_index=True, allow_call=True):
def visitor(expr):
# nodes that cannot call user-defined code in any case
if expr.type in (NodeType.const, NodeType.varargs, NodeType.group,
NodeType.table, NodeType.table_member, NodeType.table_index): # (since new tables have no metatable)
pass
elif expr.type == NodeType.var and expr.kind != VarKind.global_:
pass
elif expr.type == NodeType.unary_op and expr.op == "not":
pass
elif expr.type == NodeType.binary_op and expr.op in ("and", "or"):
pass
elif expr.type == NodeType.function:
assert not expr.target # we only traverse expressions!
return k_skip_children
# nodes that may call user-defined code
elif expr.type == NodeType.call:
func = expr.func
if (safe_only or not allow_call or
not (func.type == NodeType.var and is_root_global_or_builtin_local(func) and not func.var.reassigned and func.name not in ctxt.builtins_with_callbacks)):
raise StopTraverse()
elif expr.type == NodeType.member and not allow_member:
raise StopTraverse()
elif expr.type == NodeType.index and not allow_index:
raise StopTraverse()
# nodes that may call user-defined code via metatables (E.g. member access, operators)
elif safe_only:
raise StopTraverse()
try:
root.traverse_nodes(visitor)
return True
except StopTraverse:
return False
def minify_merge_assignments(prev, next, ctxt, safe_only):
if len(prev.targets) < len(prev.sources):
return
if (len(prev.targets) > len(prev.sources) and
((prev.sources and is_vararg_expr(prev.sources[-1])) or (next.sources and is_vararg_expr(next.sources[-1])) or len(next.targets) < len(next.sources))):
return
merge_prev = getattr(next.first_token(), "merge_prev", None)
if merge_prev is False:
return
# check if prev's targets are used in next's sources or targets
require_trivial = False # True when prev.targets may be accessed indirectly from functions that may be called by next.soources
allow_index = allow_member = True
target_vars = []
for target in prev.targets:
if target.type == NodeType.var:
target_vars.append(target.var)
# is it possible for 'next' to access 'target' without refering to it directly? (via function call)
if target.kind == VarKind.global_ or (prev.type == NodeType.assign and target.var.captured):
require_trivial = True
elif target.type == NodeType.member:
target_vars.append(target.key.var)
require_trivial = True
allow_index = False # TODO: could rely on rename's preserve logic
elif target.type == NodeType.index:
require_trivial = True
allow_member = False # TODO: could rely on rename's preserve logic
allow_index = False
else: # just in case...
return
for node in next.sources:
if target_vars and node_contains_vars(node, target_vars):
return
if require_trivial and not expr_is_trivial(node, ctxt, safe_only, allow_member, allow_index):
return
for node in next.targets:
if target_vars and node_contains_vars(node, target_vars):
return
if require_trivial and not expr_is_trivial(node, ctxt, safe_only, allow_member, allow_index, allow_call=False):
return
# when reordering local declarations, ensure we don't change which local wins out among identically-named locals
# (this relies on rename being done already!)
if len(prev.targets) > len(prev.sources) and prev.type == NodeType.local:
for target in prev.targets[len(prev.sources):]:
for next_target in next.targets:
if target.name == next_target.name:
return
# do the merge: (TODO: eww...)
def insert_array_items(dst_node, dst_arr, dst_arr_i, src_arr, src_arr_i, count):
count = default(count, len(src_arr) - src_arr_i)
if not count:
return
need_end_comma = False
if dst_arr_i < len(dst_arr):
dst_i = dst_node.children.index(dst_arr[dst_arr_i])
need_end_comma = True
elif len(dst_arr):
dst_i = dst_node.children.index(dst_arr[dst_arr_i - 1]) + 1
dst_node.insert_token(dst_i, TokenType.punct, ",")
dst_i += 1
else:
assert dst_arr is dst_node.sources
dst_node.append_token(TokenType.punct, "=")
dst_i = len(dst_node.children)
for i in range(count):
src_elem = src_arr[src_arr_i + i]
dst_arr.insert(dst_arr_i + i, src_elem)
dst_node.insert_existing(dst_i, src_elem)
dst_i += 1
if i < count - 1 or need_end_comma:
dst_node.insert_token(dst_i, TokenType.punct, ",")
dst_i += 1
insert_array_items(prev, prev.targets, len(prev.sources), next.targets, 0, None)
insert_array_items(prev, prev.sources, len(prev.sources), next.sources, 0, None)
next.erase()
def value_is_negative(ctxt, value):
if ctxt.lang == Language.pico8:
return fixnum_is_negative(value)
elif isinstance(value, int):
return value < 0
else:
return float_is_negative(value)
def minify_code(ctxt, root, minify_opts):
safe_reorder = minify_opts.get("safe-reorder", False)
safe_builtins = minify_opts.get("safe-builtins", False)
minify_tokens = minify_opts.get("tokens", True)
minify_reorder = minify_opts.get("reorder", True)
focus = Focus(minify_opts.get("focus"))
if not focus.tokens:
safe_reorder = True # nothing gained with False here, so set it to True just in case.
analysis = analyze_code_for_minify(root, focus, ctxt, safe_builtins)
def fixup_nodes_pre(node):
if minify_tokens:
# remove shorthands
if node.type in (NodeType.if_, NodeType.while_) and node.short and (analysis.new_shorts[node.type] == False):
minify_change_block_shorthand(node, False)
# remove unneeded groups
while node.type == NodeType.group:
inner, outer = node.child, node.parent
inner_prec, outer_prec = get_precedence(inner), get_precedence(outer)
needed = True
if e(inner_prec) and e(outer_prec) and (inner_prec > outer_prec or (inner_prec == outer_prec and
(outer_prec == k_unary_ops_prec or is_right_assoc(outer) == (outer.right == node)))):
needed = False
elif e(outer_prec) and inner.type in (NodeType.var, NodeType.index, NodeType.member, NodeType.call, NodeType.varargs):
needed = False
elif e(outer_prec) and inner.type == NodeType.const and (focus.tokens or can_replace_with_unary(node) or
not (inner.token.type == TokenType.number and value_is_negative(ctxt, inner.token.parsed_value))):
needed = False
elif outer.type in (NodeType.group, NodeType.table_member, NodeType.table_index, NodeType.op_assign):
needed = False
elif outer.type == NodeType.call and (node in outer.args[:-1] or
(outer.args and node == outer.args[-1] and not is_vararg_expr(inner))):
needed = False
elif outer.type in (NodeType.assign, NodeType.local) and (node in outer.sources[:-1] or
(outer.sources and node == outer.sources[-1] and (not is_vararg_expr(inner) or len(outer.targets) <= len(outer.sources)))):
needed = False
elif outer.type in (NodeType.return_, NodeType.table) and (node in outer.items[:-1] or
(outer.items and node == outer.items[-1] and not is_vararg_expr(inner))):
needed = False
elif outer.type in (NodeType.if_, NodeType.elseif, NodeType.while_, NodeType.until) and not outer.short:
needed = False
if needed:
break
else:
node.replace_with(node.child.move())
# node may now be another group, so loop
def fixup_nodes_post(node):
if minify_tokens:
# create shorthands
if (node.type in (NodeType.if_, NodeType.while_) and not node.short and
analysis.new_shorts[node.type] and node in analysis.allowed_shorts):
minify_change_block_shorthand(node, True)
if (node.type == NodeType.call and not node.short and
analysis.new_shorts[node.type] and node in analysis.allowed_shorts):
minify_change_print_shorthand(node, True)
if minify_reorder:
# merge assignments
if node.type == NodeType.local or (focus.tokens and node.type == NodeType.assign):
prev = node.prev_sibling()
while prev and prev.type == None: # skip erased
prev = prev.prev_sibling()
if prev and prev.type == node.type:
minify_merge_assignments(prev, node, ctxt, safe_reorder)
def fixup_tokens(token):
# minify sublangs
sublang = getattr(token, "sublang", None)
if sublang and sublang.minify:
token.modify(minify_string_literal(ctxt, token, focus, value=sublang.minify()))
if minify_tokens:
# remove unneeded tokens
if token.value == ";" and token.parent.type == NodeType.block and token.next_token().value != "(":
gparent = token.parent.parent
if not (gparent and is_short_block_stmt(gparent) and not token.parent.stmts):
token.erase()
return
if token.value in (",", ";") and token.parent.type == NodeType.table and token.next_sibling().value == "}":
token.erase()
return
if token.value == "(" and token.parent.type == NodeType.call and len(token.parent.args) == 1:
arg = token.parent.args[0]
if arg.type == NodeType.table or (arg.type == NodeType.const and arg.token.type == TokenType.string):
token.erase("(")
token.parent.erase_token(-1, ")")
return
# replace tokens for higher consistency
if token.value == ";" and token.parent.type == NodeType.table:
token.modify(",")
if token.value == "!=":
token.modify("~=")
if token.value == "^^" and ctxt.version >= 37:
token.modify("~")
if token.value == "then" and ctxt.lang == Language.pico8 and ctxt.version >= 42:
token.modify("do") # I HATE THIS :(
if token.value == "//" and ctxt.lang == Language.picotron:
token.modify("\\")
if token.value == "//=" and ctxt.lang == Language.picotron:
token.modify("\\=")
if token.type == TokenType.string:
token.modify(minify_string_literal(ctxt, token, focus))
if token.type == TokenType.number:
allow_unary = can_replace_with_unary(token.parent)
format_num = format_luanum if ctxt.lang == Language.picotron else format_fixnum
token.modify(format_num(token.parsed_value, sign=None if allow_unary else ''))
if token.type == TokenType.number:
if token.value.startswith("-") or token.value.startswith("~"): # either due to format_fixnum above, or due to ConstToken.value
# insert synthetic unary token, so that output_tokens's tokenize and root.get_tokens() won't get confused
token.parent.insert_token(0, TokenType.punct, token.value[0], near_next=True)
token.modify(token.value[1:])
elif token.value.startswith("("): # special case where a unary was forced on us
# insert synthetic unary tokens, as needed
token.parent.insert_token(0, TokenType.punct, token.value[0], near_next=True)
token.parent.insert_token(1, TokenType.punct, token.value[1], near_next=True)
token.parent.append_token(TokenType.punct, token.value[-1])
token.modify(token.value[2:-1])
root.traverse_nodes(fixup_nodes_pre, fixup_nodes_post, tokens=fixup_tokens)