Skip to content

Commit

Permalink
mostly comment improvements, improve node printing as suggested by pa…
Browse files Browse the repository at this point in the history
…ncelor, fixed bbs test reporting
  • Loading branch information
thisismypassport committed Oct 17, 2024
1 parent 8196bdd commit e34ae92
Show file tree
Hide file tree
Showing 12 changed files with 103 additions and 40 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ You can disable parts of the minification process via additional command-line op
* `--no-minify-lines` : Disable removal of line breaks
* `--no-minify-comments` : Disable removal of comments (requires `--no-minify-spaces`)
* `--no-minify-tokens` : Disable removal and alteration of tokens (not including identifier renaming)
* `--no-minify-reoder` : Disable reordering of statements
* `--no-minify-reorder` : Disable reordering of statements

You can control how safe the minification is (see [details about unsafe minifications](#pitfalls-of-full-minification)):
* `--minify-safe-only` : Do only safe minification. Equivalent to specifying all of the below.
Expand Down Expand Up @@ -964,7 +964,10 @@ def preprocess_syntax_main(cart, root, on_error, args, **_):

# the syntax tree format isn't really documented anywhere yet. you can:
# - check examples of use in pico_lint.py
# - search for the NodeType you're interested in, in pico_parse.py to see what it contains
# - print() nodes to see what they contain (ignores some attributes for better readability)
# - search for the NodeType you're interested in, in pico_parse.py, to see what it contains

# print(node)

def post_visit(node):
pass # just here as an example
Expand Down
66 changes: 44 additions & 22 deletions pico_compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def uncompress_code(r, size_handler=None, debug_handler=None, **_):
header = r.bytes(4, allow_eof=True)

if header == k_new_compressed_code_header:
# the new compression format - uses bit encoding & mtf (move-to-front)
unc_size = r.u16()
com_size = r.u16()

Expand Down Expand Up @@ -102,6 +103,7 @@ def uncompress_code(r, size_handler=None, debug_handler=None, **_):
assert len(code) == unc_size

elif header == k_old_compressed_code_header:
# the old compression format - byte-based and allows very limited matching
unc_size = r.u16()
assert r.u16() == 0 # ?
if debug_handler: debug_handler.init(r, str)
Expand Down Expand Up @@ -172,15 +174,34 @@ class Lz77Entry(Tuple):
offset = count = ...

class Lz77Advance(Tuple):
"""A strategy that spans positions 'i' up to 'next_i' with a given 'cost', using a linked list of lz77/literal/etc items."""
"""A strategy that advances from 'i' to 'next_i' using 'item' (a literal/lz77/etc) with cost 'cost',
it was preceded by Lz77Advance 'prev'. (so Lz77Advance-s form a linked list)
('ctxt' is the opaque context for measure)"""
i = next_i = cost = ctxt = item = prev = ...

class SubMatchDict(Struct):
"""When the list of previous matches gets too long, a SubMatchDict can be used as a perf optimization,
grouping the list by an extra code character"""
dict = best_j = ...

def get_lz77(code, min_c=3, max_c=0x7fff, max_o=0x7fff, measure=None, min_cost=None,
get_cheaper_c=None, max_o_steps=None, fast_c=None, no_repeat=False, litblock_idxs=None):
min_matches = defaultdict(list)
"""a generic helper that transforms code (either bytes or str) into a sequence of items:
literals (bytes/chars), Lz77Entry-ies, and literal blocks.
min_c/max_c - min/max allowed counts in Lz77Entry-ies
max_o - max allowed offsets (distances) in Lz77Entry-ies
measure - measures the cost of a particular item. (any cost unit can be used)
also returns a context, an opaque value to passed when measuring subsequent items
min_cost - the absolute minimum cost that the given number of bytes can correspond to
anywhere in any code. (used for perf. optimization)
get_cheaper_c - given a count, return a smaller count that results in less cost
(but may be a better choice if the next match catches up without increasing cost)
max_o_steps - (turn this to get_shorter_o?) - offset values that may be shorter but
result in less cost.
litblock_idxs - indices of where to enter or exit a literal block (should be precomputed)
"""
min_matches = defaultdict(list) # for each min_c-sized sequence in the code - its previous matches
# as either a list or a SubMatchDict
next_litblock = litblock_idxs.popleft() if litblock_idxs else len(code)
empty_code = type(code)() # e.g. "" if code is an str

Expand Down Expand Up @@ -234,6 +255,7 @@ def find_match(i, min_c=min_c, max_o=max_o, matches_dict=min_matches):
return best_c, best_j

def convert_to_matches_dict(matches, min_i, c):
"""list -> SubMatchDict"""
matches_dict = defaultdict(list)
best_j = -1

Expand All @@ -246,11 +268,11 @@ def convert_to_matches_dict(matches, min_i, c):

return SubMatchDict(matches_dict, best_j)

def mktuple(i, j, count):
def mklz77(i, j, count):
return Lz77Entry(i - j, count)

i = 0
prev_i = 0
i = 0 # current position in the code
prev_i = 0 # previous position in the code
advances = deque() if measure else None # potentially worthwhile ways to go from the current or past positions
curr_adv = None

Expand Down Expand Up @@ -283,6 +305,11 @@ def add_advance(cost, ctxt, c, item):
advances.insert(insert_idx, next_adv)
else:
advances.append(next_adv)

def add_lz77_advance(j, c):
item = mklz77(i, j, c)
cost, ctxt = measure(curr_ctxt, item)
add_advance(curr_cost + cost, ctxt, c, item)

def get_advance_items(adv):
while adv:
Expand Down Expand Up @@ -316,17 +343,13 @@ def get_advance_items(adv):
# try using a match
best_c, best_j = find_match(i)
if best_c > 0:
lz_item = mktuple(i, best_j, best_c)
lz_cost, lz_ctxt = measure(curr_ctxt, lz_item)
add_advance(curr_cost + lz_cost, lz_ctxt, best_c, lz_item)
add_lz77_advance(best_j, best_c)

if get_cheaper_c:
# try a shorter yet cheaper match
cheap_c = get_cheaper_c(best_c)
if best_c > cheap_c >= min_c:
nr_item = mktuple(i, best_j, cheap_c)
nr_cost, nr_ctxt = measure(curr_ctxt, nr_item)
add_advance(curr_cost + nr_cost, nr_ctxt, cheap_c, nr_item)
add_lz77_advance(best_j, cheap_c)

if max_o_steps:
# try a shorter yet closer match
Expand All @@ -336,17 +359,13 @@ def get_advance_items(adv):

sh_best_c, sh_best_j = find_match(i, max_o=step)
if sh_best_c > 0:
sh_item = mktuple(i, sh_best_j, sh_best_c)
sh_cost, sh_ctxt = measure(curr_ctxt, sh_item)
add_advance(curr_cost + sh_cost, sh_ctxt, sh_best_c, sh_item)
add_lz77_advance(sh_best_j, sh_best_c)

if get_cheaper_c:
# try a shorter yet cheaper match
sh_cheap_c = get_cheaper_c(sh_best_c)
if sh_best_c > sh_cheap_c >= min_c:
shnr_item = mktuple(i, sh_best_j, sh_cheap_c)
shnr_cost, shnr_ctxt = measure(curr_ctxt, shnr_item)
add_advance(curr_cost + shnr_cost, shnr_ctxt, sh_cheap_c, shnr_item)
add_lz77_advance(sh_best_j, sh_cheap_c)

curr_adv = advances.popleft()
i = curr_adv.next_i
Expand All @@ -356,7 +375,7 @@ def get_advance_items(adv):
yield from reversed(tuple(get_advance_items(curr_adv)))
curr_adv = None

else:
else: # case when no 'measure' is provided - result is far less optimal
best_c, best_j = find_match(i)
if best_c > 0:
# check for obvious wins of not using matches
Expand All @@ -365,13 +384,15 @@ def get_advance_items(adv):
yield i, code[i]
i += 1
else:
yield i, mktuple(i, best_j, best_c)
yield i, mklz77(i, best_j, best_c)
i += best_c
else:
yield i, code[i]
i += 1

if not (fast_c != None and best_c >= fast_c):
if fast_c is None or best_c < fast_c:
# add the matches we just passed through to min_matches

for j in range(prev_i, i):
matches_dict = min_matches
c = min_c
Expand All @@ -384,7 +405,8 @@ def get_advance_items(adv):
matches = matches_dict[code[j:j+c]]

matches.append(j)
if len(matches) > 200 and len(matches_dict) > 5 and c < min_c * 6: # ?

if len(matches) > 200 and len(matches_dict) > 5 and c < min_c * 6: # shaky perf. heuristic
matches_dict[code[j:j+c]] = convert_to_matches_dict(matches, i, c + 1)

prev_i = i
Expand Down Expand Up @@ -445,7 +467,7 @@ def preprocess_litblock_idxs():
pre_min_c = 4 # ignore questionable lz77s
last_cost_len = 0x20
last_cost_mask = last_cost_len - 1
last_costs = [0 for i in range(last_cost_len)]
last_costs = [0 for i in range(last_cost_len)] # cost deltas of using compression vs literal blocks
sum_costs = 0
litblock_idxs = deque()

Expand Down
6 changes: 5 additions & 1 deletion pico_constfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
from pico_output import format_fixnum, format_luanum

class LuaType(Enum):
"""A lua (or pico8) type for the purpose of constant folding"""
nil = boolean = fixnum = integer = float = string = ...

class LuaValue:
"""A lua (or pico8) value for the purpose of constant folding"""

def __init__(m, type, value):
m.type, m.value = type, value

Expand Down Expand Up @@ -83,7 +86,8 @@ def __init__(m, value):
k_lua_false = LuaBoolean(False)
k_lua_maxint = 0x7fff

# ops
# lua ops - given the language and the parameters, return the result,
# or None if unable or unwilling to do the computation

def lua_neg(lang, a):
if a.is_fixnum:
Expand Down
2 changes: 1 addition & 1 deletion pico_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class Language(Enum):
pico8 = picotron = ...

class Memory(bytearray):
"""A block pico8 memory - a bytearray with some convenience functions like get/set16, get/set4, etc."""
"""A pico8 memory block - a bytearray with some convenience functions like get/set16, get/set4, etc."""

def copy(m):
return Memory(m)
Expand Down
2 changes: 2 additions & 0 deletions pico_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ def dump_file(m, dest, fmt, misc, i, name, content):
else:
file_write(path_join(dest, filename_fixup(name)), content)

# (lz4 is used in both pico8 pods and in picotron)

def lz4_uncompress(data, debug=None):
def read_u8_sum(r):
sum = 0
Expand Down
5 changes: 4 additions & 1 deletion pico_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class VarKind(Enum):
local = global_ = member = label = ...

class VarBase():
"""A variable (a Local, Global, table Member key, or Label)"""
def __init__(m, kind, name):
m.kind = kind
m.name = name
Expand All @@ -21,7 +22,7 @@ def __init__(m, kind, name):
constval = None

def __repr__(m):
return f"{m.kind} {m.name}"
return f"{typename(m)}({m.name})"

class Local(VarBase):
def __init__(m, name, scope, builtin=False):
Expand All @@ -44,6 +45,7 @@ def __init__(m, name, scope):
m.scope = scope

class Scope:
"""A scope that defines new Locals. Note that in lua, every 'local' statement creates a new Scope"""
def __init__(m, parent=None, depth=0, funcdepth=0):
m.parent = parent
m.depth = depth
Expand Down Expand Up @@ -87,6 +89,7 @@ def has_used_members(m):
return lazy_property.is_set(m, "used_members")

class LabelScope:
"""A scope that defines the labels in a block"""
def __init__(m, parent=None, funcdepth=0):
m.parent = parent
m.funcdepth = funcdepth
Expand Down
4 changes: 2 additions & 2 deletions pico_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def flush_output():
elif ch != '#':
i += 1

elif list_get(code, i + 1) == '[' and list_get(code, i + 2) != '[': # #[...] inline directive (not used by pico preprocessor)
elif list_get(code, i + 1) == '[' and list_get(code, i + 2) != '[': # DEPRECATED
flush_output()
i, start_i, out_i = preprocessor.handle_inline(path, code, i, start_i, out_i, outparts, outmappings)

Expand Down Expand Up @@ -176,7 +176,7 @@ def trim_cart_to_tab(cart, target_tab):
k_custom_pp_inline_delims = k_wspace + "[]"

class CustomPreprocessor(PicoPreprocessor):
"""A custom preprocessor that isn't enabled by default (and is quite quirky & weird)"""
"""DEPRECATED - has no relation to pico8's preprocessor"""

def __init__(m, defines=None, pp_handler=None, **kwargs):
super().__init__(**kwargs)
Expand Down
13 changes: 11 additions & 2 deletions pico_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,23 @@ class StopTraverse(BaseException):
k_skip_children = True # value returnable from traverse's pre-function

class TokenNodeBase:
"""Baseclass for both pico8 Tokens and pico8 Nodes.
"""Baseclass for syntax Tokens, syntax Nodes, and Comments.
The syntax tree is comprised of these and can be traversed via traverse_nodes or traverse_tokens"""

def __init__(m):
m.parent, m.children = None, ()

def __repr__(m):
return repr(m.__dict__)
return "%s(type=%s)" % (typename(m), m.type)

def __str__(m):
reprlist = []
for key, val in m.__dict__.items():
if key in ("parent", "children", "idx", "endidx", "vline", "lang", "modified", "source",
"scope", "extra_i", "extra_children"):
continue
reprlist.append("%s=%r" % (key, val))
return "%s(%s)" % (typename(m), ", ".join(reprlist))

@property
def source_text(m):
Expand Down
25 changes: 19 additions & 6 deletions run_bbs_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from test_utils import *
from pico_cart import get_bbs_cart_url
from threading import Thread
import argparse
import argparse, atexit
import multiprocessing as mp
import multiprocessing.dummy as mt

Expand Down Expand Up @@ -129,10 +129,11 @@ def check_run(name, result, parse_meta=False):
meta["version"] = str_after_first(line, ":").strip()
return meta

def init_for_process(opts):
def init_for_process(opts, endpipe):
global g_opts
g_opts = opts
init_tests(opts)
atexit.register(lambda: endpipe.send(get_test_results()))

def run_for_cart(args):
(cart, cart_input, cart_output, cart_compare, cart_unfocused, focus) = args
Expand Down Expand Up @@ -225,7 +226,7 @@ def process_output(kind, output):
unsafe_minify_results = run_code(g_opts.target, uncompress_path, unsafe_minify_path, "--minify", "--count", "--parsable-count", *minify_opts)
process_output("unsafe_minify", check_run(f"{cart}:unsafe_minify", unsafe_minify_results, parse_meta=True))

return (cart, get_test_results(), new_cart_input, cart_output, deltas, best_path_for_pico8)
return (cart, new_cart_input, cart_output, deltas, best_path_for_pico8)

def run(focus):
filename = str(focus) if focus else "normal"
Expand All @@ -245,7 +246,18 @@ def run(focus):
if g_opts.pico8_interact:
Thread(target=interact_with_pico8s, args=(g_opts.pico8, g_opts.pico8_time), daemon=True).start()

with mp.Pool(g_opts.parallel_jobs, init_for_process, (g_opts,)) as mp_pool, \
endpipe_recv, endpipe_send = mp.Pipe(duplex=False)

def handle_endpipe():
try:
while True:
add_test_results(endpipe_recv.recv())
except EOFError:
pass

Thread(target=handle_endpipe).start()

with mp.Pool(g_opts.parallel_jobs, init_for_process, (g_opts, endpipe_send)) as mp_pool, \
mt.Pool(g_opts.parallel_jobs) as mt_pool:

p8_results = []
Expand All @@ -254,8 +266,7 @@ def run(focus):
if not mp_result:
continue

(cart, test_results, new_cart_input, cart_output, cart_deltas, cart_pico8_path) = mp_result
add_test_results(test_results)
(cart, new_cart_input, cart_output, cart_deltas, cart_pico8_path) = mp_result
if new_cart_input:
inputs[cart] = new_cart_input
outputs[cart] = cart_output
Expand All @@ -274,6 +285,8 @@ def run(pico8=pico8, path=cart_pico8_path):
for p8_result in p8_results:
check_run(f"p8-run", p8_result.get())

endpipe_send.close()

file_write_json(input_json, inputs, sort_keys=True, indent=4)
file_write_json(output_json, outputs, sort_keys=True, indent=4)

Expand Down
5 changes: 4 additions & 1 deletion test_input/my_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def pre_visit(node):

# the syntax tree format isn't really documented anywhere yet. you can:
# - check examples of use in pico_lint.py
# - search for the NodeType you're interested in, in pico_parse.py to see what it contains
# - print() nodes to see what they contain (ignores some attributes for better readability)
# - search for the NodeType you're interested in, in pico_parse.py, to see what it contains

# print(node)

def post_visit(node):
pass # just here as an example
Expand Down
Loading

0 comments on commit e34ae92

Please sign in to comment.