mostly comment improvements, improve node printing as suggested by pa…

…ncelor, fixed bbs test reporting
thisismypassport · Oct 17, 2024 · e34ae92 · e34ae92
1 parent 8196bdd
commit e34ae92
Show file tree

Hide file tree

Showing 12 changed files with 103 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -79,7 +79,7 @@ You can disable parts of the minification process via additional command-line op
 * `--no-minify-lines` : Disable removal of line breaks
 * `--no-minify-comments` : Disable removal of comments (requires `--no-minify-spaces`)
 * `--no-minify-tokens` : Disable removal and alteration of tokens (not including identifier renaming)
-* `--no-minify-reoder` : Disable reordering of statements
+* `--no-minify-reorder` : Disable reordering of statements
 
 You can control how safe the minification is (see [details about unsafe minifications](#pitfalls-of-full-minification)):
 * `--minify-safe-only` : Do only safe minification. Equivalent to specifying all of the below.
@@ -964,7 +964,10 @@ def preprocess_syntax_main(cart, root, on_error, args, **_):
 
             # the syntax tree format isn't really documented anywhere yet. you can:
             # - check examples of use in pico_lint.py
-            # - search for the NodeType you're interested in, in pico_parse.py to see what it contains
+            # - print() nodes to see what they contain (ignores some attributes for better readability)
+            # - search for the NodeType you're interested in, in pico_parse.py, to see what it contains
+
+            # print(node)
 
         def post_visit(node):
             pass # just here as an example

diff --git a/pico_compress.py b/pico_compress.py
@@ -48,6 +48,7 @@ def uncompress_code(r, size_handler=None, debug_handler=None, **_):
     header = r.bytes(4, allow_eof=True)
 
     if header == k_new_compressed_code_header:
+        # the new compression format - uses bit encoding & mtf (move-to-front)
         unc_size = r.u16()
         com_size = r.u16()
 
@@ -102,6 +103,7 @@ def uncompress_code(r, size_handler=None, debug_handler=None, **_):
         assert len(code) == unc_size
 
     elif header == k_old_compressed_code_header:
+        # the old compression format - byte-based and allows very limited matching
         unc_size = r.u16()
         assert r.u16() == 0 # ?
         if debug_handler: debug_handler.init(r, str)
@@ -172,15 +174,34 @@ class Lz77Entry(Tuple):
     offset = count = ...
 
 class Lz77Advance(Tuple):
-    """A strategy that spans positions 'i' up to 'next_i' with a given 'cost', using a linked list of lz77/literal/etc items."""
+    """A strategy that advances from 'i' to 'next_i' using 'item' (a literal/lz77/etc) with cost 'cost',
+       it was preceded by Lz77Advance 'prev'. (so Lz77Advance-s form a linked list)
+       ('ctxt' is the opaque context for measure)"""
     i = next_i = cost = ctxt = item = prev = ...
 
 class SubMatchDict(Struct):
+    """When the list of previous matches gets too long, a SubMatchDict can be used as a perf optimization,
+       grouping the list by an extra code character"""
     dict = best_j = ...
 
 def get_lz77(code, min_c=3, max_c=0x7fff, max_o=0x7fff, measure=None, min_cost=None,
              get_cheaper_c=None, max_o_steps=None, fast_c=None, no_repeat=False, litblock_idxs=None):
-    min_matches = defaultdict(list)
+    """a generic helper that transforms code (either bytes or str) into a sequence of items:
+         literals (bytes/chars), Lz77Entry-ies, and literal blocks.
+       min_c/max_c - min/max allowed counts in Lz77Entry-ies
+       max_o - max allowed offsets (distances) in Lz77Entry-ies
+       measure - measures the cost of a particular item. (any cost unit can be used)
+                 also returns a context, an opaque value to passed when measuring subsequent items
+       min_cost - the absolute minimum cost that the given number of bytes can correspond to
+                  anywhere in any code. (used for perf. optimization)
+       get_cheaper_c - given a count, return a smaller count that results in less cost
+                       (but may be a better choice if the next match catches up without increasing cost)
+       max_o_steps - (turn this to get_shorter_o?) - offset values that may be shorter but
+                     result in less cost.
+       litblock_idxs - indices of where to enter or exit a literal block (should be precomputed)
+    """    
+    min_matches = defaultdict(list) # for each min_c-sized sequence in the code - its previous matches
+                                    # as either a list or a SubMatchDict
     next_litblock = litblock_idxs.popleft() if litblock_idxs else len(code)
     empty_code = type(code)() # e.g. "" if code is an str
 
@@ -234,6 +255,7 @@ def find_match(i, min_c=min_c, max_o=max_o, matches_dict=min_matches):
         return best_c, best_j
 
     def convert_to_matches_dict(matches, min_i, c):
+        """list -> SubMatchDict"""
         matches_dict = defaultdict(list)
         best_j = -1
 
@@ -246,11 +268,11 @@ def convert_to_matches_dict(matches, min_i, c):
 
         return SubMatchDict(matches_dict, best_j)
 
-    def mktuple(i, j, count):
+    def mklz77(i, j, count):
         return Lz77Entry(i - j, count)
 
-    i = 0
-    prev_i = 0
+    i = 0 # current position in the code
+    prev_i = 0 # previous position in the code
     advances = deque() if measure else None # potentially worthwhile ways to go from the current or past positions
     curr_adv = None
 
@@ -283,6 +305,11 @@ def add_advance(cost, ctxt, c, item):
             advances.insert(insert_idx, next_adv)
         else:
             advances.append(next_adv)
+
+    def add_lz77_advance(j, c):
+        item = mklz77(i, j, c)
+        cost, ctxt = measure(curr_ctxt, item)
+        add_advance(curr_cost + cost, ctxt, c, item)
 
     def get_advance_items(adv):
         while adv:
@@ -316,17 +343,13 @@ def get_advance_items(adv):
                 # try using a match
                 best_c, best_j = find_match(i)
                 if best_c > 0:
-                    lz_item = mktuple(i, best_j, best_c)
-                    lz_cost, lz_ctxt = measure(curr_ctxt, lz_item)
-                    add_advance(curr_cost + lz_cost, lz_ctxt, best_c, lz_item)
+                    add_lz77_advance(best_j, best_c)
 
                     if get_cheaper_c:
                         # try a shorter yet cheaper match
                         cheap_c = get_cheaper_c(best_c)
                         if best_c > cheap_c >= min_c:
-                            nr_item = mktuple(i, best_j, cheap_c)
-                            nr_cost, nr_ctxt = measure(curr_ctxt, nr_item)
-                            add_advance(curr_cost + nr_cost, nr_ctxt, cheap_c, nr_item)
+                            add_lz77_advance(best_j, cheap_c)
 
                     if max_o_steps:
                         # try a shorter yet closer match
@@ -336,17 +359,13 @@ def get_advance_items(adv):
 
                             sh_best_c, sh_best_j = find_match(i, max_o=step)
                             if sh_best_c > 0:
-                                sh_item = mktuple(i, sh_best_j, sh_best_c)
-                                sh_cost, sh_ctxt = measure(curr_ctxt, sh_item)
-                                add_advance(curr_cost + sh_cost, sh_ctxt, sh_best_c, sh_item)
+                                add_lz77_advance(sh_best_j, sh_best_c)
 
                                 if get_cheaper_c:
                                     # try a shorter yet cheaper match
                                     sh_cheap_c = get_cheaper_c(sh_best_c)
                                     if sh_best_c > sh_cheap_c >= min_c:
-                                        shnr_item = mktuple(i, sh_best_j, sh_cheap_c)
-                                        shnr_cost, shnr_ctxt = measure(curr_ctxt, shnr_item)
-                                        add_advance(curr_cost + shnr_cost, shnr_ctxt, sh_cheap_c, shnr_item)
+                                        add_lz77_advance(sh_best_j, sh_cheap_c)
 
                 curr_adv = advances.popleft()
                 i = curr_adv.next_i
@@ -356,7 +375,7 @@ def get_advance_items(adv):
                     yield from reversed(tuple(get_advance_items(curr_adv)))
                     curr_adv = None
 
-            else:
+            else: # case when no 'measure' is provided - result is far less optimal
                 best_c, best_j = find_match(i)
                 if best_c > 0:
                     # check for obvious wins of not using matches
@@ -365,13 +384,15 @@ def get_advance_items(adv):
                         yield i, code[i]
                         i += 1
                     else:
-                        yield i, mktuple(i, best_j, best_c)
+                        yield i, mklz77(i, best_j, best_c)
                         i += best_c
                 else:
                     yield i, code[i]
                     i += 1
 
-        if not (fast_c != None and best_c >= fast_c):
+        if fast_c is None or best_c < fast_c:
+            # add the matches we just passed through to min_matches
+
             for j in range(prev_i, i):
                 matches_dict = min_matches
                 c = min_c
@@ -384,7 +405,8 @@ def get_advance_items(adv):
                     matches = matches_dict[code[j:j+c]]
 
                 matches.append(j)
-                if len(matches) > 200 and len(matches_dict) > 5 and c < min_c * 6: # ?
+
+                if len(matches) > 200 and len(matches_dict) > 5 and c < min_c * 6: # shaky perf. heuristic
                     matches_dict[code[j:j+c]] = convert_to_matches_dict(matches, i, c + 1)
 
         prev_i = i
@@ -445,7 +467,7 @@ def preprocess_litblock_idxs():
                 pre_min_c = 4 # ignore questionable lz77s
                 last_cost_len = 0x20
                 last_cost_mask = last_cost_len - 1
-                last_costs = [0 for i in range(last_cost_len)]
+                last_costs = [0 for i in range(last_cost_len)] # cost deltas of using compression vs literal blocks
                 sum_costs = 0
                 litblock_idxs = deque()
 

diff --git a/pico_constfold.py b/pico_constfold.py
@@ -5,9 +5,12 @@
 from pico_output import format_fixnum, format_luanum
 
 class LuaType(Enum):
+    """A lua (or pico8) type for the purpose of constant folding"""
     nil = boolean = fixnum = integer = float = string = ...
 
 class LuaValue:
+    """A lua (or pico8) value for the purpose of constant folding"""
+
     def __init__(m, type, value):
         m.type, m.value = type, value
 
@@ -83,7 +86,8 @@ def __init__(m, value):
 k_lua_false = LuaBoolean(False)
 k_lua_maxint = 0x7fff
 
-# ops
+# lua ops - given the language and the parameters, return the result,
+#   or None if unable or unwilling to do the computation
 
 def lua_neg(lang, a):
     if a.is_fixnum:

diff --git a/pico_defs.py b/pico_defs.py
@@ -5,7 +5,7 @@ class Language(Enum):
     pico8 = picotron = ...
 
 class Memory(bytearray):
-    """A block pico8 memory - a bytearray with some convenience functions like get/set16, get/set4, etc."""
+    """A pico8 memory block - a bytearray with some convenience functions like get/set16, get/set4, etc."""
 
     def copy(m):
         return Memory(m)

diff --git a/pico_export.py b/pico_export.py
@@ -439,6 +439,8 @@ def dump_file(m, dest, fmt, misc, i, name, content):
         else:
             file_write(path_join(dest, filename_fixup(name)), content)
 
+# (lz4 is used in both pico8 pods and in picotron)
+
 def lz4_uncompress(data, debug=None):
     def read_u8_sum(r):
         sum = 0

diff --git a/pico_parse.py b/pico_parse.py
@@ -9,6 +9,7 @@ class VarKind(Enum):
     local = global_ = member = label = ...
 
 class VarBase():
+    """A variable (a Local, Global, table Member key, or Label)"""
     def __init__(m, kind, name):
         m.kind = kind
         m.name = name
@@ -21,7 +22,7 @@ def __init__(m, kind, name):
     constval = None
 
     def __repr__(m):
-        return f"{m.kind} {m.name}"
+        return f"{typename(m)}({m.name})"
 
 class Local(VarBase):
     def __init__(m, name, scope, builtin=False):
@@ -44,6 +45,7 @@ def __init__(m, name, scope):
         m.scope = scope
 
 class Scope:
+    """A scope that defines new Locals. Note that in lua, every 'local' statement creates a new Scope"""
     def __init__(m, parent=None, depth=0, funcdepth=0):
         m.parent = parent
         m.depth = depth
@@ -87,6 +89,7 @@ def has_used_members(m):
         return lazy_property.is_set(m, "used_members")
 
 class LabelScope:
+    """A scope that defines the labels in a block"""
     def __init__(m, parent=None, funcdepth=0):
         m.parent = parent
         m.funcdepth = funcdepth

diff --git a/pico_preprocess.py b/pico_preprocess.py
@@ -55,7 +55,7 @@ def flush_output():
         elif ch != '#':
             i += 1
 
-        elif list_get(code, i + 1) == '[' and list_get(code, i + 2) != '[': # #[...] inline directive (not used by pico preprocessor)
+        elif list_get(code, i + 1) == '[' and list_get(code, i + 2) != '[': # DEPRECATED
             flush_output()
             i, start_i, out_i = preprocessor.handle_inline(path, code, i, start_i, out_i, outparts, outmappings)
 
@@ -176,7 +176,7 @@ def trim_cart_to_tab(cart, target_tab):
 k_custom_pp_inline_delims = k_wspace + "[]"
 
 class CustomPreprocessor(PicoPreprocessor):
-    """A custom preprocessor that isn't enabled by default (and is quite quirky & weird)"""
+    """DEPRECATED - has no relation to pico8's preprocessor"""
 
     def __init__(m, defines=None, pp_handler=None, **kwargs):
         super().__init__(**kwargs)

diff --git a/pico_tokenize.py b/pico_tokenize.py
@@ -25,14 +25,23 @@ class StopTraverse(BaseException):
 k_skip_children = True # value returnable from traverse's pre-function
 
 class TokenNodeBase:
-    """Baseclass for both pico8 Tokens and pico8 Nodes.
+    """Baseclass for syntax Tokens, syntax Nodes, and Comments.
     The syntax tree is comprised of these and can be traversed via traverse_nodes or traverse_tokens"""
 
     def __init__(m):
         m.parent, m.children = None, ()
 
     def __repr__(m):
-        return repr(m.__dict__)
+        return "%s(type=%s)" % (typename(m), m.type)
+
+    def __str__(m):
+        reprlist = []
+        for key, val in m.__dict__.items():
+            if key in ("parent", "children", "idx", "endidx", "vline", "lang", "modified", "source",
+                       "scope", "extra_i", "extra_children"):
+                continue
+            reprlist.append("%s=%r" % (key, val))
+        return "%s(%s)" % (typename(m), ", ".join(reprlist))
 
     @property
     def source_text(m):

diff --git a/run_bbs_tests.py b/run_bbs_tests.py
@@ -2,7 +2,7 @@
 from test_utils import *
 from pico_cart import get_bbs_cart_url
 from threading import Thread
-import argparse
+import argparse, atexit
 import multiprocessing as mp
 import multiprocessing.dummy as mt
 
@@ -129,10 +129,11 @@ def check_run(name, result, parse_meta=False):
                 meta["version"] = str_after_first(line, ":").strip()
         return meta
 
-def init_for_process(opts):
+def init_for_process(opts, endpipe):
     global g_opts
     g_opts = opts
     init_tests(opts)
+    atexit.register(lambda: endpipe.send(get_test_results()))
 
 def run_for_cart(args):
     (cart, cart_input, cart_output, cart_compare, cart_unfocused, focus) = args
@@ -225,7 +226,7 @@ def process_output(kind, output):
             unsafe_minify_results = run_code(g_opts.target, uncompress_path, unsafe_minify_path, "--minify", "--count", "--parsable-count", *minify_opts)
             process_output("unsafe_minify", check_run(f"{cart}:unsafe_minify", unsafe_minify_results, parse_meta=True))
 
-    return (cart, get_test_results(), new_cart_input, cart_output, deltas, best_path_for_pico8)
+    return (cart, new_cart_input, cart_output, deltas, best_path_for_pico8)
 
 def run(focus):
     filename = str(focus) if focus else "normal"
@@ -245,7 +246,18 @@ def run(focus):
     if g_opts.pico8_interact:
         Thread(target=interact_with_pico8s, args=(g_opts.pico8, g_opts.pico8_time), daemon=True).start()
 
-    with mp.Pool(g_opts.parallel_jobs, init_for_process, (g_opts,)) as mp_pool, \
+    endpipe_recv, endpipe_send = mp.Pipe(duplex=False)
+
+    def handle_endpipe():
+        try:
+            while True:
+                add_test_results(endpipe_recv.recv())
+        except EOFError:
+            pass
+
+    Thread(target=handle_endpipe).start()
+
+    with mp.Pool(g_opts.parallel_jobs, init_for_process, (g_opts, endpipe_send)) as mp_pool, \
          mt.Pool(g_opts.parallel_jobs) as mt_pool:
 
         p8_results = []
@@ -254,8 +266,7 @@ def run(focus):
             if not mp_result:
                 continue
 
-            (cart, test_results, new_cart_input, cart_output, cart_deltas, cart_pico8_path) = mp_result
-            add_test_results(test_results)
+            (cart, new_cart_input, cart_output, cart_deltas, cart_pico8_path) = mp_result
             if new_cart_input:
                 inputs[cart] = new_cart_input
             outputs[cart] = cart_output
@@ -274,6 +285,8 @@ def run(pico8=pico8, path=cart_pico8_path):
         for p8_result in p8_results:
             check_run(f"p8-run", p8_result.get())
 
+    endpipe_send.close()
+
     file_write_json(input_json, inputs, sort_keys=True, indent=4)
     file_write_json(output_json, outputs, sort_keys=True, indent=4)
 

diff --git a/test_input/my_script.py b/test_input/my_script.py
@@ -77,7 +77,10 @@ def pre_visit(node):
 
             # the syntax tree format isn't really documented anywhere yet. you can:
             # - check examples of use in pico_lint.py
-            # - search for the NodeType you're interested in, in pico_parse.py to see what it contains
+            # - print() nodes to see what they contain (ignores some attributes for better readability)
+            # - search for the NodeType you're interested in, in pico_parse.py, to see what it contains
+
+            # print(node)
 
         def post_visit(node):
             pass # just here as an example