From 0729051b803b54ee3cfd15be15abc47a8c04ded2 Mon Sep 17 00:00:00 2001 From: thisismypassport Date: Wed, 12 Feb 2025 12:29:57 +0200 Subject: [PATCH] fix and improve shrinkotron support - improving compression rates and allowing dumping uncompressed pods, etc. (contains algorithms to read/write the pxu format) --- README.md | 2 +- picotron_cart.py | 26 +++-- picotron_fs.py | 285 ++++++++++++++++++++++++++++++++++++++++++----- shrinko.py | 14 ++- 4 files changed, 289 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index e04d7ce..d46c54c 100644 --- a/README.md +++ b/README.md @@ -1014,5 +1014,5 @@ Cart manipulation features: Notes: * Shrinkotron assumes calls to `include` are used to include other unmodified lua files. If this is not the case, minify may break even under `--minify-safe-only` -* Currently, Shrinkotron does not touch data files (gfx/sfx/etc). It may play with their compression in the future, however. +* Shrinkotron repacks all POD files for better compression. (There are options to change this - `--uncompress-pods` and `--keep-pod-compression`) * As Picotron evolves, there might be new globals or table keys that Shrinkotron isn't aware of. You can report such cases and use [`--preserve`](#preserving-identifiers-across-the-entire-cart) meanwhile. diff --git a/picotron_cart.py b/picotron_cart.py index d209621..38c8f0d 100644 --- a/picotron_cart.py +++ b/picotron_cart.py @@ -369,16 +369,18 @@ def preview_order_key(pair): # we prefer to sort p64 files for better visibility of code, e.g. in the webapp's preview # (this is NOT what picotron does currently, hopefully doesn't matter) dirname, filename = str_split_last(pair[0], "/") - if filename == k_p64_main_path: - order = 0 + if filename == "": # directory itself must be first + order = -3 + elif filename == k_p64_main_path: + order = -2 elif filename.endswith(".lua"): - order = 1 + order = -1 elif filename == k_label_file: - order = 3 + order = 1 elif filename.startswith("."): - order = 4 - else: order = 2 + else: + order = 0 return (dirname, order, filename) def write_cart64_to_source(cart, avoid_base64=False, **opts): @@ -530,7 +532,7 @@ def filter_cart64(cart, sections): for path in to_delete: del cart.files[path] -def preproc_cart64(cart, delete_meta): +def preproc_cart64(cart, delete_meta=None, uncompress_pods=False, keep_pod_compression=False, need_pod_compression=False): if delete_meta: to_delete = [] @@ -546,6 +548,16 @@ def preproc_cart64(cart, delete_meta): for path in to_delete: del cart.files[path] + + if not keep_pod_compression: + for path, file in cart.files.items(): + if not file.is_raw and not file.is_dir: + if uncompress_pods: + file.set_payload(file.payload, compress=False, use_pxu=False) + elif need_pod_compression: + file.set_payload(file.payload, compress=True, use_pxu=True) + else: + file.set_payload(file.payload, compress=False, use_pxu=True) def merge_cart64(dest, src, sections): glob = Cart64Glob(sections) if e(sections) else None diff --git a/picotron_fs.py b/picotron_fs.py index 961f349..25be0f6 100644 --- a/picotron_fs.py +++ b/picotron_fs.py @@ -1,24 +1,33 @@ from utils import * from pico_defs import Language, encode_luastr, decode_luastr +from pico_export import lz4_compress, lz4_uncompress +from pico_compress import update_mtf + +# (note - picotron pods have nothing to do with pico8 pods) k_pod = b"pod" k_pod_str = k_pod.decode() k_pod_prefix_str = k_pod_str + "," k_pod_format = b"pod_format" -k_pod_format_str = k_pod_format.decode() +k_pod_prefix_strs = (k_pod_format.decode(), "pod_type") k_pod_raw_format = k_pod_format + b"=\"raw\"" k_meta_prefix = b"--[[" k_meta_pod_prefix = k_meta_prefix + k_pod k_meta_pod_raw_prefix = k_meta_prefix + k_pod_raw_format k_meta_suffix = b"]]" -class UserData(Struct): +class UserData(Tuple): + """Represents a picotron userdata""" type = width = height = data = ... -def parse_pod(pod): +def parse_pod(pod, ud_handler=None): + """Parses a picotron pod from a readable string""" src = Source("", pod) tokens, token_errors = tokenize(src, lang=Language.picotron) - root, parse_errors = parse(src, tokens, lang=Language.picotron, for_expr=True) + if tokens: + root, parse_errors = parse(src, tokens, lang=Language.picotron, for_expr=True) + else: + root, parse_errors = None, [] value_errors = [] def add_error(msg, node): @@ -36,15 +45,21 @@ def node_to_value(node): return None elif node.type == NodeType.unary_op and node.op == "-" and node.child.type == NodeType.const and node.child.token.type == TokenType.number: return -node.child.token.parsed_value - elif node.type == NodeType.call and node.func.type == NodeType.var and node.func.name == "userdata" and len(node.args) in (3, 4): - type = node_to_value(node.args[0]) - width = node_to_value(node.args[1]) - height = node_to_value(node.args[2]) if len(node.args) == 4 else 0 - data = node_to_value(node.args[-1]) - if isinstance(type, str) and isinstance(width, int) and isinstance(height, int) and isinstance(data, str): - return UserData(type, width, height, data) - else: - add_error(f"unknown userdata params: {type}, {width}, {height}, {data}") + + elif node.type == NodeType.call and node.func.type == NodeType.var and node.func.name == "userdata": + ud_args = tuple(node_to_value(arg) for arg in node.args) + + if len(ud_args) in (3, 4): + type, width, data = ud_args[0], ud_args[1], ud_args[-1] + height = ud_args[2] if len(ud_args) == 4 else 0 + if isinstance(type, str) and isinstance(width, int) and isinstance(height, int) and isinstance(data, str): + return UserData(type, width, height, data) + + if len(ud_args) == 0 and ud_handler and (userdata := ud_handler()): + return userdata + + add_error(f"unknown userdata params: {ud_args}") + elif node.type == NodeType.table: table = {} index = 1 @@ -75,13 +90,15 @@ def node_to_value(node): return value def parse_meta_pod(pod): + """Parses a picotron pod as it appears in a file's metadata""" if pod == k_pod_str: return {} pod = str_remove_prefix(pod, k_pod_prefix_str) return parse_pod("{" + pod + "}") -def format_pod(value): +def format_pod(value, ud_handler=None): + """Formats a picotron pod into a readable string""" if value is None: return "nil" elif value is False: @@ -92,24 +109,29 @@ def format_pod(value): return format_luanum(value, base=10) elif isinstance(value, str): return format_string_literal(value, long=False, quote='"') + elif isinstance(value, UserData): - # TODO: pxu (though not usable in meta, anyway) type, width, height, data = format_pod(value.type), format_pod(value.width), format_pod(value.height), format_pod(value.data) if value.height: - return f"userdata({type},{width},{height},{data})" + result = f"userdata({type},{width},{height},{data})" else: - return f"userdata({type},{width},{data})" + result = f"userdata({type},{width},{data})" + + if ud_handler and ud_handler(value, result): + result = "\0" # to allow unambiguously finding it in the result + return result + elif isinstance(value, dict): index = 1 parts = [] for key, child in value.items(): if key == index: - parts.append(format_pod(child)) + parts.append(format_pod(child, ud_handler)) index += 1 - elif is_identifier(key, Language.picotron): - parts.append(f"{key}={format_pod(child)}") + elif isinstance(key, str) and is_identifier(key, Language.picotron): + parts.append(f"{key}={format_pod(child, ud_handler)}") else: - parts.append(f"[{format_pod(key)}]={format_pod(child)}") + parts.append(f"[{format_pod(key, ud_handler)}]={format_pod(child, ud_handler)}") return "{" + ",".join(parts) + "}" else: throw(f"invalid pod value {value}") @@ -125,17 +147,209 @@ def escape_meta(pod): return pod def format_meta_pod(value): - if k_pod_format_str in value: # put it first - prefix = f"{k_pod_format_str}={format_pod(value[k_pod_format_str])}" - value = value.copy() - del value[k_pod_format_str] + """Formats a picotron pod as it should appear in a file's metadata""" + for pod_prefix_str in k_pod_prefix_strs: + if pod_prefix_str in value: # put it first + prefix = f"{pod_prefix_str}={format_pod(value[pod_prefix_str])}" + value = value.copy() + del value[pod_prefix_str] + break else: prefix = k_pod_str - rest = escape_meta(format_pod(value)[1:-1]) - return f"{prefix},{rest}" if rest else prefix + rest = format_pod(value)[1:-1] + return escape_meta(f"{prefix},{rest}" if rest else prefix) + +k_lz4_prefix = b"lz4\0" +k_pxu_prefix = b"pxu\0" + +class PxuFlags(Bitmask): + unk_type = 0x3 + has_height = 0x40 + long_size = 0x800 + compress = 0x2000 + +def read_pxu(data, idx): + """Reads the picotron userdata compression format 'pxu' into a UserData.""" + + with BinaryReader(BytesIO(data)) as r: + r.setpos(idx) + check(r.bytes(4) == k_pxu_prefix, "wrong pxu header") + flags = PxuFlags(r.u16()) + if not flags.compress or flags.unk_type != 3: + throw(f"unsupported pxu flags: {flags}") + + width = r.u32() if flags.long_size else r.u8() + height = (r.u32() if flags.long_size else r.u8()) if flags.has_height else 1 + size = width * height + + bits = r.u8() + check(bits == 4, "unsupported pxu bits") # TODO - allow more? + mask = (1 << bits) - 1 + ext_count = 1 << (8 - bits) + + # the general idea behind the complexity is that repeated pixels can + # take up spots from low-valued pixels. + + data = bytearray() + mapping = [i for i in range(mask)] + mtf = [i for i in range(mask)] + + while len(data) < size: + b = r.u8() + + index = b & mask + if index == mask: + value = r.u8() + mapping[mtf[-1]] = value + + else: + update_mtf(mtf, mtf.index(index), index) + value = mapping[index] + + count = 1 + (b >> bits) + if count == ext_count: + while True: + c = r.u8() + count += c + if c != 0xff: + break + + for i in range(count): + data.append(value) + + hexdata = "".join(f"{b:02x}" for b in data) + return UserData("u8", width, height if flags.has_height else 0, hexdata), r.pos() + +def read_pod(value): + """Reads a picotron pod from possibly compressed bytes""" + + if value.startswith(k_lz4_prefix): + with BinaryReader(BytesIO(value)) as r: + r.addpos(4) + size = r.u32() + _unc_size = r.u32() + value = lz4_uncompress(r.bytes(size)) + + pxu_i = 0 + userdatas = None + while True: + pxu_i = value.find(k_pxu_prefix, pxu_i) + if pxu_i < 0: + break + + userdatas = userdatas or deque() + userdata, end_i = read_pxu(value, pxu_i) + value = str_replace_between(value, pxu_i, end_i, b"userdata()") + userdatas.append(userdata) + + def handle_userdata(): + if userdatas: + return userdatas.popleft() + + return parse_pod(decode_luastr(value), handle_userdata) + +def write_pxu(ud): + """Writes userdata via the picotron userdata compression format 'pxu'""" + if ud.type != "u8": + return None + + with BinaryWriter() as w: + flags = PxuFlags.unk_type | PxuFlags.compress + if ud.height: + flags |= PxuFlags.has_height + if ud.width >= 0x100 or ud.height >= 0x100: + flags |= PxuFlags.long_size + + w.bytes(k_pxu_prefix) + w.u16(int(flags)) + (w.u32 if flags.long_size else w.u8)(ud.width) + if flags.has_height: + (w.u32 if flags.long_size else w.u8)(ud.height) + + data = bytearray() + try: + for i in range(0, len(ud.data), 2): + data.append(int(ud.data[i:i+2], 16)) + except ValueError: + throw("invalid userdata encountered") + + bits = 4 # could try other values, but picotron itself never does? + w.u8(bits) + mask = (1 << bits) - 1 + ext_count = 1 << (8 - bits) + + mapping = [i for i in range(mask)] + mtf = [i for i in range(mask)] + + i = 0 + while i < len(data): + count = 1 + value = data[i] + i += 1 + while i < len(data) and data[i] == value: + count += 1 + i += 1 + + index = list_find(mapping, value) + if index < 0: + index = mask + mapping[mtf[-1]] = value + + else: + update_mtf(mtf, mtf.index(index), index) + + w.u8(index | ((min(count, ext_count) - 1) << bits)) + if index == mask: + w.u8(value) + + if count >= ext_count: + count -= ext_count + while count >= 0xff: + w.u8(0xff) + count -= 0xff + w.u8(count) + + return w.f.getvalue() + +def write_pod(pod, compress=True, use_pxu=True): + """Writes a picotron pod into optionally compressed bytes""" + + pxu_datas = None + def handle_userdata(ud, str_data): + nonlocal pxu_datas + if use_pxu: + pxu_data = write_pxu(ud) + if pxu_data and len(pxu_data) < len(str_data): + pxu_datas = pxu_datas or deque() + pxu_datas.append(pxu_data) + return True + + value = encode_luastr(format_pod(pod, handle_userdata)) + + pxu_i = 0 + while pxu_datas: + pxu_i = value.find(0, pxu_i) + assert pxu_i >= 0 + + pxu_data = pxu_datas.popleft() + value = str_replace_at(value, pxu_i, 1, pxu_data) + pxu_i += len(pxu_data) + + if compress: + with BinaryWriter() as w: + compressed = lz4_compress(value) + w.bytes(k_lz4_prefix) + w.u32(len(compressed)) + w.u32(len(value)) + w.bytes(compressed) + value = w.f.getvalue() + + return value class PicotronFile: + """A picotron file or directory in its filesystem - files contain metadata & payload""" + def __init__(m, data, line=0): m.data = data m.line = line @@ -196,6 +410,23 @@ def raw_payload(m, value): else: m.data = value + @property + def payload(m): + if m.is_raw: + return m.raw_payload + else: + return read_pod(m.raw_payload) + + @payload.setter + def payload(m, value): + m.set_payload(value) + + def set_payload(m, value, compress=True, use_pxu=True): + if m.is_raw: + m.raw_payload = value + else: + m.raw_payload = write_pod(value, compress=compress, use_pxu=use_pxu) + is_dir = False class PicotronDir(PicotronFile): diff --git a/shrinko.py b/shrinko.py index 1f41e20..d168b85 100644 --- a/shrinko.py +++ b/shrinko.py @@ -168,6 +168,8 @@ def create_parser(): help=f"specify a {sections_desc} that contain lua code to process (default: *.lua)") pgroup.add_argument("--delete-meta", type=SplitBySeps, action="extend", help=f"specify a {sections_desc} to delete metadata of (default: * if minifying unsafely, else none)") + pgroup.add_argument("--keep-pod-compression", action="store_true", help="keep compression of all pod files as-is") + pgroup.add_argument("--uncompress-pods", action="store_true", help="uncompress all pod files to plain text") pgroup.add_argument("--list", action="store_true", help="list all files inside the cart") pgroup.add_argument("--filter", type=SplitBySeps, action="extend", help=f"specify a {sections_desc} to keep in the output") pgroup.add_argument("--insert", nargs='+', action="append", metavar=(f"INPUT [FSPATH] [FILES_FILTER]", ""), @@ -175,7 +177,8 @@ def create_parser(): pgroup.add_argument("--extract", nargs='+', action="append", metavar=(f"FSPATH [OUTPUT]", ""), help=f"extract the specified file or directory from FSPATH to OUTPUT ") else: - pgroup.set_defaults(code_sections=None, delete_meta=None, filter=None, insert=None, extract=None) + pgroup.set_defaults(code_sections=None, delete_meta=None, uncompress_pods=None, keep_pod_compression=None, + filter=None, insert=None, extract=None) pgroup.add_argument("--merge", nargs='+', action="append", metavar=(f"INPUT {sections_meta} [FORMAT]", ""), help=f"merge {sections_str} from the specified INPUT file, where {sections_meta} is a {sections_desc}") @@ -511,8 +514,13 @@ def handle_processing(args, main_cart, extra_carts): if args.filter: filter_cart_func(cart, args.filter) - if args.delete_meta: - preproc_cart_func(cart, delete_meta=args.delete_meta) + + if is_picotron: + preproc_cart_func(cart, delete_meta=args.delete_meta, + keep_pod_compression=args.keep_pod_compression, + uncompress_pods=args.uncompress_pods, + # binary formats are already compressed, so pod compression just hurts + need_pod_compression=args.format and args.format.is_src) src = CartSourceCls(cart, args.code_sections)