Skip to content

Commit

Permalink
added preprocess_syntax_main, to allow scripts to access the syntax t…
Browse files Browse the repository at this point in the history
…ree (not fully documented nor guaranteed to be stable, but still seems useful to expose)
  • Loading branch information
thisismypassport committed Aug 11, 2024
1 parent 510b5f8 commit b94383f
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 35 deletions.
91 changes: 64 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,7 @@ def postprocess_main(cart, **_):
new_cart = Cart(code=to_p8str("-- rom-only cart 🐱"), rom=cart.rom)
write_cart("new_cart2.rom", new_cart, CartFormat.rom)
```

## Advanced - custom sub-language

For **really** advanced usecases, if you're embedding a custom language inside the strings of your pico-8 code, you can let Shrinko8 know how to lint & minify it.
Expand Down Expand Up @@ -813,32 +814,6 @@ class MySubLanguage(SubLanguageBase):
def is_assignment(self, stmt):
return len(stmt) > 1 and stmt[1] == "<-" # our lang's assignment token

# for --lint:

# called to get globals defined (aka assigned to) within the sub-language's code
def get_defined_globals(self, **_):
for stmt in self.stmts:
if self.is_assignment(stmt):
yield stmt[0]

# called to get globals used (aka read from) within the sub-language's code
def get_used_globals(self, **_):
for stmt in self.stmts:
if self.is_assignment(stmt):
stmt = stmt[2:] # ignore assigned to globals

for token in stmt:
if self.is_global(token):
yield token

# called to lint the sub-language's code
def lint(self, builtins, globals, on_error, **_):
for stmt in self.stmts:
for token in stmt:
if self.is_global(token) and token not in builtins and token not in globals:
on_error("Identifier '%s' not found" % token)
# could do custom lints too

# for --minify:

# called to get all characters that won't get removed or renamed by the minifier
Expand Down Expand Up @@ -884,6 +859,34 @@ class MySubLanguage(SubLanguageBase):
def minify(self, **_):
return "\n".join(" ".join(stmt) for stmt in self.stmts)

# for --lint:

# called to get globals defined within the sub-language's code
# such globals can be used outside the sub-language too.
def get_defined_globals(self, **_):
for stmt in self.stmts:
# our language only allows assignment to globals, so any assignment defines a global
if self.is_assignment(stmt):
yield stmt[0]

# called to get globals used within the sub-language's code
def get_used_globals(self, **_):
for stmt in self.stmts:
if self.is_assignment(stmt):
stmt = stmt[2:] # don't return the assignment target, to get warnings if it isn't used

for token in stmt:
if self.is_global(token):
yield token

# called to lint the sub-language's code
def lint(self, builtins, globals, on_error, **_):
for stmt in self.stmts:
for token in stmt:
if self.is_global(token) and token not in builtins and token not in globals:
on_error("Identifier '%s' not found" % token)
# could do custom lints too

# this is called to get a sub-language class by name
def sublanguage_main(lang, **_):
if lang == "evally":
Expand Down Expand Up @@ -940,7 +943,41 @@ local table = splitkeys(--[[language::splitkeys]]"key1=val1,key2=val2,val3,val4"
?table[1] -- "val3"
```

To run, use `--script <path>` as before.
To run, use `--script <path>` as described [before](#custom-python-script).

## Advanced - access to the Syntax Tree

For **really** advanced usecases, you may want to have access to the Syntax Tree of your code (from a python script) in order to, e.g. do custom linting and analysis.

Keep in mind that the syntax tree and associated APIs are not fully documented here, and aren't guaranteed not to change in the future.

```python
# this is called after your cart is parsed into a syntax tree, but before it is transformed for minification
def preprocess_syntax_main(cart, root, on_error, args, **_):
from pico_parse import NodeType

if args.lint: # do some custom linting, if linting was requested in the command line
def pre_visit(node):
# just as an example, add a lint error on any use of 'goto'
if node.type == NodeType.goto:
on_error("goto used", node)

# the syntax tree format isn't really documented anywhere yet. you can:
# - check examples of use in pico_lint.py
# - search for the NodeType you're interested in, in pico_parse.py to see what it contains

def post_visit(node):
pass # just here as an example

# visit the entire syntax tree, calling pre_visit before each node, and post_visit after each node
# extra=True allows you to visit things not apparent in the source itself, such as:
# implicit parameters, implicit _ENV when accessing globals, etc.
root.traverse_nodes(pre=pre_visit, post=post_visit, extra=True)
```

To run, use `--script <path>` as described [before](#custom-python-script).

You can check `pico_lint.py` for examples of how to use the syntax tree.

# Picotron Support

Expand Down
11 changes: 8 additions & 3 deletions pico_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,16 +245,16 @@ def fixup_process_args(args):
return args_set, args

def process_code(ctxt, source, input_count=False, count=False, lint=False, minify=False, rename=False, unminify=False,
stop_on_lint=True, count_is_optional=False):
stop_on_lint=True, count_is_optional=False, preproc=None):
need_lint, lint = fixup_process_args(lint)
need_minify, minify = fixup_process_args(minify)
need_rename, rename = fixup_process_args(rename)
need_unminify, unminify = fixup_process_args(unminify)

if not need_lint and not need_minify and not need_unminify and not ((count or input_count) and not count_is_optional):
if not need_lint and not need_minify and not need_unminify and not ((count or input_count) and not count_is_optional) and not preproc:
return True, ()

need_parse = need_lint or need_minify or need_unminify
need_parse = need_lint or need_minify or need_unminify or preproc
need_all_comments = need_unminify or (need_minify and minify_needs_comments(minify))

errors = ()
Expand All @@ -275,6 +275,11 @@ def process_code(ctxt, source, input_count=False, count=False, lint=False, minif

if need_lint:
errors = lint_code(ctxt, root, lint)

if preproc: # can do linting and - theoretically - early transformations
def add_error(msg, node):
errors.append(Error(msg, node))
preproc(root, add_error)

if not errors or not stop_on_lint:
if need_minify:
Expand Down
3 changes: 2 additions & 1 deletion run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ def run():
run_test("countminus", "minus.p8", None, "--count", stdout_output="minuscount.txt")
run_test("error", "worse.p8", None, "--lint", stdout_output="worse.txt", norm_stdout=norm_paths, exit_code=1)
run_test("script", "script.p8", "script.p8", "--script", path_join("test_input", "my_script.py"),
"--update-version", "--script-args", "my-script-arg", "--my-script-opt", "123", update_version=False)
"--lint", "--no-lint-fail", "--update-version", "--script-args", "my-script-arg", "--my-script-opt", "123",
stdout_output="script.txt", norm_stdout=norm_paths, update_version=False)
run_test("sublang.lint", "sublang.p8", None, "--lint",
"--script", path_join("test_input", "sublang.py"), stdout_output="sublang.txt", norm_stdout=norm_paths, exit_code=2)
run_test("sublang", "sublang.p8", "sublang.p8", "--minify",
Expand Down
12 changes: 9 additions & 3 deletions shrinko.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,14 @@ def main_inner(args):
args.const = args.const or {}
args.const.update({name: parse_constant(val, lang, as_str=True) for name, val in args.str_const})

args.preproc_cb, args.postproc_cb, args.sublang_cb = None, None, None
args.preproc_cb, args.postproc_cb, args.preproc_syntax_cb, args.sublang_cb = None, None, None, None
if args.script:
for script in args.script:
preproc_main, postproc_main, sublang_main = import_from_script_by_path(script, "preprocess_main", "postprocess_main", "sublanguage_main")
preproc_main, postproc_main, preproc_syntax_main, sublang_main = import_from_script_by_path(script,
"preprocess_main", "postprocess_main", "preprocess_syntax_main", "sublanguage_main")
args.preproc_cb = func_union(args.preproc_cb, preproc_main)
args.postproc_cb = func_union(postproc_main, args.postproc_cb) # (reverse order)
args.preproc_syntax_cb = func_union(args.preproc_syntax_cb, preproc_syntax_main)
args.sublang_cb = func_union(args.sublang_cb, sublang_main, return_early=e)

base_count_handler = ParsableCountHandler if args.parsable_count else True
Expand Down Expand Up @@ -516,12 +518,16 @@ def handle_processing(args, main_cart, extra_carts):
if args.preproc_cb:
args.preproc_cb(cart=cart, src=src, ctxt=ctxt, args=args)

def preproc_syntax_call(root, on_error):
args.preproc_syntax_cb(cart=cart, src=src, root=root, on_error=on_error, ctxt=ctxt, args=args)

ok, errors = process_code(ctxt, src,
input_count=is_pico8 and args.input_count,
count=is_pico8 and args.count,
lint=args.lint, minify=args.minify, rename=args.rename,
unminify=args.unminify, stop_on_lint=not args.no_lint_fail,
count_is_optional=args.no_count_tokenize)
count_is_optional=args.no_count_tokenize,
preproc=preproc_syntax_call if args.preproc_syntax_cb else None)
if errors:
had_warns = True

Expand Down
1 change: 1 addition & 0 deletions test_compare/script.p8

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions test_compare/script.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
hello from preprocess_main!
Received args: my-script-arg 123
hello from postprocess_syntax_main!
Lint warnings:
test_input/script.p8:3:8596: goto used
hello from postprocess_main!
27 changes: 27 additions & 0 deletions test_input/my_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,30 @@ def postprocess_main(cart, **_):
assert(from_p8str(to_p8str(test_str)) == test_str)
test_p8str = to_p8str(test_str)
assert(decode_p8str(encode_p8str(test_p8str)) == test_p8str)

# this is called after your cart is parsed into a syntax tree, but before it is transformed for minification
def preprocess_syntax_main(cart, root, on_error, args, **_):
print("hello from postprocess_syntax_main!")

from pico_parse import NodeType

if args.lint: # do some custom linting, if linting was requested in the command line
def pre_visit(node):
# just as an example, add a lint error on any use of 'goto'
if node.type == NodeType.goto:
on_error("goto used", node)

# the syntax tree format isn't really documented anywhere yet. you can:
# - check examples of use in pico_lint.py
# - search for the NodeType you're interested in, in pico_parse.py to see what it contains

def post_visit(node):
pass # just here as an example

# visit the entire syntax tree, calling pre_visit before each node, and post_visit after each node
# extra=True allows you to visit things not apparent in the source itself, such as:
# implicit parameters, implicit _ENV when accessing globals, etc.
root.traverse_nodes(pre=pre_visit, post=post_visit, extra=True)

# internal note - yes, there is a bug in the test's output due to preprocess_main changing the code
# yet this not impacting CodeMapping.src_code; will fix if reported.
3 changes: 2 additions & 1 deletion test_input/script.p8
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__lua__
print("$$DATA$$")
print("$$DATA$$")
goto skip ::skip::

0 comments on commit b94383f

Please sign in to comment.