Skip to content

Commit

Permalink
Merge pull request #38 from jmanuel1/handle-indent-err
Browse files Browse the repository at this point in the history
Display nice errors from Python tokenizer exceptions
  • Loading branch information
jmanuel1 authored Nov 9, 2024
2 parents 849f5af + 20e4ede commit 737c840
Show file tree
Hide file tree
Showing 9 changed files with 354 additions and 158 deletions.
75 changes: 57 additions & 18 deletions concat/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

import argparse
from concat.transpile import parse, transpile_ast, typecheck
from concat.error_reporting import get_line_at, create_parsing_failure_message
from concat.error_reporting import (
get_line_at,
create_indentation_error_message,
create_lexical_error_message,
create_parsing_failure_message,
)
import concat.execute
import concat.lex
import concat.parser_combinators
Expand All @@ -11,7 +16,7 @@
import json
import os.path
import sys
from typing import Callable, IO, AnyStr
from typing import Callable, IO, AnyStr, assert_never


filename = '<stdin>'
Expand Down Expand Up @@ -52,28 +57,39 @@ def func(name: str) -> IO[AnyStr]:
'--tokenize',
action='store_true',
default=False,
help='tokenize input from the given file and print the tokens as a JSON array',
help=(
'tokenize input from the given file and print the tokens as a JSON '
'array'
),
)

# We should pass any unknown args onto the program we're about to run.
# FIXME: There might be a better way to go about this, but I think this is fine
# for now.
args, rest = arg_parser.parse_known_args()
sys.argv = [sys.argv[0], *rest]

def tokenize_printing_errors() -> list[concat.lex.Token]:
token_results = concat.lex.tokenize(args.file.read())
tokens = list[concat.lex.Token]()
for r in token_results:
if r.type == 'token':
tokens.append(r.token)
elif r.type == 'indent-err':
position = (r.err.lineno or 1, r.err.offset or 0)
message = r.err.msg
print('Indentation error:')
print(
create_indentation_error_message(args.file, position, message)
)
elif r.type == 'token-err':
position = r.location
message = str(r.err)
print('Lexical error:')
print(create_lexical_error_message(args.file, position, message))
else:
assert_never(r)
return tokens

if args.tokenize:
code = args.file.read()
tokens = concat.lex.tokenize(code, should_preserve_comments=True)
json.dump(tokens, sys.stdout, cls=concat.lex.TokenEncoder)
sys.exit()

# interactive mode
if args.file.isatty():
concat.stdlib.repl.repl([], [], args.debug)
else:
def batch_main():
try:
tokens = concat.lex.tokenize(args.file.read())
tokens = tokenize_printing_errors()
concat_ast = parse(tokens)
recovered_parsing_failures = concat_ast.parsing_failures
for failure in recovered_parsing_failures:
Expand Down Expand Up @@ -121,3 +137,26 @@ def func(name: str) -> IO[AnyStr]:
sys.exit(1)
finally:
args.file.close()


def main():
# interactive mode
if args.file.isatty():
concat.stdlib.repl.repl([], [], args.debug)
else:
batch_main()


# We should pass any unknown args onto the program we're about to run.
# FIXME: There might be a better way to go about this, but I think this is fine
# for now.
args, rest = arg_parser.parse_known_args()
sys.argv = [sys.argv[0], *rest]

if args.tokenize:
code = args.file.read()
tokens = concat.lex.tokenize(code, should_preserve_comments=True)
json.dump(tokens, sys.stdout, cls=concat.lex.TokenEncoder)
sys.exit()

main()
32 changes: 31 additions & 1 deletion concat/error_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@ def create_parsing_failure_message(
stream: Sequence[concat.lex.Token],
failure: concat.parser_combinators.FailureTree,
) -> str:
location = stream[failure.furthest_index].start
if failure.furthest_index < len(stream):
location = stream[failure.furthest_index].start
elif stream:
location = stream[-1].start
else:
location = (1, 0)
line = get_line_at(file, location)
message = f'Expected {failure.expected} at line {location[0]}, column {location[1] + 1}:\n{line.rstrip()}\n{" " * location[1] + "^"}'
if failure.children:
Expand All @@ -26,3 +31,28 @@ def create_parsing_failure_message(
create_parsing_failure_message(file, stream, f), ' '
)
return message


def create_lexical_error_message(
file: TextIO, location: concat.astutils.Location, message: str
) -> str:
line = get_line_at(file, location)
message = (
f'Cannot tokenize file at line {location[0]}, '
f'column {location[1] + 1}:\n'
f'{line.rstrip()}\n'
f'{' ' * location[1] + '^'}\n'
)
return message


def create_indentation_error_message(
file: TextIO, location: concat.astutils.Location, message: str
) -> str:
line = get_line_at(file, location)
message = (
f'Malformed indentation at line {location[0]}, '
f'column {location[1] + 1}:\n'
f'{line.rstrip()}\n'
)
return message
Loading

0 comments on commit 737c840

Please sign in to comment.