Skip to content

Commit

Permalink
Add hybrid parsing support
Browse files Browse the repository at this point in the history
  • Loading branch information
evhub committed May 28, 2024
1 parent 33e8671 commit b3c887a
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 34 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ test-mypy-tests: clean-no-tests
python ./coconut/tests/dest/extras.py

# same as test-univ but includes verbose output for better debugging
# regex for getting non-timing lines: ^(?!\s*(Time|Packrat|Loaded|Saving|Adaptive|Errorless|Grammar|Failed|Incremental|Pruned)\s)[^\n]*\n*
# regex for getting non-timing lines: ^(?!'|\s*(Time|Packrat|Loaded|Saving|Adaptive|Errorless|Grammar|Failed|Incremental|Pruned|Compiled)\s)[^\n]*\n*
.PHONY: test-verbose
test-verbose: export COCONUT_USE_COLOR=TRUE
test-verbose: clean
Expand Down
7 changes: 6 additions & 1 deletion coconut/_pyparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
num_displayed_timing_items,
use_cache_file,
use_line_by_line_parser,
incremental_use_hybrid,
)
from coconut.util import get_clock_time # NOQA
from coconut.util import (
Expand Down Expand Up @@ -276,7 +277,11 @@ def enableIncremental(*args, **kwargs):
if MODERN_PYPARSING and use_left_recursion_if_available:
ParserElement.enable_left_recursion()
elif SUPPORTS_INCREMENTAL and use_incremental_if_available:
ParserElement.enableIncremental(default_incremental_cache_size, still_reset_cache=not never_clear_incremental_cache)
ParserElement.enableIncremental(
default_incremental_cache_size,
still_reset_cache=not never_clear_incremental_cache,
hybrid_mode=incremental_use_hybrid,
)
elif use_packrat_parser:
ParserElement.enablePackrat(packrat_cache_size)

Expand Down
27 changes: 12 additions & 15 deletions coconut/command/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,21 +328,18 @@ def execute_args(self, args, interact=True, original_args=None):
no_tco=args.no_tco,
no_wrap=args.no_wrap_types,
)
self.comp.warm_up(
streamline=(
not self.using_jobs
and (args.watch or args.profile)
),
enable_incremental_mode=(
not self.using_jobs
and args.watch
),
set_debug_names=(
args.verbose
or args.trace
or args.profile
),
)
if not self.using_jobs:
self.comp.warm_up(
streamline=(
args.watch
or args.profile
),
set_debug_names=(
args.verbose
or args.trace
or args.profile
),
)

# process mypy args and print timing info (must come after compiler setup)
if args.mypy is not None:
Expand Down
3 changes: 1 addition & 2 deletions coconut/compiler/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
Literal,
OneOrMore,
Optional,
ParserElement,
StringEnd,
Word,
ZeroOrMore,
Expand Down Expand Up @@ -2907,7 +2906,7 @@ def add_to_grammar_init_time(cls):
def set_grammar_names():
"""Set names of grammar elements to their variable names."""
for varname, val in vars(Grammar).items():
if isinstance(val, ParserElement):
if hasattr(val, "setName"):
val.setName(varname)


Expand Down
71 changes: 64 additions & 7 deletions coconut/compiler/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
all_keywords,
always_keep_parse_name_prefix,
keep_if_unchanged_parse_name_prefix,
incremental_use_hybrid,
)
from coconut.exceptions import (
CoconutException,
Expand Down Expand Up @@ -558,7 +559,10 @@ def force_reset_packrat_cache():
"""Forcibly reset the packrat cache and all packrat stats."""
if ParserElement._incrementalEnabled:
ParserElement._incrementalEnabled = False
ParserElement.enableIncremental(incremental_mode_cache_size if in_incremental_mode() else default_incremental_cache_size, still_reset_cache=False)
ParserElement.enableIncremental(
incremental_mode_cache_size if in_incremental_mode() else default_incremental_cache_size,
**ParserElement.getIncrementalInfo(),
)
else:
ParserElement._packratEnabled = False
ParserElement.enablePackrat(packrat_cache_size)
Expand Down Expand Up @@ -590,6 +594,7 @@ def parsing_context(inner_parse=None):
yield
finally:
ParserElement._incrementalWithResets = incrementalWithResets
dehybridize_cache()
elif (
current_cache_matters
and will_clear_cache
Expand All @@ -607,6 +612,11 @@ def parsing_context(inner_parse=None):
if logger.verbose:
ParserElement.packrat_cache_stats[0] += old_cache_stats[0]
ParserElement.packrat_cache_stats[1] += old_cache_stats[1]
elif not will_clear_cache:
try:
yield
finally:
dehybridize_cache()
else:
yield

Expand All @@ -632,6 +642,10 @@ def apply(self, grammar_transformer):
def name(self):
return get_name(self.grammar)

def setName(self, *args, **kwargs):
"""Equivalent to .grammar.setName."""
return self.grammar.setName(*args, **kwargs)


def prep_grammar(grammar, for_scan, streamline=False, add_unpack=False):
"""Prepare a grammar item to be used as the root of a parse."""
Expand Down Expand Up @@ -795,6 +809,22 @@ def get_target_info_smart(target, mode="lowest"):
# PARSING INTROSPECTION:
# -----------------------------------------------------------------------------------------------------------------------

# incremental lookup indices
_lookup_elem = 0
_lookup_orig = 1
_lookup_loc = 2
# _lookup_bools = 3
# _lookup_context = 4
assert _lookup_elem == 0, "lookup must start with elem"

# incremental value indices
_value_exc_loc_or_ret = 0
# _value_furthest_loc = 1
_value_useful = -1
assert _value_exc_loc_or_ret == 0, "value must start with exc loc / ret"
assert _value_useful == -1, "value must end with usefullness obj"


def maybe_copy_elem(item, name):
"""Copy the given grammar element if it's referenced somewhere else."""
item_ref_count = sys.getrefcount(item) if CPYTHON and not on_new_python else float("inf")
Expand Down Expand Up @@ -927,7 +957,7 @@ def execute_clear_strat(clear_cache):
if clear_cache == "useless":
keys_to_del = []
for lookup, value in cache.items():
if not value[-1][0]:
if not value[_value_useful][0]:
keys_to_del.append(lookup)
for del_key in keys_to_del:
del cache[del_key]
Expand All @@ -940,6 +970,24 @@ def execute_clear_strat(clear_cache):
return orig_cache_len


def dehybridize_cache():
"""Dehybridize any hybrid entries in the incremental parsing cache."""
if (
CPYPARSING
# if we're not in incremental mode, we just throw away the cache
# after every parse, so no need to dehybridize it
and in_incremental_mode()
and ParserElement.getIncrementalInfo()["hybrid_mode"]
):
cache = get_pyparsing_cache()
new_entries = {}
for lookup, value in cache.items():
cached_item = value[0]
if cached_item is not True and not isinstance(cached_item, int):
new_entries[lookup] = (True,) + value[1:]
cache.update(new_entries)


def clear_packrat_cache(force=False):
"""Clear the packrat cache if applicable.
Very performance-sensitive for incremental parsing mode."""
Expand All @@ -948,6 +996,8 @@ def clear_packrat_cache(force=False):
if DEVELOP:
start_time = get_clock_time()
orig_cache_len = execute_clear_strat(clear_cache)
# always dehybridize after cache clear so we're dehybridizing the fewest items
dehybridize_cache()
if DEVELOP and orig_cache_len is not None:
logger.log("Pruned packrat cache from {orig_len} items to {new_len} items using {strat!r} strategy ({time} secs).".format(
orig_len=orig_cache_len,
Expand All @@ -962,10 +1012,10 @@ def get_cache_items_for(original, only_useful=False, exclude_stale=True):
"""Get items from the pyparsing cache filtered to only be from parsing original."""
cache = get_pyparsing_cache()
for lookup, value in cache.items():
got_orig = lookup[1]
got_orig = lookup[_lookup_orig]
internal_assert(lambda: isinstance(got_orig, (bytes, str)), "failed to look up original in pyparsing cache item", (lookup, value))
if ParserElement._incrementalEnabled:
(is_useful,) = value[-1]
(is_useful,) = value[_value_useful]
if only_useful and not is_useful:
continue
if exclude_stale and is_useful >= 2:
Expand All @@ -979,7 +1029,7 @@ def get_highest_parse_loc(original):
Note that there's no point in filtering for successes/failures, since we always see both at the same locations."""
highest_loc = 0
for lookup, _ in get_cache_items_for(original):
loc = lookup[2]
loc = lookup[_lookup_loc]
if loc > highest_loc:
highest_loc = loc
return highest_loc
Expand All @@ -993,7 +1043,12 @@ def enable_incremental_parsing():
return True
ParserElement._incrementalEnabled = False
try:
ParserElement.enableIncremental(incremental_mode_cache_size, still_reset_cache=False, cache_successes=incremental_mode_cache_successes)
ParserElement.enableIncremental(
incremental_mode_cache_size,
still_reset_cache=False,
cache_successes=incremental_mode_cache_successes,
hybrid_mode=incremental_mode_cache_successes and incremental_use_hybrid,
)
except ImportError as err:
raise CoconutException(str(err))
logger.log("Incremental parsing mode enabled.")
Expand Down Expand Up @@ -1022,7 +1077,7 @@ def pickle_cache(original, cache_path, include_incremental=True, protocol=pickle
break
if len(pickleable_cache_items) >= incremental_cache_limit:
break
loc = lookup[2]
loc = lookup[_lookup_loc]
# only include cache items that aren't at the start or end, since those
# are the only ones that parseIncremental will reuse
if 0 < loc < len(original) - 1:
Expand All @@ -1032,6 +1087,7 @@ def pickle_cache(original, cache_path, include_incremental=True, protocol=pickle
if validation_dict is not None:
validation_dict[identifier] = elem.__class__.__name__
pickleable_lookup = (identifier,) + lookup[1:]
internal_assert(value[_value_exc_loc_or_ret] is True or isinstance(value[_value_exc_loc_or_ret], int), "cache must be dehybridized before pickling", value[_value_exc_loc_or_ret])
pickleable_cache_items.append((pickleable_lookup, value))

all_adaptive_stats = {}
Expand Down Expand Up @@ -1120,6 +1176,7 @@ def unpickle_cache(cache_path):
if maybe_elem is not None:
if validation_dict is not None:
internal_assert(maybe_elem.__class__.__name__ == validation_dict[identifier], "incremental cache pickle-unpickle inconsistency", (maybe_elem, validation_dict[identifier]))
internal_assert(value[_value_exc_loc_or_ret] is True or isinstance(value[_value_exc_loc_or_ret], int), "attempting to unpickle hybrid cache item", value[_value_exc_loc_or_ret])
lookup = (maybe_elem,) + pickleable_lookup[1:]
usefullness = value[-1][0]
internal_assert(usefullness, "loaded useless cache item", (lookup, value))
Expand Down
17 changes: 10 additions & 7 deletions coconut/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,26 +137,29 @@ def get_path_env_var(env_var, default):

use_cache_file = True

# 0 for always disabled; float("inf") for always enabled
disable_incremental_for_len = 20480

adaptive_any_of_env_var = "COCONUT_ADAPTIVE_ANY_OF"
use_adaptive_any_of = get_bool_env_var(adaptive_any_of_env_var, True)

use_line_by_line_parser = False

# 0 for always disabled; float("inf") for always enabled
# (this determines when compiler.util.enable_incremental_parsing() is used)
disable_incremental_for_len = 20480

# note that _parseIncremental produces much smaller caches
use_incremental_if_available = False

use_line_by_line_parser = False

# these only apply to use_incremental_if_available, not compiler.util.enable_incremental_parsing()
default_incremental_cache_size = None
repeatedly_clear_incremental_cache = True
never_clear_incremental_cache = False
# also applies to compiler.util.enable_incremental_parsing() if incremental_mode_cache_successes is True
incremental_use_hybrid = True

# this is what gets used in compiler.util.enable_incremental_parsing()
incremental_mode_cache_size = None
incremental_cache_limit = 2097152 # clear cache when it gets this large
incremental_mode_cache_successes = False
incremental_mode_cache_successes = False # if False, also disables hybrid mode
require_cache_clear_frac = 0.3125 # require that at least this much of the cache must be cleared on each cache clear

use_left_recursion_if_available = False
Expand Down Expand Up @@ -1020,7 +1023,7 @@ def get_path_env_var(env_var, default):

# min versions are inclusive
unpinned_min_versions = {
"cPyparsing": (2, 4, 7, 2, 3, 3),
"cPyparsing": (2, 4, 7, 2, 4, 0),
("pre-commit", "py3"): (3,),
("psutil", "py>=27"): (5,),
"jupyter": (1, 0),
Expand Down
2 changes: 1 addition & 1 deletion coconut/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
VERSION = "3.1.0"
VERSION_NAME = None
# False for release, int >= 1 for develop
DEVELOP = 13
DEVELOP = 14
ALPHA = False # for pre releases rather than post releases

assert DEVELOP is False or DEVELOP >= 1, "DEVELOP must be False or an int >= 1"
Expand Down

0 comments on commit b3c887a

Please sign in to comment.