diff --git a/Makefile b/Makefile index c6329734..afa7695a 100644 --- a/Makefile +++ b/Makefile @@ -162,7 +162,7 @@ test-mypy-tests: clean-no-tests python ./coconut/tests/dest/extras.py # same as test-univ but includes verbose output for better debugging -# regex for getting non-timing lines: ^(?!\s*(Time|Packrat|Loaded|Saving|Adaptive|Errorless|Grammar|Failed|Incremental|Pruned)\s)[^\n]*\n* +# regex for getting non-timing lines: ^(?!'|\s*(Time|Packrat|Loaded|Saving|Adaptive|Errorless|Grammar|Failed|Incremental|Pruned|Compiled)\s)[^\n]*\n* .PHONY: test-verbose test-verbose: export COCONUT_USE_COLOR=TRUE test-verbose: clean diff --git a/coconut/_pyparsing.py b/coconut/_pyparsing.py index 28cef40b..f3101d42 100644 --- a/coconut/_pyparsing.py +++ b/coconut/_pyparsing.py @@ -48,6 +48,7 @@ num_displayed_timing_items, use_cache_file, use_line_by_line_parser, + incremental_use_hybrid, ) from coconut.util import get_clock_time # NOQA from coconut.util import ( @@ -276,7 +277,11 @@ def enableIncremental(*args, **kwargs): if MODERN_PYPARSING and use_left_recursion_if_available: ParserElement.enable_left_recursion() elif SUPPORTS_INCREMENTAL and use_incremental_if_available: - ParserElement.enableIncremental(default_incremental_cache_size, still_reset_cache=not never_clear_incremental_cache) + ParserElement.enableIncremental( + default_incremental_cache_size, + still_reset_cache=not never_clear_incremental_cache, + hybrid_mode=incremental_use_hybrid, + ) elif use_packrat_parser: ParserElement.enablePackrat(packrat_cache_size) diff --git a/coconut/command/command.py b/coconut/command/command.py index 48726cab..f6596be1 100644 --- a/coconut/command/command.py +++ b/coconut/command/command.py @@ -328,21 +328,18 @@ def execute_args(self, args, interact=True, original_args=None): no_tco=args.no_tco, no_wrap=args.no_wrap_types, ) - self.comp.warm_up( - streamline=( - not self.using_jobs - and (args.watch or args.profile) - ), - enable_incremental_mode=( - not self.using_jobs - and args.watch - ), - set_debug_names=( - args.verbose - or args.trace - or args.profile - ), - ) + if not self.using_jobs: + self.comp.warm_up( + streamline=( + args.watch + or args.profile + ), + set_debug_names=( + args.verbose + or args.trace + or args.profile + ), + ) # process mypy args and print timing info (must come after compiler setup) if args.mypy is not None: diff --git a/coconut/compiler/grammar.py b/coconut/compiler/grammar.py index 5e17c24f..7e5bd8e6 100644 --- a/coconut/compiler/grammar.py +++ b/coconut/compiler/grammar.py @@ -38,7 +38,6 @@ Literal, OneOrMore, Optional, - ParserElement, StringEnd, Word, ZeroOrMore, @@ -2907,7 +2906,7 @@ def add_to_grammar_init_time(cls): def set_grammar_names(): """Set names of grammar elements to their variable names.""" for varname, val in vars(Grammar).items(): - if isinstance(val, ParserElement): + if hasattr(val, "setName"): val.setName(varname) diff --git a/coconut/compiler/util.py b/coconut/compiler/util.py index 21a00aed..326f185e 100644 --- a/coconut/compiler/util.py +++ b/coconut/compiler/util.py @@ -136,6 +136,7 @@ all_keywords, always_keep_parse_name_prefix, keep_if_unchanged_parse_name_prefix, + incremental_use_hybrid, ) from coconut.exceptions import ( CoconutException, @@ -558,7 +559,10 @@ def force_reset_packrat_cache(): """Forcibly reset the packrat cache and all packrat stats.""" if ParserElement._incrementalEnabled: ParserElement._incrementalEnabled = False - ParserElement.enableIncremental(incremental_mode_cache_size if in_incremental_mode() else default_incremental_cache_size, still_reset_cache=False) + ParserElement.enableIncremental( + incremental_mode_cache_size if in_incremental_mode() else default_incremental_cache_size, + **ParserElement.getIncrementalInfo(), + ) else: ParserElement._packratEnabled = False ParserElement.enablePackrat(packrat_cache_size) @@ -590,6 +594,7 @@ def parsing_context(inner_parse=None): yield finally: ParserElement._incrementalWithResets = incrementalWithResets + dehybridize_cache() elif ( current_cache_matters and will_clear_cache @@ -607,6 +612,11 @@ def parsing_context(inner_parse=None): if logger.verbose: ParserElement.packrat_cache_stats[0] += old_cache_stats[0] ParserElement.packrat_cache_stats[1] += old_cache_stats[1] + elif not will_clear_cache: + try: + yield + finally: + dehybridize_cache() else: yield @@ -632,6 +642,10 @@ def apply(self, grammar_transformer): def name(self): return get_name(self.grammar) + def setName(self, *args, **kwargs): + """Equivalent to .grammar.setName.""" + return self.grammar.setName(*args, **kwargs) + def prep_grammar(grammar, for_scan, streamline=False, add_unpack=False): """Prepare a grammar item to be used as the root of a parse.""" @@ -795,6 +809,22 @@ def get_target_info_smart(target, mode="lowest"): # PARSING INTROSPECTION: # ----------------------------------------------------------------------------------------------------------------------- +# incremental lookup indices +_lookup_elem = 0 +_lookup_orig = 1 +_lookup_loc = 2 +# _lookup_bools = 3 +# _lookup_context = 4 +assert _lookup_elem == 0, "lookup must start with elem" + +# incremental value indices +_value_exc_loc_or_ret = 0 +# _value_furthest_loc = 1 +_value_useful = -1 +assert _value_exc_loc_or_ret == 0, "value must start with exc loc / ret" +assert _value_useful == -1, "value must end with usefullness obj" + + def maybe_copy_elem(item, name): """Copy the given grammar element if it's referenced somewhere else.""" item_ref_count = sys.getrefcount(item) if CPYTHON and not on_new_python else float("inf") @@ -927,7 +957,7 @@ def execute_clear_strat(clear_cache): if clear_cache == "useless": keys_to_del = [] for lookup, value in cache.items(): - if not value[-1][0]: + if not value[_value_useful][0]: keys_to_del.append(lookup) for del_key in keys_to_del: del cache[del_key] @@ -940,6 +970,24 @@ def execute_clear_strat(clear_cache): return orig_cache_len +def dehybridize_cache(): + """Dehybridize any hybrid entries in the incremental parsing cache.""" + if ( + CPYPARSING + # if we're not in incremental mode, we just throw away the cache + # after every parse, so no need to dehybridize it + and in_incremental_mode() + and ParserElement.getIncrementalInfo()["hybrid_mode"] + ): + cache = get_pyparsing_cache() + new_entries = {} + for lookup, value in cache.items(): + cached_item = value[0] + if cached_item is not True and not isinstance(cached_item, int): + new_entries[lookup] = (True,) + value[1:] + cache.update(new_entries) + + def clear_packrat_cache(force=False): """Clear the packrat cache if applicable. Very performance-sensitive for incremental parsing mode.""" @@ -948,6 +996,8 @@ def clear_packrat_cache(force=False): if DEVELOP: start_time = get_clock_time() orig_cache_len = execute_clear_strat(clear_cache) + # always dehybridize after cache clear so we're dehybridizing the fewest items + dehybridize_cache() if DEVELOP and orig_cache_len is not None: logger.log("Pruned packrat cache from {orig_len} items to {new_len} items using {strat!r} strategy ({time} secs).".format( orig_len=orig_cache_len, @@ -962,10 +1012,10 @@ def get_cache_items_for(original, only_useful=False, exclude_stale=True): """Get items from the pyparsing cache filtered to only be from parsing original.""" cache = get_pyparsing_cache() for lookup, value in cache.items(): - got_orig = lookup[1] + got_orig = lookup[_lookup_orig] internal_assert(lambda: isinstance(got_orig, (bytes, str)), "failed to look up original in pyparsing cache item", (lookup, value)) if ParserElement._incrementalEnabled: - (is_useful,) = value[-1] + (is_useful,) = value[_value_useful] if only_useful and not is_useful: continue if exclude_stale and is_useful >= 2: @@ -979,7 +1029,7 @@ def get_highest_parse_loc(original): Note that there's no point in filtering for successes/failures, since we always see both at the same locations.""" highest_loc = 0 for lookup, _ in get_cache_items_for(original): - loc = lookup[2] + loc = lookup[_lookup_loc] if loc > highest_loc: highest_loc = loc return highest_loc @@ -993,7 +1043,12 @@ def enable_incremental_parsing(): return True ParserElement._incrementalEnabled = False try: - ParserElement.enableIncremental(incremental_mode_cache_size, still_reset_cache=False, cache_successes=incremental_mode_cache_successes) + ParserElement.enableIncremental( + incremental_mode_cache_size, + still_reset_cache=False, + cache_successes=incremental_mode_cache_successes, + hybrid_mode=incremental_mode_cache_successes and incremental_use_hybrid, + ) except ImportError as err: raise CoconutException(str(err)) logger.log("Incremental parsing mode enabled.") @@ -1022,7 +1077,7 @@ def pickle_cache(original, cache_path, include_incremental=True, protocol=pickle break if len(pickleable_cache_items) >= incremental_cache_limit: break - loc = lookup[2] + loc = lookup[_lookup_loc] # only include cache items that aren't at the start or end, since those # are the only ones that parseIncremental will reuse if 0 < loc < len(original) - 1: @@ -1032,6 +1087,7 @@ def pickle_cache(original, cache_path, include_incremental=True, protocol=pickle if validation_dict is not None: validation_dict[identifier] = elem.__class__.__name__ pickleable_lookup = (identifier,) + lookup[1:] + internal_assert(value[_value_exc_loc_or_ret] is True or isinstance(value[_value_exc_loc_or_ret], int), "cache must be dehybridized before pickling", value[_value_exc_loc_or_ret]) pickleable_cache_items.append((pickleable_lookup, value)) all_adaptive_stats = {} @@ -1120,6 +1176,7 @@ def unpickle_cache(cache_path): if maybe_elem is not None: if validation_dict is not None: internal_assert(maybe_elem.__class__.__name__ == validation_dict[identifier], "incremental cache pickle-unpickle inconsistency", (maybe_elem, validation_dict[identifier])) + internal_assert(value[_value_exc_loc_or_ret] is True or isinstance(value[_value_exc_loc_or_ret], int), "attempting to unpickle hybrid cache item", value[_value_exc_loc_or_ret]) lookup = (maybe_elem,) + pickleable_lookup[1:] usefullness = value[-1][0] internal_assert(usefullness, "loaded useless cache item", (lookup, value)) diff --git a/coconut/constants.py b/coconut/constants.py index 94510e2d..30439cd7 100644 --- a/coconut/constants.py +++ b/coconut/constants.py @@ -137,26 +137,29 @@ def get_path_env_var(env_var, default): use_cache_file = True -# 0 for always disabled; float("inf") for always enabled -disable_incremental_for_len = 20480 - adaptive_any_of_env_var = "COCONUT_ADAPTIVE_ANY_OF" use_adaptive_any_of = get_bool_env_var(adaptive_any_of_env_var, True) +use_line_by_line_parser = False + +# 0 for always disabled; float("inf") for always enabled +# (this determines when compiler.util.enable_incremental_parsing() is used) +disable_incremental_for_len = 20480 + # note that _parseIncremental produces much smaller caches use_incremental_if_available = False -use_line_by_line_parser = False - # these only apply to use_incremental_if_available, not compiler.util.enable_incremental_parsing() default_incremental_cache_size = None repeatedly_clear_incremental_cache = True never_clear_incremental_cache = False +# also applies to compiler.util.enable_incremental_parsing() if incremental_mode_cache_successes is True +incremental_use_hybrid = True # this is what gets used in compiler.util.enable_incremental_parsing() incremental_mode_cache_size = None incremental_cache_limit = 2097152 # clear cache when it gets this large -incremental_mode_cache_successes = False +incremental_mode_cache_successes = False # if False, also disables hybrid mode require_cache_clear_frac = 0.3125 # require that at least this much of the cache must be cleared on each cache clear use_left_recursion_if_available = False @@ -1020,7 +1023,7 @@ def get_path_env_var(env_var, default): # min versions are inclusive unpinned_min_versions = { - "cPyparsing": (2, 4, 7, 2, 3, 3), + "cPyparsing": (2, 4, 7, 2, 4, 0), ("pre-commit", "py3"): (3,), ("psutil", "py>=27"): (5,), "jupyter": (1, 0), diff --git a/coconut/root.py b/coconut/root.py index 5060cca3..dbb23838 100644 --- a/coconut/root.py +++ b/coconut/root.py @@ -26,7 +26,7 @@ VERSION = "3.1.0" VERSION_NAME = None # False for release, int >= 1 for develop -DEVELOP = 13 +DEVELOP = 14 ALPHA = False # for pre releases rather than post releases assert DEVELOP is False or DEVELOP >= 1, "DEVELOP must be False or an int >= 1"