Skip to content

Commit

Permalink
Merge pull request #12 from nberth/remove-weak-fold
Browse files Browse the repository at this point in the history
Remove fold on ephemerons
  • Loading branch information
nberth authored Sep 25, 2023
2 parents f482bc9 + 10628f2 commit f4d5b99
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 52 deletions.
18 changes: 16 additions & 2 deletions src/lsp/cobol_common/srcloc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ let pp_file_loc ppf loc =
let raw ?(in_area_a = false) ((s, e): lexloc) : srcloc =
assert Lexing.(s.pos_cnum <= e.pos_cnum); (* ensure proper use *)
let loc = Raw (s, e, in_area_a) in
if Lexing.(s.pos_fname != e.pos_fname) then
if Lexing.(s.pos_fname <> e.pos_fname) then
Pretty.error
"%a@\n>> Internal warning in `%s.raw`: file names mismatch (`%s` != `%s`)\
" pp_srcloc loc __MODULE__ s.pos_fname e.pos_fname;
Expand Down Expand Up @@ -402,13 +402,27 @@ let replacement ~old ~new_ ~in_area_a ~replloc : srcloc =
(* end *)
(* | Cpy {copyloc = {filename; _}; _} -> Some filename *)

(** [may_join_as_single_raw a b] checks whether a lexloc {i l{_ a}} with a a
left-hand lexing position [a] and a lexloc {i l{_ b}} with a right-hand
position [b], may be joined to form a single raw source location
(internal). *)
let may_join_as_single_raw (a: Lexing.position) (b: Lexing.position) =
a.pos_fname = b.pos_fname &&
a.pos_lnum == b.pos_lnum && (* ensure we are stay on a single line *)
a.pos_cnum >= b.pos_cnum - 1

(** [concat l1 l2] concatenates two adjacent source locations [l1] and [l2]. *)
let rec concat: srcloc -> srcloc -> srcloc = fun l1 l2 -> match l1, l2 with
| Raw (s1, e1, in_area_a),
Raw (s2, e2, _)
when e1.pos_fname = s2.pos_fname && e1.pos_cnum = s2.pos_cnum - 1 ->
when may_join_as_single_raw e1 s2 ->
Raw (s1, e2, in_area_a)

| Cat { left; right = Raw (s1, e1, in_area_a) },
Raw (s2, e2, _)
when may_join_as_single_raw e1 s2 ->
Cat { left; right = Raw (s1, e2, in_area_a) }

| Cpy { copied = l1; copyloc = c1 },
Cpy { copied = l2; copyloc = c2 }
when same_copyloc c1 c2 ->
Expand Down
19 changes: 12 additions & 7 deletions src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ module Make (Config: Cobol_config.T) = struct

(* --- *)

let do_parse: type m. m state -> _ -> _ * m state =
let do_parse: type m. m state -> _ -> _ -> _ * m state =

let rec next_tokens ({ preproc = { tokzr; _ }; _ } as ps) tokens =
match Tokzr.next_token tokzr tokens with
Expand Down Expand Up @@ -424,16 +424,22 @@ module Make (Config: Cobol_config.T) = struct
Some v, ps

in
fun ps c -> normal ps [] c
fun ps tokens c -> normal ps tokens c

let parse ?verbose ?show ~recovery
(type m) ~(memory: m memory) pp checkpoint
(type m) ~(memory: m memory) pp make_checkpoint
: ('a option, m) output * _ =
let ps = init_parser ?verbose ?show ~recovery
~tokenizer_memory:memory pp in
let res, ps =
(* TODO: catch in a deeper context to grab parsed tokens *)
try do_parse ps checkpoint with e -> None, add_diag (DIAGS.of_exn e) ps
let ps, tokens = produce_tokens ps in
let first_pos = match tokens with
| [] -> Cobol_preproc.position ps.preproc.pp
| t :: _ -> Cobol_common.Srcloc.start_pos ~@t
in
try do_parse ps tokens (make_checkpoint first_pos)
with e -> None, add_diag (DIAGS.of_exn e) ps
in
match memory with
| Amnesic ->
Expand Down Expand Up @@ -480,9 +486,8 @@ let parse
init_source_format = source_format}
in
let module P = Make (val config) in
P.parse ?verbose ?show ~memory ~recovery pp @@
Grammar.Incremental.compilation_group @@
Cobol_preproc.position pp
P.parse ?verbose ?show ~memory ~recovery pp
Grammar.Incremental.compilation_group
end
in
{
Expand Down
99 changes: 56 additions & 43 deletions src/lsp/cobol_preproc/src_overlay.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,26 @@ module Limit = struct
type t = limit

let make_virtual: unit -> t =
let id = ref 0 in
let id = ref (-1) in (* Actually start at -2 (-1 is used in Lexing.dummy) *)
fun () ->
decr id;
Lexing.{ dummy_pos with pos_lnum = !id }
Lexing.{ dummy_pos with pos_cnum = !id }

let is_virtual: t -> bool =
fun l -> l.Lexing.pos_lnum < 0
let is_virtual (l: t) : bool =
l.pos_cnum < (-1)

(* Structural equality is required below, to deal with cases where we
construct the limits of a token several times, for instance at the
beginning of recovery. *)
let equal = (=)
let hash = Hashtbl.hash
let equal (l1: t) (l2: t) =
l1.pos_cnum == l2.pos_cnum &&
l1.pos_fname = l2.pos_fname

let hash (l: limit) = l.pos_cnum

(* l1 = l2, or l1 was emitted before l2 *)
let surely_predates (l1: limit) (l2: limit) = (* or equals *)
if l1.pos_cnum < (-1)
then l2.pos_cnum <= l1.pos_cnum
else l1.pos_cnum > 0 && l1.pos_cnum <= l2.pos_cnum &&
l1.pos_fname = l2.pos_fname
end

(** Weak hashtable where keys are overlay limits (internal) *)
Expand All @@ -59,6 +66,7 @@ type manager =
corresponding right limit. *)
over_right_gap: limit Links.t; (** associates the right limit of a token to
the left limit of the next *)
cache: (srcloc * limit) Links.t;
id: string; (** manager identifier (for logging/debugging) *)
}

Expand All @@ -70,11 +78,13 @@ let new_manager: string -> manager =
{
right_of = Links.create 42;
over_right_gap = Links.create 42;
cache = Links.create 42;
id = Pretty.to_string "%s-%u" manager_name !id;
}

(** Returns left and right (potentially fresh) limits for the given source
location *)
location; for any given file, must be called with the leftmost location
first. *)
let limits: manager -> srcloc -> limit * limit = fun ctx loc ->
let s, e = match Cobol_common.Srcloc.as_unique_lexloc loc with
| Some lexloc -> lexloc
Expand All @@ -88,17 +98,6 @@ let link_limits ctx left right =
(* Replace to deal with overriding of limits during recovery. *)
Links.replace ctx.over_right_gap left right

(** [leftmost_limit_in ~filename ctx] finds the leftmost limit from a location
in [filename] that is registered in [ctx] (internal). Use with moderation
as this is quite inefficient. *)
let leftmost_limit_in ~filename ctx =
Links.fold begin fun l _ -> function
| None when l.Lexing.pos_fname = filename -> Some l
| Some l' when l.Lexing.pos_cnum < l'.pos_cnum &&
l.Lexing.pos_fname = filename -> Some l
| res -> res
end ctx.right_of None

(** Returns a source location that spans between two given limits; returns a
valid pointwise location if the two given limits are physically equal. *)
let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
Expand All @@ -112,19 +111,37 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
in
Cobol_common.Srcloc.raw (pos, pos)
in
let try_limits (s, e) =
let rec jump_right loc e' =
let s' = Links.find ctx.over_right_gap e' in
let loc', e' = Links.find ctx.right_of s' in
check (Cobol_common.Srcloc.concat loc loc') e'
and check loc e' =
if e == e' (* physical comparison *)
then loc
else jump_right loc e'
let try_limits (s, e: limit * limit) =

let rec proceed_from ?loc s = (* start search from left limit [s] *)
check ?loc @@ Links.find ctx.right_of s

and check ?loc (loc', e') =
(* continue search with ([loc] concatenated with) [loc'] if [e'] is not
the sought after right limit; raises {!Not_found} when reaching an
unknown gap or limit *)
let loc = match loc with
| None -> loc'
| Some loc -> Cobol_common.Srcloc.concat loc loc'
in
if e.pos_cnum = e'.pos_cnum && (* compare only the fields that matter *)
e.pos_fname = e'.pos_fname
then (Links.replace ctx.cache s (loc, e); loc)
else try_cache_from ~loc @@ Links.find ctx.over_right_gap e'

and try_cache_from ?loc s =
(* attempt with cache first; proceed via small-step upon miss or
failure *)
match Links.find_opt ctx.cache s with
| Some ((_, e') as hit) when Limit.surely_predates e' e ->
(try check ?loc hit with Not_found -> proceed_from ?loc s)
| Some _ | None ->
proceed_from ?loc s
in

if s == e
then pointwise s
else let loc, e' = Links.find ctx.right_of s in check loc e'
else try_cache_from s
in
let join_failure (s, e) =
let loc = Cobol_common.Srcloc.raw (s, e) in
Expand All @@ -134,18 +151,14 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
(* Printexc.(print_raw_backtrace Stdlib.stderr @@ get_callstack 10); *)
loc
in
(* first attempt assumes proper token limits: `s` is a left and `e` is a right
of tokens *)
try try_limits (s, e) with Not_found ->
(* try assuming `s` is an end of token *)
try try_limits (Links.find ctx.over_right_gap s, e) with Not_found ->
if s.pos_cnum = 0 (* potential special case with left-position forged by the
parser: retry with leftmost limit if it differs from
s *)
then match leftmost_limit_in ~filename:s.pos_fname ctx with
| Some l when l != s -> try_limits (l, e) (* physical equality is enough *)
| Some _ | None -> join_failure (s, e)
else join_failure (s, e)
try (* first attempt assumes proper token limits: `s` is a left and `e` is a
right of tokens *)
try_limits (s, e)
with Not_found ->
try (* otherwise try assuming `s` is an end of token *)
try_limits (Links.find ctx.over_right_gap s, e)
with Not_found ->
join_failure (s, e)

module New_manager (Id: sig val name: string end) : MANAGER = struct
let ctx = new_manager Id.name
Expand Down

0 comments on commit f4d5b99

Please sign in to comment.