Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove fold on ephemerons #12

Merged
merged 4 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions src/lsp/cobol_common/srcloc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ let pp_file_loc ppf loc =
let raw ?(in_area_a = false) ((s, e): lexloc) : srcloc =
assert Lexing.(s.pos_cnum <= e.pos_cnum); (* ensure proper use *)
let loc = Raw (s, e, in_area_a) in
if Lexing.(s.pos_fname != e.pos_fname) then
if Lexing.(s.pos_fname <> e.pos_fname) then
Pretty.error
"%a@\n>> Internal warning in `%s.raw`: file names mismatch (`%s` != `%s`)\
" pp_srcloc loc __MODULE__ s.pos_fname e.pos_fname;
Expand Down Expand Up @@ -402,13 +402,27 @@ let replacement ~old ~new_ ~in_area_a ~replloc : srcloc =
(* end *)
(* | Cpy {copyloc = {filename; _}; _} -> Some filename *)

(** [may_join_as_single_raw a b] checks whether a lexloc {i l{_ a}} with a a
left-hand lexing position [a] and a lexloc {i l{_ b}} with a right-hand
position [b], may be joined to form a single raw source location
(internal). *)
let may_join_as_single_raw (a: Lexing.position) (b: Lexing.position) =
a.pos_fname = b.pos_fname &&
a.pos_lnum == b.pos_lnum && (* ensure we are stay on a single line *)
a.pos_cnum >= b.pos_cnum - 1

(** [concat l1 l2] concatenates two adjacent source locations [l1] and [l2]. *)
let rec concat: srcloc -> srcloc -> srcloc = fun l1 l2 -> match l1, l2 with
| Raw (s1, e1, in_area_a),
Raw (s2, e2, _)
when e1.pos_fname = s2.pos_fname && e1.pos_cnum = s2.pos_cnum - 1 ->
when may_join_as_single_raw e1 s2 ->
Raw (s1, e2, in_area_a)

| Cat { left; right = Raw (s1, e1, in_area_a) },
Raw (s2, e2, _)
when may_join_as_single_raw e1 s2 ->
Cat { left; right = Raw (s1, e2, in_area_a) }

| Cpy { copied = l1; copyloc = c1 },
Cpy { copied = l2; copyloc = c2 }
when same_copyloc c1 c2 ->
Expand Down
19 changes: 12 additions & 7 deletions src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ module Make (Config: Cobol_config.T) = struct

(* --- *)

let do_parse: type m. m state -> _ -> _ * m state =
let do_parse: type m. m state -> _ -> _ -> _ * m state =

let rec next_tokens ({ preproc = { tokzr; _ }; _ } as ps) tokens =
match Tokzr.next_token tokzr tokens with
Expand Down Expand Up @@ -424,16 +424,22 @@ module Make (Config: Cobol_config.T) = struct
Some v, ps

in
fun ps c -> normal ps [] c
fun ps tokens c -> normal ps tokens c

let parse ?verbose ?show ~recovery
(type m) ~(memory: m memory) pp checkpoint
(type m) ~(memory: m memory) pp make_checkpoint
: ('a option, m) output * _ =
let ps = init_parser ?verbose ?show ~recovery
~tokenizer_memory:memory pp in
let res, ps =
(* TODO: catch in a deeper context to grab parsed tokens *)
try do_parse ps checkpoint with e -> None, add_diag (DIAGS.of_exn e) ps
let ps, tokens = produce_tokens ps in
let first_pos = match tokens with
| [] -> Cobol_preproc.position ps.preproc.pp
| t :: _ -> Cobol_common.Srcloc.start_pos ~@t
in
try do_parse ps tokens (make_checkpoint first_pos)
with e -> None, add_diag (DIAGS.of_exn e) ps
in
match memory with
| Amnesic ->
Expand Down Expand Up @@ -480,9 +486,8 @@ let parse
init_source_format = source_format}
in
let module P = Make (val config) in
P.parse ?verbose ?show ~memory ~recovery pp @@
Grammar.Incremental.compilation_group @@
Cobol_preproc.position pp
P.parse ?verbose ?show ~memory ~recovery pp
Grammar.Incremental.compilation_group
end
in
{
Expand Down
99 changes: 56 additions & 43 deletions src/lsp/cobol_preproc/src_overlay.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,26 @@ module Limit = struct
type t = limit

let make_virtual: unit -> t =
let id = ref 0 in
let id = ref (-1) in (* Actually start at -2 (-1 is used in Lexing.dummy) *)
fun () ->
decr id;
Lexing.{ dummy_pos with pos_lnum = !id }
Lexing.{ dummy_pos with pos_cnum = !id }

let is_virtual: t -> bool =
fun l -> l.Lexing.pos_lnum < 0
let is_virtual (l: t) : bool =
l.pos_cnum < (-1)

(* Structural equality is required below, to deal with cases where we
construct the limits of a token several times, for instance at the
beginning of recovery. *)
let equal = (=)
let hash = Hashtbl.hash
let equal (l1: t) (l2: t) =
l1.pos_cnum == l2.pos_cnum &&
l1.pos_fname = l2.pos_fname

let hash (l: limit) = l.pos_cnum

(* l1 = l2, or l1 was emitted before l2 *)
let surely_predates (l1: limit) (l2: limit) = (* or equals *)
if l1.pos_cnum < (-1)
then l2.pos_cnum <= l1.pos_cnum
else l1.pos_cnum > 0 && l1.pos_cnum <= l2.pos_cnum &&
l1.pos_fname = l2.pos_fname
end

(** Weak hashtable where keys are overlay limits (internal) *)
Expand All @@ -59,6 +66,7 @@ type manager =
corresponding right limit. *)
over_right_gap: limit Links.t; (** associates the right limit of a token to
the left limit of the next *)
cache: (srcloc * limit) Links.t;
id: string; (** manager identifier (for logging/debugging) *)
}

Expand All @@ -70,11 +78,13 @@ let new_manager: string -> manager =
{
right_of = Links.create 42;
over_right_gap = Links.create 42;
cache = Links.create 42;
id = Pretty.to_string "%s-%u" manager_name !id;
}

(** Returns left and right (potentially fresh) limits for the given source
location *)
location; for any given file, must be called with the leftmost location
first. *)
let limits: manager -> srcloc -> limit * limit = fun ctx loc ->
let s, e = match Cobol_common.Srcloc.as_unique_lexloc loc with
| Some lexloc -> lexloc
Expand All @@ -88,17 +98,6 @@ let link_limits ctx left right =
(* Replace to deal with overriding of limits during recovery. *)
Links.replace ctx.over_right_gap left right

(** [leftmost_limit_in ~filename ctx] finds the leftmost limit from a location
in [filename] that is registered in [ctx] (internal). Use with moderation
as this is quite inefficient. *)
let leftmost_limit_in ~filename ctx =
Links.fold begin fun l _ -> function
| None when l.Lexing.pos_fname = filename -> Some l
| Some l' when l.Lexing.pos_cnum < l'.pos_cnum &&
l.Lexing.pos_fname = filename -> Some l
| res -> res
end ctx.right_of None

(** Returns a source location that spans between two given limits; returns a
valid pointwise location if the two given limits are physically equal. *)
let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
Expand All @@ -112,19 +111,37 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
in
Cobol_common.Srcloc.raw (pos, pos)
in
let try_limits (s, e) =
let rec jump_right loc e' =
let s' = Links.find ctx.over_right_gap e' in
let loc', e' = Links.find ctx.right_of s' in
check (Cobol_common.Srcloc.concat loc loc') e'
and check loc e' =
if e == e' (* physical comparison *)
then loc
else jump_right loc e'
let try_limits (s, e: limit * limit) =

let rec proceed_from ?loc s = (* start search from left limit [s] *)
check ?loc @@ Links.find ctx.right_of s

and check ?loc (loc', e') =
(* continue search with ([loc] concatenated with) [loc'] if [e'] is not
the sought after right limit; raises {!Not_found} when reaching an
unknown gap or limit *)
let loc = match loc with
| None -> loc'
| Some loc -> Cobol_common.Srcloc.concat loc loc'
in
if e.pos_cnum = e'.pos_cnum && (* compare only the fields that matter *)
e.pos_fname = e'.pos_fname
then (Links.replace ctx.cache s (loc, e); loc)
else try_cache_from ~loc @@ Links.find ctx.over_right_gap e'

and try_cache_from ?loc s =
(* attempt with cache first; proceed via small-step upon miss or
failure *)
match Links.find_opt ctx.cache s with
| Some ((_, e') as hit) when Limit.surely_predates e' e ->
(try check ?loc hit with Not_found -> proceed_from ?loc s)
| Some _ | None ->
proceed_from ?loc s
in

if s == e
then pointwise s
else let loc, e' = Links.find ctx.right_of s in check loc e'
else try_cache_from s
in
let join_failure (s, e) =
let loc = Cobol_common.Srcloc.raw (s, e) in
Expand All @@ -134,18 +151,14 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
(* Printexc.(print_raw_backtrace Stdlib.stderr @@ get_callstack 10); *)
loc
in
(* first attempt assumes proper token limits: `s` is a left and `e` is a right
of tokens *)
try try_limits (s, e) with Not_found ->
(* try assuming `s` is an end of token *)
try try_limits (Links.find ctx.over_right_gap s, e) with Not_found ->
if s.pos_cnum = 0 (* potential special case with left-position forged by the
parser: retry with leftmost limit if it differs from
s *)
then match leftmost_limit_in ~filename:s.pos_fname ctx with
| Some l when l != s -> try_limits (l, e) (* physical equality is enough *)
| Some _ | None -> join_failure (s, e)
else join_failure (s, e)
try (* first attempt assumes proper token limits: `s` is a left and `e` is a
right of tokens *)
try_limits (s, e)
with Not_found ->
try (* otherwise try assuming `s` is an end of token *)
try_limits (Links.find ctx.over_right_gap s, e)
with Not_found ->
join_failure (s, e)

module New_manager (Id: sig val name: string end) : MANAGER = struct
let ctx = new_manager Id.name
Expand Down