Skip to content

Commit

Permalink
Finishing touches to the rewindable parser, with some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nberth committed Oct 5, 2023
1 parent b5ec9a5 commit a2441c8
Show file tree
Hide file tree
Showing 17 changed files with 1,626 additions and 106 deletions.
15 changes: 3 additions & 12 deletions src/lsp/cobol_lsp/lsp_document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -169,18 +169,9 @@ let reparse_and_analyze ?position ({ copybook; rewinder; textdoc; _ } as doc) =
{ doc with artifacts = no_artifacts; rewinder = None; parsed = None }
| Some position, Some rewinder ->
extract_parsed_infos doc @@
Cobol_parser.rewind_and_parse rewinder ~position
begin fun ?new_position pp ->
let contents = Lsp.Text_document.text textdoc in
let contents = match new_position with
| None -> contents
| Some (Lexing.{ pos_cnum; _ } as _pos) ->
EzString.after contents (pos_cnum - 1)
in
(* Pretty.error "contents = %S@." contents; *)
Cobol_preproc.reset_preprocessor ?new_position pp
(String { contents; filename = Lsp.Uri.to_path (uri doc) })
end
Cobol_parser.rewind_and_parse rewinder ~position @@
Cobol_preproc.reset_preprocessor_for_string @@
Lsp.Text_document.text textdoc

(** Creates a record for a document that is not yet parsed or analyzed. *)
let blank ~project ?copybook textdoc =
Expand Down
48 changes: 33 additions & 15 deletions src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -519,9 +519,12 @@ module Make (Config: Cobol_config.T) = struct
and ('a, 'm) rewindable_history_event =
{
preproc_position: Lexing.position;
event_stage: ('a, 'm) interim_stage;
event_stage: ('a, 'm) interim_stage_without_tokens;
}

and ('a, 'm) interim_stage_without_tokens =
'm state * 'a Grammar_interpr.env (* Always valid input_needed env. *)

let init_rewindable_parse ps ~make_checkpoint =
{
init = ps;
Expand All @@ -530,17 +533,23 @@ module Make (Config: Cobol_config.T) = struct
}

(** Stores a stage as part of the memorized rewindable history events. *)
let save_history_event ((ps, _, _) as stage) (store: _ rewindable_history) =
let save_interim_stage (ps, _, env) (store: _ rewindable_history) =
let preproc_position = Cobol_preproc.position ps.preproc.pp in
match store with
| store'
when preproc_position.pos_cnum <> preproc_position.pos_bol ->
(* We must only save positions that correspond to beginning of lines;
this should only make us skip recording events at the end of
inputs. *)
store'
| { preproc_position = prev_pos; _ } :: store'
when prev_pos.pos_cnum = preproc_position.pos_cnum &&
prev_pos.pos_fname = preproc_position.pos_fname ->
(* Preprocessor did not advance further since last save: replace event
with new parser state: *)
{ preproc_position; event_stage = stage } :: store'
{ preproc_position; event_stage = (ps, env) } :: store'
| store' ->
{ preproc_position; event_stage = stage } :: store'
{ preproc_position; event_stage = (ps, env) } :: store'

let rewindable_parser_state = function
| { stage = Final (_, ps) | Trans (ps, _, _); _ } -> ps
Expand All @@ -560,7 +569,7 @@ module Make (Config: Cobol_config.T) = struct
| Trans ((ps, _, _) as state) ->
let store, count =
if count = save_stage then store, succ count
else save_history_event state store, 0
else save_interim_stage state store, 0
and stage =
try on_interim_stage state with e -> on_exn ps e
in
Expand All @@ -574,15 +583,23 @@ module Make (Config: Cobol_config.T) = struct
pos
| Indexed { line; char } ->
let ps = rewindable_parser_state rwps in
let lexpos = Cobol_preproc.position ps.preproc.pp in
let newline_cnums = Cobol_preproc.newline_cnums ps.preproc.pp in
let pos_bol =
try List.nth newline_cnums (line - 1)
with Not_found | Invalid_argument _ -> 0
in
Lexing.{ lexpos with pos_bol;
pos_cnum = pos_bol + char;
pos_lnum = line + 1 }
if newline_cnums = []
then raise Not_found (* no complete line was processed yet; just skip *)
else
let lexpos = Cobol_preproc.position ps.preproc.pp in
try
let pos_bol =
try List.nth newline_cnums (line - 1)
with Not_found | Invalid_argument _ -> 0
in
Lexing.{ lexpos with pos_bol;
pos_cnum = pos_bol + char;
pos_lnum = line + 1 }
with Failure _ ->
(* The given line exceeds what was already processed, so we restart
from the current preprocessor position. *)
lexpos

let find_history_event_preceding ~position ({ store; _ } as rwps) =
let lexpos = lexing_postion_of ~position rwps in
Expand All @@ -608,10 +625,11 @@ module Make (Config: Cobol_config.T) = struct
let rwps =
try
let event, store = find_history_event_preceding ~position rwps in
let ps, tokens, env = event.event_stage in
let ps, env = event.event_stage in
let pp = ps.preproc.pp in
let pp = pp_rewind ?new_position:(Some event.preproc_position) pp in
let pp = pp_rewind ~new_position:event.preproc_position pp in
let ps = { ps with preproc = { ps.preproc with pp } } in
let ps, tokens = produce_tokens ps in
{ rwps with stage = Trans (ps, tokens, env); store }
with Not_found -> (* rewinding before first checkpoint *)
let pp = pp_rewind rwps.init.preproc.pp in
Expand Down
13 changes: 7 additions & 6 deletions src/lsp/cobol_parser/text_lexer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,16 @@ module Make (Words: module type of Text_keywords) = struct
let silenced_keywords =
StringSet.of_list Words.silenced_keywords

let reserve_words: Cobol_config.words_spec -> unit =
let reserve_words: Cobol_config.words_spec -> DIAGS.Set.t =
let on_token_handle_of kwd descr ~f =
try f @@ handle_of_keyword kwd with
try f @@ handle_of_keyword kwd; DIAGS.Set.none with
| Not_found when StringSet.mem kwd silenced_keywords ->
() (* Ignore silently? Warn? *)
DIAGS.Set.none (* Ignore silently? Warn? *)
| Not_found ->
Pretty.error "@[Unable@ to@ %s@ keyword:@ %s@]@." descr kwd
DIAGS.Set.error "@[Unable@ to@ %s@ keyword:@ %s@]@." descr kwd
in
List.iter begin fun (w, word_spec) -> match word_spec with
List.fold_left begin fun diags (w, word_spec) ->
DIAGS.Set.union diags @@ match word_spec with
| Cobol_config.ReserveWord { preserve_context_sensitivity } ->
on_token_handle_of w "reserve" ~f:begin fun h ->
if preserve_context_sensitivity
Expand All @@ -132,7 +133,7 @@ module Make (Words: module type of Text_keywords) = struct
end
| NotReserved ->
on_token_handle_of w "unreserve" ~f:unreserve_token
end
end DIAGS.Set.none

let enable_tokens tokens =
TokenHandles.iter enable_token tokens
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_parser/text_lexer.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ val show_token_of_handle: token_handle -> string
(* --- *)

val handle_of_token: Grammar_tokens.token -> token_handle
val reserve_words: Cobol_config.words_spec -> unit
val reserve_words: Cobol_config.words_spec -> Cobol_common.Diagnostics.Set.t
val enable_tokens: TokenHandles.t -> unit
val disable_tokens: TokenHandles.t -> unit

Expand Down
3 changes: 1 addition & 2 deletions src/lsp/cobol_parser/text_tokenizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -303,12 +303,11 @@ module Make (Config: Cobol_config.T) = struct
let amnesic = Amnesic
let eidetic = Eidetic []
let init memory ~context_sensitive_tokens =
init_text_lexer ~context_sensitive_tokens;
{
expect_picture_string = false;
leftover_tokens = [];
memory;
diags = DIAGS.Set.none;
diags = init_text_lexer ~context_sensitive_tokens;
lexing_options = Text_lexer.default_lexing_options;
}

Expand Down
6 changes: 3 additions & 3 deletions src/lsp/cobol_preproc/preproc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ let srclex_from_channel = make_srclex Lexing.from_channel
let srclex_from_file ~source_format filename : any_srclexer =
srclex_from_string ~source_format ~filename (EzFile.read_file filename)

(** Note: If given, assumes [position] corresponds to the begining of the
input. If absent, restarts from first position. File name is kept from the
previous input. *)
(** Note: If given, assumes [position] corresponds to the beginning of the
input, which {e must} also be at the beginning of a line. If absent,
restarts from first position. File name is kept from the previous input. *)
let srclex_restart make_lexing ?position input (Plx (s, prev_lexbuf)) =
let lexbuf = make_lexing ?with_positions:(Some true) input in
let pos_fname = match position with
Expand Down
7 changes: 7 additions & 0 deletions src/lsp/cobol_preproc/preproc_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,13 @@ let pp_pptokens: pptokens Pretty.printer =
let reset_preprocessor ?new_position pp input =
preprocessor input (`ResetPosition (pp, new_position))

let reset_preprocessor_for_string string ?new_position pp =
let contents = match new_position with
| Some Lexing.{ pos_cnum; _ } -> EzString.after string (pos_cnum - 1)
| None -> string
in (* filename is ignored *)
reset_preprocessor ?new_position pp @@ String { contents; filename = "" }

(* --- *)

let preprocessor ?(options = Preproc_options.default) input =
Expand Down
6 changes: 6 additions & 0 deletions src/lsp/cobol_preproc/preproc_engine.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ val reset_preprocessor
-> preprocessor
-> input
-> preprocessor
val reset_preprocessor_for_string
: string
-> ?new_position:Lexing.position
-> preprocessor
-> preprocessor


(* --- *)

Expand Down
20 changes: 18 additions & 2 deletions test/cobol_parsing/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,28 @@
(modules test_picture_parsing test_combined_relations_parsing)
(libraries alcotest cobol_parser cobol_data))

(library
(name parser_testing)
(modules Parser_testing)
(libraries cobol_parser)
)

(library
(name test_cobol_parser)
(modules cS_tokens decimal_point tokens parser_testing)
(modules cS_tokens decimal_point tokens)
(preprocess
(pps ppx_expect))
(inline_tests
(modes best)) ; add js for testing with nodejs
(libraries cobol_parser)
(libraries parser_testing)
)

(library
(name test_cobol_parser_rewind)
(modules test_appending test_appending_large test_cutnpaste_large)
(preprocess
(pps ppx_expect))
(inline_tests
(modes best)) ; add js for testing with nodejs
(libraries parser_testing testsuite_utils)
)
Loading

0 comments on commit a2441c8

Please sign in to comment.