Skip to content

Commit

Permalink
Rewindable parsing
Browse files Browse the repository at this point in the history
This commit adds the ability to restart parsing only a few tokens
before any document change.  This should greatly improve performance,
especially when editing the middle or end of large files.
  • Loading branch information
nberth committed Sep 27, 2023
1 parent 684a25e commit bb7d073
Show file tree
Hide file tree
Showing 33 changed files with 851 additions and 454 deletions.
2 changes: 2 additions & 0 deletions src/lsp/cobol_common/diagnostics.ml
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ let simple_result r = result r
let some_result ?diags r = result ?diags (Some r)
let no_result ~diags = { result = None; diags }
let map_result f { result; diags } = { result = f result; diags }
let more_result f { result; diags } = with_more_diags ~diags (f result)
let forget_result { diags; _ } = diags

let hint_result r = Cont.khint (with_diag r)
let note_result r = Cont.knote (with_diag r)
Expand Down
2 changes: 2 additions & 0 deletions src/lsp/cobol_common/diagnostics.mli
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ val simple_result: 'a -> 'a with_diags
val some_result: ?diags:diagnostics -> 'a -> 'a option with_diags
val no_result: diags:diagnostics -> _ option with_diags
val map_result: ('a -> 'b) -> 'a with_diags -> 'b with_diags
val more_result: ('a -> 'b with_diags) -> 'a with_diags -> 'b with_diags
val forget_result: _ with_diags -> diagnostics

val hint_result: 'a -> ?loc:Srcloc.srcloc -> ('b, 'a with_diags) Pretty.func
val note_result: 'a -> ?loc:Srcloc.srcloc -> ('b, 'a with_diags) Pretty.func
Expand Down
3 changes: 1 addition & 2 deletions src/lsp/cobol_lsp/lsp_completion_keywords.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

(* open Cobol_common.Basics *)

let keywords_all = fst @@ List.split Cobol_parser.Text_keywords.keywords
let keywords_all = fst @@ List.split Cobol_parser.Keywords.keywords


(* TODO: Too many keywords, hard to classification *)
Expand Down Expand Up @@ -326,4 +326,3 @@ let keywords_data = [
let keywords_data = StringSet.elements @@ StringSet.of_list keywords_data
let keywords_proc = StringSet.elements @@ StringSet.of_list (keywords_proc @ keywords_proc_other)
*)

190 changes: 141 additions & 49 deletions src/lsp/cobol_lsp/lsp_document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ module TYPES = struct
project: Lsp_project.t;
textdoc: Lsp.Text_document.t;
copybook: bool;
artifacts: Cobol_parser.parsing_artifacts;
artifacts: Cobol_parser.Outputs.artifacts;
parsed: parsed_data option;
rewinder: rewinder option;
(* Used for caching, when loading a cache file as the file is not reparsed,
then diagnostics are not sent. *)
diags: DIAGS.Set.t;
Expand All @@ -40,19 +41,27 @@ module TYPES = struct
definitions: name_definitions_in_compilation_unit CUMap.t Lazy.t;
references: name_references_in_compilation_unit CUMap.t Lazy.t;
}
and rewinder =
(PTREE.compilation_group option,
Cobol_common.Behaviors.eidetic) Cobol_parser.Outputs.output
Cobol_parser.rewinder

(** Raised by {!retrieve_parsed_data}. *)
exception Unparseable of Lsp.Types.DocumentUri.t
exception Copybook of Lsp.Types.DocumentUri.t

(** Raised by {!load} and {!update}; allows keeping consistent document
contents. *)
exception Internal_error of document * exn

type cached = (** Persistent representation (for caching) *)
{
doc_cache_filename: string; (* relative to project rootdir *)
doc_cache_checksum: Digest.t; (* checked against file on disk on reload *)
doc_cache_langid: string;
doc_cache_version: int;
doc_cache_pplog: Cobol_preproc.log;
doc_cache_tokens: Cobol_parser.tokens_with_locs;
doc_cache_tokens: Cobol_parser.Outputs.tokens_with_locs;
doc_cache_comments: Cobol_preproc.comments;
doc_cache_parsed: (PTREE.compilation_group * CUs.t) option;
doc_cache_diags: DIAGS.Set.serializable;
Expand All @@ -64,17 +73,33 @@ include TYPES
type t = document
let uri { textdoc; _ } = Lsp.Text_document.documentUri textdoc

let parse ~project text =
let uri = Lsp.Text_document.documentUri text in
let libpath = Lsp_project.libpath_for ~uri project in
Cobol_parser.parse_with_tokens
(* Recovery policy for the parser: *)
~recovery:(EnableRecovery { silence_benign_recoveries = true })
~source_format:project.source_format
~config:project.cobol_config
~libpath
(String { contents = Lsp.Text_document.text text;
filename = Lsp.Uri.to_path uri })
(* let simple_parse ({ project; textdoc; _ } as doc) = *)
(* Cobol_parser.parse_with_artifacts *)
(* ~options:Cobol_parser.Options.{ *)
(* default with *)
(* recovery = EnableRecovery { silence_benign_recoveries = true }; *)
(* } *)
(* ~config:project.cobol_config @@ *)
(* Cobol_preproc.preprocessor *)
(* { init_libpath = Lsp_project.libpath_for ~uri:(uri doc) project; *)
(* init_config = project.cobol_config; *)
(* init_source_format = project.source_format } @@ *)
(* String { contents = Lsp.Text_document.text textdoc; *)
(* filename = Lsp.Uri.to_path (uri doc) } *)

let rewindable_parse ({ project; textdoc; _ } as doc) =
Cobol_parser.rewindable_parse_with_artifacts
~options:Cobol_parser.Options.{
default with
recovery = EnableRecovery { silence_benign_recoveries = true };
}
~config:project.cobol_config @@
Cobol_preproc.preprocessor
{ init_libpath = Lsp_project.libpath_for ~uri:(uri doc) project;
init_config = project.cobol_config;
init_source_format = project.source_format } @@
String { contents = Lsp.Text_document.text textdoc;
filename = Lsp.Uri.to_path (uri doc) }

let lazy_definitions ast cus =
lazy begin cus |>
Expand All @@ -99,28 +124,61 @@ let lazy_references ast cus defs =
end CUMap.empty ast
end

let no_parsing_artifacts =
Cobol_parser.{ tokens = lazy [];
pplog = Cobol_preproc.Trace.empty;
comments = [] }

let analyze ({ project; textdoc; copybook; _ } as doc) =
let artifacts, (parsed, diags) =
if copybook then
no_parsing_artifacts, (None, DIAGS.Set.none)
else
let ptree = parse ~project textdoc in
Cobol_parser.parsing_artifacts ptree,
match Cobol_typeck.analyze_compilation_group ptree with
| Ok (cus, ast, diags) ->
let definitions = lazy_definitions ast cus in
let references = lazy_references ast cus definitions in
Some { ast; cus; definitions; references}, diags
| Error diags ->
None, diags (* NB: no token if unrecoverable error (e.g, wrong
indicator) *)
let no_artifacts =
Cobol_parser.Outputs.{ tokens = lazy [];
pplog = Cobol_preproc.Trace.empty;
comments = [];
newline_cnums = [] }

let gather_parsed_data ptree =
Cobol_typeck.analyze_compilation_group ptree |>
DIAGS.map_result begin function
| Ok (cus, ast) ->
let definitions = lazy_definitions ast cus in
let references = lazy_references ast cus definitions in
Some { ast; cus; definitions; references}
| Error () ->
None
end

let extract_parsed_infos doc ptree =
let DIAGS.{ result = artifacts, rewinder, parsed; diags} =
DIAGS.more_result begin fun (ptree, rewinder) ->
gather_parsed_data ptree |>
DIAGS.map_result begin fun parsed ->
Cobol_parser.artifacts ptree, Some rewinder, parsed
end
end ptree
in
{ doc with artifacts; diags; parsed }
{ doc with artifacts; rewinder; diags; parsed }

let parse_and_analyze ({ copybook; _ } as doc) =
if copybook then (* skip *)
{ doc with artifacts = no_artifacts; rewinder = None; parsed = None }
else
(* extract_parsed_infos doc @@ simple_parse doc *)
extract_parsed_infos doc @@ rewindable_parse doc

let reparse_and_analyze ?position ({ copybook; rewinder; textdoc; _ } as doc) =
match position, rewinder with
| None, _ | _, None ->
parse_and_analyze doc
| _, Some _ when copybook -> (* skip *)
{ doc with artifacts = no_artifacts; rewinder = None; parsed = None }
| Some position, Some rewinder ->
extract_parsed_infos doc @@
Cobol_parser.rewind_and_parse rewinder ~position
begin fun ?new_position pp ->
let contents = Lsp.Text_document.text textdoc in
let contents = match new_position with
| None -> contents
| Some (Lexing.{ pos_cnum; _ } as _pos) ->
EzString.after contents (pos_cnum - 1)
in
(* Pretty.error "contents = %S@." contents; *)
Cobol_preproc.reset_preprocessor ?new_position pp
(String { contents; filename = Lsp.Uri.to_path (uri doc) })
end

(** Creates a record for a document that is not yet parsed or analyzed. *)
let blank ~project ?copybook textdoc =
Expand All @@ -132,7 +190,8 @@ let blank ~project ?copybook textdoc =
{
project;
textdoc;
artifacts = no_parsing_artifacts;
artifacts = no_artifacts;
rewinder = None;
diags = DIAGS.Set.none;
parsed = None;
copybook;
Expand All @@ -141,17 +200,46 @@ let blank ~project ?copybook textdoc =
let position_encoding = `UTF8

let load ~project ?copybook doc =
Lsp.Text_document.make ~position_encoding doc
|> blank ~project ?copybook
|> analyze
let textdoc = Lsp.Text_document.make ~position_encoding doc in
let doc = blank ~project ?copybook textdoc in
try parse_and_analyze doc
with e -> raise @@ Internal_error (doc, e)

let first_change_pos ({ artifacts = { newline_cnums; _ }; _ } as doc) changes =
if newline_cnums = [] then None (* straight out of cache: missing info *)
else
match
List.fold_left begin fun pos -> function
| Lsp.Types.TextDocumentContentChangeEvent.{ range = None; _ } ->
Some (0, 0, 0) (* meaning: full text change *)
| { range = Some { start = { line; character }; _ }; _ } ->
let bol =
try List.nth newline_cnums (line - 1)
with Not_found | Invalid_argument _ -> 0
in
let cnum = bol + character in
match pos with
| Some (_, _, cnum') when cnum' > cnum -> pos
| _ -> Some (line + 1, bol, cnum)
end None changes
with
| Some (pos_lnum, pos_bol, pos_cnum) ->
Some Lexing.{ pos_fname = Lsp.Uri.to_path (uri doc);
pos_bol; pos_cnum; pos_lnum }
| None -> (* Humm... can |changes|=0 really happen? *)
None

let update { project; textdoc; _ } changes =
(* TODO: Make it not reparse everything when a change occurs. *)
Lsp.Text_document.apply_content_changes textdoc changes
|> blank ~project
|> analyze
let update ({ textdoc; _ } as doc) changes =
let position = first_change_pos doc changes in
let doc =
{ doc with
textdoc = Lsp.Text_document.apply_content_changes textdoc changes }
in
try reparse_and_analyze ?position doc
with e -> raise @@ Internal_error (doc, e)

(** Raises {!Unparseable} in case the document cannot be parsed entierely. *)
(** Raises {!Unparseable} in case the document cannot be parsed entierely, or
{!Copybook} in case the document is not a main program. *)
let retrieve_parsed_data: document -> parsed_data = function
| { parsed = Some p; _ } -> p
| { copybook = false; _ } as doc -> raise @@ Unparseable (uri doc)
Expand All @@ -160,7 +248,7 @@ let retrieve_parsed_data: document -> parsed_data = function
(** Caching utilities *)

let to_cache ({ project; textdoc; parsed; diags;
artifacts = { pplog; tokens; comments }; _ } as doc) =
artifacts = { pplog; tokens; comments; _ }; _ } as doc) =
{
doc_cache_filename = Lsp_project.relative_path_for ~uri:(uri doc) project;
doc_cache_checksum = Digest.string (Lsp.Text_document.text textdoc);
Expand Down Expand Up @@ -199,12 +287,16 @@ let of_cache ~project
let parsed =
Option.map
(fun (ast, cus) ->
let definitions = lazy_definitions ast cus in
let references = lazy_references ast cus definitions in
{ ast; cus; definitions; references})
let definitions = lazy_definitions ast cus in
let references = lazy_references ast cus definitions in
{ ast; cus; definitions; references })
parsed
in
{ doc with artifacts = { pplog; tokens = lazy tokens; comments };
{ doc with artifacts = { pplog; tokens = lazy tokens; comments;
(* We leave the folloing out of the cache: only
used upon document update, which should only
happen after a full parse in each session. *)
newline_cnums = [] };
diags = DIAGS.Set.of_serializable diags;
parsed }

Expand Down
6 changes: 3 additions & 3 deletions src/lsp/cobol_lsp/lsp_notif.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ let on_notification state notif =
| Initialized config, Initialized ->
Running (Lsp_server.init ~config)
| Running registry, TextDocumentDidOpen params ->
Running (Lsp_server.add params registry)
Running (Lsp_server.did_open params registry)
| Running registry, TextDocumentDidChange params ->
Running (Lsp_server.update params registry)
Running (Lsp_server.did_change params registry)
| Running registry, TextDocumentDidClose params ->
Running (Lsp_server.remove params registry)
Running (Lsp_server.did_close params registry)
| Running _, Exit ->
Exit (Error "Received premature 'exit' notification")
| _ ->
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_lsp/lsp_request.ml
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ let handle_semtoks_full,
handle_semtoks_range =
let handle registry ?range (doc: TextDocumentIdentifier.t) =
try_with_document_data registry doc
~f:begin fun ~doc:{ artifacts = { pplog; tokens; comments };
~f:begin fun ~doc:{ artifacts = { pplog; tokens; comments; _ };
_ } Lsp_document.{ ast; _ } ->
let data =
Lsp_semtoks.data ~filename:(Lsp.Uri.to_path doc.uri) ~range
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_lsp/lsp_semtoks.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

open Cobol_common (* Srcloc, Visitor *)
open Cobol_common.Srcloc.INFIX
open Cobol_parser.Grammar_tokens
open Cobol_parser.Tokens

module TOKTYP = struct
type t = { index: int; name: string }
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_lsp/lsp_semtoks.mli
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ val token_modifiers: string list
val data
: filename: string
-> range: Lsp.Types.Range.t option
-> tokens: Cobol_parser.tokens_with_locs
-> tokens: Cobol_parser.Outputs.tokens_with_locs
-> pplog: Cobol_preproc.log
-> comments: Cobol_preproc.comments
-> ptree: Lsp_imports.PTREE.compilation_group
Expand Down
Loading

0 comments on commit bb7d073

Please sign in to comment.