From 6d5540cc451ede4c98eeff240a1e13f33903e658 Mon Sep 17 00:00:00 2001 From: Nicolas Berthier Date: Fri, 22 Sep 2023 09:36:47 +0200 Subject: [PATCH] Rewindable parsing This commit adds the ability to restart parsing only a few tokens before any document change. This should greatly improve performance, especially when editing the middle or end of large files. --- src/lsp/cobol_common/diagnostics.ml | 2 + src/lsp/cobol_common/diagnostics.mli | 2 + src/lsp/cobol_lsp/lsp_completion_keywords.ml | 3 +- src/lsp/cobol_lsp/lsp_document.ml | 190 ++++-- src/lsp/cobol_lsp/lsp_notif.ml | 6 +- src/lsp/cobol_lsp/lsp_request.ml | 2 +- src/lsp/cobol_lsp/lsp_semtoks.ml | 2 +- src/lsp/cobol_lsp/lsp_semtoks.mli | 2 +- src/lsp/cobol_lsp/lsp_server.ml | 42 +- src/lsp/cobol_lsp/lsp_server.mli | 6 +- src/lsp/cobol_parser/cobol_parser.ml | 37 +- src/lsp/cobol_parser/context.ml | 3 + src/lsp/cobol_parser/context.mli | 1 + src/lsp/cobol_parser/parser_engine.ml | 664 +++++++++++-------- src/lsp/cobol_parser/parser_engine.mli | 65 +- src/lsp/cobol_parser/parser_options.ml | 29 +- src/lsp/cobol_parser/parser_outputs.ml | 33 + src/lsp/cobol_parser/text_tokenizer.ml | 2 +- src/lsp/cobol_preproc/preproc.ml | 28 +- src/lsp/cobol_preproc/preproc.mli | 31 + src/lsp/cobol_preproc/preproc_engine.ml | 38 +- src/lsp/cobol_preproc/preproc_engine.mli | 19 +- src/lsp/cobol_preproc/src_lexing.ml | 13 +- src/lsp/cobol_preproc/src_lexing.mli | 1 + src/lsp/cobol_preproc/src_overlay.ml | 4 +- src/lsp/cobol_typeck/cobol_typeck.ml | 14 +- src/lsp/cobol_typeck/cobol_typeck.mli | 6 +- src/lsp/superbol_free_lib/command_pp.ml | 2 +- src/lsp/superbol_free_lib/common_args.ml | 14 +- src/lsp/superbol_free_lib/common_args.mli | 4 +- test/cobol_parsing/parser_testing.ml | 19 +- test/lsp/lsp_testing.ml | 2 +- test/output-tests/gnucobol.ml | 19 +- 33 files changed, 851 insertions(+), 454 deletions(-) create mode 100644 src/lsp/cobol_parser/parser_outputs.ml diff --git a/src/lsp/cobol_common/diagnostics.ml b/src/lsp/cobol_common/diagnostics.ml index 50dc7f212..1b87234bb 100644 --- a/src/lsp/cobol_common/diagnostics.ml +++ b/src/lsp/cobol_common/diagnostics.ml @@ -199,6 +199,8 @@ let simple_result r = result r let some_result ?diags r = result ?diags (Some r) let no_result ~diags = { result = None; diags } let map_result f { result; diags } = { result = f result; diags } +let more_result f { result; diags } = with_more_diags ~diags (f result) +let forget_result { diags; _ } = diags let hint_result r = Cont.khint (with_diag r) let note_result r = Cont.knote (with_diag r) diff --git a/src/lsp/cobol_common/diagnostics.mli b/src/lsp/cobol_common/diagnostics.mli index cd1eac3f7..6ed202ca4 100644 --- a/src/lsp/cobol_common/diagnostics.mli +++ b/src/lsp/cobol_common/diagnostics.mli @@ -108,6 +108,8 @@ val simple_result: 'a -> 'a with_diags val some_result: ?diags:diagnostics -> 'a -> 'a option with_diags val no_result: diags:diagnostics -> _ option with_diags val map_result: ('a -> 'b) -> 'a with_diags -> 'b with_diags +val more_result: ('a -> 'b with_diags) -> 'a with_diags -> 'b with_diags +val forget_result: _ with_diags -> diagnostics val hint_result: 'a -> ?loc:Srcloc.srcloc -> ('b, 'a with_diags) Pretty.func val note_result: 'a -> ?loc:Srcloc.srcloc -> ('b, 'a with_diags) Pretty.func diff --git a/src/lsp/cobol_lsp/lsp_completion_keywords.ml b/src/lsp/cobol_lsp/lsp_completion_keywords.ml index 93744e549..87af7db46 100644 --- a/src/lsp/cobol_lsp/lsp_completion_keywords.ml +++ b/src/lsp/cobol_lsp/lsp_completion_keywords.ml @@ -13,7 +13,7 @@ (* open Cobol_common.Basics *) -let keywords_all = fst @@ List.split Cobol_parser.Text_keywords.keywords +let keywords_all = fst @@ List.split Cobol_parser.Keywords.keywords (* TODO: Too many keywords, hard to classification *) @@ -326,4 +326,3 @@ let keywords_data = [ let keywords_data = StringSet.elements @@ StringSet.of_list keywords_data let keywords_proc = StringSet.elements @@ StringSet.of_list (keywords_proc @ keywords_proc_other) *) - diff --git a/src/lsp/cobol_lsp/lsp_document.ml b/src/lsp/cobol_lsp/lsp_document.ml index 9c93566f4..47ae1fbb0 100644 --- a/src/lsp/cobol_lsp/lsp_document.ml +++ b/src/lsp/cobol_lsp/lsp_document.ml @@ -25,8 +25,9 @@ module TYPES = struct project: Lsp_project.t; textdoc: Lsp.Text_document.t; copybook: bool; - artifacts: Cobol_parser.parsing_artifacts; + artifacts: Cobol_parser.Outputs.artifacts; parsed: parsed_data option; + rewinder: rewinder option; (* Used for caching, when loading a cache file as the file is not reparsed, then diagnostics are not sent. *) diags: DIAGS.Set.t; @@ -40,11 +41,19 @@ module TYPES = struct definitions: name_definitions_in_compilation_unit CUMap.t Lazy.t; references: name_references_in_compilation_unit CUMap.t Lazy.t; } + and rewinder = + (PTREE.compilation_group option, + Cobol_common.Behaviors.eidetic) Cobol_parser.Outputs.output + Cobol_parser.rewinder (** Raised by {!retrieve_parsed_data}. *) exception Unparseable of Lsp.Types.DocumentUri.t exception Copybook of Lsp.Types.DocumentUri.t + (** Raised by {!load} and {!update}; allows keeping consistent document + contents. *) + exception Internal_error of document * exn + type cached = (** Persistent representation (for caching) *) { doc_cache_filename: string; (* relative to project rootdir *) @@ -52,7 +61,7 @@ module TYPES = struct doc_cache_langid: string; doc_cache_version: int; doc_cache_pplog: Cobol_preproc.log; - doc_cache_tokens: Cobol_parser.tokens_with_locs; + doc_cache_tokens: Cobol_parser.Outputs.tokens_with_locs; doc_cache_comments: Cobol_preproc.comments; doc_cache_parsed: (PTREE.compilation_group * CUs.t) option; doc_cache_diags: DIAGS.Set.serializable; @@ -64,17 +73,33 @@ include TYPES type t = document let uri { textdoc; _ } = Lsp.Text_document.documentUri textdoc -let parse ~project text = - let uri = Lsp.Text_document.documentUri text in - let libpath = Lsp_project.libpath_for ~uri project in - Cobol_parser.parse_with_tokens - (* Recovery policy for the parser: *) - ~recovery:(EnableRecovery { silence_benign_recoveries = true }) - ~source_format:project.source_format - ~config:project.cobol_config - ~libpath - (String { contents = Lsp.Text_document.text text; - filename = Lsp.Uri.to_path uri }) +(* let simple_parse ({ project; textdoc; _ } as doc) = *) +(* Cobol_parser.parse_with_artifacts *) +(* ~options:Cobol_parser.Options.{ *) +(* default with *) +(* recovery = EnableRecovery { silence_benign_recoveries = true }; *) +(* } *) +(* ~config:project.cobol_config @@ *) +(* Cobol_preproc.preprocessor *) +(* { init_libpath = Lsp_project.libpath_for ~uri:(uri doc) project; *) +(* init_config = project.cobol_config; *) +(* init_source_format = project.source_format } @@ *) +(* String { contents = Lsp.Text_document.text textdoc; *) +(* filename = Lsp.Uri.to_path (uri doc) } *) + +let rewindable_parse ({ project; textdoc; _ } as doc) = + Cobol_parser.rewindable_parse_with_artifacts + ~options:Cobol_parser.Options.{ + default with + recovery = EnableRecovery { silence_benign_recoveries = true }; + } + ~config:project.cobol_config @@ + Cobol_preproc.preprocessor + { init_libpath = Lsp_project.libpath_for ~uri:(uri doc) project; + init_config = project.cobol_config; + init_source_format = project.source_format } @@ + String { contents = Lsp.Text_document.text textdoc; + filename = Lsp.Uri.to_path (uri doc) } let lazy_definitions ast cus = lazy begin cus |> @@ -99,28 +124,61 @@ let lazy_references ast cus defs = end CUMap.empty ast end -let no_parsing_artifacts = - Cobol_parser.{ tokens = lazy []; - pplog = Cobol_preproc.Trace.empty; - comments = [] } - -let analyze ({ project; textdoc; copybook; _ } as doc) = - let artifacts, (parsed, diags) = - if copybook then - no_parsing_artifacts, (None, DIAGS.Set.none) - else - let ptree = parse ~project textdoc in - Cobol_parser.parsing_artifacts ptree, - match Cobol_typeck.analyze_compilation_group ptree with - | Ok (cus, ast, diags) -> - let definitions = lazy_definitions ast cus in - let references = lazy_references ast cus definitions in - Some { ast; cus; definitions; references}, diags - | Error diags -> - None, diags (* NB: no token if unrecoverable error (e.g, wrong - indicator) *) +let no_artifacts = + Cobol_parser.Outputs.{ tokens = lazy []; + pplog = Cobol_preproc.Trace.empty; + comments = []; + newline_cnums = [] } + +let gather_parsed_data ptree = + Cobol_typeck.analyze_compilation_group ptree |> + DIAGS.map_result begin function + | Ok (cus, ast) -> + let definitions = lazy_definitions ast cus in + let references = lazy_references ast cus definitions in + Some { ast; cus; definitions; references} + | Error () -> + None + end + +let extract_parsed_infos doc ptree = + let DIAGS.{ result = artifacts, rewinder, parsed; diags} = + DIAGS.more_result begin fun (ptree, rewinder) -> + gather_parsed_data ptree |> + DIAGS.map_result begin fun parsed -> + Cobol_parser.artifacts ptree, Some rewinder, parsed + end + end ptree in - { doc with artifacts; diags; parsed } + { doc with artifacts; rewinder; diags; parsed } + +let parse_and_analyze ({ copybook; _ } as doc) = + if copybook then (* skip *) + { doc with artifacts = no_artifacts; rewinder = None; parsed = None } + else + (* extract_parsed_infos doc @@ simple_parse doc *) + extract_parsed_infos doc @@ rewindable_parse doc + +let reparse_and_analyze ?position ({ copybook; rewinder; textdoc; _ } as doc) = + match position, rewinder with + | None, _ | _, None -> + parse_and_analyze doc + | _, Some _ when copybook -> (* skip *) + { doc with artifacts = no_artifacts; rewinder = None; parsed = None } + | Some position, Some rewinder -> + extract_parsed_infos doc @@ + Cobol_parser.rewind_and_parse rewinder ~position + begin fun ?new_position pp -> + let contents = Lsp.Text_document.text textdoc in + let contents = match new_position with + | None -> contents + | Some (Lexing.{ pos_cnum; _ } as _pos) -> + EzString.after contents (pos_cnum - 1) + in + (* Pretty.error "contents = %S@." contents; *) + Cobol_preproc.reset_preprocessor ?new_position pp + (String { contents; filename = Lsp.Uri.to_path (uri doc) }) + end (** Creates a record for a document that is not yet parsed or analyzed. *) let blank ~project ?copybook textdoc = @@ -132,7 +190,8 @@ let blank ~project ?copybook textdoc = { project; textdoc; - artifacts = no_parsing_artifacts; + artifacts = no_artifacts; + rewinder = None; diags = DIAGS.Set.none; parsed = None; copybook; @@ -141,17 +200,46 @@ let blank ~project ?copybook textdoc = let position_encoding = `UTF8 let load ~project ?copybook doc = - Lsp.Text_document.make ~position_encoding doc - |> blank ~project ?copybook - |> analyze + let textdoc = Lsp.Text_document.make ~position_encoding doc in + let doc = blank ~project ?copybook textdoc in + try parse_and_analyze doc + with e -> raise @@ Internal_error (doc, e) + +let first_change_pos ({ artifacts = { newline_cnums; _ }; _ } as doc) changes = + if newline_cnums = [] then None (* straight out of cache: missing info *) + else + match + List.fold_left begin fun pos -> function + | Lsp.Types.TextDocumentContentChangeEvent.{ range = None; _ } -> + Some (0, 0, 0) (* meaning: full text change *) + | { range = Some { start = { line; character }; _ }; _ } -> + let bol = + try List.nth newline_cnums (line - 1) + with Not_found | Invalid_argument _ -> 0 + in + let cnum = bol + character in + match pos with + | Some (_, _, cnum') when cnum' > cnum -> pos + | _ -> Some (line + 1, bol, cnum) + end None changes + with + | Some (pos_lnum, pos_bol, pos_cnum) -> + Some Lexing.{ pos_fname = Lsp.Uri.to_path (uri doc); + pos_bol; pos_cnum; pos_lnum } + | None -> (* Humm... can |changes|=0 really happen? *) + None -let update { project; textdoc; _ } changes = - (* TODO: Make it not reparse everything when a change occurs. *) - Lsp.Text_document.apply_content_changes textdoc changes - |> blank ~project - |> analyze +let update ({ textdoc; _ } as doc) changes = + let position = first_change_pos doc changes in + let doc = + { doc with + textdoc = Lsp.Text_document.apply_content_changes textdoc changes } + in + try reparse_and_analyze ?position doc + with e -> raise @@ Internal_error (doc, e) -(** Raises {!Unparseable} in case the document cannot be parsed entierely. *) +(** Raises {!Unparseable} in case the document cannot be parsed entierely, or + {!Copybook} in case the document is not a main program. *) let retrieve_parsed_data: document -> parsed_data = function | { parsed = Some p; _ } -> p | { copybook = false; _ } as doc -> raise @@ Unparseable (uri doc) @@ -160,7 +248,7 @@ let retrieve_parsed_data: document -> parsed_data = function (** Caching utilities *) let to_cache ({ project; textdoc; parsed; diags; - artifacts = { pplog; tokens; comments }; _ } as doc) = + artifacts = { pplog; tokens; comments; _ }; _ } as doc) = { doc_cache_filename = Lsp_project.relative_path_for ~uri:(uri doc) project; doc_cache_checksum = Digest.string (Lsp.Text_document.text textdoc); @@ -199,12 +287,16 @@ let of_cache ~project let parsed = Option.map (fun (ast, cus) -> - let definitions = lazy_definitions ast cus in - let references = lazy_references ast cus definitions in - { ast; cus; definitions; references}) + let definitions = lazy_definitions ast cus in + let references = lazy_references ast cus definitions in + { ast; cus; definitions; references }) parsed in - { doc with artifacts = { pplog; tokens = lazy tokens; comments }; + { doc with artifacts = { pplog; tokens = lazy tokens; comments; + (* We leave the folloing out of the cache: only + used upon document update, which should only + happen after a full parse in each session. *) + newline_cnums = [] }; diags = DIAGS.Set.of_serializable diags; parsed } diff --git a/src/lsp/cobol_lsp/lsp_notif.ml b/src/lsp/cobol_lsp/lsp_notif.ml index e865ea291..5cfd01ae3 100644 --- a/src/lsp/cobol_lsp/lsp_notif.ml +++ b/src/lsp/cobol_lsp/lsp_notif.ml @@ -24,11 +24,11 @@ let on_notification state notif = | Initialized config, Initialized -> Running (Lsp_server.init ~config) | Running registry, TextDocumentDidOpen params -> - Running (Lsp_server.add params registry) + Running (Lsp_server.did_open params registry) | Running registry, TextDocumentDidChange params -> - Running (Lsp_server.update params registry) + Running (Lsp_server.did_change params registry) | Running registry, TextDocumentDidClose params -> - Running (Lsp_server.remove params registry) + Running (Lsp_server.did_close params registry) | Running _, Exit -> Exit (Error "Received premature 'exit' notification") | _ -> diff --git a/src/lsp/cobol_lsp/lsp_request.ml b/src/lsp/cobol_lsp/lsp_request.ml index 074b95978..c7a8297ea 100644 --- a/src/lsp/cobol_lsp/lsp_request.ml +++ b/src/lsp/cobol_lsp/lsp_request.ml @@ -250,7 +250,7 @@ let handle_semtoks_full, handle_semtoks_range = let handle registry ?range (doc: TextDocumentIdentifier.t) = try_with_document_data registry doc - ~f:begin fun ~doc:{ artifacts = { pplog; tokens; comments }; + ~f:begin fun ~doc:{ artifacts = { pplog; tokens; comments; _ }; _ } Lsp_document.{ ast; _ } -> let data = Lsp_semtoks.data ~filename:(Lsp.Uri.to_path doc.uri) ~range diff --git a/src/lsp/cobol_lsp/lsp_semtoks.ml b/src/lsp/cobol_lsp/lsp_semtoks.ml index 317c5b7d6..3c264f46b 100644 --- a/src/lsp/cobol_lsp/lsp_semtoks.ml +++ b/src/lsp/cobol_lsp/lsp_semtoks.ml @@ -13,7 +13,7 @@ open Cobol_common (* Srcloc, Visitor *) open Cobol_common.Srcloc.INFIX -open Cobol_parser.Grammar_tokens +open Cobol_parser.Tokens module TOKTYP = struct type t = { index: int; name: string } diff --git a/src/lsp/cobol_lsp/lsp_semtoks.mli b/src/lsp/cobol_lsp/lsp_semtoks.mli index e8b35572b..2f719a800 100644 --- a/src/lsp/cobol_lsp/lsp_semtoks.mli +++ b/src/lsp/cobol_lsp/lsp_semtoks.mli @@ -19,7 +19,7 @@ val token_modifiers: string list val data : filename: string -> range: Lsp.Types.Range.t option - -> tokens: Cobol_parser.tokens_with_locs + -> tokens: Cobol_parser.Outputs.tokens_with_locs -> pplog: Cobol_preproc.log -> comments: Cobol_preproc.comments -> ptree: Lsp_imports.PTREE.compilation_group diff --git a/src/lsp/cobol_lsp/lsp_server.ml b/src/lsp/cobol_lsp/lsp_server.ml index 06ab11e8e..37bd846e0 100644 --- a/src/lsp/cobol_lsp/lsp_server.ml +++ b/src/lsp/cobol_lsp/lsp_server.ml @@ -132,8 +132,20 @@ let create_or_retrieve_project ~uri registry = let project = Lsp_project.for_ ~rootdir ~layout in project, add_project project registry -let add (DidOpenTextDocumentParams.{ textDocument = { uri; text; _ }; - _ } as doc) ?copybook registry = +let add (DidOpenTextDocumentParams.{ textDocument = { uri; _ }; _ } as doc) + ?copybook registry = + let project, registry = create_or_retrieve_project ~uri registry in + try + let doc = Lsp_document.load ~project ?copybook doc in + let registry = dispatch_diagnostics doc registry in + add_or_replace_doc doc registry + with Lsp_document.Internal_error (doc, e) -> + Lsp_io.pretty_notification ~log:true ~type_:Error + "Internal error while opening document: %a" Fmt.exn e; + add_or_replace_doc doc registry + +let did_open (DidOpenTextDocumentParams.{ textDocument = { uri; text; _ }; + _ } as doc) ?copybook registry = (* Try first with a lookup for the project in a cache, and then by creating/loading the project. *) let rec aux ~try_cache registry = @@ -152,10 +164,7 @@ let add (DidOpenTextDocumentParams.{ textDocument = { uri; text; _ }; in aux ~try_cache:false registry (* try again without the cache *) | None | Some _ -> - let project, registry = create_or_retrieve_project ~uri registry in - let doc = Lsp_document.load ~project ?copybook doc in - let registry = dispatch_diagnostics doc registry in - add_or_replace_doc doc registry + add doc ?copybook registry in aux ~try_cache:true registry @@ -164,14 +173,19 @@ let find_document (TextDocumentIdentifier.{ uri; _ } as doc) { docs; _ } = try URIMap.find uri docs with Not_found -> raise @@ Document_not_found doc -let update DidChangeTextDocumentParams.{ textDocument = { uri; _ }; - contentChanges; _ } registry = - let doc = find_document TextDocumentIdentifier.{ uri } registry in - let doc = Lsp_document.update doc contentChanges in - let registry = dispatch_diagnostics doc registry in - add_or_replace_doc doc registry - -let remove DidCloseTextDocumentParams.{ textDocument = { uri } } registry = +let did_change DidChangeTextDocumentParams.{ textDocument = { uri; _ }; + contentChanges; _ } registry = + try + let doc = find_document TextDocumentIdentifier.{ uri } registry in + let doc = Lsp_document.update doc contentChanges in + let registry = dispatch_diagnostics doc registry in + add_or_replace_doc doc registry + with Lsp_document.Internal_error (doc, e) -> + Lsp_io.pretty_notification ~log:true ~type_:Error + "Internal error while updating document: %a" Fmt.exn e; + add_or_replace_doc doc registry + +let did_close DidCloseTextDocumentParams.{ textDocument = { uri } } registry = { registry with docs = URIMap.remove uri registry.docs } (** {2 Miscellaneous} *) diff --git a/src/lsp/cobol_lsp/lsp_server.mli b/src/lsp/cobol_lsp/lsp_server.mli index d8f46dc8f..55511d022 100644 --- a/src/lsp/cobol_lsp/lsp_server.mli +++ b/src/lsp/cobol_lsp/lsp_server.mli @@ -63,13 +63,13 @@ val init which case it is not parsed directly as a normal program). When absent, copybook detection is performed via project configuration (see {!Lsp_project.detect_copybook}). *) -val add +val did_open : Lsp.Types.DidOpenTextDocumentParams.t -> ?copybook: bool -> t -> t -val update +val did_change : Lsp.Types.DidChangeTextDocumentParams.t -> t -> t -val remove +val did_close : Lsp.Types.DidCloseTextDocumentParams.t -> t -> t val find_document diff --git a/src/lsp/cobol_parser/cobol_parser.ml b/src/lsp/cobol_parser/cobol_parser.ml index 908e23b9d..660868858 100644 --- a/src/lsp/cobol_parser/cobol_parser.ml +++ b/src/lsp/cobol_parser/cobol_parser.ml @@ -18,33 +18,18 @@ module PTree = PTree module PTree_visitor = PTree_visitor (** Options to tune the parser engine *) -include Parser_options +module Options = Parser_options -type ('a, 'm) parsed_result = ('a, 'm) Parser_engine.parsed_result = - { - parsed_input: Cobol_preproc.input; - parsed_diags: Cobol_common.Diagnostics.Set.t; - parsed_output: ('a, 'm) Parser_options.output; - } +(** Output types for the engine *) +module Outputs = Parser_outputs -type 'm parsed_compilation_group = - (PTree.compilation_group option, 'm) parsed_result - -(** {1 Exported modules} *) -(*TODO: remove these extra modules once the parser provides the proper tokens.*) -module Grammar_contexts = Grammar_contexts -module Grammar_tokens = Grammar_tokens -module Text_keywords = Text_keywords +module Contexts = Grammar_contexts +module Tokens = Grammar_tokens +module Keywords = Text_keywords (** {1 Exported functions} *) -type 'x source_handling = ?source_format:Cobol_config.source_format_spec -> 'x - -let parse_simple: _ source_handling = Parser_engine.parse_simple -let parse_with_tokens: _ source_handling = Parser_engine.parse_with_tokens -let parsing_artifacts = Parser_engine.parsing_artifacts - -(* --- *) +include Parser_engine (** {1 Modules and functions exported for testing purposes} @@ -53,16 +38,20 @@ let parsing_artifacts = Parser_engine.parsing_artifacts module INTERNAL = struct (** {2 COBOL tokens} *) + module Tokens = Grammar_tokens let pp_token = Text_tokenizer.pp_token let pp_tokens = Text_tokenizer.pp_tokens (** {2 COBOL grammar} *) + module Grammar (* : Grammar_sig.S *) = Grammar - (** {2 Parser with dummy source locations, that can be fed directly with a - list of tokens} *) + (** {2 Dummy parser} *) + + (** Parser with dummy source locations, that can be fed directly with a + list of tokens *) module Dummy = struct module Tags: Cobol_ast.Helpers.TAGS = struct let loc = Cobol_common.Srcloc.raw Lexing.(dummy_pos, dummy_pos) diff --git a/src/lsp/cobol_parser/context.ml b/src/lsp/cobol_parser/context.ml index 4da82f4a6..cb6abfd09 100644 --- a/src/lsp/cobol_parser/context.ml +++ b/src/lsp/cobol_parser/context.ml @@ -85,3 +85,6 @@ let top_tokens: stack -> TH.t = function let pop: stack -> stack * TH.t = function | [] -> Pretty.invalid_arg "Unable to pop on an empty context stack" | { diff; _ } :: tl -> tl, diff + +let all_tokens: stack -> TH.t = + List.fold_left (fun th { diff; _ } -> TH.union th diff) TH.empty diff --git a/src/lsp/cobol_parser/context.mli b/src/lsp/cobol_parser/context.mli index 5d6f9c4d9..4341b11c0 100644 --- a/src/lsp/cobol_parser/context.mli +++ b/src/lsp/cobol_parser/context.mli @@ -30,4 +30,5 @@ val top: stack -> context option (** {3 Context-specific operations} *) val top_tokens: stack -> Text_lexer.TokenHandles.t +val all_tokens: stack -> Text_lexer.TokenHandles.t val pop: stack -> stack * Text_lexer.TokenHandles.t diff --git a/src/lsp/cobol_parser/parser_engine.ml b/src/lsp/cobol_parser/parser_engine.ml index 88042c163..6528ab452 100644 --- a/src/lsp/cobol_parser/parser_engine.ml +++ b/src/lsp/cobol_parser/parser_engine.ml @@ -15,11 +15,40 @@ module DIAGS = Cobol_common.Diagnostics open Cobol_common.Types open Cobol_common.Srcloc.INFIX -include Parser_options (* import types for options *) +open Parser_options (* import types for options *) +open Parser_outputs (* import types for outputs *) +(* Main type definitions *) + +type 'x rewinder = + { + rewind_n_parse: preprocessor_rewind -> position: Lexing.position -> + ('x * ('x rewinder)) with_diags; + } +and preprocessor_rewind = + ?new_position:Lexing.position -> (Cobol_preproc.preprocessor as 'r) -> 'r + +type 'm simple_parsing + = ?options:parser_options + -> ?config:Cobol_config.t + -> Cobol_preproc.preprocessor + -> (PTree.compilation_group option, 'm) output DIAGS.with_diags + +type 'm rewindable_parsing + = ?options:parser_options + -> ?config:Cobol_config.t + -> Cobol_preproc.preprocessor + -> (((PTree.compilation_group option, 'm) output as 'x) * + 'x rewinder) DIAGS.with_diags + +(* --- *) + +(** Parser configuration is mostly to deal with reserved tokens and grammar + post-actions. *) module Make (Config: Cobol_config.T) = struct module Tokzr = Text_tokenizer.Make (Config) + module Post = Grammar_post_actions.Make (Config) module Overlay_manager = Grammar_utils.Overlay_manager module Grammar_interpr = Grammar.MenhirInterpreter module Grammar_recovery = @@ -31,7 +60,6 @@ module Make (Config: Cobol_config.T) = struct | _ -> false end) - module Post = Grammar_post_actions.Make (Config) (** State of the parser. @@ -64,8 +92,8 @@ module Make (Config: Cobol_config.T) = struct (** Part of the parser state that changes very rarely, if at all. *) and 'm persist = { - recovery: recovery; tokenizer_memory: 'm memory; + recovery: recovery; verbose: bool; show_if_verbose: [`Tks | `Ctx] list; show: [`Pending] list; @@ -75,13 +103,9 @@ module Make (Config: Cobol_config.T) = struct context-stack will be needed when we want a persistent parser state. Best place for this is probaly in the tokenizer.*) - let init_parser - ?(verbose = false) - ?(show_if_verbose = [`Tks; `Ctx]) - ?(show = [`Pending]) - (type m) ~(tokenizer_memory: m memory) - ~(recovery: recovery) - pp = + let make_parser + (type m) Parser_options.{ verbose; show; recovery } + ?(show_if_verbose = [`Tks; `Ctx]) ~(tokenizer_memory: m memory) pp = let tokzr: m Tokzr.state = let memory: m Tokzr.memory = match tokenizer_memory with | Parser_options.Amnesic -> Tokzr.amnesic @@ -100,8 +124,8 @@ module Make (Config: Cobol_config.T) = struct context_stack = Context.empty_stack; persist = { - recovery; tokenizer_memory; + recovery; verbose; show_if_verbose; show; @@ -145,8 +169,6 @@ module Make (Config: Cobol_config.T) = struct Pretty.error "Tks: %a@." Text_tokenizer.pp_tokens tokens; update_tokzr ps tokzr, tokens - let state_num env = Grammar_interpr.current_state_number env - let update_context_stack ~stack_update ~tokenizer_update ({ preproc; _ } as ps) tokens : Context.t list -> 's * 'a = function @@ -218,287 +240,397 @@ module Make (Config: Cobol_config.T) = struct (* --- *) - let do_parse: type m. m state -> _ -> _ -> _ * m state = + let rec next_token ({ preproc = { tokzr; _ }; _ } as ps) tokens = + match Tokzr.next_token tokzr tokens with + | Some (tokzr, token, tokens) -> + (update_tokzr ps tokzr, token, tokens) + | None -> + let ps, tokens = produce_tokens ps in + next_token ps tokens + + let token_n_srcloc_limits ?prev_limit token = + let s, e = Overlay_manager.limits ~@token in + Option.iter (fun e -> Overlay_manager.link_limits e s) prev_limit; + ~&token, s, e + + let put_token_back ({ preproc; _ } as ps) token tokens = + let tokzr, tokens = Tokzr.put_token_back preproc.tokzr token tokens in + { ps with prev_limit = ps.prev_limit'; + preproc = { ps.preproc with tokzr } }, tokens + + let leaving_context ps prod = + match Context.top ps.preproc.context_stack with + | None -> false (* first filter *) + | Some top_ctx -> + match Grammar_interpr.lhs prod with + | X T _ -> false + | X N nt -> match Grammar_context.nonterminal_context nt with + | Some ctx -> ctx == top_ctx + | _ -> false + + let pop_context ({ preproc = { tokzr; context_stack; _ }; _ } as ps) + tokens = + let context_stack, tokens_set = Context.pop context_stack in + if show `Ctx ps then + Pretty.error "Outgoing: %a@." Context.pp_context tokens_set; + let tokzr, tokens = Tokzr.disable_tokens tokzr tokens tokens_set in + { ps with preproc = { ps.preproc with tokzr; context_stack }}, + tokens + + let push_incoming_contexts ps tokens env = + + let push context_stack ctx = + if show `Ctx ps then + Pretty.error "Incoming: %a@." Context.pp_context ctx; - let rec next_tokens ({ preproc = { tokzr; _ }; _ } as ps) tokens = - match Tokzr.next_token tokzr tokens with - | Some (tokzr, token, tokens) -> - update_tokzr ps tokzr, (token, tokens) - | None -> - let ps, tokens = produce_tokens ps in - next_tokens ps tokens - in + (* Push the new context on top of the stack *) + let context_stack = Context.push ctx context_stack in - let token_n_srcloc_limits ?prev_limit token = - let s, e = Overlay_manager.limits ~@token in - Option.iter (fun e -> Overlay_manager.link_limits e s) prev_limit; - ~&token, s, e + (* ... and retrieve newly reserved tokens *) + context_stack, Context.top_tokens context_stack in - let put_token_back ({ preproc; _ } as ps) token tokens = - let tokzr, tokens = Tokzr.put_token_back preproc.tokzr token tokens in - { ps with prev_limit = ps.prev_limit'; - preproc = { ps.preproc with tokzr } }, tokens + (* Retrieve and enable all incoming contexts *) + update_context_stack ps tokens + (Grammar_context.contexts_for_state_num @@ + Grammar_interpr.current_state_number env) + ~stack_update:push + ~tokenizer_update:Tokzr.enable_tokens + + let pop_outgoing_context ps tokens prod = + if leaving_context ps prod + then pop_context ps tokens + else ps, tokens + + (** Traverses a path (sequence of parser states or productions) that starts + with the state that matches the current context stack, and applies the + induced changes to the context stack. *) + let seesaw_context_stack ps tokens operations = + List.fold_left begin fun (ps, tokens) -> function + | Grammar_recovery.Env e -> push_incoming_contexts ps tokens e + | Grammar_recovery.Prod p -> pop_outgoing_context ps tokens p + end (ps, tokens) operations + + let env_loc env = + match Grammar_interpr.top env with + | None -> None + | Some (Element (_, _, s, e)) -> Some (Overlay_manager.join_limits (s, e)) + + let pending ?(severity = DIAGS.Warn) descr ps env = + if List.mem `Pending ps.preproc.persist.show then + let diag = + DIAGS.One.diag severity "Ignored@ %a@ (implementation@ pending)" + Pretty.text descr ?loc:(env_loc env) + in + add_diag diag ps + else ps + + let post_production ({ preproc = { tokzr; _ }; _ } as ps) + token tokens prod env = + match Post.post_production prod env with + | Post_diagnostic action -> + let ps = match action ~loc:(env_loc env) with + | Ok ((), Some diag) | Error Some diag -> add_diag diag ps + | Ok ((), None) | Error None -> ps + in + ps, token, tokens + | Post_special_names DecimalPointIsComma -> + let tokzr, token, tokens = + Tokzr.decimal_point_is_comma tokzr token tokens in + if show `Tks ps then + Pretty.error "Tks': %a@." Text_tokenizer.pp_tokens tokens; + update_tokzr ps tokzr, token, tokens + | Post_pending descr -> + pending descr ps env, token, tokens + | Post_special_names _ + | NoPost -> + ps, token, tokens + + let on_reduction ps token tokens prod = function + | Grammar_interpr.HandlingError env + | AboutToReduce (env, _) + | Shifting (_, env, _) -> + post_production ps token tokens prod env + | _ -> + ps, token, tokens + + (* Main code for driving the parser with recovery and lexical contexts: *) + + type ('a, 'm) step = + | OnTok of ('a, 'm) new_token_step + | Final of ('a option * 'm state) + and ('a, 'm) new_token_step = + (('m state * Text_tokenizer.token * Text_tokenizer.tokens) * + 'a Grammar_interpr.env) (* Always valid input_needed env. *) + + let rec normal ps tokens = function + | Grammar_interpr.InputNeeded env -> + OnTok (next_token ps tokens, env) + | Shifting (_e1, e2, _) as c -> + let ps, tokens = push_incoming_contexts ps tokens e2 in + normal ps tokens @@ Grammar_interpr.resume c + | Accepted v -> + accept ps v + | AboutToReduce _ (* may only happen upon `check` (or empty language) *) + | Rejected | HandlingError _ -> + assert false (* should never happen *) + + and on_new_token (({ prev_limit; _ } as ps, token, tokens), env) = + let c = Grammar_interpr.input_needed env in + let _t, _, e as tok = token_n_srcloc_limits ?prev_limit token in + let ps = { ps with prev_limit = Some e; prev_limit' = prev_limit } in + check ps token tokens env @@ Grammar_interpr.offer c tok + + and check ps token tokens env = function + | Grammar_interpr.HandlingError env -> + error ps token tokens env + | AboutToReduce (_, prod) when leaving_context ps prod -> + (* Reoffer token *) + let ps, tokens = put_token_back ps token tokens in + let ps, tokens = pop_context ps tokens in + normal ps tokens @@ Grammar_interpr.input_needed env + | AboutToReduce (_, prod) as c -> + (* NB: Here, we assume semantic actions do not raise any exception; + maybe that's a tad too optimistic; if they did we may need to report + that. *) + let c = Grammar_interpr.resume c in + let ps, token, tokens = on_reduction ps token tokens prod c in + check ps token tokens env c + | Shifting (_e1, e2, _) as c -> + let ps, tokens = push_incoming_contexts ps tokens e2 in + check ps token tokens env @@ Grammar_interpr.resume c + | c -> + normal ps tokens c + + and error ps token tokens env = + let report_invalid_syntax = + let loc_limits = Grammar_interpr.positions env in + let loc = Overlay_manager.join_limits loc_limits in + fun severity -> add_diag (DIAGS.One.diag severity ~loc "Invalid@ syntax") in + match ps.preproc.persist.recovery with + | EnableRecovery recovery_options -> + (* The limits of the re-submitted token will be re-constructed in + `token_n_srcloc_limits`, so `prev_limit` needs to be re-adjusted to + the second-to-last right-limit. *) + let ps, tokens = put_token_back ps token tokens in + recover ps tokens (Grammar_recovery.generate env) + ~report_syntax_hints_n_error:(report_syntax_hints_n_error + ~report_invalid_syntax + ~recovery_options) + | DisableRecovery -> + Final (None, report_invalid_syntax Error ps) + + and recover ps tokens candidates ~report_syntax_hints_n_error = + let { prev_limit; _ } as ps, token, tokens = next_token ps tokens in + let _, _, e as tok = token_n_srcloc_limits ?prev_limit token in + let ps = { ps with prev_limit = Some e; prev_limit' = prev_limit } in + match Grammar_recovery.attempt candidates tok with + | `Fail when ~&token <> Grammar_tokens.EOF -> (* ignore one token *) + recover ps tokens candidates ~report_syntax_hints_n_error + | `Fail when Option.is_none candidates.final -> + Final (None, report_syntax_hints_n_error ps []) (* unable to recover *) + | `Fail -> + let v, assumed = Option.get candidates.final in + accept (report_syntax_hints_n_error ps assumed) v + | `Accept (v, assumed) -> + accept (report_syntax_hints_n_error ps assumed) v + | `Ok (c, _, visited, assumed) -> + let ps, tokens = seesaw_context_stack ps tokens visited in + normal (report_syntax_hints_n_error ps assumed) tokens c + + and accept ps v = + Final (Some v, ps) + + let on_exn ps e = + Final (None, add_diag (DIAGS.of_exn e) ps) - let leaving_context ps prod = - match Context.top ps.preproc.context_stack with - | None -> false (* first filter *) - | Some top_ctx -> - match Grammar_interpr.lhs prod with - | X T _ -> false - | X N nt -> match Grammar_context.nonterminal_context nt with - | Some ctx -> ctx == top_ctx - | _ -> false - in + (* --- *) - let pop_context ({ preproc = { tokzr; context_stack; _ }; _ } as ps) - tokens = - let context_stack, tokens_set = Context.pop context_stack in - if show `Ctx ps then - Pretty.error "Outgoing: %a@." Context.pp_context tokens_set; - let tokzr, tokens = Tokzr.disable_tokens tokzr tokens tokens_set in - { ps with preproc = { ps.preproc with tokzr; context_stack }}, - tokens + let init_parse ps ~make_checkpoint = + let ps, tokens = produce_tokens ps in + let first_pos = match tokens with + | [] -> Cobol_preproc.position ps.preproc.pp + | t :: _ -> Cobol_common.Srcloc.start_pos ~@t in + normal ps tokens (make_checkpoint first_pos) - let push_incoming_contexts ps tokens env = - - let push context_stack ctx = - if show `Ctx ps then - Pretty.error "Incoming: %a@." Context.pp_context ctx; + let rec full_parse = function + | Final (res, ps) -> + res, ps + | OnTok (((ps, _, _), _) as state) -> + full_parse @@ try on_new_token state with e -> on_exn ps e - (* Push the new context on top of the stack *) - let context_stack = Context.push ctx context_stack in - - (* ... and retrieve newly reserved tokens *) - context_stack, Context.top_tokens context_stack - in - - (* Retrieve and enable all incoming contexts *) - update_context_stack ps tokens - (Grammar_context.contexts_for_state_num (state_num env)) - ~stack_update:push - ~tokenizer_update:Tokzr.enable_tokens - - and pop_outgoing_context ps tokens prod = - if leaving_context ps prod - then pop_context ps tokens - else ps, tokens - in - - (** Traverses a path (sequence of parser states or productions) that starts - with the state that matches the current context stack, and applies the - induced changes to the context stack. *) - let seesaw_context_stack ps tokens = - List.fold_left begin fun (ps, tokens) -> function - | Grammar_recovery.Env e -> push_incoming_contexts ps tokens e - | Grammar_recovery.Prod p -> pop_outgoing_context ps tokens p - end (ps, tokens) - in + (* --- *) - let env_loc env = - match Grammar_interpr.top env with - | None -> None - | Some (Element (_, _, s, e)) -> Some (Overlay_manager.join_limits (s, e)) - in + type ('a, 'm) rewindable_parsing_state = + { + init: 'm state; + step: ('a, 'm) step; + store: ('a, 'm) rewindable_history; + } + and ('a, 'm) rewindable_history = ('a, 'm) rewindable_history_event list + and ('a, 'm) rewindable_history_event = + { + preproc_position: Lexing.position; + event_step: ('a, 'm) new_token_step; + } - let pending ?(severity = DIAGS.Warn) descr ps env = - if List.mem `Pending ps.preproc.persist.show then - let diag = - DIAGS.One.diag severity "Ignored@ %a@ (implementation@ pending)" - Pretty.text descr ?loc:(env_loc env) - in - add_diag diag ps - else ps - in + let save_history_event + (((ps, _, _), _) as state) (store: _ rewindable_history) = + let preproc_position = Cobol_preproc.position ps.preproc.pp in + match store with + | { preproc_position = prev_pos; _ } :: store' + when prev_pos.pos_cnum = preproc_position.pos_cnum && + prev_pos.pos_fname = preproc_position.pos_fname -> + (* Preprocessor did not advance further since last save: replace event + with new parser state: *) + { preproc_position; event_step = state } :: store' + | store' -> + { preproc_position; event_step = state } :: store' + + let init_rewindable_parse ps ~make_checkpoint = + { + init = ps; + step = init_parse ps ~make_checkpoint; + store = []; + } - let post_production ({ preproc = { tokzr; _ }; _ } as ps) - token tokens prod env = - match Post.post_production prod env with - | Post_diagnostic action -> - let ps = match action ~loc:(env_loc env) with - | Ok ((), Some diag) | Error Some diag -> add_diag diag ps - | Ok ((), None) | Error None -> ps + let rewindable_parser_state = function + | { step = Final (_, ps) | OnTok ((ps, _, _), _); _ } -> ps + + let with_context_sensitive_tokens ~f = function + | { step = Final (_, ps) | OnTok ((ps, _, _), _); _ } -> + f (Context.all_tokens ps.preproc.context_stack) + + let parse_with_trace ?(save_step = 10) rwps = + let rec loop count ({ store; step; _ } as rwps) = match step with + | Final (res, _ps) -> + with_context_sensitive_tokens rwps ~f:Text_lexer.disable_tokens; + res, rwps + | OnTok (((ps, _, _), _) as state) -> + let store, count = + if count = save_step then store, succ count + else save_history_event state store, 0 + and step = + try on_new_token state with e -> on_exn ps e in - ps, token, tokens - | Post_special_names DecimalPointIsComma -> - let tokzr, token, tokens = - Tokzr.decimal_point_is_comma tokzr token tokens in - if show `Tks ps then - Pretty.error "Tks': %a@." Text_tokenizer.pp_tokens tokens; - update_tokzr ps tokzr, token, tokens - | Post_pending descr -> - pending descr ps env, token, tokens - | Post_special_names _ - | NoPost -> - ps, token, tokens + loop count { rwps with store; step } in + with_context_sensitive_tokens rwps ~f:Text_lexer.enable_tokens; + loop 0 rwps - let rec normal ({ prev_limit; _ } as ps) tokens = function - | Grammar_interpr.InputNeeded env as c -> - let ps, (token, tokens) = next_tokens ps tokens in - let _t, _, e as tok = token_n_srcloc_limits ?prev_limit token in - let ps = { ps with prev_limit = Some e; prev_limit' = prev_limit } in - check ps token tokens env (Grammar_interpr.offer c tok) - | Shifting (_e1, e2, _) as c -> - let ps, tokens = push_incoming_contexts ps tokens e2 in - normal ps tokens @@ Grammar_interpr.resume c - | Accepted v -> - accept ps v - | AboutToReduce _ (* may only happen upon `check` (or empty language) *) - | Rejected | HandlingError _ -> - assert false (* should never happen *) - - and on_production ps token tokens prod = function - | Grammar_interpr.HandlingError env - | AboutToReduce (env, _) - | Shifting (_, env, _) -> - post_production ps token tokens prod env - | _ -> - ps, token, tokens - - and check ps token tokens env = function - | Grammar_interpr.HandlingError env -> - error ps token tokens env - | AboutToReduce (_, prod) when leaving_context ps prod -> - (* Reoffer token *) - let ps, tokens = put_token_back ps token tokens in - let ps, tokens = pop_context ps tokens in - normal ps tokens @@ Grammar_interpr.input_needed env - | AboutToReduce (_, prod) as c -> - (* NB: Here, we assume semantic actions do not raise any exception; - maybe that's a tad too optimistic; if they did we may need to - report that. *) - let c = Grammar_interpr.resume c in - let ps, token, tokens = on_production ps token tokens prod c in - check ps token tokens env c - | Shifting (_e1, e2, _) as c -> - let ps, tokens = push_incoming_contexts ps tokens e2 in - check ps token tokens env @@ Grammar_interpr.resume c - | c -> - normal ps tokens c - - and error ps token tokens env = - let report_invalid_syntax = - let loc_limits = Grammar_interpr.positions env in - let loc = Overlay_manager.join_limits loc_limits in - fun severity -> add_diag (DIAGS.One.diag severity ~loc "Invalid@ syntax") - in - match ps.preproc.persist.recovery with - | EnableRecovery recovery_options -> - (* The limits of the re-submitted token will be re-constructed in - `token_n_srcloc_limits`, so `prev_limit` needs to be re-adjusted to - the second-to-last right-limit. *) - let ps, tokens = put_token_back ps token tokens in - recover ps tokens (Grammar_recovery.generate env) - ~report_syntax_hints_n_error:(report_syntax_hints_n_error - ~report_invalid_syntax - ~recovery_options) - | DisableRecovery -> - None, report_invalid_syntax Error ps - - and recover ({ prev_limit; _ } as ps) tokens candidates - ~report_syntax_hints_n_error = - let ps, (token, tokens) = next_tokens ps tokens in - let _, _, e as tok = token_n_srcloc_limits ?prev_limit token in - let ps = { ps with prev_limit = Some e; prev_limit' = prev_limit } in - match Grammar_recovery.attempt candidates tok with - | `Fail when ~&token <> Grammar_tokens.EOF -> (* ignore one token *) - recover ps tokens candidates ~report_syntax_hints_n_error - | `Fail when Option.is_none candidates.final -> - None, report_syntax_hints_n_error ps [] (* unable to recover *) - | `Fail -> - let v, assumed = Option.get candidates.final in - accept (report_syntax_hints_n_error ps assumed) v - | `Accept (v, assumed) -> - accept (report_syntax_hints_n_error ps assumed) v - | `Ok (c, _, visited, assumed) -> - let ps, tokens = seesaw_context_stack ps tokens visited in - normal (report_syntax_hints_n_error ps assumed) tokens c - - and accept ps v = - Some v, ps + (* --- *) - in - fun ps tokens c -> normal ps tokens c - - let parse ?verbose ?show ~recovery - (type m) ~(memory: m memory) pp make_checkpoint - : ('a option, m) output * _ = - let ps = init_parser ?verbose ?show ~recovery - ~tokenizer_memory:memory pp in - let res, ps = - (* TODO: catch in a deeper context to grab parsed tokens *) - let ps, tokens = produce_tokens ps in - let first_pos = match tokens with - | [] -> Cobol_preproc.position ps.preproc.pp - | t :: _ -> Cobol_common.Srcloc.start_pos ~@t - in - try do_parse ps tokens (make_checkpoint first_pos) - with e -> None, add_diag (DIAGS.of_exn e) ps - in - match memory with + let aggregate_output (type m) res (ps: m state) : ('a option, m) output = + match ps.preproc.persist.tokenizer_memory with | Amnesic -> - Only res, all_diags ps + Only res | Eidetic -> - let artifacts = { - tokens = Tokzr.parsed_tokens ps.preproc.tokzr; - pplog = Cobol_preproc.log ps.preproc.pp; - comments = Cobol_preproc.comments ps.preproc.pp; - } in - WithArtifacts (res, artifacts), all_diags ps + let artifacts = + { tokens = Tokzr.parsed_tokens ps.preproc.tokzr; + pplog = Cobol_preproc.log ps.preproc.pp; + comments = Cobol_preproc.comments ps.preproc.pp; + newline_cnums = Cobol_preproc.newline_cnums ps.preproc.pp } in + WithArtifacts (res, artifacts) + + let parse_once + ~options (type m) ~(memory: m memory) ~make_checkpoint pp + : (('a option, m) output) with_diags = + let ps = make_parser options ~tokenizer_memory:memory pp in + let res, ps = full_parse @@ init_parse ~make_checkpoint ps in + DIAGS.with_diags (aggregate_output res ps) (all_diags ps) + + let find_history_event_preceding ~(position: Lexing.position) store = + let rec aux = function + | [] -> + raise Not_found + | { preproc_position; _ } as event :: store + when preproc_position.pos_cnum <= position.pos_cnum && + preproc_position.pos_fname = position.pos_fname -> + event, store + | _ :: store -> + aux store + in + aux store + + let rec rewind_n_parse + : type m. ('a, m) rewindable_parsing_state -> make_checkpoint:_ + -> preprocessor_rewind -> position:Lexing.position + -> ((('a option, m) output as 'x) * 'x rewinder) with_diags = + fun rwps ~make_checkpoint pp_rewind ~position -> + let rwps = + try + let event, store = find_history_event_preceding ~position rwps.store in + let (ps, token, tokens), env = event.event_step in + let pp = ps.preproc.pp in + let pp = pp_rewind ?new_position:(Some event.preproc_position) pp in + let ps = { ps with preproc = { ps.preproc with pp } } in + { rwps with step = OnTok ((ps, token, tokens), env); store } + with Not_found -> (* rewinding before first checkpoint *) + let pp = pp_rewind rwps.init.preproc.pp in + let ps = { rwps.init with preproc = { rwps.init.preproc with pp } } in + init_rewindable_parse ~make_checkpoint ps + in + let res, rwps = parse_with_trace rwps in + let ps = rewindable_parser_state rwps in + let output = aggregate_output res ps in + let rewind_n_parse = rewind_n_parse rwps ~make_checkpoint in + DIAGS.with_diags (output, { rewind_n_parse }) (all_diags ps) + + let rewindable_parse + : options:_ -> memory:'m memory -> make_checkpoint:_ + -> Cobol_preproc.preprocessor + -> ((('a option, 'm) output as 'x) * 'x rewinder) with_diags = + fun ~options ~memory ~make_checkpoint pp -> + let res, rwps = + make_parser options ~tokenizer_memory:memory pp |> + init_rewindable_parse ~make_checkpoint |> + parse_with_trace + in + let ps = rewindable_parser_state rwps in + let output = aggregate_output res ps in + let rewind_n_parse = rewind_n_parse rwps ~make_checkpoint in + DIAGS.with_diags (output, { rewind_n_parse }) (all_diags ps) end -let default_recovery = - EnableRecovery { silence_benign_recoveries = false } +(* --- *) -(* TODO: accept a record instead of many labeled arguments? *) -type 'm parsing_function - = ?source_format:Cobol_config.source_format_spec - -> ?config:Cobol_config.t - -> ?recovery:recovery - -> ?verbose:bool - -> ?show:[`Pending] list - -> libpath:string list - -> Cobol_preproc.input - -> (PTree.compilation_group option, 'm) parsed_result +(* Main exported functions *) let parse (type m) ~(memory: m memory) - ?(source_format = Cobol_config.Auto) + ?(options = Parser_options.default) ?(config = Cobol_config.default) - ?(recovery = default_recovery) - ?verbose - ?show - ~libpath - : Cobol_preproc.input -> (PTree.compilation_group option, m) parsed_result = - let preprocessor = Cobol_preproc.preprocessor ?verbose in - fun input -> - let { result = output, parsed_diags; diags = other_diags } = - Cobol_common.with_stateful_diagnostics input - ~f:begin fun _init_diags input -> - let pp = preprocessor input @@ - `WithLibpath Cobol_preproc.{ init_libpath = libpath; - init_config = config; - init_source_format = source_format} - in - let module P = Make (val config) in - P.parse ?verbose ?show ~memory ~recovery pp - Grammar.Incremental.compilation_group - end - in - { - parsed_input = input; - parsed_output = output; - parsed_diags = DIAGS.Set.union parsed_diags other_diags - } + : Cobol_preproc.preprocessor -> + (PTree.compilation_group option, m) output with_diags = + let module P = Make (val config) in + P.parse_once ~options ~memory + ~make_checkpoint:Grammar.Incremental.compilation_group let parse_simple = parse ~memory:Amnesic -let parse_with_tokens = parse ~memory:Eidetic +let parse_with_artifacts = parse ~memory:Eidetic -let parsing_artifacts - : (_, Cobol_common.Behaviors.eidetic) parsed_result -> _ = function - | { parsed_output = WithArtifacts (_, artifacts); _ } -> artifacts +let rewindable_parse + (type m) + ~(memory: m memory) + ?(options = Parser_options.default) + ?(config = Cobol_config.default) + : Cobol_preproc.preprocessor -> + (((PTree.compilation_group option, m) output as 'x) * 'x rewinder) + with_diags = + let module P = Make (val config) in + P.rewindable_parse ~options ~memory + ~make_checkpoint:Grammar.Incremental.compilation_group + +let rewindable_parse_simple = rewindable_parse ~memory:Amnesic +let rewindable_parse_with_artifacts = rewindable_parse ~memory:Eidetic + +let rewind_and_parse { rewind_n_parse } rewind_preproc ~position = + rewind_n_parse rewind_preproc ~position + +let artifacts + : (_, Cobol_common.Behaviors.eidetic) output -> _ = function + | WithArtifacts (_, artifacts) -> artifacts diff --git a/src/lsp/cobol_parser/parser_engine.mli b/src/lsp/cobol_parser/parser_engine.mli index 0054a1ccf..60cdcedb8 100644 --- a/src/lsp/cobol_parser/parser_engine.mli +++ b/src/lsp/cobol_parser/parser_engine.mli @@ -11,21 +11,54 @@ (* *) (**************************************************************************) -include module type of Parser_options +open Parser_options +open Parser_outputs -type 'm parsing_function - = ?source_format:Cobol_config.source_format_spec +(** {1 Basic (one-shot) parsing} *) + +type 'm simple_parsing + = ?options:Parser_options.parser_options + -> ?config:Cobol_config.t + -> Cobol_preproc.preprocessor + -> (PTree.compilation_group option, 'm) output + Cobol_common.Diagnostics.with_diags + +val parse + : memory: 'm memory -> 'm simple_parsing +val parse_simple + : Cobol_common.Behaviors.amnesic simple_parsing +val parse_with_artifacts + : Cobol_common.Behaviors.eidetic simple_parsing + +(** {1 Rewindable parsing} *) + +type 'm rewindable_parsing + = ?options:parser_options -> ?config:Cobol_config.t - -> ?recovery:recovery - -> ?verbose:bool - -> ?show:[`Pending] list - -> libpath:string list - -> Cobol_preproc.input - -> (PTree.compilation_group option, 'm) parsed_result - -val parse: memory: 'm Parser_options.memory -> 'm parsing_function -val parse_simple: Cobol_common.Behaviors.amnesic parsing_function -val parse_with_tokens: Cobol_common.Behaviors.eidetic parsing_function - -val parsing_artifacts - : (_, Cobol_common.Behaviors.eidetic) parsed_result -> parsing_artifacts + -> Cobol_preproc.preprocessor + -> (((PTree.compilation_group option, 'm) output as 'x) * + 'x rewinder) + Cobol_common.Diagnostics.with_diags +and 'x rewinder +and preprocessor_rewind = + ?new_position:Lexing.position -> (Cobol_preproc.preprocessor as 'r) -> 'r + +val rewindable_parse + : memory:'m memory + -> 'm rewindable_parsing +val rewindable_parse_simple + : Cobol_common.Behaviors.amnesic rewindable_parsing +val rewindable_parse_with_artifacts + : Cobol_common.Behaviors.eidetic rewindable_parsing + +val rewind_and_parse + : 'x rewinder + -> preprocessor_rewind + -> position: Lexing.position + -> (((PTree.compilation_group option, 'm) output as 'x) * 'x rewinder) + Cobol_common.Diagnostics.with_diags + +(** {1 Accessing artifacts} *) + +val artifacts + : (_, Cobol_common.Behaviors.eidetic) output -> artifacts diff --git a/src/lsp/cobol_parser/parser_options.ml b/src/lsp/cobol_parser/parser_options.ml index dc237d101..44a659b88 100644 --- a/src/lsp/cobol_parser/parser_options.ml +++ b/src/lsp/cobol_parser/parser_options.ml @@ -14,8 +14,6 @@ (** Gathers some types used to define options for the parser engine. *) (* We are only defining types here so an MLI would only be redundant. *) -open Cobol_common.Srcloc.TYPES - (** Switch for the recovery mechanism *) type recovery = | DisableRecovery @@ -26,26 +24,23 @@ and recovery_options = missing tokens (e.g, periods). *) } -type 'a memory = +type 'm memory = | Amnesic: Cobol_common.Behaviors.amnesic memory | Eidetic: Cobol_common.Behaviors.eidetic memory -type tokens_with_locs = Grammar_tokens.token with_loc list -type parsing_artifacts = +type parser_options = { - tokens: tokens_with_locs Lazy.t; - pplog: Cobol_preproc.log; - comments: Cobol_preproc.comments + verbose: bool; + show: [`Pending] list; + recovery: recovery; } -type ('a, 'm) output = - | Only: 'a -> - ('a, Cobol_common.Behaviors.amnesic) output - | WithArtifacts: 'a * parsing_artifacts -> - ('a, Cobol_common.Behaviors.eidetic) output -type ('a, 'm) parsed_result = +let default_recovery = + EnableRecovery { silence_benign_recoveries = false } + +let default = { - parsed_input: Cobol_preproc.input; - parsed_diags: Cobol_common.Diagnostics.Set.t; - parsed_output: ('a, 'm) output; + verbose = false; + show = [`Pending]; + recovery = default_recovery; } diff --git a/src/lsp/cobol_parser/parser_outputs.ml b/src/lsp/cobol_parser/parser_outputs.ml new file mode 100644 index 000000000..fc347f9f5 --- /dev/null +++ b/src/lsp/cobol_parser/parser_outputs.ml @@ -0,0 +1,33 @@ +(**************************************************************************) +(* *) +(* SuperBOL OSS Studio *) +(* *) +(* Copyright (c) 2022-2023 OCamlPro SAS *) +(* *) +(* All rights reserved. *) +(* This source code is licensed under the GNU Affero General Public *) +(* License version 3 found in the LICENSE.md file in the root directory *) +(* of this source tree. *) +(* *) +(**************************************************************************) + +(** Gathers some types used to define outputs for the parser engine. *) +(* We are only defining types here so an MLI would only be redundant. *) + +open Cobol_common.Srcloc.TYPES + +type tokens_with_locs = Grammar_tokens.token with_loc list + +type artifacts = + { + tokens: tokens_with_locs Lazy.t; + pplog: Cobol_preproc.log; + comments: Cobol_preproc.comments; + newline_cnums: int list; + } + +type ('a, 'm) output = + | Only: 'a -> ('a, Cobol_common.Behaviors.amnesic) output + | WithArtifacts: 'a * artifacts -> ('a, Cobol_common.Behaviors.eidetic) output + +type 'm parsed_compilation_group = (PTree.compilation_group option, 'm) output diff --git a/src/lsp/cobol_parser/text_tokenizer.ml b/src/lsp/cobol_parser/text_tokenizer.ml index 06dd70ee9..0af619ff7 100644 --- a/src/lsp/cobol_parser/text_tokenizer.ml +++ b/src/lsp/cobol_parser/text_tokenizer.ml @@ -394,7 +394,7 @@ module Make (Config: Cobol_config.T) = struct | Eidetic [] -> Fmt.invalid_arg "put_token_back: unexpected memory state" | Eidetic (_ :: toks) -> { s with memory = Eidetic toks } - let next_token (type m) (s: m state) = + let next_token (s: _ state) = let rec aux = function | { payload = INTERVENING_ ','; _ } :: tokens -> aux tokens diff --git a/src/lsp/cobol_preproc/preproc.ml b/src/lsp/cobol_preproc/preproc.ml index 584c18c0d..7c72bbf06 100644 --- a/src/lsp/cobol_preproc/preproc.ml +++ b/src/lsp/cobol_preproc/preproc.ml @@ -29,12 +29,14 @@ include Preproc_trace (* include log events *) type 'k srclexer = 'k Src_lexing.state * Lexing.lexbuf and any_srclexer = | Plx: 'k srclexer -> any_srclexer [@@unboxed] -let srclex_lexbuf (Plx (_, lexbuf)) = lexbuf -let srclex_pos pl = (srclex_lexbuf pl).Lexing.lex_curr_p +let srclex_pos (Plx (_, lexbuf)) = + lexbuf.Lexing.lex_curr_p let srclex_diags (Plx (pl, _)) = Src_lexing.diagnostics pl let srclex_comments (Plx (pl, _)) = Src_lexing.comments pl +let srclex_newline_cnums (Plx (pl, _)) = + Src_lexing.newline_cnums pl let srclex_source_format (Plx (pl, _)) = Src_lexing.(source_format_spec @@ source_format pl) @@ -99,6 +101,27 @@ let srclex_from_channel = make_srclex Lexing.from_channel let srclex_from_file ~source_format filename : any_srclexer = srclex_from_string ~source_format ~filename (EzFile.read_file filename) +(** Note: If given, assumes [position] corresponds to the begining of the + input. If absent, restarts from first position. File name is kept from the + previous input. *) +let srclex_restart make_lexing ?position input (Plx (s, prev_lexbuf)) = + let lexbuf = make_lexing ?with_positions:(Some true) input in + let pos_fname = match position with + | Some p -> + Lexing.set_position lexbuf p; + p.Lexing.pos_fname + | None -> + prev_lexbuf.Lexing.lex_curr_p.pos_fname + in + Lexing.set_filename lexbuf pos_fname; + Plx (s, lexbuf) + +let srclex_restart_on_string = srclex_restart Lexing.from_string +let srclex_restart_on_channel = srclex_restart Lexing.from_channel +let srclex_restart_on_file ?position filename = + srclex_restart_on_string ?position (EzFile.read_file filename) + + (* --- Compiler Directives -------------------------------------------------- *) (* SOURCE FORMAT *) @@ -474,7 +497,6 @@ let delim left text right = textword_cat (fun w s -> concat_strings s w) Fun.id left let try_replacing_clause: replacing with_loc -> text -> _ result = fun replacing -> - (* Helpers to record replacement operations on source locations: *) let replloc = ~@replacing in match ~&replacing with | ReplaceExact { repl_from; repl_to } -> diff --git a/src/lsp/cobol_preproc/preproc.mli b/src/lsp/cobol_preproc/preproc.mli index 9227c66ae..25b45d6aa 100644 --- a/src/lsp/cobol_preproc/preproc.mli +++ b/src/lsp/cobol_preproc/preproc.mli @@ -101,6 +101,8 @@ val apply_replacing -> text -> 'a +(** {3 Source format} *) + val cdir_source_format : dialect: Cobol_config.dialect -> string with_loc @@ -113,6 +115,8 @@ val with_source_format -> any_srclexer -> any_srclexer +(** {3 Instantiation} *) + val srclex_from_file : source_format:Cobol_config.source_format -> string @@ -127,6 +131,30 @@ val srclex_from_channel -> source_format:Cobol_config.source_format -> in_channel -> any_srclexer + +(** {3 Resetting the input} *) + +(** Note: the functions below assume [position] corresponds to the begining of + the input.} *) + +val srclex_restart_on_file + : ?position: Lexing.position + -> string + -> any_srclexer + -> any_srclexer +val srclex_restart_on_string + : ?position: Lexing.position + -> string + -> any_srclexer + -> any_srclexer +val srclex_restart_on_channel + : ?position: Lexing.position + -> in_channel + -> any_srclexer + -> any_srclexer + +(** {3 Queries} *) + val srclex_diags : any_srclexer -> Cobol_common.Diagnostics.Set.t @@ -136,6 +164,9 @@ val srclex_pos val srclex_comments : any_srclexer -> comments +val srclex_newline_cnums + : any_srclexer + -> int list val next_source_line: any_srclexer -> any_srclexer * text val fold_source_lines: any_srclexer -> (text -> 'a -> 'a) -> 'a -> 'a diff --git a/src/lsp/cobol_preproc/preproc_engine.ml b/src/lsp/cobol_preproc/preproc_engine.ml index d056f8305..2ed9e00c4 100644 --- a/src/lsp/cobol_preproc/preproc_engine.ml +++ b/src/lsp/cobol_preproc/preproc_engine.ml @@ -50,7 +50,6 @@ and preprocessor_persist = { pparser: (module Preproc.PPPARSER); overlay_manager: (module Src_overlay.MANAGER); - config: Cobol_config.t; replacing: Preproc.replacing with_loc list list; copybooks: Cobol_common.Srcloc.copylocs; (* opened copybooks *) libpath: string list; @@ -66,6 +65,7 @@ let log { pplog; _ } = pplog let srclexer { srclex; _ } = srclex let position { srclex; _ } = Preproc.srclex_pos srclex let comments { srclex; _ } = Preproc.srclex_comments srclex +let newline_cnums { srclex; _ } = Preproc.srclex_newline_cnums srclex let with_srclex lp srclex = if lp.srclex == srclex then lp else { lp with srclex } @@ -93,6 +93,14 @@ let make_srclex ~source_format = function | Channel { contents; filename } -> Preproc.srclex_from_channel ~filename ~source_format contents +let rewind_srclex srclex ?position = function + | Filename filename -> + Preproc.srclex_restart_on_file ?position filename srclex + | String { contents; _ } -> (* Let's avoid renaming the file in lexbuf... *) + Preproc.srclex_restart_on_string ?position contents srclex + | Channel { contents; _ } -> (* ditto *) + Preproc.srclex_restart_on_channel ?position contents srclex + type init = { init_libpath: string list; @@ -119,7 +127,6 @@ let preprocessor ?(verbose = false) input = function { pparser = (module Pp); overlay_manager = (module Om); - config = (module Config); replacing = []; copybooks = Cobol_common.Srcloc.no_copy; libpath; @@ -141,6 +148,10 @@ let preprocessor ?(verbose = false) input = function verbose = persist.verbose || verbose; }; } + | `ResetPosition ({ srclex; _ } as pp, position) -> + { + pp with srclex = rewind_srclex srclex ?position input; + } (* --- *) @@ -220,8 +231,7 @@ and try_preproc lp srctext = | Ok (cdir, ppstate) -> Ok (process_preproc_phrase { lp with ppstate } cdir) and process_preproc_phrase ({ persist = { pparser = (module Pp); - overlay_manager = (module Om); - config = (module Config); _ }; + overlay_manager = (module Om); _ }; _ } as lp) = let parse ~stmt parser phrase : _ result = Pp.MenhirInterpreter.loop_handle @@ -385,6 +395,15 @@ let pp_pptokens: pptokens Pretty.printer = (* --- *) +let reset_preprocessor ?new_position pp input = + preprocessor ~verbose:pp.persist.verbose input + (`ResetPosition (pp, new_position)) + +(* --- *) + +let preprocessor ?verbose init input = + preprocessor ?verbose input (`WithLibpath init) + (** Default pretty-printing formatter for {!lex_file}, {!lex_lib}, and {!preprocess_file}. *) let default_oppf = Fmt.stdout @@ -425,10 +444,10 @@ let preprocess_file ~source_format ?verbose ?(config = Cobol_config.default) ~libpath ?(ppf = default_oppf) = let preprocessor = preprocessor ?verbose in Cobol_common.do_unit begin fun _init_diags filename -> - pp_preprocessed ppf @@ preprocessor (Filename filename) @@ - `WithLibpath { init_libpath = libpath; + pp_preprocessed ppf @@ + preprocessor { init_libpath = libpath; init_config = config; - init_source_format = source_format} + init_source_format = source_format } (Filename filename) end let text_of_input ~source_format ?verbose ?(config = Cobol_config.default) @@ -437,10 +456,9 @@ let text_of_input ~source_format ?verbose ?(config = Cobol_config.default) Cobol_common.do_any begin fun _init_diags input -> fst @@ full_text ~item:"file" @@ - preprocessor input @@ - `WithLibpath { init_libpath = libpath; + preprocessor { init_libpath = libpath; init_config = config; - init_source_format = source_format} + init_source_format = source_format } input end ?epf a let text_of_file ~source_format ?verbose ?(config = Cobol_config.default) diff --git a/src/lsp/cobol_preproc/preproc_engine.mli b/src/lsp/cobol_preproc/preproc_engine.mli index 14bceb89b..f28ec513e 100644 --- a/src/lsp/cobol_preproc/preproc_engine.mli +++ b/src/lsp/cobol_preproc/preproc_engine.mli @@ -25,6 +25,17 @@ type init = init_source_format: Cobol_config.source_format_spec; } +val preprocessor + : ?verbose:bool + -> init + -> input + -> preprocessor +val reset_preprocessor + : ?new_position:Lexing.position + -> preprocessor + -> input + -> preprocessor + (* --- *) val diags: preprocessor -> Cobol_common.Diagnostics.Set.t @@ -34,6 +45,8 @@ val log: preprocessor -> Preproc_trace.log val comments: preprocessor -> Text.comments val srclexer: preprocessor -> Preproc.any_srclexer val position: preprocessor -> Lexing.position +val newline_cnums: preprocessor -> int list + val next_sentence: preprocessor -> Text.text * preprocessor (** {2 High-level commands} *) @@ -43,12 +56,6 @@ val decide_source_format -> Cobol_config.source_format_spec -> Cobol_config.source_format Cobol_common.Diagnostics.with_diags -val preprocessor - : ?verbose:bool - -> input - -> [< `WithLibpath of init ] - -> preprocessor - val lex_file : source_format: Cobol_config.source_format_spec -> ?ppf:Format.formatter diff --git a/src/lsp/cobol_preproc/src_lexing.ml b/src/lsp/cobol_preproc/src_lexing.ml index fb9936907..1aa3dc649 100644 --- a/src/lsp/cobol_preproc/src_lexing.ml +++ b/src/lsp/cobol_preproc/src_lexing.ml @@ -146,6 +146,8 @@ type 'k state = comments: comments; cdir_seen: bool; newline: bool; + newline_cnums: int list; (** index of all newline characters encountered + so far (in reverse order) *) diags: DIAGS.Set.t; config: 'k config; } @@ -177,6 +179,7 @@ let init_state : 'k source_format -> 'k state = fun source_format -> comments = []; cdir_seen = false; newline = true; + newline_cnums = []; diags = DIAGS.Set.none; config = { @@ -187,6 +190,7 @@ let init_state : 'k source_format -> 'k state = fun source_format -> let diagnostics { diags; _ } = diags let comments { comments; _ } = List.rev comments +let newline_cnums { newline_cnums; _ } = List.rev newline_cnums let source_format { config = { source_format; _ }; _ } = source_format let allow_debug { config = { debug; _ }; _ } = debug @@ -252,12 +256,17 @@ let append t state = let new_line state lexbuf = Lexing.new_line lexbuf; + let state = + { state with + newline = true; + newline_cnums = Lexing.lexeme_end lexbuf :: state.newline_cnums } + in match state.lex_prods, state.cdir_seen with | { payload = TextWord _ | Alphanum _ | Pseudo _ | Eof; _ } :: _, _ | _, true -> - flush { state with newline = true } + flush state | _ -> - { state with newline = true }, [] + state, [] (* --- *) diff --git a/src/lsp/cobol_preproc/src_lexing.mli b/src/lsp/cobol_preproc/src_lexing.mli index e9e2fe394..c1ce5e1ba 100644 --- a/src/lsp/cobol_preproc/src_lexing.mli +++ b/src/lsp/cobol_preproc/src_lexing.mli @@ -56,6 +56,7 @@ type 'k state val init_state: 'k source_format -> 'k state val diagnostics: _ state -> Cobol_common.Diagnostics.Set.t val comments: _ state -> Text.comments +val newline_cnums: _ state -> int list val source_format: 'k state -> 'k source_format val change_source_format: 'k state -> 'c source_format Cobol_common.Srcloc.with_loc -> ('c state, 'k state) result diff --git a/src/lsp/cobol_preproc/src_overlay.ml b/src/lsp/cobol_preproc/src_overlay.ml index 5106a8668..37716ddcf 100644 --- a/src/lsp/cobol_preproc/src_overlay.ml +++ b/src/lsp/cobol_preproc/src_overlay.ml @@ -90,7 +90,9 @@ let limits: manager -> srcloc -> limit * limit = fun ctx loc -> | Some lexloc -> lexloc | _ -> Limit.make_virtual (), Limit.make_virtual () in - Links.replace ctx.right_of s (loc, e); (* replace to deal with duplicates *) + Links.replace ctx.right_of s (loc, e); (* Replace to deal with duplicates. *) + Links.remove ctx.cache s; (* Manually remove from cache to prevent invalid *) + Links.remove ctx.cache e; (* or even cyclic/infinite search upon rewind. *) s, e (** Links token limits *) diff --git a/src/lsp/cobol_typeck/cobol_typeck.ml b/src/lsp/cobol_typeck/cobol_typeck.ml index 1df6a3424..66301e555 100644 --- a/src/lsp/cobol_typeck/cobol_typeck.ml +++ b/src/lsp/cobol_typeck/cobol_typeck.ml @@ -1156,20 +1156,18 @@ struct end let analyze_compilation_group ?(config = Cobol_config.default) - (type m) : m Cobol_parser.parsed_compilation_group -> _ = + (type m) : m Cobol_parser.Outputs.parsed_compilation_group -> _ = let analyze_cg (module Diags: DIAGS.STATEFUL) cg = let module Typeck = Make (val config) (Diags) in Ok (Typeck.typeck_compilation_group cg, DIAGS.Set.none) in function - | { parsed_output = Only None | WithArtifacts (None, _); - parsed_diags; _ } -> - Error parsed_diags - | { parsed_output = Only Some cg | WithArtifacts (Some cg, _); - parsed_diags; _ } -> + | Only None | WithArtifacts (None, _) -> + DIAGS.result @@ Error () + | Only Some cg | WithArtifacts (Some cg, _) -> match Cobol_common.catch_diagnostics analyze_cg cg with | Ok (res, diags) -> - Ok (res, cg, DIAGS.Set.union parsed_diags diags) + DIAGS.result ~diags @@ Ok (res, cg) | Error diags -> - Error (DIAGS.Set.union parsed_diags diags) + DIAGS.result ~diags @@ Error () diff --git a/src/lsp/cobol_typeck/cobol_typeck.mli b/src/lsp/cobol_typeck/cobol_typeck.mli index 28780f728..997a8016c 100644 --- a/src/lsp/cobol_typeck/cobol_typeck.mli +++ b/src/lsp/cobol_typeck/cobol_typeck.mli @@ -18,10 +18,8 @@ module CUs = Cobol_data.Compilation_unit.SET val analyze_compilation_group : ?config:(module Cobol_config.T) - -> _ Cobol_parser.parsed_compilation_group - -> (CUs.t * Cobol_parser.PTree.compilation_group * DIAGS.diagnostics, - DIAGS.diagnostics) - result + -> _ Cobol_parser.Outputs.parsed_compilation_group + -> (CUs.t * Cobol_parser.PTree.compilation_group, unit) result DIAGS.with_diags module Make (Config: Cobol_config.T) (* for dialect-based checks *) diff --git a/src/lsp/superbol_free_lib/command_pp.ml b/src/lsp/superbol_free_lib/command_pp.ml index a97d9ba73..585e6221b 100644 --- a/src/lsp/superbol_free_lib/command_pp.ml +++ b/src/lsp/superbol_free_lib/command_pp.ml @@ -44,7 +44,7 @@ let cmd = let text = let common = common_get () in Cobol_preproc.text_of_file file - ~verbose: common.verbose + ~verbose: common.parser_options.verbose ~source_format:common.source_format ~libpath:common.libpath in diff --git a/src/lsp/superbol_free_lib/common_args.ml b/src/lsp/superbol_free_lib/common_args.ml index b603bf54b..21f0e45e4 100644 --- a/src/lsp/superbol_free_lib/common_args.ml +++ b/src/lsp/superbol_free_lib/common_args.ml @@ -15,13 +15,13 @@ open EzCompat open Ezcmd.V2 open EZCMD.TYPES +open Cobol_parser.Options + type t = { config: (module Cobol_config.T); source_format: Cobol_config.source_format_spec; libpath: string list; - verbose: bool; - recovery: Cobol_parser.recovery; - show: [`Pending] list; + parser_options: parser_options; } let showable = @@ -130,12 +130,12 @@ let get () = in let recovery = if !recovery - then Cobol_parser.EnableRecovery { silence_benign_recoveries = false } - else Cobol_parser.DisableRecovery + then EnableRecovery { silence_benign_recoveries = false } + else DisableRecovery in let verbose = !Globals.verbosity > 0 in - { config ; source_format ; libpath = !libpath ; recovery; verbose; - show = !show } + { config; source_format; libpath = !libpath; + parser_options = { recovery; verbose; show = !show } } in get, args diff --git a/src/lsp/superbol_free_lib/common_args.mli b/src/lsp/superbol_free_lib/common_args.mli index e94db350a..f3bc30815 100644 --- a/src/lsp/superbol_free_lib/common_args.mli +++ b/src/lsp/superbol_free_lib/common_args.mli @@ -15,9 +15,7 @@ type t = { config: (module Cobol_config.T); source_format: Cobol_config.source_format_spec; libpath: string list; - verbose: bool; - recovery: Cobol_parser.recovery; - show: [`Pending] list; + parser_options: Cobol_parser.Options.parser_options; } val get : unit -> (unit -> t) * Ezcmd.V2.EZCMD.TYPES.arg_list diff --git a/test/cobol_parsing/parser_testing.ml b/test/cobol_parsing/parser_testing.ml index cce65ca4d..0182ce674 100644 --- a/test/cobol_parsing/parser_testing.ml +++ b/test/cobol_parsing/parser_testing.ml @@ -11,14 +11,23 @@ (* *) (**************************************************************************) +module DIAGS = Cobol_common.Diagnostics + let show_parsed_tokens ?(verbose = false) ?(source_format = Cobol_config.(SF SFFixed)) prog = - let { parsed_output = WithArtifacts (_, { tokens; _ }); _ } = - Cobol_parser.parse_with_tokens ~verbose ~source_format - ~recovery:(EnableRecovery { silence_benign_recoveries = false }) - ~libpath:[] @@ - Cobol_preproc.String { filename = "prog.cob"; contents = prog } + let DIAGS.{ result = WithArtifacts (_, { tokens; _ }); _ } = + Cobol_parser.parse_with_artifacts + ~options:Cobol_parser.Options.{ + default with + verbose; + recovery = EnableRecovery { silence_benign_recoveries = true }; + } @@ + Cobol_preproc.preprocessor + { init_libpath = []; + init_config = Cobol_config.default; + init_source_format = source_format } @@ + String { filename = "prog.cob"; contents = prog } in Cobol_parser.INTERNAL.pp_tokens Fmt.stdout (Lazy.force tokens) diff --git a/test/lsp/lsp_testing.ml b/test/lsp/lsp_testing.ml index 6f23fceb6..540569a7e 100644 --- a/test/lsp/lsp_testing.ml +++ b/test/lsp/lsp_testing.ml @@ -51,7 +51,7 @@ let add_cobol_doc server ?copybook ~projdir filename text = let uri = Lsp.Uri.of_path path in EzFile.write_file path text; let server = - LSP.Server.add ?copybook + LSP.Server.did_open ?copybook DidOpenTextDocumentParams.{ textDocument = TextDocumentItem.{ languageId = "cobol"; version = 0; text; uri; diff --git a/test/output-tests/gnucobol.ml b/test/output-tests/gnucobol.ml index be8234551..de5dcfa82 100644 --- a/test/output-tests/gnucobol.ml +++ b/test/output-tests/gnucobol.ml @@ -16,6 +16,8 @@ open Autofonce_lib open Autofonce_config open Autofonce_core.Types +module DIAGS = Cobol_common.Diagnostics + let () = (* Disable backtrace recording so `OCAMLRUNPARAM=b` has no effect on the output of tests that fail expectedly. *) @@ -139,14 +141,21 @@ let do_check_parse (test_filename, contents, _, { check_loc; then Cobol_config.(SF SFFree) else Cobol_config.(SF SFFixed) in + let parse_simple input = + Cobol_parser.parse_simple @@ + Cobol_preproc.preprocessor + { init_source_format = source_format; + init_config = Cobol_config.default; + init_libpath = [] } input + in try let input = setup_input ~filename contents in - match Cobol_parser.parse_simple ~source_format ~libpath:[] input with - | { parsed_diags; parsed_output = Only Some _; _ } -> - Cobol_common.show_diagnostics ~ppf:Fmt.stdout parsed_diags; + match parse_simple input with + | DIAGS.{ diags; result = Only Some _ } -> + Cobol_common.show_diagnostics ~ppf:Fmt.stdout diags; terminate "ok" - | { parsed_diags; _ } -> - Cobol_common.show_diagnostics ~ppf:Fmt.stdout parsed_diags; + | DIAGS.{ diags; _ } -> + Cobol_common.show_diagnostics ~ppf:Fmt.stdout diags; terminate "ok (with errors)" | exception e -> Pretty.out "Failure (%s)@\n%s@\n" (Printexc.to_string e) contents;