diff --git a/src/lsp/cobol_lsp/lsp_completion.ml b/src/lsp/cobol_lsp/lsp_completion.ml index 8449f2f5..feff94f8 100644 --- a/src/lsp/cobol_lsp/lsp_completion.ml +++ b/src/lsp/cobol_lsp/lsp_completion.ml @@ -236,10 +236,9 @@ let string_of_K tokens = | [] -> () | Cobol_parser.Tokens.PERIOD::tl -> Fmt.string ppf ".\n"; inner tl - | hd::tl -> - let token' = hd &@ Srcloc.dummy in + | token::tl -> if space_before then Fmt.sp ppf (); - Cobol_parser.INTERNAL.pp_token ppf token'; (* TODO: Cobol_parser.Tokens.pp *) + Cobol_parser.Tokens.pp ppf token; inner tl in inner ~space_before:false diff --git a/src/lsp/cobol_parser/cobol_parser.ml b/src/lsp/cobol_parser/cobol_parser.ml index 090d5d2e..f8615d33 100644 --- a/src/lsp/cobol_parser/cobol_parser.ml +++ b/src/lsp/cobol_parser/cobol_parser.ml @@ -20,7 +20,13 @@ module Options = Parser_options module Outputs = Parser_outputs module Diagnostics = Parser_diagnostics -module Tokens = Grammar_tokens +module Tokens = struct + include Grammar_tokens + let pp = Text_tokenizer.pp_token + let pp' = Text_tokenizer.pp_token' + let pp'_list = Text_tokenizer.pp_token'_list + let pp'_list_with_loc_info = Text_tokenizer.pp_tokens_with_loc_info +end module Expect = Grammar_expect module Printer = Grammar_printer module Keywords = Text_keywords @@ -36,12 +42,6 @@ include Parser_engine module INTERNAL = struct - (** {2 COBOL tokens} *) - - let pp_token = Text_tokenizer.pp_token - let pp_tokens = Text_tokenizer.pp_tokens - let pp_tokens' = Text_tokenizer.pp_tokens' - (** {2 COBOL grammar} *) module Grammar (* : Grammar_sig.S *) = Grammar diff --git a/src/lsp/cobol_parser/parser_engine.ml b/src/lsp/cobol_parser/parser_engine.ml index 6d94e84c..7b2c814f 100644 --- a/src/lsp/cobol_parser/parser_engine.ml +++ b/src/lsp/cobol_parser/parser_engine.ml @@ -163,7 +163,7 @@ let all_diags { preproc = { pp; diags; tokzr; _ }; _ } = (* --- *) -let rec produce_tokens (ps: _ state as 's) : 's * Text_tokenizer.tokens = +let rec produce_tokens (ps: _ state as 's) : 's * Tokzr.token' list = let text, pp = Cobol_preproc.next_chunk ps.preproc.pp in let { preproc = { pp; tokzr; _ }; _ } as ps = update_pp ps pp in assert (text <> []); @@ -400,7 +400,7 @@ type ('a, 'm) stage = rewound. *) and ('a, 'm) interim_stage = 'm state * - Text_tokenizer.tokens * + Tokzr.token' list * 'a Grammar_interpr.env (* Always valid input_needed env. *) (** Final stage, at which the parser has stopped processing. *) diff --git a/src/lsp/cobol_parser/text_tokenizer.ml b/src/lsp/cobol_parser/text_tokenizer.ml index 70c3e45c..d53bcd90 100644 --- a/src/lsp/cobol_parser/text_tokenizer.ml +++ b/src/lsp/cobol_parser/text_tokenizer.ml @@ -22,8 +22,7 @@ open Parser_diagnostics (* --- *) -type token = Grammar_tokens.token with_loc -type tokens = token list +type token' = Grammar_tokens.token with_loc let combined_tokens = (* /!\ WARNING: None of the constituents of combined tokens may be @@ -93,40 +92,43 @@ let pp_token_string: Grammar_tokens.token Pretty.printer = fun ppf -> try Hashtbl.find combined_tokens t with Not_found -> "" -let pp_token: token Pretty.printer = fun ppf -> +let pp_token: Grammar_tokens.token Pretty.printer = fun ppf -> let string s = Pretty.string ppf s and print format = Pretty.print ppf format in - fun t -> match ~&t with + fun t -> match t with | WORD w -> print "WORD[%s]" w | WORD_IN_AREA_A w -> print "WORD_IN_AREA_A[%s]" w | PICTURE_STRING w -> print "PICTURE_STRING[%s]" w | INFO_WORD s -> print "INFO_WORD[%s]" s - | COMMENT_ENTRY _ -> print "COMMENT_ENTRY[%a]" pp_token_string ~&t - | EXEC_BLOCK _ -> print "EXEC_BLOCK[%a]" pp_token_string ~&t + | COMMENT_ENTRY _ -> print "COMMENT_ENTRY[%a]" pp_token_string t + | EXEC_BLOCK _ -> print "EXEC_BLOCK[%a]" pp_token_string t | DIGITS i -> print "DIGITS[%s]" i | SINTLIT i -> print "SINT[%s]" i | FIXEDLIT (i, sep, d) -> print "FIXED[%s%c%s]" i sep d | FLOATLIT (i, sep, d, e) -> print "FLOAT[%s%c%sE%s]" i sep d e | INTERVENING_ c -> print "<%c>" c | tok when is_intrinsic_token tok -> - print "INTRINSIC_FUNC[%a]" pp_token_string ~&t + print "INTRINSIC_FUNC[%a]" pp_token_string t | EOF -> string "EOF" | t -> pp_token_string ppf t -let pp_tokens = - Pretty.list ~fopen:"@[" ~fclose:"@]" pp_token +let pp_token': token' Pretty.printer = + Cobol_common.Srcloc.pp_with_loc pp_token -let pp_tokens' ?fsep = +let pp_token'_list = + Pretty.list ~fopen:"@[" ~fclose:"@]" pp_token' + +let pp_tokens_with_loc_info ?fsep = Pretty.list ~fopen:"@[" ?fsep ~fclose:"@]" begin fun ppf t -> Pretty.print ppf "%a@@%a" - pp_token t + pp_token' t Cobol_common.Srcloc.pp_srcloc_struct ~@t end (* --- *) let loc_in_area_a: srcloc -> bool = Cobol_common.Srcloc.in_area_a -let token_in_area_a: token -> bool = fun t -> loc_in_area_a ~@t +let token_in_area_a: token' -> bool = fun t -> loc_in_area_a ~@t (* --- *) @@ -338,12 +340,12 @@ let preproc_n_combine_tokens ~intrinsics_enabled ~source_format = type 'a memory = | Amnesic: Cobol_common.Behaviors.amnesic memory - | Eidetic: tokens -> Cobol_common.Behaviors.eidetic memory + | Eidetic: token' list -> Cobol_common.Behaviors.eidetic memory type 'm state = { lexer_state: Text_lexer.lexer_state; - leftover_tokens: tokens; (* non-empty only when [preproc_n_combine_tokens] + leftover_tokens: token' list; (* non-empty only when [preproc_n_combine_tokens] errors out for lack of input tokens. *) memory: 'm memory; context_stack: Context.stack; @@ -542,7 +544,7 @@ let acc_tokens_of_text_word (rev_prefix_tokens, state) { payload = c; loc } = else nominal state -let tokens_of_text: 'a state -> text -> tokens * 'a state = fun state text -> +let tokens_of_text: 'a state -> text -> token' list * 'a state = fun state text -> let tokens, state = List.fold_left acc_tokens_of_text_word ([], state) text in List.rev tokens, state @@ -555,7 +557,7 @@ let tokenize_text ~source_format ({ leftover_tokens; _ } as state) text = match preproc_n_combine_tokens ~intrinsics_enabled ~source_format tokens with | Ok (tokens, diags) -> if show `Tks state then - Pretty.error "Tks: %a@." pp_tokens tokens; + Pretty.error "Tks: %a@." pp_token'_list tokens; let diags = Parser_diagnostics.union diags state.diags in Ok tokens, { state with diags } | Error `MissingInputs -> @@ -604,7 +606,7 @@ let retokenize { lexer_state; persist = { lexer; _ }; _ } w = (handling of DECIMAL POINT). *) fst @@ Text_lexer.read_tokens lexer lexer_state w -let reword_intrinsics s : tokens -> tokens = +let reword_intrinsics s : token' list -> token' list = (* Some intrinsics NOT preceded with FUNCTION may now be words; assumes [Disabled_intrinsics] does not occur on a `FUNCTION` keyword (but that's unlikely). *) @@ -628,7 +630,8 @@ let reword_intrinsics s : tokens -> tokens = {!module:Text_lexer}. *) (* TODO: Find whether everything related to Area A and comma-retokenization could be moved to Text_lexer *) -let retokenize_after: lexer_update -> _ state -> tokens -> tokens = fun update s -> +let retokenize_after: lexer_update -> _ state -> token' list -> token' list = + fun update s -> match update with | Enabled_keywords tokens | Disabled_keywords tokens @@ -735,7 +738,7 @@ let enable_intrinsics state token tokens = let tokens = retokenize_after Enabled_intrinsics state tokens in let token, tokens = List.hd tokens, List.tl tokens in if show `Tks state then - Pretty.error "Tks': %a@." pp_tokens tokens; + Pretty.error "Tks': %a@." pp_token'_list tokens; emit_token state token, token, tokens @@ -747,7 +750,7 @@ let disable_intrinsics state token tokens = let tokens = retokenize_after Disabled_intrinsics state tokens in let token, tokens = List.hd tokens, List.tl tokens in if show `Tks state then - Pretty.error "Tks': %a@." pp_tokens tokens; + Pretty.error "Tks': %a@." pp_token'_list tokens; emit_token state token, token, tokens @@ -782,7 +785,7 @@ let decimal_point_is_comma (type m) (state: m state) token tokens = let tokens = retokenize_after CommaBecomesDecimalPoint state tokens in let token, tokens = List.hd tokens, List.tl tokens in if show `Tks state then - Pretty.error "Tks': %a@." pp_tokens tokens; + Pretty.error "Tks': %a@." pp_token'_list tokens; emit_token state token, token, tokens @@ -816,7 +819,7 @@ let push_contexts state tokens : Context.t list -> 's * 'a = function (* Update tokenizer state *) let state, tokens = enable_tokens state tokens tokens_set in if show `Tks state then - Pretty.error "Tks': %a@." pp_tokens tokens; + Pretty.error "Tks': %a@." pp_token'_list tokens; with_context_stack state context_stack, tokens diff --git a/src/lsp/cobol_parser/text_tokenizer.mli b/src/lsp/cobol_parser/text_tokenizer.mli index 3111e02d..4c225fea 100644 --- a/src/lsp/cobol_parser/text_tokenizer.mli +++ b/src/lsp/cobol_parser/text_tokenizer.mli @@ -18,11 +18,12 @@ open EzCompat (** {2 Compilation group tokens} *) (** Tokens passed to {!Parser}; can be obtained via {!tokenize_text}. *) -type token = Grammar_tokens.token Cobol_ptree.with_loc -type tokens = token list -val pp_token: token Pretty.printer -val pp_tokens: tokens Pretty.printer -val pp_tokens': ?fsep:Pretty.simple -> tokens Pretty.printer +type token' = Grammar_tokens.token Cobol_ptree.with_loc + +val pp_token: Grammar_tokens.token Pretty.printer +val pp_token': token' Pretty.printer +val pp_token'_list: token' list Pretty.printer +val pp_tokens_with_loc_info: ?fsep:Pretty.simple -> token' list Pretty.printer (* --- *) @@ -62,44 +63,44 @@ val diagnostics val parsed_tokens : Cobol_common.Behaviors.eidetic state - -> tokens Lazy.t + -> token' list Lazy.t val tokenize_text : source_format: Cobol_preproc.Src_format.any -> 'a state -> Cobol_preproc.Text.t - -> (tokens, [>`MissingInputs | `ReachedEOF of tokens]) result * 'a state + -> (token' list, [>`MissingInputs | `ReachedEOF of token' list]) result * 'a state val next_token : 'a state - -> tokens - -> ('a state * token * tokens) option + -> token' list + -> ('a state * token' * token' list) option val put_token_back : 'a state - -> token - -> tokens - -> 'a state * tokens + -> token' + -> token' list + -> 'a state * token' list (* --- *) val enable_intrinsics : 'a state - -> token - -> tokens - -> 'a state * token * tokens + -> token' + -> token' list + -> 'a state * token' * token' list val disable_intrinsics : 'a state - -> token - -> tokens - -> 'a state * token * tokens + -> token' + -> token' list + -> 'a state * token' * token' list val reset_intrinsics : 'a state - -> token - -> tokens - -> 'a state * token * tokens + -> token' + -> token' list + -> 'a state * token' * token' list val replace_intrinsics : 'a state @@ -108,17 +109,17 @@ val replace_intrinsics val decimal_point_is_comma : 'a state - -> token - -> tokens - -> 'a state * token * tokens + -> token' + -> token' list + -> 'a state * token' * token' list (* --- *) val push_contexts : 'a state - -> tokens + -> token' list -> Grammar_contexts.context list - -> 'a state * tokens + -> 'a state * token' list val top_context : _ state @@ -126,8 +127,8 @@ val top_context val pop_context : 'a state - -> tokens - -> 'a state * tokens + -> token' list + -> 'a state * token' list val enable_context_sensitive_tokens: _ state -> unit val disable_context_sensitive_tokens: _ state -> unit diff --git a/test/cobol_parsing/parser_testing.ml b/test/cobol_parsing/parser_testing.ml index 0ffe0ccd..4379ac63 100644 --- a/test/cobol_parsing/parser_testing.ml +++ b/test/cobol_parsing/parser_testing.ml @@ -43,8 +43,8 @@ let show_parsed_tokens ?(parser_options = options ()) Cobol_parser.parse_with_artifacts ~options:parser_options in (if with_locations - then Cobol_parser.INTERNAL.pp_tokens' ~fsep:"@\n" - else Cobol_parser.INTERNAL.pp_tokens) Fmt.stdout (Lazy.force tokens) + then Cobol_parser.Tokens.pp'_list_with_loc_info ~fsep:"@\n" + else Cobol_parser.Tokens.pp'_list) Fmt.stdout (Lazy.force tokens) let show_diagnostics ?(parser_options = options ()) ?source_format ?filename contents =