Skip to content

Commit

Permalink
Merge pull request #346 from NeoKaios/refactor/cleanup-pp-token
Browse files Browse the repository at this point in the history
Unify `pp_token` of Text_tokenizer with other pp
  • Loading branch information
nberth authored Aug 9, 2024
2 parents 4b3926a + 1f88e82 commit 8b26d76
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 119 deletions.
4 changes: 2 additions & 2 deletions .drom

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions src/lsp/cobol_lsp/lsp_completion.ml
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,9 @@ let string_of_K tokens =
| [] -> ()
| Cobol_parser.Tokens.PERIOD::tl ->
Fmt.string ppf ".\n"; inner tl
| hd::tl ->
let token' = hd &@ Srcloc.dummy in
| token::tl ->
if space_before then Fmt.sp ppf ();
Cobol_parser.INTERNAL.pp_token ppf token'; (* TODO: Cobol_parser.Tokens.pp *)
Cobol_parser.Tokens.pp ppf token;
inner tl
in
inner ~space_before:false
Expand Down
14 changes: 7 additions & 7 deletions src/lsp/cobol_parser/cobol_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ module Options = Parser_options
module Outputs = Parser_outputs
module Diagnostics = Parser_diagnostics

module Tokens = Grammar_tokens
module Tokens = struct
include Grammar_tokens
let pp = Grammar_tokens_printer.pp_token
let pp' = Grammar_tokens_printer.pp_token'
let pp'_list = Grammar_tokens_printer.pp_tokens
let pp'_list_with_loc_info = Grammar_tokens_printer.pp_tokens_with_loc_info
end
module Expect = Grammar_expect
module Printer = Grammar_printer
module Keywords = Text_keywords
Expand All @@ -36,12 +42,6 @@ include Parser_engine

module INTERNAL = struct

(** {2 COBOL tokens} *)

let pp_token = Text_tokenizer.pp_token
let pp_tokens = Text_tokenizer.pp_tokens
let pp_tokens' = Text_tokenizer.pp_tokens'

(** {2 COBOL grammar} *)

module Grammar (* : Grammar_sig.S *) = Grammar
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_parser/dune

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

98 changes: 98 additions & 0 deletions src/lsp/cobol_parser/grammar_tokens_printer.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
(**************************************************************************)
(* *)
(* SuperBOL OSS Studio *)
(* *)
(* Copyright (c) 2022-2023 OCamlPro SAS *)
(* *)
(* All rights reserved. *)
(* This source code is licensed under the GNU Affero General Public *)
(* License version 3 found in the LICENSE.md file in the root directory *)
(* of this source tree. *)
(* *)
(**************************************************************************)

open Cobol_common.Srcloc.INFIX
open Cobol_common.Srcloc.TYPES
open Grammar_tokens (* import token constructors *)

(* --- *)

let combined_tokens =
(* /!\ WARNING: None of the constituents of combined tokens may be
context-sensitive.
Rationale: this would considerably complicate retokenization (which is
necessary with the current solution to handle context-sensitive
keywords) *)
Hashtbl.of_seq @@
Seq.map (fun (a, b) -> b, a) @@
List.to_seq Text_keywords.combined_keywords

let pp_alphanum_string_prefix ppf Cobol_ptree.{ hexadecimal; quotation; str;
runtime_repr } =
if runtime_repr = Null_terminated_bytes then Fmt.char ppf 'Z';
if hexadecimal then Fmt.char ppf 'X';
match quotation with
| Simple_quote -> Fmt.pf ppf "'%s" str
| Double_quote -> Fmt.pf ppf "\"%s" str

let pp_token_string: token Pretty.printer = fun ppf ->
let string s = Pretty.string ppf s
and print format = Pretty.print ppf format in
function
| WORD w
| WORD_IN_AREA_A w
| PICTURE_STRING w
| INFO_WORD w
| DIGITS w
| SINTLIT w -> string w
| EIGHTY_EIGHT -> string "88"
| FIXEDLIT (i, sep, d) -> print "%s%c%s" i sep d
| FLOATLIT (i, sep, d, e) -> print "%s%c%sE%s" i sep d e
| ALPHANUM a -> Cobol_ptree.pp_alphanum ppf a
| ALPHANUM_PREFIX a -> pp_alphanum_string_prefix ppf a
| NATLIT s -> print "N\"%s\"" s
| BOOLIT b -> print "B\"%a\"" Cobol_ptree.pp_boolean b
| COMMENT_ENTRY e -> print "%a" Fmt.(list ~sep:sp string) e
| EXEC_BLOCK b -> Cobol_common.Exec_block.pp ppf b
| INTERVENING_ c -> print "%c" c
| t -> string @@
try Text_lexer.show_token t
with Not_found ->
try Hashtbl.find combined_tokens t
with Not_found -> "<unknown/unexpected token>"

let pp_token: token Pretty.printer = fun ppf ->
let string s = Pretty.string ppf s
and print format = Pretty.print ppf format in
function
| WORD w -> print "WORD[%s]" w
| WORD_IN_AREA_A w -> print "WORD_IN_AREA_A[%s]" w
| PICTURE_STRING w -> print "PICTURE_STRING[%s]" w
| INFO_WORD s -> print "INFO_WORD[%s]" s
| COMMENT_ENTRY _ as t -> print "COMMENT_ENTRY[%a]" pp_token_string t
| EXEC_BLOCK _ as t -> print "EXEC_BLOCK[%a]" pp_token_string t
| DIGITS i -> print "DIGITS[%s]" i
| SINTLIT i -> print "SINT[%s]" i
| FIXEDLIT (i, sep, d) -> print "FIXED[%s%c%s]" i sep d
| FLOATLIT (i, sep, d, e) -> print "FLOAT[%s%c%sE%s]" i sep d e
| INTERVENING_ c -> print "<%c>" c
| INTRINSIC_FUNC _ as tok ->
print "INTRINSIC_FUNC[%a]" pp_token_string tok
| tok when Text_keywords.is_known_intrinsic_token tok ->
print "SPECIALIZED_INTRINSIC_FUNC[%a]" pp_token_string tok
| EOF -> string "EOF"
| t -> pp_token_string ppf t

let pp_token': token with_loc Pretty.printer =
Cobol_common.Srcloc.pp_with_loc pp_token

let pp_tokens : token with_loc list Pretty.printer =
Pretty.list ~fopen:"@[" ~fclose:"@]" pp_token'

let pp_tokens_with_loc_info ?fsep : token with_loc list Pretty.printer =
Pretty.list ~fopen:"@[" ?fsep ~fclose:"@]" begin fun ppf t ->
Pretty.print ppf "%a@@%a"
pp_token' t
Cobol_common.Srcloc.pp_srcloc_struct ~@t
end
18 changes: 18 additions & 0 deletions src/lsp/cobol_parser/keywords/gen_keywords.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

let cmlyname = ref None
let external_tokens = ref ""
let with_is_intrinsic = ref false

let usage_msg = Fmt.str "%s [OPTIONS] file.cmly" Sys.argv.(0)
let anon str = match !cmlyname with
Expand All @@ -24,6 +25,8 @@ let () =
Arg.[
("--external-tokens", Set_string external_tokens,
"<module> Import token type definition from <module>");
("--with-is-intrinsic", Set with_is_intrinsic,
"<module> Import token type definition from <module>");
]
anon usage_msg

Expand Down Expand Up @@ -174,6 +177,19 @@ let emit_intrinsic_functions_list ppf =
Terminal.iter (emit_custom_intrinsics ppf);
Fmt.pf ppf "@]@\n]@."

let emit_is_intrinsic ppf =
if !with_is_intrinsic then
let is_intrinsic t =
intrinsic (Terminal.attributes t) |> Option.is_some
in
Fmt.pf ppf "@[<2>let is_known_intrinsic_token = %s.(function@." tokens_module;
Terminal.iter begin fun t ->
if is_intrinsic t
then Fmt.pf ppf "| %a@." pp_terminal t
end;
Fmt.pf ppf "| _ -> false@]\n)@."


let emit ppf =
Fmt.pf ppf
"(* Caution: this file was automatically generated from %s; do not edit *)\
Expand All @@ -184,6 +200,7 @@ let emit ppf =
@\n%t\
@\n%t\
@\n%t\
@\n%t\
@\n"
cmlyname
emit_prelude
Expand All @@ -192,6 +209,7 @@ let emit ppf =
emit_intrinsic_functions_list
emit_puncts_list
emit_silenced_keywords_list
emit_is_intrinsic

let () =
emit Fmt.stdout
2 changes: 1 addition & 1 deletion src/lsp/cobol_parser/package.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ dune-trailer = """
(action
(with-stdout-to %{targets}
(run %{exe:./keywords/gen_keywords.exe} %{deps}
--external-tokens Grammar_tokens))))
--external-tokens Grammar_tokens --with-is-intrinsic))))
(rule
(targets grammar_expect.ml)
Expand Down
2 changes: 2 additions & 0 deletions src/lsp/cobol_parser/text_keywords.mli
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ val silenced_keywords: string list

(** Mapping from punctuations to their respective tokens *)
val puncts: (string * Grammar_tokens.token) list

val is_known_intrinsic_token: Grammar_tokens.token -> bool
105 changes: 5 additions & 100 deletions src/lsp/cobol_parser/text_tokenizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,113 +18,14 @@ open Cobol_common.Srcloc.INFIX
open Cobol_common.Srcloc.TYPES
open Cobol_preproc.Text.TYPES
open Grammar_tokens (* import token constructors *)
open Grammar_tokens_printer
open Parser_diagnostics

(* --- *)

type token = Grammar_tokens.token with_loc
type tokens = token list

let combined_tokens =
(* /!\ WARNING: None of the constituents of combined tokens may be
context-sensitive.
Rationale: this would considerably complicate retokenization (which is
necessary with the current solution to handle context-sensitive
keywords) *)
Hashtbl.of_seq @@
Seq.map (fun (a, b) -> b, a) @@
List.to_seq Text_keywords.combined_keywords

let is_intrinsic_token = function
| BYTE_LENGTH_FUNC
| CHAR_FUNC
| CONTENT_OF_FUNC
| CONVERT_FUNC
| CURRENT_DATE_FUNC
| FORMATTED_DATETIME_FUNC
| FORMATTED_TIME_FUNC
| LENGTH_FUNC
| LOCALE_DATE_FUNC
| LOCALE_TIME_FROM_SECONDS_FUNC
| LOCALE_TIME_FUNC
| NUMVAL_C_FUNC
| RANDOM_FUNC
| RANGE_FUNC
| REVERSE_FUNC
| SIGN_FUNC
| SUM_FUNC
| TRIM_FUNC
| WHEN_COMPILED_FUNC
| INTRINSIC_FUNC _ -> true
| _ -> false

let pp_alphanum_string_prefix ppf Cobol_ptree.{ hexadecimal; quotation; str;
runtime_repr } =
if runtime_repr = Null_terminated_bytes then Fmt.char ppf 'Z';
if hexadecimal then Fmt.char ppf 'X';
match quotation with
| Simple_quote -> Fmt.pf ppf "'%s" str
| Double_quote -> Fmt.pf ppf "\"%s" str

let pp_token_string: Grammar_tokens.token Pretty.printer = fun ppf ->
let string s = Pretty.string ppf s
and print format = Pretty.print ppf format in
function
| WORD w
| WORD_IN_AREA_A w
| PICTURE_STRING w
| INFO_WORD w
| DIGITS w
| SINTLIT w -> string w
| EIGHTY_EIGHT -> string "88"
| FIXEDLIT (i, sep, d) -> print "%s%c%s" i sep d
| FLOATLIT (i, sep, d, e) -> print "%s%c%sE%s" i sep d e
| ALPHANUM a -> Cobol_ptree.pp_alphanum ppf a
| ALPHANUM_PREFIX a -> pp_alphanum_string_prefix ppf a
| NATLIT s -> print "N\"%s\"" s
| BOOLIT b -> print "B\"%a\"" Cobol_ptree.pp_boolean b
| COMMENT_ENTRY e -> print "%a" Fmt.(list ~sep:sp string) e
| EXEC_BLOCK b -> Cobol_common.Exec_block.pp ppf b
| INTERVENING_ c -> print "%c" c
| t -> string @@
try Text_lexer.show_token t
with Not_found ->
try Hashtbl.find combined_tokens t
with Not_found -> "<unknown/unexpected token>"

let pp_token: token Pretty.printer = fun ppf ->
let string s = Pretty.string ppf s
and print format = Pretty.print ppf format in
fun t -> match ~&t with
| WORD w -> print "WORD[%s]" w
| WORD_IN_AREA_A w -> print "WORD_IN_AREA_A[%s]" w
| PICTURE_STRING w -> print "PICTURE_STRING[%s]" w
| INFO_WORD s -> print "INFO_WORD[%s]" s
| COMMENT_ENTRY _ -> print "COMMENT_ENTRY[%a]" pp_token_string ~&t
| EXEC_BLOCK _ -> print "EXEC_BLOCK[%a]" pp_token_string ~&t
| DIGITS i -> print "DIGITS[%s]" i
| SINTLIT i -> print "SINT[%s]" i
| FIXEDLIT (i, sep, d) -> print "FIXED[%s%c%s]" i sep d
| FLOATLIT (i, sep, d, e) -> print "FLOAT[%s%c%sE%s]" i sep d e
| INTERVENING_ c -> print "<%c>" c
| tok when is_intrinsic_token tok ->
print "INTRINSIC_FUNC[%a]" pp_token_string ~&t
| EOF -> string "EOF"
| t -> pp_token_string ppf t

let pp_tokens =
Pretty.list ~fopen:"@[" ~fclose:"@]" pp_token

let pp_tokens' ?fsep =
Pretty.list ~fopen:"@[" ?fsep ~fclose:"@]" begin fun ppf t ->
Pretty.print ppf "%a@@%a"
pp_token t
Cobol_common.Srcloc.pp_srcloc_struct ~@t
end

(* --- *)

let loc_in_area_a: srcloc -> bool = Cobol_common.Srcloc.in_area_a
let token_in_area_a: token -> bool = fun t -> loc_in_area_a ~@t

Expand Down Expand Up @@ -609,6 +510,10 @@ let reword_intrinsics s : tokens -> tokens =
[Disabled_intrinsics] does not occur on a `FUNCTION` keyword (but that's
unlikely). *)
let keyword_of_token = Hashtbl.find Text_lexer.word_of_token in
let is_intrinsic_token = function
| INTRINSIC_FUNC _ -> true
| t when Text_keywords.is_known_intrinsic_token t -> true
| _ -> false in
let rec aux rev_prefix suffix =
match suffix with
| [] ->
Expand Down
3 changes: 0 additions & 3 deletions src/lsp/cobol_parser/text_tokenizer.mli
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ open EzCompat
(** Tokens passed to {!Parser}; can be obtained via {!tokenize_text}. *)
type token = Grammar_tokens.token Cobol_ptree.with_loc
type tokens = token list
val pp_token: token Pretty.printer
val pp_tokens: tokens Pretty.printer
val pp_tokens': ?fsep:Pretty.simple -> tokens Pretty.printer

(* --- *)

Expand Down
4 changes: 2 additions & 2 deletions test/cobol_parsing/parser_testing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ let show_parsed_tokens ?(parser_options = options ())
Cobol_parser.parse_with_artifacts ~options:parser_options
in
(if with_locations
then Cobol_parser.INTERNAL.pp_tokens' ~fsep:"@\n"
else Cobol_parser.INTERNAL.pp_tokens) Fmt.stdout (Lazy.force tokens)
then Cobol_parser.Tokens.pp'_list_with_loc_info ~fsep:"@\n"
else Cobol_parser.Tokens.pp'_list) Fmt.stdout (Lazy.force tokens)

let show_diagnostics ?(parser_options = options ())
?source_format ?filename contents =
Expand Down

0 comments on commit 8b26d76

Please sign in to comment.