Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of text-word continuations #33

Merged
merged 4 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions src/lsp/cobol_indent/indenter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,6 @@ let indenter ~source_format (str:string) (rdl:indent_record list) range =
(*indent a range of file, with the default indent_config*)
let indent_range' ~source_format ~range ~file =
let file_content = Ez_file.V1.EzFile.read_file file in
let check_indent = Indent_check.check_indentation in
let state = {
scope = BEGIN;
context = [];
acc = [];
range;
}
in
(*
Not satisfied with the `Cobol_preproc.fold_text_lines`,
this function has an argument which is the name of file,
Expand All @@ -80,8 +72,9 @@ let indent_range' ~source_format ~range ~file =
(* NB: not anymore. *)
*)
let state =
Cobol_preproc.fold_text_lines ~source_format check_indent
(Filename file) state
Cobol_preproc.fold_source_lines ~source_format
~f:(fun _lnum line acc -> Indent_check.check_indentation line acc)
(Filename file) { scope = BEGIN; context = []; acc = []; range }
in
let ind_recds = state.acc in
indenter ~source_format file_content ind_recds state.range
Expand Down
2 changes: 1 addition & 1 deletion src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ module Make (Config: Cobol_config.T) = struct
(* --- *)

let rec produce_tokens (ps: _ state as 's) : 's * Text_tokenizer.tokens =
let text, pp = Cobol_preproc.next_sentence ps.preproc.pp in
let text, pp = Cobol_preproc.next_chunk ps.preproc.pp in
let { preproc = { pp; tokzr; _ }; _ } as ps = update_pp ps pp in
assert (text <> []);
(* Note: this is the source format in use at the end of the sentence. *)
Expand Down
62 changes: 50 additions & 12 deletions src/lsp/cobol_preproc/preproc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ let srclex_newline_cnums (Plx (pl, _)) =
type 'k source_line =
| Line: 'k srclexer * text -> 'k source_line

let source_lines_reader lexer =
let source_chunks_reader lexer =
let rec next_source_line (state, lexbuf) =
let state, pseutoks = lexer state lexbuf in
match pseutoks with
Expand All @@ -53,32 +53,70 @@ let source_lines_reader lexer =
in
next_source_line

let fold_source_lines lexer f pl =
let next_source_line = source_lines_reader lexer in
let rec aux pl acc = match next_source_line pl with
let fold_source_chunks lexer f pl =
let next_source_chunk = source_chunks_reader lexer in
let rec aux pl acc = match next_source_chunk pl with
| Line (_, [{ payload = Eof; _}]) -> acc
| Line (pl, text) -> f text acc |> aux pl
in
aux pl

let print_source_lines lexer ppf pl =
fold_source_lines lexer (fun t () -> Pretty.print ppf "%a@." Text.pp_text t)
let print_source lexer ppf pl =
fold_source_chunks lexer (fun t () -> Pretty.print ppf "%a@." Text.pp_text t)
pl ()

let next_source_line (Plx pl) =
let Line (pl, text) = source_lines_reader Src_lexer.line pl in
let next_source_chunk (Plx pl) =
let Line (pl, text) = source_chunks_reader Src_lexer.line pl in
Plx pl, text

let print_source_lines ppf (Plx pl) =
print_source_lines Src_lexer.line ppf pl
let print_source ppf (Plx pl) =
print_source Src_lexer.line ppf pl

let fold_source_lines pl f acc =
let rec aux pl acc = match next_source_line pl with
let fold_source_chunks pl f acc =
let rec aux pl acc = match next_source_chunk pl with
| _, [{ payload = Eof; _}] -> acc
| pl, text -> aux pl (f text acc)
in
aux pl acc

(** [fold_source_lines pl ~f acc] applies [f line_number line acc] for each
successive line [line] of the input lexed by [pl]. [line_number] gives the
line number for [line] (starting at [1]). [line] is given empty to [f] if
it corresponds to empty line in the input, or was a line continuation. *)
let fold_source_lines pl ~f acc =
let tok_lnum tok =
(* On source text, which is NOT manipulated, we only have lexical locations,
so using [start_pos] is enough. *)
(Cobol_common.Srcloc.start_pos ~@tok).pos_lnum
in
let spit_empty_lines ~until_lnum cur_lnum acc =
let rec aux cur_lnum acc =
if cur_lnum < until_lnum
then aux (succ cur_lnum) (f cur_lnum [] acc)
else acc
in
aux cur_lnum acc
in
let rec spit_chunk chunk (acc, cur_lnum, cur_prefix) =
match
Cobol_common.Basics.LIST.split_at_first ~prefix:`Same ~where:`Before
(fun tok -> tok_lnum tok > cur_lnum) chunk
with
| Error () -> (* still on the same line *)
(acc, cur_lnum, cur_prefix @ chunk)
| Ok (prefix, []) -> (* should not happen (in case, just append) *)
(acc, cur_lnum, cur_prefix @ prefix)
| Ok (prefix, (tok :: _ as suffix)) -> (* terminating a line *)
let acc = f cur_lnum (cur_prefix @ prefix) acc in
let new_lnum = tok_lnum tok in
let acc = spit_empty_lines ~until_lnum:new_lnum (succ cur_lnum) acc in
spit_chunk suffix (acc, new_lnum, [])
in
let acc, last_lnum, tail = fold_source_chunks pl spit_chunk (acc, 1, []) in
match tail with (* fold on the last line upon exit... *)
| [] | { payload = Eof; _ } :: _ -> acc (* ... if non-empty *)
| _ -> f last_lnum tail acc

(* --- *)

let with_source_format
Expand Down
11 changes: 8 additions & 3 deletions src/lsp/cobol_preproc/preproc.mli
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,20 @@ val srclex_comments
val srclex_newline_cnums
: any_srclexer
-> int list
val next_source_line
val next_source_chunk
: any_srclexer
-> any_srclexer * text
val fold_source_lines
val fold_source_chunks
: any_srclexer
-> (text -> 'a -> 'a)
-> 'a
-> 'a
val print_source_lines
val fold_source_lines
: any_srclexer
-> f:(int -> text -> 'a -> 'a)
-> 'a
-> 'a
val print_source
: Format.formatter
-> any_srclexer
-> unit
Expand Down
112 changes: 64 additions & 48 deletions src/lsp/cobol_preproc/preproc_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -155,53 +155,71 @@ let apply_active_replacing_full { pplog; persist; _ } = match persist with
| { replacing = r :: _; _ } -> Preproc.apply_replacing OnFullText r pplog
| _ -> fun text -> text, pplog

(** [next_sentence lp] reads the next sentence from [lp], handling lexical and
compiler directives along the way. It never returns an empty sentence: the
output text always terminates with a period or {!Eof}. *)
let rec next_sentence ({ srclex; buff; _ } as lp) =
match Preproc.next_source_line srclex with
(** [lookup_compiler_directive chunk] searches for a compiler-directive
text-word (that starts with either `{v >> v}' or `{v $ v}') in the given
chunk of source text.

Returns [Ok (prefix, cdir_text)] if a compiler directive is recognised,
where [cdir_text] is guaranteed to start with a compiler-directive word on a
line [l] and terminates at the end [l]. *)
(* Note: {!Preproc.next_source_chunk} never outputs compiler-directive
text-words in positions other than the first two. Such a chunk also
terminates at the end of the source line as it cannot be continued (contrary
to normal source lines). *)
let lookup_compiler_directive: Text.text -> _ = function
| t :: _ as text when Text.cdirp t -> Ok ([ ], text)
| p :: (t :: _ as text) when Text.cdirp t -> Ok ([p], text)
| _ -> Error `NotCDir

(* --- *)

(** [next_chunk lp] reads the next chunk from [lp], handling lexical and
compiler directives along the way. It never returns an empty result: the
output text always containts at least {!Eof}. *)
let rec next_chunk ({ srclex; buff; _ } as lp) =
match Preproc.next_source_chunk srclex with
| srclex, ([{ payload = Eof; _}] as eof) ->
let text, pplog = apply_active_replacing_full lp (buff @ eof) in
text, { lp with srclex; pplog; buff = [] }
| srclex, text ->
if show `Src lp then
Pretty.error "Src: %a@." Text.pp_text text;
match try_lexing_directive (with_srclex lp srclex) text with
| Ok lp ->
next_sentence lp
| Error `NotLexDir ->
match lookup_compiler_directive text with
| Error `NotCDir ->
preprocess_line { lp with srclex; buff = [] } (buff @ text)

and try_lexing_directive ({ persist = { pparser = (module Pp);
overlay_manager = om; _ };
srclex; _ } as lp) srctext =
match Text_supplier.supply_text_if_compiler_directive om srctext with
| Error `NotCDir ->
Error `NotLexDir
| Ok supplier ->
(* Here, [srctext] is never empty as it's known to start with a compiler
directive marker `>>` (or `$` for MF-style directives), so we should
always have a loc: *)
let loc = Option.get @@ Cobol_common.Srcloc.concat_locs srctext in
let parser = Pp.Incremental.lexing_directive (position lp) in
match ~&(Pp.MenhirInterpreter.loop supplier parser) with
| { result = Some Preproc_directives.LexDirSource sf as lexdir; diags } ->
let pplog = Preproc_trace.new_lexdir ~loc ?lexdir lp.pplog in
let lp = add_diags lp diags in
let lp = with_pplog lp pplog in
Ok (with_srclex lp (Preproc.with_source_format sf srclex))
| { result = None; diags } -> (* valid lexdir with erroneous semantics *)
let pplog = Preproc_trace.new_lexdir ~loc lp.pplog in
let lp = with_pplog lp pplog in
Ok (add_diags lp diags)
| exception Pp.Error ->
Ok (DIAGS.Cont.kerror (add_diag lp) ~loc
"Malformed@ or@ unknown@ compiler@ directive")
| Ok ([], lexdir_text) ->
next_chunk @@ on_lexing_directive { lp with srclex } lexdir_text
| Ok (text, lexdir_text) ->
let lp = { lp with srclex; buff = [] } in
preprocess_line (on_lexing_directive lp lexdir_text) (buff @ text)

and on_lexing_directive ({ persist = { pparser = (module Pp);
overlay_manager = om; _ };
srclex; _ } as lp) lexdir_text =
(* Here, [lexdir_text] is never empty as it's known to start with a compiler
directive marker `>>` (or `$` for MF-style directives), so we should always
have a loc: *)
let supplier = Text_supplier.pptoks_of_text_supplier om lexdir_text in
let loc = Option.get @@ Cobol_common.Srcloc.concat_locs lexdir_text in
let parser = Pp.Incremental.lexing_directive (position lp) in
match ~&(Pp.MenhirInterpreter.loop supplier parser) with
| { result = Some Preproc_directives.LexDirSource sf as lexdir; diags } ->
let pplog = Preproc_trace.new_lexdir ~loc ?lexdir lp.pplog in
let lp = add_diags lp diags in
let lp = with_pplog lp pplog in
with_srclex lp (Preproc.with_source_format sf srclex)
| { result = None; diags } -> (* valid lexdir with erroneous semantics *)
let pplog = Preproc_trace.new_lexdir ~loc lp.pplog in
let lp = with_pplog lp pplog in
add_diags lp diags
| exception Pp.Error ->
DIAGS.Cont.kerror (add_diag lp) ~loc
"Malformed@ or@ unknown@ compiler@ directive"

and preprocess_line lp srctext =
match try_preproc lp srctext with
| Ok (`CDirNone (lp, [])) -> (* Never return empty: skip to next sentence *)
next_sentence lp
next_chunk lp
| Ok (`CDirNone (lp, text)) ->
do_replacing lp text
| Ok (`CopyDone (lp, srctext))
Expand All @@ -210,16 +228,16 @@ and preprocess_line lp srctext =
be a compiler directive. *)
preprocess_line lp srctext
| Ok (`ReplaceDone (lp, text, srctext)) ->
text, with_buff lp srctext
text, with_buff lp @@ Text.strip_eof srctext
| Error (`MissingPeriod | `MissingText) ->
next_sentence (with_buff lp srctext)
next_chunk (with_buff lp srctext)

and do_replacing lp text =
match apply_active_replacing lp text with
| Ok (text, pplog) ->
text, with_pplog lp pplog
| Error (`MissingText ([], pplog, buff)) ->
next_sentence (with_buff_n_pplog lp buff pplog)
next_chunk (with_buff_n_pplog lp buff pplog)
| Error (`MissingText (text, pplog, buff)) ->
text, with_buff_n_pplog lp buff pplog

Expand Down Expand Up @@ -336,7 +354,7 @@ and read_lib ({ persist = { libpath; copybooks; verbose; _ }; _ } as lp)
and full_text ?(item = "library") ?postproc lp : Text.text * preprocessor =
let eofp p = ~&p = Text.Eof in
let rec aux acc lp =
let text, lp = next_sentence lp in
let text, lp = next_chunk lp in
let text = match postproc with
| None -> text
| Some p -> List.(rev @@ rev_map p text)
Expand All @@ -351,8 +369,8 @@ and full_text ?(item = "library") ?postproc lp : Text.text * preprocessor =
in
aux [] lp

let next_sentence lp =
let text, lp = next_sentence lp in
let next_chunk lp =
let text, lp = next_chunk lp in
if show `Txt lp then
Pretty.error "Txt: %a@." Text.pp_text text;
text, lp
Expand Down Expand Up @@ -417,8 +435,7 @@ let lex_file ~source_format ?(ppf = default_oppf) =
Cobol_common.do_unit begin fun (module DIAGS) input ->
let source_format =
DIAGS.grab_diags @@ decide_source_format input source_format in
let pl = make_srclex ~source_format input in
Preproc.print_source_lines ppf pl
Preproc.print_source ppf (make_srclex ~source_format input)
end

let lex_lib ~source_format ~libpath ?(ppf = default_oppf) =
Expand All @@ -429,17 +446,16 @@ let lex_lib ~source_format ~libpath ?(ppf = default_oppf) =
DIAGS.grab_diags @@
decide_source_format (Filename filename) source_format in
let pl = Preproc.srclex_from_file ~source_format filename in
Preproc.print_source_lines ppf pl
Preproc.print_source ppf pl
| Error lnf ->
Copybook.lib_not_found_error (DIAGS.error "%t") lnf
end

let fold_text_lines ~source_format ?epf f =
let fold_source_lines ~source_format ?epf ~f =
Cobol_common.do_any ?epf begin fun (module DIAGS) input ->
let source_format =
DIAGS.grab_diags @@ decide_source_format input source_format in
let pl = make_srclex ~source_format input in
Preproc.fold_source_lines pl f
Preproc.fold_source_lines (make_srclex ~source_format input) ~f
end

let pp_preprocessed ppf lp =
Expand Down
18 changes: 9 additions & 9 deletions src/lsp/cobol_preproc/preproc_engine.mli
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ val position: preprocessor -> Lexing.position
val source_format: preprocessor -> Src_format.any
val newline_cnums: preprocessor -> int list

val next_sentence: preprocessor -> Text.text * preprocessor
val next_chunk: preprocessor -> Text.text * preprocessor

(** {2 High-level commands} *)

Expand All @@ -61,14 +61,6 @@ val lex_file
-> input
-> unit

val fold_text_lines
: source_format: Cobol_config.source_format_spec
-> ?epf:Format.formatter
-> (Text.text -> 'a -> 'a)
-> input
-> 'a
-> 'a

val lex_lib
: source_format: Cobol_config.source_format_spec
-> libpath:string list
Expand All @@ -77,6 +69,14 @@ val lex_lib
-> [< `Alphanum | `Word ] * string
-> unit

val fold_source_lines
: source_format: Cobol_config.source_format_spec
-> ?epf:Format.formatter
-> f:(int -> Text.text -> 'a -> 'a)
-> input
-> 'a
-> 'a

val preprocess_file
: ?options: Preproc_options.preproc_options
-> ?ppf:Format.formatter
Expand Down
Loading