-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
IRJ files parser as an Mlang sublibrary (#222)
- Loading branch information
Showing
13 changed files
with
467 additions
and
204 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# This file is generated by dune, edit dune-project instead | ||
opam-version: "2.0" | ||
version: "1.1.0" | ||
synopsis: "Parser for the IRJ tests" | ||
description: | ||
"This parser is aimed for the tests used by la DGFiP to test the calculation of the French income tax" | ||
maintainer: ["[email protected]"] | ||
authors: ["Denis Merigoux" "Raphaël Monat"] | ||
license: "GPL-3.0-or-later" | ||
homepage: "https://gitlab.inria.fr/verifisc/mlang" | ||
bug-reports: "https://gitlab.inria.fr/verifisc/mlang/issues" | ||
depends: [ | ||
"ocaml" {>= "4.11.2"} | ||
"dune" {build} | ||
"odoc" {= "1.5.3"} | ||
"ocamlformat" {= "0.19.0"} | ||
] | ||
build: [ | ||
["dune" "subst"] {pinned} | ||
[ | ||
"dune" | ||
"build" | ||
"-p" | ||
name | ||
"-j" | ||
jobs | ||
"@install" | ||
"@runtest" {with-test} | ||
"@doc" {with-doc} | ||
] | ||
] | ||
dev-repo: "git+https://gitlab.inria.fr/verifisc/mlang.git" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
(env | ||
(static | ||
(ocamlopt_flags | ||
(-O3 -ccopt -static)))) | ||
|
||
(library | ||
(name irj_include) | ||
(public_name irj_parser)) | ||
|
||
(ocamllex irj_lexer) | ||
|
||
(menhir | ||
(modules irj_parser) | ||
(flags --strict --explain)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
Mathieu Durero <[email protected]>, David Declerck (2023) | ||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
Foundation, either version 3 of the License, or (at your option) any later | ||
version. | ||
This program is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
details. | ||
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
type pos = { | ||
pos_filename : string; | ||
pos_loc : Lexing.position * Lexing.position; | ||
} | ||
|
||
let mk_position sloc = | ||
{ pos_filename = (fst sloc).Lexing.pos_fname; pos_loc = sloc } | ||
|
||
exception TestParsingError of (string * pos) | ||
(* duplication of some of the utils *) | ||
|
||
type literal = I of int | F of float | ||
|
||
type var_value = string * literal * pos | ||
|
||
(* type var_values = var_value list *) | ||
|
||
type calc_error = string * pos | ||
|
||
(* type calc_errors = calc_error list *) | ||
|
||
(* type rappel = string * string * var_value * string * string * string * string | ||
* string *) | ||
type rappel = { | ||
event_nb : int; | ||
rappel_nb : int; | ||
variable_code : string; | ||
change_value : int; | ||
direction : string; | ||
(* R, C, M, P *) | ||
penalty_code : int option; | ||
(* 0 - 99 *) | ||
base_tolerance_legale : int option; | ||
month_year : int; | ||
(* MMYYYY *) | ||
decl_2042_rect : int option; | ||
(* 0 or 1 *) | ||
pos : pos; | ||
} | ||
|
||
type prim_data_block = { | ||
entrees : var_value list; | ||
controles_attendus : calc_error list; | ||
resultats_attendus : var_value list; | ||
} | ||
|
||
type corr_data_block = { | ||
entrees_rappels : rappel list; | ||
controles_attendus : calc_error list; | ||
resultats_attendus : var_value list; | ||
} | ||
|
||
type irj_file = { | ||
nom : string; | ||
prim : prim_data_block; | ||
rapp : corr_data_block option; | ||
(* corr : prim_data_block option; *) | ||
(*corr is for old correctif form from primitif files, rapp is for the | ||
actual one in correctif files*) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
Mathieu Durero <[email protected]> (2023) | ||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
Foundation, either version 3 of the License, or (at your option) any later | ||
version. | ||
This program is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
details. | ||
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
open Irj_ast | ||
|
||
let parse_file (test_name : string) : irj_file = | ||
let input = open_in test_name in | ||
let filebuf = Lexing.from_channel input in | ||
let filebuf = | ||
{ | ||
filebuf with | ||
lex_curr_p = { filebuf.lex_curr_p with pos_fname = test_name }; | ||
} | ||
in | ||
let f = | ||
try Irj_parser.irj_file Irj_lexer.token filebuf with | ||
| TestParsingError e -> | ||
close_in input; | ||
raise (TestParsingError e) | ||
| Irj_parser.Error -> | ||
close_in input; | ||
raise | ||
(TestParsingError | ||
( "Test syntax error", | ||
mk_position (filebuf.lex_start_p, filebuf.lex_curr_p) )) | ||
in | ||
close_in input; | ||
f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
Mathieu Durero <[email protected]> (2023) | ||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
|
@@ -13,14 +14,6 @@ | |
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
type literal = I of int | F of float | ||
|
||
type var_values = (string * literal * Pos.t) list | ||
|
||
type test_file = { | ||
nom : string; | ||
ep : var_values; | ||
cp : var_values; | ||
rp : var_values; | ||
corr : (var_values * var_values * var_values) option; | ||
} | ||
val parse_file : string -> Irj_ast.irj_file | ||
(** [parse_file file] loads the content of a given IRJ [file] in a simple | ||
datastructure. *) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
David Declerck (2023) | ||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
Foundation, either version 3 of the License, or (at your option) any later | ||
version. | ||
This program is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
details. | ||
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
{ | ||
open Lexing | ||
open Irj_parser | ||
open Irj_ast | ||
|
||
let error lb msg = | ||
raise (TestParsingError ("Lexing error : " ^ msg, | ||
mk_position (Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb))) | ||
|
||
module StrMap = Map.Make (String) | ||
|
||
let keywords = | ||
List.fold_left (fun map (kw, tok) -> | ||
StrMap.add kw tok map | ||
) StrMap.empty [ | ||
"#NOM", NOM; | ||
"#FIP", FIP; | ||
"#ENTREES-PRIMITIF", ENTREESPRIM; | ||
"#CONTROLES-PRIMITIF", CONTROLESPRIM; | ||
"#RESULTATS-PRIMITIF", RESULTATSPRIM; | ||
"#ENTREES-CORRECTIF", ENTREESCORR; | ||
"#CONTROLES-CORRECTIF", CONTROLESCORR; | ||
"#RESULTATS-CORRECTIF", RESULTATSCORR; | ||
"#ENTREES-RAPPELS", ENTREESRAPP; | ||
"#CONTROLES-RAPPELS", CONTROLESRAPP; | ||
"#RESULTATS-RAPPELS", RESULTATSRAPP; | ||
"#DATES", DATES; | ||
"#AVIS_IR", AVISIR; | ||
"#AVIS_CSG", AVISCSG; | ||
] | ||
|
||
let is_bol lb = | ||
(* bol = beginning of line *) | ||
lb.lex_start_p.pos_cnum - lb.lex_start_p.pos_bol = 0 | ||
|
||
let check_cr lb = | ||
if String.contains (lexeme lb) '\r' then | ||
error lb ("Carriage return detected") | ||
(* No more supposed to be an error in autotests. Keeping it to enforce it later? *) | ||
} | ||
|
||
let blank = [' ' '\t'] | ||
let any = [^ '\n'] | ||
let nl = ['\n'] | ||
|
||
rule token = parse | ||
|
||
| '\n' | "\r\n" | ||
{ check_cr lexbuf; new_line lexbuf; | ||
if is_bol lexbuf then token lexbuf | ||
else NL } | ||
|
||
| '*' any* nl | ||
{ check_cr lexbuf; new_line lexbuf; | ||
if is_bol lexbuf then token lexbuf | ||
else error lexbuf "Comment with * must start in the first column" } | ||
|
||
| blank any* nl | ||
{ check_cr lexbuf; new_line lexbuf; | ||
if is_bol lexbuf then error lexbuf "Line can not start with a blank" | ||
else NL } | ||
|
||
| '-'? ['0' - '9']+ as i | ||
{ INTEGER (int_of_string i) } | ||
|
||
| '-'? ['0' - '9']+ '.' ['0' - '9']* as f | ||
{ FLOAT (float_of_string f) } (* DONT KEEP THAT *) | ||
(* Probably in order to write a specific function for our number format *) | ||
|
||
| ['a'-'z' 'A'-'Z' '0'-'9' '_']+ as s | ||
{ SYMBOL s } | ||
|
||
| ['a'-'z' 'A'-'Z' '0'-'9' '_' '-' '.' ';' (*' '*)]+ as s | ||
{ NAME s } | ||
(* Compared to the old lexer, adds _ and . removes space *) | ||
|
||
| "/" | ||
{ SLASH } | ||
|
||
| "##" | ||
{ ENDSHARP } | ||
|
||
| "#" ['a'-'z' 'A'-'Z' '0'-'9' '_' '-']+ as s | ||
{ match StrMap.find_opt s keywords with | ||
| None -> error lexbuf (Printf.sprintf "Unknown section name: '#%s'" s) | ||
| Some t -> t } | ||
|
||
| eof | ||
{ EOF } | ||
|
||
| _ as c | ||
{ error lexbuf (Printf.sprintf | ||
"Unexpected character '%c' (%d)" c (Char.code c)) } |
Oops, something went wrong.