-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
IRJ files parser as an Mlang sublibrary #222
Changes from 18 commits
6b00947
68ea878
3d3aec5
e633129
1874466
7d7c8ab
9938401
6cb06e1
fc3ce45
9d59616
c8896bf
cc5f4d6
debeadf
d78259f
4114440
e492f35
900d4c7
906230c
9ca3ac1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# This file is generated by dune, edit dune-project instead | ||
opam-version: "2.0" | ||
version: "1.1.0" | ||
synopsis: "Parser for the IRJ tests" | ||
description: | ||
"This parser is aimed for the tests used by la DGFiP to test the calculation of the French income tax" | ||
maintainer: ["[email protected]"] | ||
authors: ["Denis Merigoux" "Raphaël Monat"] | ||
license: "GPL-3.0-or-later" | ||
homepage: "https://gitlab.inria.fr/verifisc/mlang" | ||
bug-reports: "https://gitlab.inria.fr/verifisc/mlang/issues" | ||
depends: [ | ||
"ocaml" {>= "4.11.2"} | ||
"dune" {build} | ||
"odoc" {= "1.5.3"} | ||
"ocamlformat" {= "0.19.0"} | ||
] | ||
build: [ | ||
["dune" "subst"] {pinned} | ||
[ | ||
"dune" | ||
"build" | ||
"-p" | ||
name | ||
"-j" | ||
jobs | ||
"@install" | ||
"@runtest" {with-test} | ||
"@doc" {with-doc} | ||
] | ||
] | ||
dev-repo: "git+https://gitlab.inria.fr/verifisc/mlang.git" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
(env | ||
(static | ||
(ocamlopt_flags | ||
(-O3 -ccopt -static)))) | ||
|
||
(executable | ||
(name main) | ||
(package irj_parser) | ||
(public_name irj_parser.exe) | ||
(libraries irj_parser)) |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is there this empty file here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thought we might want an executable at first and didn't delete it afterwards ^^' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
(env | ||
(static | ||
(ocamlopt_flags | ||
(-O3 -ccopt -static)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto |
||
|
||
(library | ||
(name irj_file) | ||
(public_name irj_parser) | ||
(wrapped false)) | ||
|
||
(ocamllex irj_lexer) | ||
|
||
(menhir | ||
(modules irj_parser) | ||
(flags --strict --explain)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
Mathieu Durero <[email protected]>, David Declerck (2023) | ||
|
||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
Foundation, either version 3 of the License, or (at your option) any later | ||
version. | ||
|
||
This program is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
details. | ||
|
||
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
type t = { pos_filename : string; pos_loc : Lexing.position * Lexing.position } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That type should not be named |
||
|
||
let mk_position sloc = | ||
{ pos_filename = (fst sloc).Lexing.pos_fname; pos_loc = sloc } | ||
|
||
exception | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need to duplicate the full type here as you won't use all its components in exception TestParsingError of string * pos |
||
StructuredError of (string * (string option * t) list * (unit -> unit) option) | ||
(* duplication of some of the utils *) | ||
|
||
type literal = I of int | F of float | ||
|
||
type var_value = string * literal * t | ||
|
||
(* type var_values = var_value list *) | ||
|
||
type calc_error = string * t | ||
|
||
(* type calc_errors = calc_error list *) | ||
|
||
(* type rappel = string * string * var_value * string * string * string * string | ||
* string *) | ||
type rappel = { | ||
event_nb : int; | ||
rappel_nb : int; | ||
variable_code : string; | ||
change_value : int; | ||
direction : string; | ||
(* R, C, M, P *) | ||
penalty_code : int option; | ||
(* 0 - 99 *) | ||
base_tolerance_legale : int option; | ||
month_year : int; | ||
(* MMYYYY *) | ||
decl_2042_rect : int option; | ||
(* 0 or 1 *) | ||
pos : t; | ||
} | ||
|
||
type prim_data_block = { | ||
entrees : var_value list; | ||
controles_attendus : calc_error list; | ||
resultats_attendus : var_value list; | ||
} | ||
|
||
type corr_data_block = { | ||
entrees_rappels : rappel list; | ||
controles_attendus : calc_error list; | ||
resultats_attendus : var_value list; | ||
} | ||
|
||
type irj_file = { | ||
nom : string; | ||
prim : prim_data_block; | ||
rapp : corr_data_block option; | ||
(* corr : prim_data_block option; *) | ||
(*corr is for old correctif form from primitif files, rapp is for the | ||
actual one in correctif files*) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
Mathieu Durero <[email protected]> (2023) | ||
|
||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
Foundation, either version 3 of the License, or (at your option) any later | ||
version. | ||
|
||
This program is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
details. | ||
|
||
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
open Irj_ast | ||
|
||
let parse_file (test_name : string) : irj_file = | ||
let input = open_in test_name in | ||
let filebuf = Lexing.from_channel input in | ||
let filebuf = | ||
{ | ||
filebuf with | ||
lex_curr_p = { filebuf.lex_curr_p with pos_fname = test_name }; | ||
} | ||
in | ||
let f = | ||
try Irj_parser.irj_file Irj_lexer.token filebuf with | ||
| StructuredError e -> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you can catch |
||
close_in input; | ||
raise (StructuredError e) | ||
| Irj_parser.Error -> | ||
close_in input; | ||
raise | ||
(StructuredError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here raise a |
||
( "Test syntax error", | ||
[ (None, mk_position (filebuf.lex_start_p, filebuf.lex_curr_p)) ], | ||
None )) | ||
in | ||
close_in input; | ||
f |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
Mathieu Durero <[email protected]> (2023) | ||
|
||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
|
@@ -13,14 +14,6 @@ | |
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
type literal = I of int | F of float | ||
|
||
type var_values = (string * literal * Pos.t) list | ||
|
||
type test_file = { | ||
nom : string; | ||
ep : var_values; | ||
cp : var_values; | ||
rp : var_values; | ||
corr : (var_values * var_values * var_values) option; | ||
} | ||
val parse_file : string -> Irj_ast.irj_file | ||
(** [parse_file file] loads the content of a given IRJ [file] in a simple | ||
datastructure. *) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019) | ||
David Declerck (2023) | ||
|
||
This program is free software: you can redistribute it and/or modify it under | ||
the terms of the GNU General Public License as published by the Free Software | ||
Foundation, either version 3 of the License, or (at your option) any later | ||
version. | ||
|
||
This program is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
details. | ||
|
||
You should have received a copy of the GNU General Public License along with | ||
this program. If not, see <https://www.gnu.org/licenses/>. *) | ||
|
||
{ | ||
open Lexing | ||
open Irj_parser | ||
open Irj_ast | ||
|
||
let error lb msg = | ||
raise (StructuredError ("Lexing error : " ^ msg, | ||
[ (None, mk_position (Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb)) ], | ||
None)) | ||
|
||
module StrMap = Map.Make (String) | ||
|
||
let keywords = | ||
List.fold_left (fun map (kw, tok) -> | ||
StrMap.add kw tok map | ||
) StrMap.empty [ | ||
"#NOM", NOM; | ||
"#FIP", FIP; | ||
"#ENTREES-PRIMITIF", ENTREESPRIM; | ||
"#CONTROLES-PRIMITIF", CONTROLESPRIM; | ||
"#RESULTATS-PRIMITIF", RESULTATSPRIM; | ||
"#ENTREES-CORRECTIF", ENTREESCORR; | ||
"#CONTROLES-CORRECTIF", CONTROLESCORR; | ||
"#RESULTATS-CORRECTIF", RESULTATSCORR; | ||
"#ENTREES-RAPPELS", ENTREESRAPP; | ||
"#CONTROLES-RAPPELS", CONTROLESRAPP; | ||
"#RESULTATS-RAPPELS", RESULTATSRAPP; | ||
"#DATES", DATES; | ||
"#AVIS_IR", AVISIR; | ||
"#AVIS_CSG", AVISCSG; | ||
] | ||
|
||
let is_bol lb = | ||
(* bol = beginning of line *) | ||
lb.lex_start_p.pos_cnum - lb.lex_start_p.pos_bol = 0 | ||
|
||
let check_cr lb = | ||
if String.contains (lexeme lb) '\r' then | ||
error lb ("Carriage return detected") | ||
(* No more supposed to be an error in autotests. Keeping it to enforce it later? *) | ||
} | ||
|
||
let blank = [' ' '\t'] | ||
let any = [^ '\n'] | ||
let nl = ['\n'] | ||
|
||
rule token = parse | ||
|
||
| '\n' | "\r\n" | ||
{ check_cr lexbuf; new_line lexbuf; | ||
if is_bol lexbuf then token lexbuf | ||
else NL } | ||
|
||
| '*' any* nl | ||
{ check_cr lexbuf; new_line lexbuf; | ||
if is_bol lexbuf then token lexbuf | ||
else error lexbuf "Comment with * must start in the first column" } | ||
|
||
| blank any* nl | ||
{ check_cr lexbuf; new_line lexbuf; | ||
if is_bol lexbuf then error lexbuf "Line can not start with a blank" | ||
else NL } | ||
|
||
| '-'? ['0' - '9']+ as i | ||
{ INTEGER (int_of_string i) } | ||
|
||
| '-'? ['0' - '9']+ '.' ['0' - '9']* as f | ||
{ FLOAT (float_of_string f) } (* DONT KEEP THAT *) | ||
(* Probably in order to write a specific function for our number format *) | ||
|
||
| ['a'-'z' 'A'-'Z' '0'-'9' '_']+ as s | ||
{ SYMBOL s } | ||
|
||
| ['a'-'z' 'A'-'Z' '0'-'9' '_' '-' '.' ';' (*' '*)]+ as s | ||
{ NAME s } | ||
(* Compared to the old lexer, adds _ and . removes space *) | ||
|
||
| "/" | ||
{ SLASH } | ||
|
||
| "##" | ||
{ ENDSHARP } | ||
|
||
| "#" ['a'-'z' 'A'-'Z' '0'-'9' '_' '-']+ as s | ||
{ match StrMap.find_opt s keywords with | ||
| None -> error lexbuf (Printf.sprintf "Unknown section name: '#%s'" s) | ||
| Some t -> t } | ||
|
||
| eof | ||
{ EOF } | ||
|
||
| _ as c | ||
{ error lexbuf (Printf.sprintf | ||
"Unexpected character '%c' (%d)" c (Char.code c)) } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we sure we want
-O3
? That is pretty agressiveThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This stanza is there in a lot of places and only affect static profile builds (only uses for releases IIRC).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just checking, are we leaving this as is? I changed the rest following your remarks (thanks for those!) and it's ready to be pushed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes you can leave this as is :) Please push!