Skip to content

Commit

Permalink
IRJ files parser as an Mlang sublibrary (#222)
Browse files Browse the repository at this point in the history
  • Loading branch information
denismerigoux authored Nov 29, 2023
2 parents 4c12d21 + 9ca3ac1 commit d7fa9bb
Show file tree
Hide file tree
Showing 13 changed files with 467 additions and 204 deletions.
14 changes: 14 additions & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,17 @@
(= 0.19.0))
(parmap
(= 1.2.3))))

(package
(name irj_parser)
(synopsis "Parser for the IRJ tests")
(description "This parser is aimed for the tests used by la DGFiP to test the calculation of the French income tax")
(depends
(ocaml
(and(>= "4.11.2")))
(dune
(and :build))
(odoc
(= 1.5.3))
(ocamlformat
(= 0.19.0))))
32 changes: 32 additions & 0 deletions irj_parser.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "1.1.0"
synopsis: "Parser for the IRJ tests"
description:
"This parser is aimed for the tests used by la DGFiP to test the calculation of the French income tax"
maintainer: ["[email protected]"]
authors: ["Denis Merigoux" "Raphaël Monat"]
license: "GPL-3.0-or-later"
homepage: "https://gitlab.inria.fr/verifisc/mlang"
bug-reports: "https://gitlab.inria.fr/verifisc/mlang/issues"
depends: [
"ocaml" {>= "4.11.2"}
"dune" {build}
"odoc" {= "1.5.3"}
"ocamlformat" {= "0.19.0"}
]
build: [
["dune" "subst"] {pinned}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://gitlab.inria.fr/verifisc/mlang.git"
14 changes: 14 additions & 0 deletions src/irj_parser/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
(env
(static
(ocamlopt_flags
(-O3 -ccopt -static))))

(library
(name irj_include)
(public_name irj_parser))

(ocamllex irj_lexer)

(menhir
(modules irj_parser)
(flags --strict --explain))
76 changes: 76 additions & 0 deletions src/irj_parser/irj_ast.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019)
Mathieu Durero <[email protected]>, David Declerck (2023)
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>. *)

type pos = {
pos_filename : string;
pos_loc : Lexing.position * Lexing.position;
}

let mk_position sloc =
{ pos_filename = (fst sloc).Lexing.pos_fname; pos_loc = sloc }

exception TestParsingError of (string * pos)
(* duplication of some of the utils *)

type literal = I of int | F of float

type var_value = string * literal * pos

(* type var_values = var_value list *)

type calc_error = string * pos

(* type calc_errors = calc_error list *)

(* type rappel = string * string * var_value * string * string * string * string
* string *)
type rappel = {
event_nb : int;
rappel_nb : int;
variable_code : string;
change_value : int;
direction : string;
(* R, C, M, P *)
penalty_code : int option;
(* 0 - 99 *)
base_tolerance_legale : int option;
month_year : int;
(* MMYYYY *)
decl_2042_rect : int option;
(* 0 or 1 *)
pos : pos;
}

type prim_data_block = {
entrees : var_value list;
controles_attendus : calc_error list;
resultats_attendus : var_value list;
}

type corr_data_block = {
entrees_rappels : rappel list;
controles_attendus : calc_error list;
resultats_attendus : var_value list;
}

type irj_file = {
nom : string;
prim : prim_data_block;
rapp : corr_data_block option;
(* corr : prim_data_block option; *)
(*corr is for old correctif form from primitif files, rapp is for the
actual one in correctif files*)
}
41 changes: 41 additions & 0 deletions src/irj_parser/irj_file.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019)
Mathieu Durero <[email protected]> (2023)
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>. *)

open Irj_ast

let parse_file (test_name : string) : irj_file =
let input = open_in test_name in
let filebuf = Lexing.from_channel input in
let filebuf =
{
filebuf with
lex_curr_p = { filebuf.lex_curr_p with pos_fname = test_name };
}
in
let f =
try Irj_parser.irj_file Irj_lexer.token filebuf with
| TestParsingError e ->
close_in input;
raise (TestParsingError e)
| Irj_parser.Error ->
close_in input;
raise
(TestParsingError
( "Test syntax error",
mk_position (filebuf.lex_start_p, filebuf.lex_curr_p) ))
in
close_in input;
f
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019)
Mathieu Durero <[email protected]> (2023)
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Expand All @@ -13,14 +14,6 @@
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>. *)

type literal = I of int | F of float

type var_values = (string * literal * Pos.t) list

type test_file = {
nom : string;
ep : var_values;
cp : var_values;
rp : var_values;
corr : (var_values * var_values * var_values) option;
}
val parse_file : string -> Irj_ast.irj_file
(** [parse_file file] loads the content of a given IRJ [file] in a simple
datastructure. *)
109 changes: 109 additions & 0 deletions src/irj_parser/irj_lexer.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
(* Copyright Inria, contributors: Raphaël Monat <[email protected]> (2019)
David Declerck (2023)
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>. *)

{
open Lexing
open Irj_parser
open Irj_ast

let error lb msg =
raise (TestParsingError ("Lexing error : " ^ msg,
mk_position (Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb)))

module StrMap = Map.Make (String)

let keywords =
List.fold_left (fun map (kw, tok) ->
StrMap.add kw tok map
) StrMap.empty [
"#NOM", NOM;
"#FIP", FIP;
"#ENTREES-PRIMITIF", ENTREESPRIM;
"#CONTROLES-PRIMITIF", CONTROLESPRIM;
"#RESULTATS-PRIMITIF", RESULTATSPRIM;
"#ENTREES-CORRECTIF", ENTREESCORR;
"#CONTROLES-CORRECTIF", CONTROLESCORR;
"#RESULTATS-CORRECTIF", RESULTATSCORR;
"#ENTREES-RAPPELS", ENTREESRAPP;
"#CONTROLES-RAPPELS", CONTROLESRAPP;
"#RESULTATS-RAPPELS", RESULTATSRAPP;
"#DATES", DATES;
"#AVIS_IR", AVISIR;
"#AVIS_CSG", AVISCSG;
]

let is_bol lb =
(* bol = beginning of line *)
lb.lex_start_p.pos_cnum - lb.lex_start_p.pos_bol = 0

let check_cr lb =
if String.contains (lexeme lb) '\r' then
error lb ("Carriage return detected")
(* No more supposed to be an error in autotests. Keeping it to enforce it later? *)
}

let blank = [' ' '\t']
let any = [^ '\n']
let nl = ['\n']

rule token = parse

| '\n' | "\r\n"
{ check_cr lexbuf; new_line lexbuf;
if is_bol lexbuf then token lexbuf
else NL }

| '*' any* nl
{ check_cr lexbuf; new_line lexbuf;
if is_bol lexbuf then token lexbuf
else error lexbuf "Comment with * must start in the first column" }

| blank any* nl
{ check_cr lexbuf; new_line lexbuf;
if is_bol lexbuf then error lexbuf "Line can not start with a blank"
else NL }

| '-'? ['0' - '9']+ as i
{ INTEGER (int_of_string i) }

| '-'? ['0' - '9']+ '.' ['0' - '9']* as f
{ FLOAT (float_of_string f) } (* DONT KEEP THAT *)
(* Probably in order to write a specific function for our number format *)

| ['a'-'z' 'A'-'Z' '0'-'9' '_']+ as s
{ SYMBOL s }

| ['a'-'z' 'A'-'Z' '0'-'9' '_' '-' '.' ';' (*' '*)]+ as s
{ NAME s }
(* Compared to the old lexer, adds _ and . removes space *)

| "/"
{ SLASH }

| "##"
{ ENDSHARP }

| "#" ['a'-'z' 'A'-'Z' '0'-'9' '_' '-']+ as s
{ match StrMap.find_opt s keywords with
| None -> error lexbuf (Printf.sprintf "Unknown section name: '#%s'" s)
| Some t -> t }

| eof
{ EOF }

| _ as c
{ error lexbuf (Printf.sprintf
"Unexpected character '%c' (%d)" c (Char.code c)) }
Loading

0 comments on commit d7fa9bb

Please sign in to comment.