Skip to content

Commit

Permalink
Fix #20
Browse files Browse the repository at this point in the history
Signed-off-by: Marcello Seri <[email protected]>
  • Loading branch information
mseri committed Oct 17, 2022
1 parent 1cd2c2d commit 925dd36
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
37 changes: 37 additions & 0 deletions bin/doi2bib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,46 @@ open Doi2bib

let err s = `Error (false, s)

(* Assumes " url = { THE_URL }," is always well formed *)
let url_re =
Re.compile
Re.(
seq
[
bol;
group (seq [ rep blank; str "url = {" ]);
group (rep (compl [ char '}' ]));
str "},";
])

let escape_table =
let escapes =
List.map
(fun s -> Re.compile @@ Re.str s)
[ "%2F"; "%28"; "%29"; "%3C"; "%3E"; "%3A"; "%3B" ]
in
let chars = [ "/"; "("; ")"; "<"; ">"; ":"; ";" ] in
List.combine escapes chars

let unescape s =
List.fold_left
(fun s (re_code, chr) -> Re.replace_string ~all:true re_code ~by:chr s)
s escape_table

let process_id outfile id =
let open Lwt.Syntax in
let* bibtex = Http.get_bib_entry @@ Parser.parse_id id in
let f grp =
let prefix = Re.Group.get grp 1 in
let url = Re.Group.get grp 2 in
String.concat "" [ prefix; unescape url; "}," ]
in
(* The doi produced by crossref contains %-escaped urls,
but %s are comments in bibtex/latex. This is perhaps a
brute way of dealing with it but it has been working
fine for me on a huge number of examples.
See https://github.com/mseri/doi2bib/issues/20 for context*)
let bibtex = Re.replace ~all:true url_re ~f bibtex in
match outfile with
| "stdout" -> Lwt_io.print bibtex
| outfile ->
Expand Down
2 changes: 1 addition & 1 deletion bin/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(executable
(name doi2bib)
(public_name doi2bib)
(libraries cmdliner doi2bib unix)
(libraries cmdliner doi2bib re unix)
(preprocess future_syntax))

0 comments on commit 925dd36

Please sign in to comment.