Skip to content

Commit

Permalink
Merge pull request #21 from mseri/fix-url
Browse files Browse the repository at this point in the history
Fix #20
  • Loading branch information
mseri authored Oct 17, 2022
2 parents 1cd2c2d + 7bfd4b5 commit 8de99f1
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 6 deletions.
37 changes: 37 additions & 0 deletions bin/doi2bib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,46 @@ open Doi2bib

let err s = `Error (false, s)

(* Assumes " url = { THE_URL }," is always well formed *)
let url_re =
Re.compile
Re.(
seq
[
bol;
group (seq [ rep blank; str "url = {" ]);
group (rep (compl [ char '}' ]));
str "},";
])

let escape_table =
let escapes =
List.map
(fun s -> Re.compile @@ Re.str s)
[ "%2F"; "%28"; "%29"; "%3C"; "%3E"; "%3A"; "%3B" ]
in
let chars = [ "/"; "("; ")"; "<"; ">"; ":"; ";" ] in
List.combine escapes chars

let unescape s =
List.fold_left
(fun s (re_code, chr) -> Re.replace_string ~all:true re_code ~by:chr s)
s escape_table

let process_id outfile id =
let open Lwt.Syntax in
let* bibtex = Http.get_bib_entry @@ Parser.parse_id id in
let f grp =
let prefix = Re.Group.get grp 1 in
let url = Re.Group.get grp 2 in
String.concat "" [ prefix; unescape url; "}," ]
in
(* The doi produced by crossref contains %-escaped urls,
but %s are comments in bibtex/latex. This is perhaps a
brute way of dealing with it but it has been working
fine for me on a huge number of examples.
See https://github.com/mseri/doi2bib/issues/20 for context*)
let bibtex = Re.replace ~all:true url_re ~f bibtex in
match outfile with
| "stdout" -> Lwt_io.print bibtex
| outfile ->
Expand Down
2 changes: 1 addition & 1 deletion bin/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(executable
(name doi2bib)
(public_name doi2bib)
(libraries cmdliner doi2bib unix)
(libraries cmdliner doi2bib re unix)
(preprocess future_syntax))
4 changes: 2 additions & 2 deletions tests/arxiv.t/run.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Arxiv entry (with DOI entry) executed with prefix
$ doi2bib arXiv:1609.01724
@article{Prandi_2018,
doi = {10.4171/jst/226},
url = {https://doi.org/10.4171%2Fjst%2F226},
url = {https://doi.org/10.4171/jst/226},
year = 2018,
month = {jul},
publisher = {European Mathematical Society - {EMS} - Publishing House {GmbH}},
Expand All @@ -17,7 +17,7 @@ Arxiv Entry (with DOI entry) executed without prefix
$ doi2bib 1902.00436
@article{Vermeeren_2019,
doi = {10.1088/1751-8121/ab4767},
url = {https://doi.org/10.1088%2F1751-8121%2Fab4767},
url = {https://doi.org/10.1088/1751-8121/ab4767},
year = 2019,
month = {oct},
publisher = {{IOP} Publishing},
Expand Down
18 changes: 16 additions & 2 deletions tests/doi.t/run.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ DOI entry executed without prefix
$ doi2bib 10.1007/s10569-019-9946-9
@article{Bravetti_2020,
doi = {10.1007/s10569-019-9946-9},
url = {https://doi.org/10.1007%2Fs10569-019-9946-9},
url = {https://doi.org/10.1007/s10569-019-9946-9},
year = 2020,
month = {jan},
publisher = {Springer Science and Business Media {LLC}},
Expand All @@ -16,7 +16,7 @@ DOI entry executed with prefix
$ doi2bib doi:10.4171/JST/226
@article{Prandi_2018,
doi = {10.4171/jst/226},
url = {https://doi.org/10.4171%2Fjst%2F226},
url = {https://doi.org/10.4171/jst/226},
year = 2018,
month = {jul},
publisher = {European Mathematical Society - {EMS} - Publishing House {GmbH}},
Expand All @@ -27,3 +27,17 @@ DOI entry executed with prefix
title = {Quantum confinement on non-complete Riemannian manifolds},
journal = {Journal of Spectral Theory}
}
DOI entry containing parentheses
$ doi2bib "doi:10.1016/0393-0440(89)90029-6"
@article{Albert_1989,
doi = {10.1016/0393-0440(89)90029-6},
url = {https://doi.org/10.1016/0393-0440(89)90029-6},
year = 1989,
publisher = {Elsevier {BV}},
volume = {6},
number = {4},
pages = {627--649},
author = {Claude Albert},
title = {Le th{\'{e}}or{\`{e}}me de r{\'{e}}duction de Marsden-Weinstein en g{\'{e}}om{\'{e}}trie cosymplectique et de contact},
journal = {Journal of Geometry and Physics}
}
2 changes: 1 addition & 1 deletion tests/pubmed.t/run.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Pubmed entry with 'PMC' prefix.
$ doi2bib PMC2883744
@article{Comas_2010,
doi = {10.1038/ng.590},
url = {https://doi.org/10.1038%2Fng.590},
url = {https://doi.org/10.1038/ng.590},
year = 2010,
month = {may},
publisher = {Springer Science and Business Media {LLC}},
Expand Down

0 comments on commit 8de99f1

Please sign in to comment.