Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions bibfmt/bin/bibfmt.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ let bibfmt out strict single_line quiet verbose force files =
"Warning: No valid BibTeX entries found in the file.\n%!";
combined_content)
else
let options =
let options =
{ Bibtex.default_options with strict; single_line }
in
Bibtex.pretty_print_bibtex ~options parse_result.items)
Expand Down Expand Up @@ -114,7 +114,8 @@ let () =
in
let single_line =
let doc =
"Force field values onto a single line by replacing newlines with a space."
"Force field values onto a single line by replacing newlines with a \
space."
in
Arg.(value & flag & info [ "l"; "single-line" ] ~doc)
in
Expand All @@ -141,7 +142,10 @@ let () =
Arg.(value & pos_all string [] & info [] ~docv:"FILES" ~doc)
in
let bibfmt_t =
Term.(ret (const bibfmt $ out $ strict $ single_line $ quiet $ verbose $ force $ files))
Term.(
ret
(const bibfmt $ out $ strict $ single_line $ quiet $ verbose $ force
$ files))
in
let info =
let doc = "A little CLI tool to pretty print bibtex files." in
Expand Down
9 changes: 5 additions & 4 deletions bibfmt/lib/bibtex.ml
Original file line number Diff line number Diff line change
Expand Up @@ -677,9 +677,8 @@ let format_entry options entry =
|> List.filter (function
| Field f -> (
match f.value with
| QuotedStringValue s
| BracedStringValue s
| UnquotedStringValue s ->
| QuotedStringValue s | BracedStringValue s | UnquotedStringValue s
->
String.length (String.trim s) > 0
| NumberValue _ -> true)
| EntryComment _ -> true)
Expand All @@ -706,7 +705,9 @@ let format_entry options entry =
let contents_str =
if filtered_contents = [] then ""
else
let formatted_contents = List.map format_entry_content' filtered_contents in
let formatted_contents =
List.map format_entry_content' filtered_contents
in
let rec add_commas_except_last = function
| [] -> []
| [ last ] -> [ last ] (* No comma for the last item *)
Expand Down
13 changes: 8 additions & 5 deletions bibfmt/lib/bibtex.mli
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,14 @@ val format_field_value_with_url_unescaping :
@return String representation with URLs unescaped if applicable *)

val format_field : bool -> bool -> field -> string
(** [format_field capitalized single_line field] formats a complete field (name = value).
(** [format_field capitalized single_line field] formats a complete field (name
= value).
@param field The field to format
@return String representation of the field *)

val format_entry_content : bool -> bool -> entry_content -> string
(** [format_entry_content capitalized single_line content] formats entry content (field or comment).
(** [format_entry_content capitalized single_line content] formats entry content
(field or comment).
@param content The entry content to format
@return String representation of the content *)

Expand Down Expand Up @@ -201,13 +203,14 @@ val find_duplicate_groups :
Each group contains at least 2 entries that match on the specified keys. *)

val string_of_field_value : field_value -> string
(** [string_of_field_value fv] converts a field value to its string representation.
(** [string_of_field_value fv] converts a field value to its string
representation.
@param fv The field value to convert
@return String representation of the field value *)

val make_field : string -> string -> entry_content
(** [make_field name value] creates a BibTeX field with the given name and value.
The value is wrapped in braces.
(** [make_field name value] creates a BibTeX field with the given name and
value. The value is wrapped in braces.
@param name Field name
@param value Field value as a string
@return An entry_content Field with a BracedStringValue *)
3 changes: 1 addition & 2 deletions doi2bib.opam
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ build: [
dev-repo: "git+https://github.com/mseri/doi2bib.git"
depends: [
"dune" {>= "3.0"}
"ocaml" {>= "4.08"}
"ocaml" {>= "4.14.0"}
"bibfmt" {= version}
"astring" {>= "0.8.0"}
"cohttp-lwt-unix" {>= "2.5.0"}
"cmdliner" {>= "1.1.0"}
"clz" {>= "0.1.0"}
Expand Down
5 changes: 2 additions & 3 deletions doi2bib.opam.template
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
depends: [
"dune" {>= "2.8"}
"ocaml" {>= "4.08"}
"dune" {>= "3.0"}
"ocaml" {>= "4.14.0"}
"bibfmt" {= version}
"astring" {>= "0.8.0"}
"cohttp-lwt-unix" {>= "2.5.0"}
"cmdliner" {>= "1.1.0"}
"clz" {>= "0.1.0"}
Expand Down
6 changes: 4 additions & 2 deletions doi2bib/bin/doi2bib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ let process_id outfile id =
let open Lwt.Syntax in
let* bibtex = Http.get_bib_entry @@ Parser.parse_id id in

let parsed_items = Bibtex.parse_bibtex bibtex in
let parsed_items =
Bibtex.parse_bibtex bibtex |> List.map Helpers.clean_item
in
let formatted =
if List.length parsed_items = 0 then (
Printf.eprintf
Expand Down Expand Up @@ -88,7 +90,7 @@ let process_file outfile infile =
let write_out () =
let bibtex_out = Buffer.contents bibtex_buffer in
let open Bibtex in
let parsed_items = parse_bibtex bibtex_out in
let parsed_items = parse_bibtex bibtex_out |> List.map Helpers.clean_item in
let options = { default_options with strict = true } in
let formatted = pretty_print_bibtex ~options parsed_items in

Expand Down
1 change: 1 addition & 0 deletions doi2bib/lib/doi2bib.ml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
module Http = Http
module Parser = Parser
module Helpers = Helpers
2 changes: 1 addition & 1 deletion doi2bib/lib/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(library
(name doi2bib)
(libraries astring cohttp-lwt-unix clz.cohttp ezxmlm lwt re unix)
(libraries cohttp-lwt-unix clz.cohttp ezxmlm lwt re unix bibfmt)
(preprocess future_syntax)
(package doi2bib))
60 changes: 60 additions & 0 deletions doi2bib/lib/helpers.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
let decode_html_entities s =
let replacements =
[
("&", "&");
(""", "\"");
("'", "'");
("©", "(c)");
("®", "(r)");
("™", "(tm)");
(" ", " ");
]
in
List.fold_left
(fun acc (pattern, replacement) ->
let re = Re.compile (Re.str pattern) in
Re.replace_string ~all:true re ~by:replacement acc)
s replacements

let escape_ampersand s =
(* We want to match '&' that is not preceded by '\', but the re library
does not support lookbehind, so we can match either the start of the string or a non-backslash character before '&', keep it and replace
the remaining '&' by '\&' *)
let re =
Re.compile
(Re.seq
[
Re.group (Re.alt [ Re.bos; Re.compl [ Re.char '\\' ] ]); Re.char '&';
])
in
Re.replace re
~f:(fun subs ->
let prefix = Re.Group.get subs 1 in
prefix ^ "\\&")
s

(* The doi API can return html entities more or less everywhere in the fields
content, so we need to replace them. So far we replace some common ones
and make sure to escape the '&'. It can still fail if the entry includes #
or % (this is already treated in URLs), but I'd wait for it to happen
before taking any further action. *)
let clean_string s = s |> decode_html_entities |> escape_ampersand

let clean_field_value =
let open Bibtex in
function
| QuotedStringValue s -> QuotedStringValue (clean_string s)
| BracedStringValue s -> BracedStringValue (clean_string s)
| UnquotedStringValue s -> UnquotedStringValue (clean_string s)
| NumberValue n -> NumberValue n

let clean_item =
let open Bibtex in
function
| Entry e ->
let clean_content = function
| Field f -> Field { f with value = clean_field_value f.value }
| EntryComment c -> EntryComment c
in
Entry { e with contents = List.map clean_content e.contents }
| Comment c -> Comment c
23 changes: 14 additions & 9 deletions doi2bib/lib/parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ let string_of_id = function
| PubMed s -> "PubMed ID '" ^ s ^ "'"

let parse_id id =
let open Astring in
let is_prefix affix s = String.is_prefix ~affix (String.Ascii.lowercase s) in
let is_prefix affix s =
let n = String.length affix in
String.length s >= n && String.sub (String.lowercase_ascii s) 0 n = affix
in
let sub start s =
String.sub ~start s |> String.Sub.to_string |> String.trim
String.trim (String.sub s start (String.length s - start))
in
let contains c s = String.exists (fun c' -> c' = c) s in
match id with
| doi when is_prefix "doi:" doi -> DOI (sub 4 doi)
| arxiv when is_prefix "arxiv:" arxiv -> ArXiv (sub 6 arxiv)
| pubmed when is_prefix "pmc" pubmed -> PubMed pubmed
| doi when contains '/' doi -> DOI (String.trim doi)
| arxiv when contains '.' arxiv -> ArXiv (String.trim arxiv)
| doi when String.contains doi '/' -> DOI (String.trim doi)
| arxiv when String.contains arxiv '.' -> ArXiv (String.trim arxiv)
| _ -> raise (Parse_error id)

let parse_atom id atom =
Expand All @@ -42,13 +43,17 @@ let parse_atom id atom =
get_attr "term" a
in
let bibid =
let open Astring in
(match String.cuts ~empty:false ~sep:" " authors with
(match
String.split_on_char ' ' authors |> List.filter (fun s -> s <> "")
with
| _ :: s :: _ -> s
| s :: _ -> s
| [] -> "")
^ year
^ (String.cut ~sep:" " title |> Option.map fst |> Option.value ~default:"")
^
match String.index_opt title ' ' with
| Some i -> String.sub title 0 i
| None -> ""
in
Printf.sprintf
{|@misc{%s,
Expand Down
15 changes: 15 additions & 0 deletions doi2bib/tests/doi.t/run.t
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,18 @@ DOI entry containing parentheses
YEAR = {1989},
PAGES = {627-649}
}
DOI entry containing HTML entities and ampersands
$ doi2bib 10.2140/gt.2014.18.669
@article{Karshon_2014,
TITLE = {Classification of Hamiltonian torus actions with two-dimensional quotients},
VOLUME = {18},
ISSN = {1465-3060},
URL = {http://dx.doi.org/10.2140/gt.2014.18.669},
DOI = {10.2140/gt.2014.18.669},
NUMBER = {2},
JOURNAL = {Geometry \& Topology},
PUBLISHER = {Mathematical Sciences Publishers},
AUTHOR = {Karshon, Yael and Tolman, Susan},
YEAR = {2014},
PAGES = {669-716}
}
1 change: 0 additions & 1 deletion nix/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ buildDunePackage ({

propagatedBuildInputs = [
bibfmt
astring
cohttp-lwt-unix
cmdliner
clz
Expand Down
Loading