From cd0beec8d8ed59b1abf6e13378a81e5076a94434 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 2 Apr 2016 21:53:53 +0200 Subject: [PATCH 1/2] Do not escape comments (but still sanitize them). --- _oasis | 2 +- lib/xml_print.ml | 59 +++++++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/_oasis b/_oasis index 76a7cd90e..b9e1123f3 100644 --- a/_oasis +++ b/_oasis @@ -64,7 +64,7 @@ Library tyxml_f Html5_sigs, Html5_types, Html5_f - BuildDepends: uutf + BuildDepends: uutf, re Library tyxml_top FindlibName: top diff --git a/lib/xml_print.ml b/lib/xml_print.ml index 5fcf95e62..5188a0afb 100644 --- a/lib/xml_print.ml +++ b/lib/xml_print.ml @@ -48,16 +48,39 @@ let encode_unsafe_char_and_at s = Buffer.contents b let compose_decl ?(version = "1.0") ?(encoding = "UTF-8") () = - "\n" + Format.sprintf + {|\n|} + version encoding let compose_doctype dt args = - " "\"" ^ a ^ "\"") args)) ^ ">" - + let pp_args fmt = function + | [] -> () + | l -> + Format.fprintf fmt " PUBLIC %a" + (Format.pp_print_list ~pp_sep:Format.pp_print_space + (fun fmt -> Format.fprintf fmt "%S")) + l + in + Format.asprintf + "" + dt + pp_args args + +let re_end_comment = Re.(compile @@ alt [ + seq [ bos ; str ">" ] ; + seq [ bos ; str "->" ] ; + str "-->" ; + str "--!>" ; +]) +let escape_comment s = + let f g = match Re.Group.get g 0 with + | ">" -> ">" + | "->" -> "->" + | "-->" -> "-->" + | "--!>" -> "--!>" + | s -> s + in + Re.replace ~all:true re_end_comment ~f s (* copied form js_of_ocaml: compiler/javascript.ml *) let pp_number fmt v = @@ -177,16 +200,6 @@ module type TagList = sig val emptytags : string list end let pp_noop _fmt _ = () -(* Present only in ocaml >= 4.02 *) -let rec pp_print_list ~pp_sep pp_v ppf = function - | [] -> () - | [v] -> pp_v ppf v - | v :: vs -> - pp_v ppf v; - pp_sep ppf (); - pp_print_list ~pp_sep pp_v ppf vs - - module Make_fmt (Xml : Xml_sigs.Iterable) (I : TagList) = @@ -213,14 +226,14 @@ struct | AStr s -> Format.fprintf fmt "%S" (encode s) | AStrL (sep, slist) -> Format.fprintf fmt "\"%a\"" - (pp_print_list ~pp_sep:(pp_sep sep) (pp_encode encode)) slist + (Format.pp_print_list ~pp_sep:(pp_sep sep) (pp_encode encode)) slist let pp_attrib encode fmt a = Format.fprintf fmt " %s=%a" (aname a) (pp_attrib_value encode) a let pp_attribs encode = - pp_print_list ~pp_sep:pp_noop (pp_attrib encode) + Format.pp_print_list ~pp_sep:pp_noop (pp_attrib encode) let pp_closedtag encode fmt tag attrs = if is_emptytag tag then @@ -240,13 +253,13 @@ struct and pp_elt encode fmt elt = match content elt with | Comment texte -> - Format.fprintf fmt "" (pp_encode encode) texte + Format.fprintf fmt "" (escape_comment texte) | Entity e -> Format.fprintf fmt "&%s;" e | PCDATA texte -> - Format.pp_print_string fmt (encode texte) + pp_encode encode fmt texte | EncodedPCDATA texte -> Format.pp_print_string fmt texte @@ -260,7 +273,7 @@ struct | Empty -> () and pp_elts encode = - pp_print_list ~pp_sep:pp_noop (pp_elt encode) + Format.pp_print_list ~pp_sep:pp_noop (pp_elt encode) let pp ?(encode=encode_unsafe_char) () = pp_elt encode From e82852fc6a5a9dae1aec96684dc273f3f38568f9 Mon Sep 17 00:00:00 2001 From: Drup Date: Sat, 2 Apr 2016 21:54:29 +0200 Subject: [PATCH 2/2] Add escaping tests for comments. --- test/test_html.ml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/test_html.ml b/test/test_html.ml index 5154c2b8c..7d59cfb31 100644 --- a/test/test_html.ml +++ b/test/test_html.ml @@ -53,6 +53,27 @@ let escaping = "html escaping", tyxml_tests Html5.[ /* ]]> */ |} ; + "comment", + tot (Xml.comment + {|[if IE 8]> |} ; + + "dodgy comment 1", + tot (Xml.comment {|>