1
1
use regex:: Regex ;
2
+ use std:: collections:: HashMap ;
2
3
use std:: fs:: File ;
3
4
use std:: io:: { BufReader , Read } ;
4
5
use std:: path:: Path ;
5
- use xml_dom:: level2:: { Attribute , Node , RefNode } ;
6
+ use xml_dom:: level2:: { Attribute , Node , RefNode , Element } ;
6
7
use xml_dom:: parser:: read_reader;
7
8
use unicode_bom:: Bom ;
8
9
use walkdir:: WalkDir ;
@@ -29,21 +30,43 @@ impl XMLUtil {
29
30
}
30
31
31
32
pub fn replace_xml ( dir : & str , src_file : & str , pattern : & str , replace : & str , output_file : & Option < & str > ) {
33
+ let ( _, files) = Self :: get_files_with_content_type ( dir,
34
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" ) ;
35
+
32
36
let out_file = match output_file {
33
37
Some ( of) => of,
34
38
None => src_file
35
39
} ;
36
40
37
- Self :: snr_xml ( Mode :: Value , dir, src_file, Some ( vec ! ( "word/document(\\ d*).xml" ) ) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
41
+ let fref = files. iter ( ) . map ( AsRef :: as_ref) . collect ( ) ;
42
+ Self :: snr_xml ( Mode :: Value , dir, src_file, Some ( fref) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
38
43
}
39
44
40
45
pub fn replace_attr ( dir : & str , src_file : & str , pattern : & str , replace : & str , output_file : & Option < & str > ) {
46
+ let ( defaults, files) = Self :: get_files_with_content_type ( dir,
47
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" ) ;
48
+ let rels_extension = & defaults[ "application/vnd.openxmlformats-package.relationships+xml" ] ;
49
+
50
+ let mut rels_files = vec ! ( ) ;
51
+ for f in files {
52
+ let last_slash = f. rfind ( '/' ) . unwrap ( ) ;
53
+ let mut new_fn = String :: new ( ) ;
54
+ new_fn. push_str ( & f[ ..last_slash] ) ;
55
+ new_fn. push_str ( "/_" ) ;
56
+ new_fn. push_str ( rels_extension) ;
57
+ new_fn. push_str ( & f[ last_slash..] ) ;
58
+ new_fn. push ( '.' ) ;
59
+ new_fn. push_str ( rels_extension) ;
60
+ rels_files. push ( new_fn) ;
61
+ }
62
+
41
63
let out_file = match output_file {
42
64
Some ( of) => of,
43
65
None => src_file
44
66
} ;
45
67
46
- Self :: snr_xml ( Mode :: Attribute , dir, src_file, Some ( vec ! ( "word/_rels/document(\\ d*).xml.rels" ) ) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
68
+ let fref = rels_files. iter ( ) . map ( AsRef :: as_ref) . collect ( ) ;
69
+ Self :: snr_xml ( Mode :: Attribute , dir, src_file, Some ( fref) , Some ( pattern) , Some ( replace) , Some ( out_file) ) ;
47
70
}
48
71
49
72
fn snr_xml ( mode : Mode , dir : & str , src_file : & str , files : Option < Vec < & str > > , pattern : Option < & str > , replace : Option < & str > , output_file : Option < & str > ) {
@@ -64,7 +87,7 @@ impl XMLUtil {
64
87
let sub_path = FileUtil :: get_sub_path ( entry. path ( ) , & base_dir) ;
65
88
66
89
if let Some ( file_list) = & files {
67
- if !Self :: list_matches ( & file_list, & sub_path. as_str ( ) ) {
90
+ if !file_list. contains ( & sub_path. as_str ( ) ) {
68
91
continue ;
69
92
}
70
93
} else {
@@ -120,8 +143,6 @@ impl XMLUtil {
120
143
121
144
for n in node. child_nodes ( ) {
122
145
for ( _, mut attr) in n. attributes ( ) {
123
- // let v = av.value();
124
- // println!("Name: {} = {:?}", an, v);
125
146
if let Some ( v) = attr. value ( ) {
126
147
if v. len ( ) == 0 {
127
148
continue ;
@@ -187,15 +208,71 @@ impl XMLUtil {
187
208
Bom :: from ( & mut file)
188
209
}
189
210
190
- fn list_matches ( file_list : & [ & str ] , name : & str ) -> bool {
191
- for file_pat in file_list {
192
- let regex = Regex :: new ( * file_pat) . unwrap ( ) ;
193
- if regex. is_match ( name) {
194
- return true ;
211
+ fn get_content_types ( dir : & str ) -> ( HashMap < String , String > , HashMap < String , String > ) {
212
+ let mut defaults = HashMap :: new ( ) ;
213
+ let mut mappings = HashMap :: new ( ) ;
214
+
215
+ let path = Path :: new ( dir) . join ( "[Content_Types].xml" ) ;
216
+
217
+ let bom = Self :: get_bom ( & path) ;
218
+ let f = File :: open ( path) . unwrap ( ) ; // TODO
219
+ let mut r = BufReader :: new ( f) ;
220
+
221
+ if bom != Bom :: Null {
222
+ // Remove the BOM bytes from the stream as they will cause the XML parsing to fail
223
+ let len = bom. len ( ) ;
224
+ let mut bom_prefix = vec ! [ 0 ; len] ;
225
+ r. read_exact ( & mut bom_prefix) . unwrap ( ) ;
226
+ }
227
+
228
+ let dom_res = read_reader ( r) . unwrap ( ) ;
229
+ for n in dom_res. child_nodes ( ) {
230
+ if n. local_name ( ) == "Types" {
231
+ for m in n. child_nodes ( ) {
232
+ match m. local_name ( ) . as_str ( ) {
233
+ "Default" => {
234
+ let en = m. get_attribute ( "Extension" ) ;
235
+ let ct = m. get_attribute ( "ContentType" ) ;
236
+
237
+ if en. is_some ( ) && ct. is_some ( ) {
238
+ defaults. insert ( ct. unwrap ( ) , en. unwrap ( ) ) ;
239
+ }
240
+ } ,
241
+ "Override" => {
242
+ let pn = m. get_attribute ( "PartName" ) ;
243
+ let ct = m. get_attribute ( "ContentType" ) ;
244
+
245
+ if pn. is_some ( ) && ct. is_some ( ) {
246
+ let pns = pn. unwrap ( ) ;
247
+ let rel_pn;
248
+ if pns. starts_with ( '/' ) {
249
+ rel_pn = & pns[ 1 ..] ;
250
+ } else {
251
+ rel_pn = & pns;
252
+ }
253
+
254
+ mappings. insert ( rel_pn. to_owned ( ) , ct. unwrap ( ) ) ;
255
+ }
256
+ } ,
257
+ _ => { }
258
+ }
259
+ }
195
260
}
196
261
}
197
262
198
- false
263
+ ( defaults, mappings)
264
+ }
265
+
266
+ fn get_files_with_content_type ( dir : & str , content_type : & str ) -> ( HashMap < String , String > , Vec < String > ) {
267
+ let ( defaults, mappings) = Self :: get_content_types ( dir) ;
268
+
269
+ let mut result = vec ! ( ) ;
270
+ for ( file, ct) in & mappings {
271
+ if ct == content_type {
272
+ result. push ( file. to_owned ( ) ) ;
273
+ }
274
+ }
275
+ ( defaults, result)
199
276
}
200
277
}
201
278
@@ -306,7 +383,8 @@ mod tests {
306
383
#[ test]
307
384
fn test_replace_both ( ) -> io:: Result < ( ) > {
308
385
let orgdir = "./src/test/test_tree3" ;
309
- let testdir = testdir ! ( ) ;
386
+ let testroot = testdir ! ( ) ;
387
+ let testdir = testroot. join ( "subdir" ) ;
310
388
311
389
copy_dir_all ( orgdir, & testdir) ?;
312
390
@@ -319,15 +397,15 @@ mod tests {
319
397
assert ! ( before. contains( ">www.example.com<" ) , "Precondition" ) ;
320
398
assert ! ( !before. contains( "zzz" ) , "Precondition" ) ;
321
399
322
- let before_rels = fs:: read_to_string ( "./src/test/test_tree3/word/_rels/document3 .xml.rels" ) ?;
400
+ let before_rels = fs:: read_to_string ( "./src/test/test_tree3/word/_rels/document2 .xml.rels" ) ?;
323
401
assert ! ( before_rels. contains( "Target=\" http://www.example.com/\" " ) , "Precondition" ) ;
324
402
325
403
XMLUtil :: replace_xml ( & testdir. to_string_lossy ( ) , "my-source.docx" ,
326
404
"[Ss]ome" , "zzz" ,
327
- & Some ( & testdir . join ( "output.docx" ) . to_string_lossy ( ) ) ) ;
405
+ & Some ( & testroot . join ( "output.docx" ) . to_string_lossy ( ) ) ) ;
328
406
XMLUtil :: replace_attr ( & testdir. to_string_lossy ( ) , "my-source.docx" ,
329
407
"www.example.com" , "foobar.org" ,
330
- & Some ( & testdir . join ( "output-2.docx" ) . to_string_lossy ( ) ) ) ;
408
+ & Some ( & testroot . join ( "output-2.docx" ) . to_string_lossy ( ) ) ) ;
331
409
332
410
// Check that the replacement worked as expected
333
411
let after = fs:: read_to_string ( testdir. join ( "word/document2.xml" ) ) ?;
@@ -339,7 +417,7 @@ mod tests {
339
417
assert ! ( !after. contains( "some" ) ) ;
340
418
assert ! ( !after. contains( "Some" ) ) ;
341
419
342
- let after_rels = fs:: read_to_string ( testdir. join ( "word/_rels/document3 .xml.rels" ) ) ?;
420
+ let after_rels = fs:: read_to_string ( testdir. join ( "word/_rels/document2 .xml.rels" ) ) ?;
343
421
assert ! ( after_rels. contains( "Target=\" http://foobar.org/\" " ) ) ;
344
422
345
423
Ok ( ( ) )
0 commit comments