Skip to content

Commit a1fc86c

Browse files
committed
Obtain file names to handle from the [Content_Types].xml file
1 parent c49edd0 commit a1fc86c

File tree

5 files changed

+129
-17
lines changed

5 files changed

+129
-17
lines changed
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
3+
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
4+
<Default Extension="xml" ContentType="application/xml"/>
5+
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
6+
<Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
7+
<Override PartName="/word/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/>
8+
<Override PartName="/word/webSettings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"/>
9+
<Override PartName="/word/fontTable.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"/>
10+
<Override PartName="/word/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/>
11+
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
12+
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
13+
</Types>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Some random content.
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
3+
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml" />
4+
<Default Extension="xml" ContentType="application/xml" />
5+
<Default Extension="png" ContentType="image/png" />
6+
<Override PartName="/word/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml" />
7+
<Override PartName="/word/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml" />
8+
<Override PartName="/word/fontTable.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml" />
9+
<Override PartName="/word/webSettings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml" />
10+
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml" />
11+
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml" />
12+
<Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml" />
13+
<Override PartName="/word/document2.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" />
14+
<Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml" />
15+
<Override PartName="/customXml/itemProps2.xml" ContentType="application/vnd.openxmlformats-officedocument.customXmlProperties+xml" />
16+
<Override PartName="/customXml/itemProps1.xml" ContentType="application/vnd.openxmlformats-officedocument.customXmlProperties+xml" />
17+
<Override PartName="/customXml/itemProps3.xml" ContentType="application/vnd.openxmlformats-officedocument.customXmlProperties+xml" />
18+
<Override PartName="/docProps/custom.xml" ContentType="application/vnd.openxmlformats-officedocument.custom-properties+xml" />
19+
<Override PartName="/word/intelligence2.xml" ContentType="application/vnd.ms-office.intelligence2+xml" />
20+
</Types>

src/xml_util.rs

+95-17
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use regex::Regex;
2+
use std::collections::HashMap;
23
use std::fs::File;
34
use std::io::{BufReader, Read};
45
use std::path::Path;
5-
use xml_dom::level2::{Attribute, Node, RefNode};
6+
use xml_dom::level2::{Attribute, Node, RefNode, Element};
67
use xml_dom::parser::read_reader;
78
use unicode_bom::Bom;
89
use walkdir::WalkDir;
@@ -29,21 +30,43 @@ impl XMLUtil {
2930
}
3031

3132
pub fn replace_xml(dir: &str, src_file: &str, pattern: &str, replace: &str, output_file: &Option<&str>) {
33+
let (_, files) = Self::get_files_with_content_type(dir,
34+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
35+
3236
let out_file = match output_file {
3337
Some(of) => of,
3438
None => src_file
3539
};
3640

37-
Self::snr_xml(Mode::Value, dir, src_file, Some(vec!("word/document(\\d*).xml")), Some(pattern), Some(replace), Some(out_file));
41+
let fref = files.iter().map(AsRef::as_ref).collect();
42+
Self::snr_xml(Mode::Value, dir, src_file, Some(fref), Some(pattern), Some(replace), Some(out_file));
3843
}
3944

4045
pub fn replace_attr(dir: &str, src_file: &str, pattern: &str, replace: &str, output_file: &Option<&str>) {
46+
let (defaults, files) = Self::get_files_with_content_type(dir,
47+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
48+
let rels_extension = &defaults["application/vnd.openxmlformats-package.relationships+xml"];
49+
50+
let mut rels_files = vec!();
51+
for f in files {
52+
let last_slash = f.rfind('/').unwrap();
53+
let mut new_fn = String::new();
54+
new_fn.push_str(&f[..last_slash]);
55+
new_fn.push_str("/_");
56+
new_fn.push_str(rels_extension);
57+
new_fn.push_str(&f[last_slash..]);
58+
new_fn.push('.');
59+
new_fn.push_str(rels_extension);
60+
rels_files.push(new_fn);
61+
}
62+
4163
let out_file = match output_file {
4264
Some(of) => of,
4365
None => src_file
4466
};
4567

46-
Self::snr_xml(Mode::Attribute, dir, src_file, Some(vec!("word/_rels/document(\\d*).xml.rels")), Some(pattern), Some(replace), Some(out_file));
68+
let fref = rels_files.iter().map(AsRef::as_ref).collect();
69+
Self::snr_xml(Mode::Attribute, dir, src_file, Some(fref), Some(pattern), Some(replace), Some(out_file));
4770
}
4871

4972
fn snr_xml(mode: Mode, dir: &str, src_file: &str, files: Option<Vec<&str>>, pattern: Option<&str>, replace: Option<&str>, output_file: Option<&str>) {
@@ -64,7 +87,7 @@ impl XMLUtil {
6487
let sub_path = FileUtil::get_sub_path(entry.path(), &base_dir);
6588

6689
if let Some(file_list) = &files {
67-
if !Self::list_matches(&file_list, &sub_path.as_str()) {
90+
if !file_list.contains(&sub_path.as_str()) {
6891
continue;
6992
}
7093
} else {
@@ -120,8 +143,6 @@ impl XMLUtil {
120143

121144
for n in node.child_nodes() {
122145
for (_, mut attr) in n.attributes() {
123-
// let v = av.value();
124-
// println!("Name: {} = {:?}", an, v);
125146
if let Some(v) = attr.value() {
126147
if v.len() == 0 {
127148
continue;
@@ -187,15 +208,71 @@ impl XMLUtil {
187208
Bom::from(&mut file)
188209
}
189210

190-
fn list_matches(file_list: &[&str], name: &str) -> bool {
191-
for file_pat in file_list {
192-
let regex = Regex::new(*file_pat).unwrap();
193-
if regex.is_match(name) {
194-
return true;
211+
fn get_content_types(dir: &str) -> (HashMap<String, String>, HashMap<String, String>) {
212+
let mut defaults = HashMap::new();
213+
let mut mappings = HashMap::new();
214+
215+
let path = Path::new(dir).join("[Content_Types].xml");
216+
217+
let bom = Self::get_bom(&path);
218+
let f = File::open(path).unwrap(); // TODO
219+
let mut r = BufReader::new(f);
220+
221+
if bom != Bom::Null {
222+
// Remove the BOM bytes from the stream as they will cause the XML parsing to fail
223+
let len = bom.len();
224+
let mut bom_prefix = vec![0; len];
225+
r.read_exact(&mut bom_prefix).unwrap();
226+
}
227+
228+
let dom_res = read_reader(r).unwrap();
229+
for n in dom_res.child_nodes() {
230+
if n.local_name() == "Types" {
231+
for m in n.child_nodes() {
232+
match m.local_name().as_str() {
233+
"Default" => {
234+
let en = m.get_attribute("Extension");
235+
let ct = m.get_attribute("ContentType");
236+
237+
if en.is_some() && ct.is_some() {
238+
defaults.insert(ct.unwrap(), en.unwrap());
239+
}
240+
},
241+
"Override" => {
242+
let pn = m.get_attribute("PartName");
243+
let ct = m.get_attribute("ContentType");
244+
245+
if pn.is_some() && ct.is_some() {
246+
let pns = pn.unwrap();
247+
let rel_pn;
248+
if pns.starts_with('/') {
249+
rel_pn = &pns[1..];
250+
} else {
251+
rel_pn = &pns;
252+
}
253+
254+
mappings.insert(rel_pn.to_owned(), ct.unwrap());
255+
}
256+
},
257+
_ => {}
258+
}
259+
}
195260
}
196261
}
197262

198-
false
263+
(defaults, mappings)
264+
}
265+
266+
fn get_files_with_content_type(dir: &str, content_type: &str) -> (HashMap<String, String>, Vec<String>) {
267+
let (defaults, mappings) = Self::get_content_types(dir);
268+
269+
let mut result = vec!();
270+
for (file, ct) in &mappings {
271+
if ct == content_type {
272+
result.push(file.to_owned());
273+
}
274+
}
275+
(defaults, result)
199276
}
200277
}
201278

@@ -306,7 +383,8 @@ mod tests {
306383
#[test]
307384
fn test_replace_both() -> io::Result<()> {
308385
let orgdir = "./src/test/test_tree3";
309-
let testdir = testdir!();
386+
let testroot = testdir!();
387+
let testdir = testroot.join("subdir");
310388

311389
copy_dir_all(orgdir, &testdir)?;
312390

@@ -319,15 +397,15 @@ mod tests {
319397
assert!(before.contains(">www.example.com<"), "Precondition");
320398
assert!(!before.contains("zzz"), "Precondition");
321399

322-
let before_rels = fs::read_to_string("./src/test/test_tree3/word/_rels/document3.xml.rels")?;
400+
let before_rels = fs::read_to_string("./src/test/test_tree3/word/_rels/document2.xml.rels")?;
323401
assert!(before_rels.contains("Target=\"http://www.example.com/\""), "Precondition");
324402

325403
XMLUtil::replace_xml(&testdir.to_string_lossy(), "my-source.docx",
326404
"[Ss]ome", "zzz",
327-
&Some(&testdir.join("output.docx").to_string_lossy()));
405+
&Some(&testroot.join("output.docx").to_string_lossy()));
328406
XMLUtil::replace_attr(&testdir.to_string_lossy(), "my-source.docx",
329407
"www.example.com", "foobar.org",
330-
&Some(&testdir.join("output-2.docx").to_string_lossy()));
408+
&Some(&testroot.join("output-2.docx").to_string_lossy()));
331409

332410
// Check that the replacement worked as expected
333411
let after = fs::read_to_string(testdir.join("word/document2.xml"))?;
@@ -339,7 +417,7 @@ mod tests {
339417
assert!(!after.contains("some"));
340418
assert!(!after.contains("Some"));
341419

342-
let after_rels = fs::read_to_string(testdir.join("word/_rels/document3.xml.rels"))?;
420+
let after_rels = fs::read_to_string(testdir.join("word/_rels/document2.xml.rels"))?;
343421
assert!(after_rels.contains("Target=\"http://foobar.org/\""));
344422

345423
Ok(())

0 commit comments

Comments
 (0)