Skip to content

Commit 603e780

Browse files
doc: add simple test for ConvertDoc for .doc files (#143)
1 parent 8c168b3 commit 603e780

File tree

3 files changed

+92
-1
lines changed

3 files changed

+92
-1
lines changed

.github/workflows/go.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
- uses: actions/checkout@v2
1717

1818
- name: Install dependencies
19-
run: sudo apt install unrtf tidy
19+
run: sudo apt install wv unrtf tidy
2020

2121
- name: Set up Go ${{ matrix.go }}
2222
uses: actions/setup-go@v2

doc_test.go

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package docconv
2+
3+
import (
4+
"os"
5+
"os/exec"
6+
"path"
7+
"strings"
8+
"testing"
9+
"time"
10+
11+
"github.com/google/go-cmp/cmp"
12+
)
13+
14+
func TestConvertDoc(t *testing.T) {
15+
if _, err := exec.LookPath("wvText"); err != nil {
16+
t.Skip("wvText not installed")
17+
return
18+
}
19+
20+
tests := []struct {
21+
file string
22+
wantTrimmedText string
23+
wantMeta map[string]string
24+
wantErr bool
25+
}{
26+
{
27+
file: "001-test.doc",
28+
wantTrimmedText: "test",
29+
wantMeta: map[string]string{
30+
"AppName": "Microsoft Office Word",
31+
"CharCount": "4",
32+
"Character count": "4",
33+
"CodePage": "1252",
34+
"Company": "",
35+
"CreateTime": "2023-09-13 01:54:00 +0000 UTC",
36+
"CreatedDate": "1694570040",
37+
"Dirty links": "false",
38+
"DocSecurity": "0",
39+
"Document parts": "0",
40+
"EditTime": "1970-01-01 00:00:00 +0000 UTC",
41+
"Heading pair": "0",
42+
"Hyperlinks changed": "false",
43+
"LastAuthor": "cloudconvert_7",
44+
"LastSaveTime": "2023-09-13 01:54:00 +0000 UTC",
45+
"Line count": "1",
46+
"ModifiedDate": "1694570040",
47+
"PageCount": "1",
48+
"Paragraph count": "1",
49+
"RevNumber": "1",
50+
"Scale": "false",
51+
"Shared document": "false",
52+
"Template": "Normal",
53+
"Version": "1048576",
54+
"WordCount": "0",
55+
},
56+
},
57+
}
58+
for _, tt := range tests {
59+
t.Run(tt.file, func(t *testing.T) {
60+
f, err := os.Open(path.Join("testdata", tt.file))
61+
if err != nil {
62+
t.Fatal(err)
63+
}
64+
defer f.Close()
65+
66+
gotText, gotMeta, err := ConvertDoc(f)
67+
if (err != nil) != tt.wantErr {
68+
t.Errorf("ConvertDoc() error = %v, wantErr %v", err, tt.wantErr)
69+
return
70+
}
71+
gotText = strings.TrimSpace(gotText)
72+
if gotText != tt.wantTrimmedText {
73+
t.Errorf("ConvertDoc() text = %v, want %v", gotText, tt.wantTrimmedText)
74+
}
75+
if !cmp.Equal(tt.wantMeta, gotMeta, maybeTimeComparer) {
76+
t.Errorf("ConvertDoc() meta mismatch (-want +got):\n%v", cmp.Diff(tt.wantMeta, gotMeta, maybeTimeComparer))
77+
}
78+
})
79+
}
80+
}
81+
82+
// Compares strings as time.Times if they look like times. Required because
83+
// wvText returns different time formats depending on system clock.
84+
var maybeTimeComparer = cmp.Comparer(func(x, y string) bool {
85+
xt, xterr := time.Parse("2006-01-02 15:04:05 -0700 MST", x)
86+
yt, yterr := time.Parse("2006-01-02 15:04:05 -0700 MST", y)
87+
if xterr == nil && yterr == nil {
88+
return xt.Equal(yt)
89+
}
90+
return x == y
91+
})

testdata/001-test.doc

23 KB
Binary file not shown.

0 commit comments

Comments
 (0)