-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbulk_trans.go
61 lines (54 loc) · 1.2 KB
/
bulk_trans.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
// Translate corpus material into action metadata + document
// format for bulk operations into Elasticsearch.
package main
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
)
type doc struct {
Index string `json:"elasticsearch_index"`
Channel string `json:"channel"`
}
func main() {
fname := os.Args[1]
in, err := os.Open(fname)
if err != nil {
panic(err)
}
defer in.Close()
fileExt := filepath.Ext(fname)
out, err := os.OpenFile(fmt.Sprintf("%s-bulk.json", fname[0:len(fname)-len(fileExt)]), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
panic(err)
}
defer out.Close()
r := bufio.NewReader(in)
for {
var d *doc
data, err := r.ReadBytes('\n')
if err == nil || err == io.EOF {
if len(data) > 0 && data[len(data)-1] == '\n' {
data = data[:len(data)-1]
}
}
if err != nil {
if err != io.EOF {
fmt.Printf("%+v", err)
}
break
}
err = json.Unmarshal(data, &d)
index := d.Index
if d.Channel != "" {
index = fmt.Sprintf("%s-%s", d.Index, d.Channel)
}
actionMetaStr := fmt.Sprintf("{\"index\":{\"_index\":\"%s\"}}\n", index)
dataStr := fmt.Sprintf("%s\n", data)
out.WriteString(actionMetaStr)
out.WriteString(dataStr)
}
}