Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: optimize Workbook.get_string #86

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions binary_read.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package xls

import (
"encoding/binary"
"io"
)

func ReadBytes(r io.Reader, size int) ([]byte, error) {
buf := make([]byte, size)
if _, err := r.Read(buf); err != nil {
return buf, err
}
return buf, nil
}

func MustReadBytes(r io.Reader, size int) []byte {
buf, _ := ReadBytes(r, size)
return buf
}

func ReadByte(r io.Reader) (byte, error) {
buf, err := ReadBytes(r, 1)
if err != nil {
return 0, err
}
return buf[0], nil
}

func ReadUint16(r io.Reader) (uint16, error) {
buf, err := ReadBytes(r, 2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}

func ReadUint32(r io.Reader) (uint32, error) {
buf, err := ReadBytes(r, 4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}

func ReadBoundSheet(r io.Reader) *boundsheet {
var bs = new(boundsheet)
buf, _ := ReadBytes(r, 7)
bs.Filepos = binary.LittleEndian.Uint32(buf[0:4])
bs.Visible = buf[4]
bs.Type = buf[5]
bs.Name = buf[6]
return bs
}

func ReadRowInfo(r io.Reader) *rowInfo {
row := new(rowInfo)
buf, _ := ReadBytes(r, 16)
row.Index = binary.LittleEndian.Uint16(buf[0:2])
row.Fcell = binary.LittleEndian.Uint16(buf[2:4])
row.Lcell = binary.LittleEndian.Uint16(buf[4:6])
row.Height = binary.LittleEndian.Uint16(buf[6:8])
row.Notused = binary.LittleEndian.Uint16(buf[8:10])
row.Notused2 = binary.LittleEndian.Uint16(buf[10:12])
row.Flags = binary.LittleEndian.Uint32(buf[12:16])
return row
}

func ReadLabelsstCol(r io.Reader) *LabelsstCol {
col := new(LabelsstCol)
buf, _ := ReadBytes(r, 10)
col.RowB = binary.LittleEndian.Uint16(buf[0:2])
col.FirstColB = binary.LittleEndian.Uint16(buf[2:4])
col.Xf = binary.LittleEndian.Uint16(buf[4:6])
col.Sst = binary.LittleEndian.Uint32(buf[6:10])
return col
}

func ReadBof(r io.Reader, row *bof) error {
buf, err := ReadBytes(r, 4)
if err != nil {
return err
}
row.Id = binary.LittleEndian.Uint16(buf[0:2])
row.Size = binary.LittleEndian.Uint16(buf[2:4])
return err
}
10 changes: 10 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module github.com/extrame/xls

go 1.16

require (
github.com/extrame/goyymmdd v0.0.0-20210114090516-7cc815f00d1a
github.com/extrame/ole2 v0.0.0-20160812065207-d69429661ad7
github.com/tealeg/xlsx v1.0.5
golang.org/x/text v0.3.7
)
37 changes: 23 additions & 14 deletions workbook.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ func (w *WorkBook) Parse(buf io.ReadSeeker) {
// buf := bytes.NewReader(bts)
offset := 0
for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
//if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
if err := ReadBof(buf, b); err == nil {
bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset)
} else {
break
Expand All @@ -72,8 +73,9 @@ func (w *WorkBook) addFormat(format *Format) {
func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) {
after = b
after_using = pre
var bts = make([]byte, b.Size)
binary.Read(buf, binary.LittleEndian, bts)
//var bts = make([]byte, b.Size)
//binary.Read(buf, binary.LittleEndian, bts)
var bts = MustReadBytes(buf, int(b.Size))
buf_item := bytes.NewReader(bts)
switch b.Id {
case 0x809:
Expand Down Expand Up @@ -135,7 +137,8 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int
offset = i
case 0x85: // boundsheet
var bs = new(boundsheet)
binary.Read(buf_item, binary.LittleEndian, bs)
//binary.Read(buf_item, binary.LittleEndian, bs)
bs = ReadBoundSheet(buf_item)
// different for BIFF5 and BIFF8
wb.addSheet(bs, buf_item)
case 0x0e0: // XF
Expand Down Expand Up @@ -177,15 +180,18 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
var richtext_num = uint16(0)
var phonetic_size = uint32(0)
var flag byte
err = binary.Read(buf, binary.LittleEndian, &flag)
//err = binary.Read(buf, binary.LittleEndian, &flag)
flag, err = ReadByte(buf)
if flag&0x8 != 0 {
err = binary.Read(buf, binary.LittleEndian, &richtext_num)
//err = binary.Read(buf, binary.LittleEndian, &richtext_num)
richtext_num, err = ReadUint16(buf)
} else if w.continue_rich > 0 {
richtext_num = w.continue_rich
w.continue_rich = 0
}
if flag&0x4 != 0 {
err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
//err = binary.Read(buf, binary.LittleEndian, &phonetic_size)
phonetic_size, err = ReadUint32(buf)
} else if w.continue_apsb > 0 {
phonetic_size = w.continue_apsb
w.continue_apsb = 0
Expand All @@ -194,7 +200,8 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
var bts = make([]uint16, size)
var i = uint16(0)
for ; i < size && err == nil; i++ {
err = binary.Read(buf, binary.LittleEndian, &bts[i])
//err = binary.Read(buf, binary.LittleEndian, &bts[i])
bts[i], err = ReadUint16(buf)
}

// when eof found, we dont want to append last element
Expand Down Expand Up @@ -226,25 +233,27 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e
res = string(runes)
}
if richtext_num > 0 {
var bts []byte
//var bts []byte
var seek_size int64
if w.Is5ver {
seek_size = int64(2 * richtext_num)
} else {
seek_size = int64(4 * richtext_num)
}
bts = make([]byte, seek_size)
err = binary.Read(buf, binary.LittleEndian, bts)
//bts = make([]byte, seek_size)
//err = binary.Read(buf, binary.LittleEndian, bts)
_, err = ReadBytes(buf, int(seek_size))
if err == io.EOF {
w.continue_rich = richtext_num
}

// err = binary.Read(buf, binary.LittleEndian, bts)
}
if phonetic_size > 0 {
var bts []byte
bts = make([]byte, phonetic_size)
err = binary.Read(buf, binary.LittleEndian, bts)
//var bts []byte
//bts = make([]byte, phonetic_size)
//err = binary.Read(buf, binary.LittleEndian, bts)
_, err = ReadBytes(buf, int(phonetic_size))
if err == io.EOF {
w.continue_apsb = phonetic_size
}
Expand Down
13 changes: 8 additions & 5 deletions worksheet.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ func (w *WorkSheet) parse(buf io.ReadSeeker) {
var bof_pre *bof
var col_pre interface{}
for {
if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
//if err := binary.Read(buf, binary.LittleEndian, b); err == nil {
if err := ReadBof(buf, b); err == nil {
bof_pre, col_pre = w.parseBof(buf, b, bof_pre, col_pre)
if b.Id == 0xa {
break
Expand Down Expand Up @@ -81,8 +82,9 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof, col_pre interf
w.rightToLeft = (sheetOptions & 0x40) != 0
w.Selected = (sheetOptions & 0x400) != 0
case 0x208: //ROW
r := new(rowInfo)
binary.Read(buf, binary.LittleEndian, r)
//r := new(rowInfo)
//binary.Read(buf, binary.LittleEndian, r)
r := ReadRowInfo(buf)
w.addRow(r)
case 0x0BD: //MULRK
mc := new(MulrkCol)
Expand Down Expand Up @@ -129,8 +131,9 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof, col_pre interf
col = new(RkCol)
binary.Read(buf, binary.LittleEndian, col)
case 0xFD: //LABELSST
col = new(LabelsstCol)
binary.Read(buf, binary.LittleEndian, col)
//col = new(LabelsstCol)
//binary.Read(buf, binary.LittleEndian, col)
col = ReadLabelsstCol(buf)
case 0x204:
c := new(labelCol)
binary.Read(buf, binary.LittleEndian, &c.BlankCol)
Expand Down