Skip to content

Commit 7a455e1

Browse files
authored
Merge pull request #85 from bodgit/sfx
Add support for reading self-extracting archives
2 parents 56e81dc + 5990375 commit 7a455e1

File tree

4 files changed

+85
-16
lines changed

4 files changed

+85
-16
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Current status:
1818
* Handles compressed headers, (`7za a -mhc=on test.7z ...`).
1919
* Handles password-protected versions of both of the above (`7za a -mhc=on|off -mhe=on -ppassword test.7z ...`).
2020
* Handles archives split into multiple volumes, (`7za a -v100m test.7z ...`).
21+
* Handles self-extracting archives, (`7za a -sfx archive.exe ...`).
2122
* Validates CRC values as it parses the file.
2223
* Supports BCJ2, Brotli, Bzip2, Copy, Deflate, Delta, LZ4, LZMA, LZMA2 and Zstandard methods.
2324
* Implements the `fs.FS` interface so you can treat an opened 7-zip archive like a filesystem.

reader.go

+80-16
Original file line numberDiff line numberDiff line change
@@ -257,38 +257,99 @@ func (z *Reader) folderReader(si *streamsInfo, f int) (*folderReadCloser, uint32
257257
return si.FolderReader(io.NewSectionReader(z.r, z.start, z.end-z.start), f, z.p)
258258
}
259259

260-
//nolint:cyclop,funlen,gocognit
260+
const (
261+
chunkSize = 4096
262+
searchLimit = 1 << 20 // 1 MiB
263+
)
264+
265+
func findSignature(r io.ReaderAt, search []byte) ([]int64, error) {
266+
var (
267+
offset int64
268+
offsets []int64
269+
)
270+
271+
chunk := make([]byte, chunkSize+len(search))
272+
273+
for offset < searchLimit {
274+
n, err := r.ReadAt(chunk, offset)
275+
276+
for i := 0; ; {
277+
idx := bytes.Index(chunk[i:n], search)
278+
if idx == -1 {
279+
break
280+
}
281+
282+
offsets = append(offsets, offset+int64(i+idx))
283+
if offsets[0] == 0 {
284+
// If signature is at the beginning, return immediately, it's a regular archive
285+
return offsets, nil
286+
}
287+
288+
i += idx + 1
289+
}
290+
291+
if err != nil {
292+
if errors.Is(err, io.EOF) {
293+
break
294+
}
295+
296+
return nil, err
297+
}
298+
299+
offset += chunkSize
300+
}
301+
302+
return offsets, nil
303+
}
304+
305+
//nolint:cyclop,funlen,gocognit,gocyclo
261306
func (z *Reader) init(r io.ReaderAt, size int64) error {
262307
h := crc32.NewIEEE()
263308
tra := plumbing.TeeReaderAt(r, h)
264-
sr := io.NewSectionReader(tra, 0, size) // Will only read first 32 bytes
265309

266-
var sh signatureHeader
267-
if err := binary.Read(sr, binary.LittleEndian, &sh); err != nil {
310+
signature := []byte{'7', 'z', 0xbc, 0xaf, 0x27, 0x1c}
311+
312+
offsets, err := findSignature(r, signature)
313+
if err != nil {
268314
return err
269315
}
270316

271-
signature := []byte{'7', 'z', 0xbc, 0xaf, 0x27, 0x1c}
272-
if !bytes.Equal(sh.Signature[:], signature) {
317+
if len(offsets) == 0 {
273318
return errFormat
274319
}
275320

276-
z.r = r
277-
278-
h.Reset()
279-
280321
var (
281-
err error
322+
sr *io.SectionReader
323+
off int64
282324
start startHeader
283325
)
284326

285-
if err = binary.Read(sr, binary.LittleEndian, &start); err != nil {
286-
return err
327+
for _, off = range offsets {
328+
sr = io.NewSectionReader(tra, off, size-off) // Will only read first 32 bytes
329+
330+
var sh signatureHeader
331+
if err = binary.Read(sr, binary.LittleEndian, &sh); err != nil {
332+
return err
333+
}
334+
335+
z.r = r
336+
337+
h.Reset()
338+
339+
if err = binary.Read(sr, binary.LittleEndian, &start); err != nil {
340+
return err
341+
}
342+
343+
// CRC of the start header should match
344+
if util.CRC32Equal(h.Sum(nil), sh.CRC) {
345+
break
346+
}
347+
348+
err = errChecksum
287349
}
288350

289-
// CRC of the start header should match
290-
if !util.CRC32Equal(h.Sum(nil), sh.CRC) {
291-
return errChecksum
351+
if err != nil {
352+
return err
292353
}
293354

294355
// Work out where we are in the file (32, avoiding magic numbers)
@@ -301,6 +362,9 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
301362
return err
302363
}
303364

365+
z.start += off
366+
z.end += off
367+
304368
h.Reset()
305369

306370
// Bound bufio.Reader otherwise it can read trailing garbage which screws up the CRC check

reader_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ func TestOpenReader(t *testing.T) {
116116
name: "zstd",
117117
file: "zstd.7z",
118118
},
119+
{
120+
name: "sfx",
121+
file: "sfx.exe",
122+
},
119123
}
120124

121125
for _, table := range tables {

testdata/sfx.exe

437 KB
Binary file not shown.

0 commit comments

Comments
 (0)