@@ -3,11 +3,10 @@ package llm
3
3
import (
4
4
"bytes"
5
5
"encoding/binary"
6
+ "encoding/json"
6
7
"fmt"
7
8
"io"
8
9
"strings"
9
-
10
- "log/slog"
11
10
)
12
11
13
12
type containerGGUF struct {
@@ -29,6 +28,12 @@ type containerGGUF struct {
29
28
NumTensor uint64
30
29
NumKV uint64
31
30
}
31
+
32
+ maxArraySize int
33
+ }
34
+
35
+ func (c * containerGGUF ) canCollectArray (size int ) bool {
36
+ return c .maxArraySize < 0 || size <= c .maxArraySize
32
37
}
33
38
34
39
func (c * containerGGUF ) Name () string {
@@ -54,7 +59,6 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
54
59
}
55
60
56
61
model := newGGUF (c )
57
- slog .Debug (fmt .Sprintf ("model = %#v" , model ))
58
62
if err := model .Decode (rs ); err != nil {
59
63
return nil , err
60
64
}
@@ -85,6 +89,8 @@ type gguf struct {
85
89
tensors []* Tensor
86
90
87
91
parameters uint64
92
+
93
+ scratch [16 << 10 ]byte
88
94
}
89
95
90
96
func newGGUF (container * containerGGUF ) * gguf {
@@ -181,34 +187,34 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
181
187
}
182
188
183
189
// decode tensors
184
- for i := 0 ; uint64 ( i ) < llm .numTensor (); i ++ {
190
+ for range llm .numTensor () {
185
191
name , err := readGGUFString (llm , rs )
186
192
if err != nil {
187
- return err
193
+ return fmt . Errorf ( "failed to read tensor name: %w" , err )
188
194
}
189
195
190
196
// dims is the number of dimensions in the tensor
191
197
dims , err := readGGUF [uint32 ](llm , rs )
192
198
if err != nil {
193
- return err
199
+ return fmt . Errorf ( "failed to read tensor dimensions: %w" , err )
194
200
}
195
201
196
202
shape := [4 ]uint64 {1 , 1 , 1 , 1 }
197
203
for i := 0 ; uint32 (i ) < dims ; i ++ {
198
204
shape [i ], err = readGGUF [uint64 ](llm , rs )
199
205
if err != nil {
200
- return err
206
+ return fmt . Errorf ( "failed to read tensor shape: %w" , err )
201
207
}
202
208
}
203
209
204
210
kind , err := readGGUF [uint32 ](llm , rs )
205
211
if err != nil {
206
- return err
212
+ return fmt . Errorf ( "failed to read tensor kind: %w" , err )
207
213
}
208
214
209
215
offset , err := readGGUF [uint64 ](llm , rs )
210
216
if err != nil {
211
- return err
217
+ return fmt . Errorf ( "failed to read tensor offset: %w" , err )
212
218
}
213
219
214
220
tensor := Tensor {
@@ -230,24 +236,19 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
230
236
alignment = 32
231
237
}
232
238
233
- offset , err := rs .Seek (0 , io .SeekCurrent )
234
- if err != nil {
235
- return err
236
- }
237
-
238
- padding := llm .padding (offset , int64 (alignment ))
239
- if _ , err := rs .Seek (padding , io .SeekCurrent ); err != nil {
240
- return err
241
- }
242
-
243
239
for _ , tensor := range llm .tensors {
244
- if _ , err := rs .Seek (int64 (tensor .Size ()), io .SeekCurrent ); err != nil {
245
- return err
240
+ offset , err := rs .Seek (0 , io .SeekCurrent )
241
+ if err != nil {
242
+ return fmt .Errorf ("failed to get current offset: %w" , err )
246
243
}
247
244
248
- padding := llm .padding (int64 ( tensor . Size ()) , int64 (alignment ))
245
+ padding := llm .padding (offset , int64 (alignment ))
249
246
if _ , err := rs .Seek (padding , io .SeekCurrent ); err != nil {
250
- return err
247
+ return fmt .Errorf ("failed to seek to init padding: %w" , err )
248
+ }
249
+
250
+ if _ , err := rs .Seek (int64 (tensor .Size ()), io .SeekCurrent ); err != nil {
251
+ return fmt .Errorf ("failed to seek to tensor: %w" , err )
251
252
}
252
253
}
253
254
@@ -285,22 +286,48 @@ func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
285
286
return b .String (), nil
286
287
}
287
288
289
+ func discardGGUFString (llm * gguf , r io.Reader ) error {
290
+ buf := llm .scratch [:8 ]
291
+ _ , err := io .ReadFull (r , buf )
292
+ if err != nil {
293
+ return err
294
+ }
295
+
296
+ size := int (llm .ByteOrder .Uint64 (buf ))
297
+ for size > 0 {
298
+ n , err := r .Read (llm .scratch [:min (size , cap (llm .scratch ))])
299
+ if err != nil {
300
+ return err
301
+ }
302
+ size -= n
303
+ }
304
+ return nil
305
+ }
306
+
288
307
func readGGUFString (llm * gguf , r io.Reader ) (string , error ) {
289
308
if llm .Version == 1 {
290
309
return readGGUFV1String (llm , r )
291
310
}
292
311
293
- var length uint64
294
- if err := binary .Read (r , llm .ByteOrder , & length ); err != nil {
312
+ buf := llm .scratch [:8 ]
313
+ _ , err := io .ReadFull (r , buf )
314
+ if err != nil {
295
315
return "" , err
296
316
}
297
317
298
- var b bytes.Buffer
299
- if _ , err := io .CopyN (& b , r , int64 (length )); err != nil {
300
- return "" , err
318
+ length := int (llm .ByteOrder .Uint64 (buf ))
319
+ if length > len (llm .scratch ) {
320
+ buf = make ([]byte , length )
321
+ } else {
322
+ buf = llm .scratch [:length ]
301
323
}
324
+ clear (buf )
302
325
303
- return b .String (), nil
326
+ _ , err = io .ReadFull (r , buf )
327
+ if err != nil {
328
+ return "" , err
329
+ }
330
+ return string (buf ), nil
304
331
}
305
332
306
333
func writeGGUFString (llm * gguf , w io.Writer , s string ) error {
@@ -316,7 +343,16 @@ func writeGGUFString(llm *gguf, w io.Writer, s string) error {
316
343
return err
317
344
}
318
345
319
- func readGGUFV1Array (llm * gguf , r io.Reader ) (a []any , err error ) {
346
+ type array struct {
347
+ size int
348
+ values []any
349
+ }
350
+
351
+ func (a * array ) MarshalJSON () ([]byte , error ) {
352
+ return json .Marshal (a .values )
353
+ }
354
+
355
+ func readGGUFV1Array (llm * gguf , r io.Reader ) (* array , error ) {
320
356
t , err := readGGUF [uint32 ](llm , r )
321
357
if err != nil {
322
358
return nil , err
@@ -327,7 +363,12 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
327
363
return nil , err
328
364
}
329
365
330
- for i := 0 ; uint32 (i ) < n ; i ++ {
366
+ a := & array {size : int (n )}
367
+ if llm .canCollectArray (int (n )) {
368
+ a .values = make ([]any , 0 , int (n ))
369
+ }
370
+
371
+ for i := range n {
331
372
var e any
332
373
switch t {
333
374
case ggufTypeUint8 :
@@ -361,13 +402,15 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
361
402
return nil , err
362
403
}
363
404
364
- a = append (a , e )
405
+ if a .values != nil {
406
+ a .values [i ] = e
407
+ }
365
408
}
366
409
367
- return
410
+ return a , nil
368
411
}
369
412
370
- func readGGUFArray (llm * gguf , r io.Reader ) (a [] any , err error ) {
413
+ func readGGUFArray (llm * gguf , r io.Reader ) (* array , error ) {
371
414
if llm .Version == 1 {
372
415
return readGGUFV1Array (llm , r )
373
416
}
@@ -382,7 +425,12 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
382
425
return nil , err
383
426
}
384
427
385
- for i := 0 ; uint64 (i ) < n ; i ++ {
428
+ a := & array {size : int (n )}
429
+ if llm .canCollectArray (int (n )) {
430
+ a .values = make ([]any , int (n ))
431
+ }
432
+
433
+ for i := range n {
386
434
var e any
387
435
switch t {
388
436
case ggufTypeUint8 :
@@ -408,18 +456,24 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
408
456
case ggufTypeBool :
409
457
e , err = readGGUF [bool ](llm , r )
410
458
case ggufTypeString :
411
- e , err = readGGUFString (llm , r )
459
+ if a .values != nil {
460
+ e , err = readGGUFString (llm , r )
461
+ } else {
462
+ err = discardGGUFString (llm , r )
463
+ }
412
464
default :
413
465
return nil , fmt .Errorf ("invalid array type: %d" , t )
414
466
}
415
467
if err != nil {
416
468
return nil , err
417
469
}
418
470
419
- a = append (a , e )
471
+ if a .values != nil {
472
+ a .values [i ] = e
473
+ }
420
474
}
421
475
422
- return
476
+ return a , nil
423
477
}
424
478
425
479
func writeGGUFArray [S ~ []E , E any ](llm * gguf , w io.Writer , t uint32 , s S ) error {
0 commit comments