18
18
package snowflake
19
19
20
20
import (
21
+ "bytes"
21
22
"context"
22
23
"encoding/hex"
24
+ "encoding/json"
23
25
"fmt"
26
+ "io"
24
27
"math"
25
28
"strconv"
26
29
"strings"
@@ -300,7 +303,7 @@ func integerToDecimal128(ctx context.Context, a arrow.Array, dt *arrow.Decimal12
300
303
return result , err
301
304
}
302
305
303
- func rowTypesToArrowSchema (ctx context.Context , ld gosnowflake.ArrowStreamLoader , useHighPrecision bool ) (* arrow.Schema , error ) {
306
+ func rowTypesToArrowSchema (_ context.Context , ld gosnowflake.ArrowStreamLoader , useHighPrecision bool ) (* arrow.Schema , error ) {
304
307
var loc * time.Location
305
308
306
309
metadata := ld .RowTypes ()
@@ -360,8 +363,7 @@ func extractTimestamp(src *string) (sec, nsec int64, err error) {
360
363
return
361
364
}
362
365
363
- func jsonDataToArrow (ctx context.Context , bldr * array.RecordBuilder , ld gosnowflake.ArrowStreamLoader ) (arrow.Record , error ) {
364
- rawData := ld .JSONData ()
366
+ func jsonDataToArrow (_ context.Context , bldr * array.RecordBuilder , rawData [][]* string ) (arrow.Record , error ) {
365
367
fieldBuilders := bldr .Fields ()
366
368
for _ , rec := range rawData {
367
369
for i , col := range rec {
@@ -471,7 +473,12 @@ func newRecordReader(ctx context.Context, alloc memory.Allocator, ld gosnowflake
471
473
return nil , errToAdbcErr (adbc .StatusInternal , err )
472
474
}
473
475
474
- if len (batches ) == 0 {
476
+ // if the first chunk was JSON, that means this was a metadata query which
477
+ // is only returning JSON data rather than Arrow
478
+ rawData := ld .JSONData ()
479
+ if len (rawData ) > 0 {
480
+ // construct an Arrow schema based on reading the JSON metadata description of the
481
+ // result type schema
475
482
schema , err := rowTypesToArrowSchema (ctx , ld , useHighPrecision )
476
483
if err != nil {
477
484
return nil , adbc.Error {
@@ -480,20 +487,87 @@ func newRecordReader(ctx context.Context, alloc memory.Allocator, ld gosnowflake
480
487
}
481
488
}
482
489
490
+ if ld .TotalRows () == 0 {
491
+ return array .NewRecordReader (schema , []arrow.Record {})
492
+ }
493
+
483
494
bldr := array .NewRecordBuilder (alloc , schema )
484
495
defer bldr .Release ()
485
496
486
- rec , err := jsonDataToArrow (ctx , bldr , ld )
497
+ rec , err := jsonDataToArrow (ctx , bldr , rawData )
487
498
if err != nil {
488
499
return nil , err
489
500
}
490
501
defer rec .Release ()
491
502
492
- if ld .TotalRows () != 0 {
493
- return array .NewRecordReader (schema , []arrow.Record {rec })
494
- } else {
495
- return array .NewRecordReader (schema , []arrow.Record {})
503
+ results := []arrow.Record {rec }
504
+ for _ , b := range batches {
505
+ rdr , err := b .GetStream (ctx )
506
+ if err != nil {
507
+ return nil , adbc.Error {
508
+ Msg : err .Error (),
509
+ Code : adbc .StatusInternal ,
510
+ }
511
+ }
512
+ defer rdr .Close ()
513
+
514
+ // the "JSON" data returned isn't valid JSON. Instead it is a list of
515
+ // comma-delimited JSON lists containing every value as a string, except
516
+ // for a JSON null to represent nulls. Thus we can't just use the existing
517
+ // JSON parsing code in Arrow.
518
+ data , err := io .ReadAll (rdr )
519
+ if err != nil {
520
+ return nil , adbc.Error {
521
+ Msg : err .Error (),
522
+ Code : adbc .StatusInternal ,
523
+ }
524
+ }
525
+
526
+ if cap (rawData ) >= int (b .NumRows ()) {
527
+ rawData = rawData [:b .NumRows ()]
528
+ } else {
529
+ rawData = make ([][]* string , b .NumRows ())
530
+ }
531
+ bldr .Reserve (int (b .NumRows ()))
532
+
533
+ // we grab the entire JSON message and create a bytes reader
534
+ offset , buf := int64 (0 ), bytes .NewReader (data )
535
+ for i := 0 ; i < int (b .NumRows ()); i ++ {
536
+ // we construct a decoder from the bytes.Reader to read the next JSON list
537
+ // of columns (one row) from the input
538
+ dec := json .NewDecoder (buf )
539
+ if err = dec .Decode (& rawData [i ]); err != nil {
540
+ return nil , adbc.Error {
541
+ Msg : err .Error (),
542
+ Code : adbc .StatusInternal ,
543
+ }
544
+ }
545
+
546
+ // dec.InputOffset() now represents the index of the ',' so we skip the comma
547
+ offset += dec .InputOffset () + 1
548
+ // then seek the buffer to that spot. we have to seek based on the start
549
+ // because json.Decoder can read from the buffer more than is necessary to
550
+ // process the JSON data.
551
+ if _ , err = buf .Seek (offset , 0 ); err != nil {
552
+ return nil , adbc.Error {
553
+ Msg : err .Error (),
554
+ Code : adbc .StatusInternal ,
555
+ }
556
+ }
557
+ }
558
+
559
+ // now that we have our [][]*string of JSON data, we can pass it to get converted
560
+ // to an Arrow record batch and appended to our slice of batches
561
+ rec , err := jsonDataToArrow (ctx , bldr , rawData )
562
+ if err != nil {
563
+ return nil , err
564
+ }
565
+ defer rec .Release ()
566
+
567
+ results = append (results , rec )
496
568
}
569
+
570
+ return array .NewRecordReader (schema , results )
497
571
}
498
572
499
573
ch := make (chan arrow.Record , bufferSize )
0 commit comments