Skip to content

Commit 32a27f1

Browse files
zeroshadecocoa-xu
authored andcommitted
fix(go/adbc/driver/snowflake): workaround snowflake metadata-only limitations (apache#1790)
Workaround to fix apache#1454 until snowflake addresses snowflakedb/gosnowflake#1110 with a better solution (hopefully by having the server actually return Arrow...)
1 parent a0fe48f commit 32a27f1

File tree

2 files changed

+108
-9
lines changed

2 files changed

+108
-9
lines changed

go/adbc/driver/snowflake/driver_test.go

+25
Original file line numberDiff line numberDiff line change
@@ -2006,3 +2006,28 @@ func (suite *SnowflakeTests) TestJwtPrivateKey() {
20062006
defer os.Remove(binKey)
20072007
verifyKey(binKey)
20082008
}
2009+
2010+
func (suite *SnowflakeTests) TestMetadataOnlyQuery() {
2011+
// force more than one chunk for `SHOW FUNCTIONS` which will return
2012+
// JSON data instead of arrow, even though we ask for Arrow
2013+
suite.Require().NoError(suite.stmt.SetSqlQuery(`ALTER SESSION SET CLIENT_RESULT_CHUNK_SIZE = 50`))
2014+
_, err := suite.stmt.ExecuteUpdate(suite.ctx)
2015+
suite.Require().NoError(err)
2016+
2017+
// since we lowered the CLIENT_RESULT_CHUNK_SIZE this will return at least
2018+
// 1 chunk in addition to the first one. Metadata queries will return JSON
2019+
// no matter what currently.
2020+
suite.Require().NoError(suite.stmt.SetSqlQuery(`SHOW FUNCTIONS`))
2021+
rdr, n, err := suite.stmt.ExecuteQuery(suite.ctx)
2022+
suite.Require().NoError(err)
2023+
defer rdr.Release()
2024+
2025+
recv := int64(0)
2026+
for rdr.Next() {
2027+
recv += rdr.Record().NumRows()
2028+
}
2029+
2030+
// verify that we got the exepected number of rows if we sum up
2031+
// all the rows from each record in the stream.
2032+
suite.Equal(n, recv)
2033+
}

go/adbc/driver/snowflake/record_reader.go

+83-9
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
package snowflake
1919

2020
import (
21+
"bytes"
2122
"context"
2223
"encoding/hex"
24+
"encoding/json"
2325
"fmt"
26+
"io"
2427
"math"
2528
"strconv"
2629
"strings"
@@ -300,7 +303,7 @@ func integerToDecimal128(ctx context.Context, a arrow.Array, dt *arrow.Decimal12
300303
return result, err
301304
}
302305

303-
func rowTypesToArrowSchema(ctx context.Context, ld gosnowflake.ArrowStreamLoader, useHighPrecision bool) (*arrow.Schema, error) {
306+
func rowTypesToArrowSchema(_ context.Context, ld gosnowflake.ArrowStreamLoader, useHighPrecision bool) (*arrow.Schema, error) {
304307
var loc *time.Location
305308

306309
metadata := ld.RowTypes()
@@ -360,8 +363,7 @@ func extractTimestamp(src *string) (sec, nsec int64, err error) {
360363
return
361364
}
362365

363-
func jsonDataToArrow(ctx context.Context, bldr *array.RecordBuilder, ld gosnowflake.ArrowStreamLoader) (arrow.Record, error) {
364-
rawData := ld.JSONData()
366+
func jsonDataToArrow(_ context.Context, bldr *array.RecordBuilder, rawData [][]*string) (arrow.Record, error) {
365367
fieldBuilders := bldr.Fields()
366368
for _, rec := range rawData {
367369
for i, col := range rec {
@@ -471,7 +473,12 @@ func newRecordReader(ctx context.Context, alloc memory.Allocator, ld gosnowflake
471473
return nil, errToAdbcErr(adbc.StatusInternal, err)
472474
}
473475

474-
if len(batches) == 0 {
476+
// if the first chunk was JSON, that means this was a metadata query which
477+
// is only returning JSON data rather than Arrow
478+
rawData := ld.JSONData()
479+
if len(rawData) > 0 {
480+
// construct an Arrow schema based on reading the JSON metadata description of the
481+
// result type schema
475482
schema, err := rowTypesToArrowSchema(ctx, ld, useHighPrecision)
476483
if err != nil {
477484
return nil, adbc.Error{
@@ -480,20 +487,87 @@ func newRecordReader(ctx context.Context, alloc memory.Allocator, ld gosnowflake
480487
}
481488
}
482489

490+
if ld.TotalRows() == 0 {
491+
return array.NewRecordReader(schema, []arrow.Record{})
492+
}
493+
483494
bldr := array.NewRecordBuilder(alloc, schema)
484495
defer bldr.Release()
485496

486-
rec, err := jsonDataToArrow(ctx, bldr, ld)
497+
rec, err := jsonDataToArrow(ctx, bldr, rawData)
487498
if err != nil {
488499
return nil, err
489500
}
490501
defer rec.Release()
491502

492-
if ld.TotalRows() != 0 {
493-
return array.NewRecordReader(schema, []arrow.Record{rec})
494-
} else {
495-
return array.NewRecordReader(schema, []arrow.Record{})
503+
results := []arrow.Record{rec}
504+
for _, b := range batches {
505+
rdr, err := b.GetStream(ctx)
506+
if err != nil {
507+
return nil, adbc.Error{
508+
Msg: err.Error(),
509+
Code: adbc.StatusInternal,
510+
}
511+
}
512+
defer rdr.Close()
513+
514+
// the "JSON" data returned isn't valid JSON. Instead it is a list of
515+
// comma-delimited JSON lists containing every value as a string, except
516+
// for a JSON null to represent nulls. Thus we can't just use the existing
517+
// JSON parsing code in Arrow.
518+
data, err := io.ReadAll(rdr)
519+
if err != nil {
520+
return nil, adbc.Error{
521+
Msg: err.Error(),
522+
Code: adbc.StatusInternal,
523+
}
524+
}
525+
526+
if cap(rawData) >= int(b.NumRows()) {
527+
rawData = rawData[:b.NumRows()]
528+
} else {
529+
rawData = make([][]*string, b.NumRows())
530+
}
531+
bldr.Reserve(int(b.NumRows()))
532+
533+
// we grab the entire JSON message and create a bytes reader
534+
offset, buf := int64(0), bytes.NewReader(data)
535+
for i := 0; i < int(b.NumRows()); i++ {
536+
// we construct a decoder from the bytes.Reader to read the next JSON list
537+
// of columns (one row) from the input
538+
dec := json.NewDecoder(buf)
539+
if err = dec.Decode(&rawData[i]); err != nil {
540+
return nil, adbc.Error{
541+
Msg: err.Error(),
542+
Code: adbc.StatusInternal,
543+
}
544+
}
545+
546+
// dec.InputOffset() now represents the index of the ',' so we skip the comma
547+
offset += dec.InputOffset() + 1
548+
// then seek the buffer to that spot. we have to seek based on the start
549+
// because json.Decoder can read from the buffer more than is necessary to
550+
// process the JSON data.
551+
if _, err = buf.Seek(offset, 0); err != nil {
552+
return nil, adbc.Error{
553+
Msg: err.Error(),
554+
Code: adbc.StatusInternal,
555+
}
556+
}
557+
}
558+
559+
// now that we have our [][]*string of JSON data, we can pass it to get converted
560+
// to an Arrow record batch and appended to our slice of batches
561+
rec, err := jsonDataToArrow(ctx, bldr, rawData)
562+
if err != nil {
563+
return nil, err
564+
}
565+
defer rec.Release()
566+
567+
results = append(results, rec)
496568
}
569+
570+
return array.NewRecordReader(schema, results)
497571
}
498572

499573
ch := make(chan arrow.Record, bufferSize)

0 commit comments

Comments
 (0)