Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/en/resources/sources/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ sources:
# scopes: # Optional: List of OAuth scopes to request.
# - "https://www.googleapis.com/auth/bigquery"
# - "https://www.googleapis.com/auth/drive.readonly"
# maxQueryResultRows: 50 # Optional: Limits the number of rows returned by queries. Defaults to 50.
```

Initialize a BigQuery source that uses the client's access token:
Expand All @@ -153,6 +154,7 @@ sources:
# scopes: # Optional: List of OAuth scopes to request.
# - "https://www.googleapis.com/auth/bigquery"
# - "https://www.googleapis.com/auth/drive.readonly"
# maxQueryResultRows: 50 # Optional: Limits the number of rows returned by queries. Defaults to 50.
```

## Reference
Expand All @@ -167,3 +169,4 @@ sources:
| useClientOAuth | bool | false | If true, forwards the client's OAuth access token from the "Authorization" header to downstream queries. **Note:** This cannot be used with `writeMode: protected`. |
| scopes | []string | false | A list of OAuth 2.0 scopes to use for the credentials. If not provided, default scopes are used. |
| impersonateServiceAccount | string | false | Service account email to impersonate when making BigQuery and Dataplex API calls. The authenticated principal must have the `roles/iam.serviceAccountTokenCreator` role on the target service account. [Learn More](https://cloud.google.com/iam/docs/service-account-impersonation) |
| maxQueryResultRows | int | false | The maximum number of rows to return from a query. Defaults to 50. |
1 change: 1 addition & 0 deletions internal/prebuiltconfigs/tools/bigquery.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ sources:
location: ${BIGQUERY_LOCATION:}
useClientOAuth: ${BIGQUERY_USE_CLIENT_OAUTH:false}
scopes: ${BIGQUERY_SCOPES:}
maxQueryResultRows: ${BIGQUERY_MAX_QUERY_RESULT_ROWS:50}

tools:
analyze_contribution:
Expand Down
9 changes: 7 additions & 2 deletions internal/sources/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ type Config struct {
UseClientOAuth bool `yaml:"useClientOAuth"`
ImpersonateServiceAccount string `yaml:"impersonateServiceAccount"`
Scopes StringOrStringSlice `yaml:"scopes"`
MaxQueryResultRows int `yaml:"maxQueryResultRows"`
}

// StringOrStringSlice is a custom type that can unmarshal both a single string
Expand Down Expand Up @@ -127,6 +128,10 @@ func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.So
r.WriteMode = WriteModeAllowed
}

if r.MaxQueryResultRows == 0 {
r.MaxQueryResultRows = 50
}

if r.WriteMode == WriteModeProtected && r.UseClientOAuth {
// The protected mode only allows write operations to the session's temporary datasets.
// when using client OAuth, a new session is created every
Expand All @@ -150,7 +155,7 @@ func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.So
Client: client,
RestService: restService,
TokenSource: tokenSource,
MaxQueryResultRows: 50,
MaxQueryResultRows: r.MaxQueryResultRows,
ClientCreator: clientCreator,
}

Expand Down Expand Up @@ -567,7 +572,7 @@ func (s *Source) RunSQL(ctx context.Context, bqClient *bigqueryapi.Client, state
}

var out []any
for {
for s.MaxQueryResultRows <= 0 || len(out) < s.MaxQueryResultRows {
var val []bigqueryapi.Value
err = it.Next(&val)
if err == iterator.Done {
Expand Down
76 changes: 76 additions & 0 deletions internal/sources/bigquery/bigquery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ import (

yaml "github.com/goccy/go-yaml"
"github.com/google/go-cmp/cmp"
"go.opentelemetry.io/otel/trace/noop"

"github.com/googleapis/genai-toolbox/internal/server"
"github.com/googleapis/genai-toolbox/internal/sources/bigquery"
"github.com/googleapis/genai-toolbox/internal/testutils"
"github.com/googleapis/genai-toolbox/internal/util"
)

func TestParseFromYamlBigQuery(t *testing.T) {
Expand Down Expand Up @@ -154,6 +157,26 @@ func TestParseFromYamlBigQuery(t *testing.T) {
},
},
},
{
desc: "with max query result rows example",
in: `
sources:
my-instance:
kind: bigquery
project: my-project
location: us
maxQueryResultRows: 10
`,
want: server.SourceConfigs{
"my-instance": bigquery.Config{
Name: "my-instance",
Kind: bigquery.SourceKind,
Project: "my-project",
Location: "us",
MaxQueryResultRows: 10,
},
},
},
}
for _, tc := range tcs {
t.Run(tc.desc, func(t *testing.T) {
Expand Down Expand Up @@ -220,6 +243,59 @@ func TestFailParseFromYaml(t *testing.T) {
}
}

func TestInitialize_MaxQueryResultRows(t *testing.T) {
ctx, err := testutils.ContextWithNewLogger()
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
ctx = util.WithUserAgent(ctx, "test-agent")
tracer := noop.NewTracerProvider().Tracer("")

tcs := []struct {
desc string
cfg bigquery.Config
want int
}{
{
desc: "default value",
cfg: bigquery.Config{
Name: "test-default",
Kind: bigquery.SourceKind,
Project: "test-project",
UseClientOAuth: true,
},
want: 50,
},
{
desc: "configured value",
cfg: bigquery.Config{
Name: "test-configured",
Kind: bigquery.SourceKind,
Project: "test-project",
UseClientOAuth: true,
MaxQueryResultRows: 100,
},
want: 100,
},
}

for _, tc := range tcs {
t.Run(tc.desc, func(t *testing.T) {
src, err := tc.cfg.Initialize(ctx, tracer)
if err != nil {
t.Fatalf("Initialize failed: %v", err)
}
bqSrc, ok := src.(*bigquery.Source)
if !ok {
t.Fatalf("Expected *bigquery.Source, got %T", src)
}
if bqSrc.MaxQueryResultRows != tc.want {
t.Errorf("MaxQueryResultRows = %d, want %d", bqSrc.MaxQueryResultRows, tc.want)
}
})
}
}

func TestNormalizeValue(t *testing.T) {
tests := []struct {
name string
Expand Down
Loading