Skip to content

Commit

Permalink
feat(gcloud)!: add support to seed data when using RunBigQueryContain…
Browse files Browse the repository at this point in the history
…er (testcontainers#2523)

* Update bigquery container to have an optional seed yaml file

* update opts snippet to handle error; update documentation

* Update based on feedback

* chore: use new API for running big query container

* chore: run make lint

* chore: use testify's require

* chore: remove unused

* fix: process yaml file just once

* chore: rename variable

* chore: run mod tidy

* chore: pass a reader to WithDataYAML option

* chore: do not allow multiple calls to WithDataYAML

* chore: embed test resource

* chore: simplify reader

* fix: update docs

* chore: use the embed file even more

* docs: wording

* chore: simplify tests

* chore: use original assertion

---------

Co-authored-by: Manuel de la Peña <[email protected]>
  • Loading branch information
mtellis2 and mdelapenya authored Dec 12, 2024
1 parent 8b4fa8e commit 35bf0cd
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 10 deletions.
18 changes: 17 additions & 1 deletion docs/modules/gcloud.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ go get github.com/testcontainers/testcontainers-go/modules/gcloud
## Usage example

!!!info
By default, the all the emulators use `gcr.io/google.com/cloudsdktool/cloud-sdk:367.0.0-emulators` as the default Docker image, except for the BigQuery emulator, which uses `ghcr.io/goccy/bigquery-emulator:0.4.3`, and Spanner, which uses `gcr.io/cloud-spanner-emulator/emulator:1.4.0`.
By default, the all the emulators use `gcr.io/google.com/cloudsdktool/cloud-sdk:367.0.0-emulators` as the default Docker image, except for the BigQuery emulator, which uses `ghcr.io/goccy/bigquery-emulator:0.6.1`, and Spanner, which uses `gcr.io/cloud-spanner-emulator/emulator:1.4.0`.

### BigQuery

Expand All @@ -28,6 +28,22 @@ go get github.com/testcontainers/testcontainers-go/modules/gcloud

It's important to set the `option.WithEndpoint()` option using the container's URI, as shown in the client example above.

#### Data YAML (Seed File)

- Not available until the next release of testcontainers-go <a href="https://github.com/testcontainers/testcontainers-go"><span class="tc-version">:material-tag: main</span></a>

If you would like to do additional initialization in the BigQuery container, add a `data.yaml` file represented by an `io.Reader` to the container request with the `WithDataYAML` function.
That file is copied after the container is created but before it's started. The startup command then used will look like `--project test --data-from-yaml /testcontainers-data.yaml`.

An example of a `data.yaml` file that seeds the BigQuery instance with datasets and tables is shown below:

<!--codeinclude-->
[Data Yaml content](../../modules/gcloud/testdata/data.yaml)
<!--/codeinclude-->

!!!warning
This feature is only available for the `BigQuery` container, and if you pass multiple `WithDataYAML` options, an error is returned.

### BigTable

<!--codeinclude-->
Expand Down
17 changes: 15 additions & 2 deletions modules/gcloud/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
// Deprecated: use RunBigQuery instead
// RunBigQueryContainer creates an instance of the GCloud container type for BigQuery.
func RunBigQueryContainer(ctx context.Context, opts ...testcontainers.ContainerCustomizer) (*GCloudContainer, error) {
return RunBigQuery(ctx, "ghcr.io/goccy/bigquery-emulator:0.4.3", opts...)
return RunBigQuery(ctx, "ghcr.io/goccy/bigquery-emulator:0.6.1", opts...)
}

// RunBigQuery creates an instance of the GCloud container type for BigQuery.
Expand All @@ -31,7 +31,20 @@ func RunBigQuery(ctx context.Context, img string, opts ...testcontainers.Contain
return nil, err
}

req.Cmd = []string{"--project", settings.ProjectID}
req.Cmd = append(req.Cmd, "--project", settings.ProjectID)

// Process data yaml file only for the BigQuery container.
if settings.bigQueryDataYaml != nil {
containerPath := "/testcontainers-data.yaml"

req.Cmd = append(req.Cmd, "--data-from-yaml", containerPath)

req.Files = append(req.Files, testcontainers.ContainerFile{
Reader: settings.bigQueryDataYaml,
ContainerFilePath: containerPath,
FileMode: 0o644,
})
}

return newGCloudContainer(ctx, req, 9050, settings, "http://")
}
87 changes: 85 additions & 2 deletions modules/gcloud/bigquery_test.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
package gcloud_test

import (
"bytes"
"context"
_ "embed"
"errors"
"fmt"
"log"
"testing"

"cloud.google.com/go/bigquery"
"github.com/stretchr/testify/require"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
"google.golang.org/api/option/internaloption"
Expand All @@ -17,13 +21,16 @@ import (
"github.com/testcontainers/testcontainers-go/modules/gcloud"
)

//go:embed testdata/data.yaml
var dataYaml []byte

func ExampleRunBigQueryContainer() {
// runBigQueryContainer {
ctx := context.Background()

bigQueryContainer, err := gcloud.RunBigQuery(
ctx,
"ghcr.io/goccy/bigquery-emulator:0.4.3",
"ghcr.io/goccy/bigquery-emulator:0.6.1",
gcloud.WithProjectID("bigquery-project"),
)
defer func() {
Expand Down Expand Up @@ -82,7 +89,83 @@ func ExampleRunBigQueryContainer() {
}

fmt.Println(val)

// Output:
// [30]
}

func TestBigQueryWithDataYAML(t *testing.T) {
ctx := context.Background()

t.Run("valid", func(t *testing.T) {
bigQueryContainer, err := gcloud.RunBigQuery(
ctx,
"ghcr.io/goccy/bigquery-emulator:0.6.1",
gcloud.WithProjectID("test"),
gcloud.WithDataYAML(bytes.NewReader(dataYaml)),
)
testcontainers.CleanupContainer(t, bigQueryContainer)
require.NoError(t, err)

projectID := bigQueryContainer.Settings.ProjectID

opts := []option.ClientOption{
option.WithEndpoint(bigQueryContainer.URI),
option.WithGRPCDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())),
option.WithoutAuthentication(),
internaloption.SkipDialSettingsValidation(),
}

client, err := bigquery.NewClient(ctx, projectID, opts...)
require.NoError(t, err)
defer client.Close()

selectQuery := client.Query("SELECT * FROM dataset1.table_a where name = @name")
selectQuery.QueryConfig.Parameters = []bigquery.QueryParameter{
{Name: "name", Value: "bob"},
}
it, err := selectQuery.Read(ctx)
require.NoError(t, err)

var val []bigquery.Value
for {
err := it.Next(&val)
if errors.Is(err, iterator.Done) {
break
}
require.NoError(t, err)
}

require.Equal(t, int64(30), val[0])
})

t.Run("multi-value-set", func(t *testing.T) {
bigQueryContainer, err := gcloud.RunBigQuery(
ctx,
"ghcr.io/goccy/bigquery-emulator:0.6.1",
gcloud.WithProjectID("test"),
gcloud.WithDataYAML(bytes.NewReader(dataYaml)),
gcloud.WithDataYAML(bytes.NewReader(dataYaml)),
)
testcontainers.CleanupContainer(t, bigQueryContainer)
require.EqualError(t, err, `data yaml already exists`)
})

t.Run("multi-value-not-set", func(t *testing.T) {
noValueOption := func() testcontainers.CustomizeRequestOption {
return func(req *testcontainers.GenericContainerRequest) error {
req.Cmd = append(req.Cmd, "--data-from-yaml")
return nil
}
}

bigQueryContainer, err := gcloud.RunBigQuery(
ctx,
"ghcr.io/goccy/bigquery-emulator:0.6.1",
noValueOption(), // because --project is always added last, this option will receive `--project` as value, which results in an error
gcloud.WithProjectID("test"),
gcloud.WithDataYAML(bytes.NewReader(dataYaml)),
)
testcontainers.CleanupContainer(t, bigQueryContainer)
require.Error(t, err)
})
}
31 changes: 27 additions & 4 deletions modules/gcloud/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package gcloud

import (
"context"
"errors"
"fmt"
"io"

"github.com/docker/go-connections/nat"

Expand Down Expand Up @@ -44,7 +46,8 @@ func newGCloudContainer(ctx context.Context, req testcontainers.GenericContainer
}

type options struct {
ProjectID string
ProjectID string
bigQueryDataYaml io.Reader
}

func defaultOptions() options {
Expand All @@ -57,7 +60,7 @@ func defaultOptions() options {
var _ testcontainers.ContainerCustomizer = (*Option)(nil)

// Option is an option for the GCloud container.
type Option func(*options)
type Option func(*options) error

// Customize is a NOOP. It's defined to satisfy the testcontainers.ContainerCustomizer interface.
func (o Option) Customize(*testcontainers.GenericContainerRequest) error {
Expand All @@ -67,8 +70,26 @@ func (o Option) Customize(*testcontainers.GenericContainerRequest) error {

// WithProjectID sets the project ID for the GCloud container.
func WithProjectID(projectID string) Option {
return func(o *options) {
return func(o *options) error {
o.ProjectID = projectID
return nil
}
}

// WithDataYAML seeds the BigQuery project for the GCloud container with an [io.Reader] representing
// the data yaml file, which is used to copy the file to the container, and then processed to seed
// the BigQuery project.
//
// Other GCloud containers will ignore this option.
// If this option is passed multiple times, an error is returned.
func WithDataYAML(r io.Reader) Option {
return func(o *options) error {
if o.bigQueryDataYaml != nil {
return errors.New("data yaml already exists")
}

o.bigQueryDataYaml = r
return nil
}
}

Expand All @@ -77,7 +98,9 @@ func applyOptions(req *testcontainers.GenericContainerRequest, opts []testcontai
settings := defaultOptions()
for _, opt := range opts {
if apply, ok := opt.(Option); ok {
apply(&settings)
if err := apply(&settings); err != nil {
return options{}, err
}
}
if err := opt.Customize(req); err != nil {
return options{}, err
Expand Down
2 changes: 1 addition & 1 deletion modules/gcloud/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
cloud.google.com/go/pubsub v1.36.2
cloud.google.com/go/spanner v1.57.0
github.com/docker/go-connections v0.5.0
github.com/stretchr/testify v1.9.0
github.com/testcontainers/testcontainers-go v0.34.0
google.golang.org/api v0.169.0
google.golang.org/grpc v1.64.1
Expand Down Expand Up @@ -74,7 +75,6 @@ require (
github.com/shirou/gopsutil/v3 v3.23.12 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/stretchr/testify v1.9.0 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/yusufpapurcu/wmi v1.2.3 // indirect
Expand Down
20 changes: 20 additions & 0 deletions modules/gcloud/testdata/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
projects:
- id: test
datasets:
- id: dataset1
tables:
- id: table_a
columns:
- name: id
type: INTEGER
- name: name
type: STRING
- name: createdAt
type: TIMESTAMP
data:
- id: 1
name: alice
createdAt: "2022-10-21T00:00:00"
- id: 30
name: bob
createdAt: "2022-10-21T00:00:00"

0 comments on commit 35bf0cd

Please sign in to comment.