-
Notifications
You must be signed in to change notification settings - Fork 425
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate SQL and shadow backend from Uber internal repo (#294)
This PR adds a new, SQL-based backend option for the tag datastore. The SQL backend offers increased performance over filesystem based backends for very large Docker registries as it takes advantage of optimizations such as indexing. Block-based storage systems are particularly bad at handling many small files, or walking directory structures. With the SQL backend, when listing the entire Docker catalog you are doing a simple SELECT over an indexed table, rather than a costly filesystem walk. This also includes a backend abstraction option called a "shadow" backend. This backend is designed for transitioning Kraken to a new backend by allowing admins to specify an "active" and "shadow" backend. Writes are sent to both backends, but reads only occur from the active. This ensures data consistency between the backends, and allows the old, proven backed to act as a safety net in the case where the new backend fails or has to be taken offline due to some unforeseen problems.
- Loading branch information
1 parent
1e07dd7
commit 645466c
Showing
13 changed files
with
1,521 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
Kraken Shadow Datastore | ||
======================== | ||
This backend is designed for transitioning Kraken to a new backend by allowing admins to specify an "active" and | ||
"shadow" backend. The term shadow is used because the other backend "shadows" the active backend. Writes are sent | ||
to both backends, but reads only occur from the active. This ensures data consistency between the backends, and | ||
allows the old, proven backed to act as a safety net in the case where the new backend fails or has to be taken | ||
offline due to some unforeseen problems. | ||
|
||
Because all reads occur from the active backend, data needs to be manually migrated from the old backend to the new, | ||
before bringing the shadow backend online. This means there will be some downtime incurred during the transition. | ||
|
||
Supported Backends | ||
------------------ | ||
This currently supports SQL, HDFS, S3, and testfs backends. | ||
|
||
Note that this backend is only enabled as a tag datastore for the build-index. However this could be used as the blob | ||
datastore for the origin servers, provided it is not configured to use the SQL backend in active or shadow modes. | ||
|
||
Configuration | ||
------------- | ||
The config has two required items "active_backend" and "shadow_backend", each of which function the same as the | ||
standard "backend" in that the backend type and normal configuration is defined under each, depending on which | ||
role that backend will play. For example, here is a configuration that uses the SQL backend as the active, and | ||
the testfs backend as the shadow: | ||
```yaml | ||
backends: | ||
- namespace: .* | ||
backend: | ||
shadow: | ||
active_backend: | ||
sql: | ||
dialect: mysql | ||
connection_string: "kraken:kraken@tcp(kraken-mysql:3306)/kraken?parseTime=True" | ||
debug_logging: true | ||
shadow_backend: | ||
testfs: | ||
addr: localhost:7357 | ||
root: tags | ||
name_path: docker_tag | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
// Copyright (c) 2016-2020 Uber Technologies, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
package shadowbackend | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"io" | ||
|
||
"github.com/uber/kraken/core" | ||
"github.com/uber/kraken/lib/backend" | ||
"github.com/uber/kraken/lib/backend/backenderrors" | ||
"github.com/uber/kraken/lib/backend/hdfsbackend" | ||
"github.com/uber/kraken/lib/backend/s3backend" | ||
"github.com/uber/kraken/lib/backend/sqlbackend" | ||
"github.com/uber/kraken/lib/backend/testfs" | ||
"github.com/uber/kraken/utils/log" | ||
"gopkg.in/yaml.v2" | ||
) | ||
|
||
type factory struct{} | ||
|
||
func (f *factory) Name() string { | ||
return "shadow" | ||
} | ||
|
||
func (f *factory) Create( | ||
confRaw interface{}, authConfRaw interface{}) (backend.Client, error) { | ||
|
||
confBytes, err := yaml.Marshal(confRaw) | ||
if err != nil { | ||
return nil, fmt.Errorf("marshal shadow config: %v", err) | ||
} | ||
|
||
var config Config | ||
if err := yaml.Unmarshal(confBytes, &config); err != nil { | ||
return nil, fmt.Errorf("unmarshal shadow config: %v", err) | ||
} | ||
return NewClient(config, authConfRaw) | ||
} | ||
|
||
// Client implements a backend.Client for shadow mode. See the README for full details on what shadow mode means. | ||
type Client struct { | ||
cfg Config | ||
active backend.Client | ||
shadow backend.Client | ||
} | ||
|
||
// Option allows setting optional Client parameters. | ||
type Option func(*Client) | ||
|
||
// NewClient creates a new shadow Client | ||
func NewClient(config Config, authConfRaw interface{}) (*Client, error) { | ||
a, err := getBackendClient(config.ActiveClientConfig, authConfRaw) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
s, err := getBackendClient(config.ShadowClientConfig, authConfRaw) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return &Client{ | ||
cfg: config, | ||
active: a, | ||
shadow: s, | ||
}, nil | ||
} | ||
|
||
func getBackendClient(backendConfig map[string]interface{}, authConfRaw interface{}) (backend.Client, error) { | ||
var name string | ||
var confRaw interface{} | ||
|
||
// TODO Re-implementing all the factory functions is bad form, but because backends.getFactory isn't public there | ||
// is no way to access them currently. Opened https://github.com/uber/kraken/issues/213 to address this. | ||
for name, confRaw = range backendConfig { | ||
switch name { | ||
case "sql": | ||
confBytes, err := yaml.Marshal(confRaw) | ||
if err != nil { | ||
return nil, fmt.Errorf("marshal sql config: %s", err) | ||
} | ||
var config sqlbackend.Config | ||
if err := yaml.Unmarshal(confBytes, &config); err != nil { | ||
return nil, fmt.Errorf("unmarshal sql config: %s", err) | ||
} | ||
authConfBytes, err := yaml.Marshal(authConfRaw) | ||
var userAuth sqlbackend.UserAuthConfig | ||
if err := yaml.Unmarshal(authConfBytes, &userAuth); err != nil { | ||
return nil, fmt.Errorf("unmarshal sql auth config: %s", err) | ||
} | ||
return sqlbackend.NewClient(config, userAuth) | ||
case "hdfs": | ||
confBytes, err := yaml.Marshal(confRaw) | ||
if err != nil { | ||
return nil, fmt.Errorf("marshal hdfs config: %s", err) | ||
} | ||
|
||
var config hdfsbackend.Config | ||
if err := yaml.Unmarshal(confBytes, &config); err != nil { | ||
return nil, fmt.Errorf("unmarshal hdfs config: %s", err) | ||
} | ||
|
||
return hdfsbackend.NewClient(config) | ||
case "s3": | ||
confBytes, err := yaml.Marshal(confRaw) | ||
if err != nil { | ||
return nil, fmt.Errorf("marshal s3 config: %s", err) | ||
} | ||
|
||
var config s3backend.Config | ||
if err := yaml.Unmarshal(confBytes, &config); err != nil { | ||
return nil, fmt.Errorf("unmarshal s3 config: %s", err) | ||
} | ||
authConfBytes, err := yaml.Marshal(authConfRaw) | ||
var userAuth s3backend.UserAuthConfig | ||
if err := yaml.Unmarshal(authConfBytes, &userAuth); err != nil { | ||
return nil, fmt.Errorf("unmarshal s3 auth config: %s", err) | ||
} | ||
|
||
return s3backend.NewClient(config, userAuth) | ||
case "testfs": | ||
confBytes, err := yaml.Marshal(confRaw) | ||
if err != nil { | ||
return nil, fmt.Errorf("marshal testfs config: %s", err) | ||
} | ||
|
||
var config testfs.Config | ||
if err := yaml.Unmarshal(confBytes, &config); err != nil { | ||
return nil, fmt.Errorf("unmarshal testfs config: %s", err) | ||
} | ||
|
||
return testfs.NewClient(config) | ||
default: | ||
return nil, fmt.Errorf("unsupported backend type '%s'", name) | ||
} | ||
} | ||
|
||
return nil, nil | ||
} | ||
|
||
func isNotFoundErr(err error) bool { | ||
return err != nil && err == backenderrors.ErrBlobNotFound | ||
} | ||
|
||
// Stat returns a non-nil core.BlobInfo struct if the data exists, an error otherwise. | ||
func (c *Client) Stat(namespace string, name string) (*core.BlobInfo, error) { | ||
// read from both, fail if error from either | ||
res, errA := c.active.Stat(namespace, name) | ||
_, errS := c.shadow.Stat(namespace, name) | ||
|
||
if isNotFoundErr(errA) && isNotFoundErr(errS) { | ||
return nil, backenderrors.ErrBlobNotFound | ||
} | ||
|
||
if errA != nil || errS != nil { | ||
if errA != nil && errS == nil { | ||
log.Errorf("[Stat] error getting %s for namespace '%s' from active backend: %v", name, namespace, errA) | ||
return nil, errA | ||
} | ||
|
||
if errS != nil && errA == nil { | ||
log.Errorf("[Stat] error getting %s for namespace '%s' from shadow backend: %v", name, namespace, errS) | ||
return nil, errS | ||
} | ||
|
||
return nil, fmt.Errorf("[Stat] error in both backends for %s in namespace '%s'. active: '%v', shadow: '%v'", name, namespace, errA, errS) | ||
} | ||
|
||
return res, nil | ||
} | ||
|
||
// Download gets the data from the backend and then writes it to the output writer. | ||
func (c *Client) Download(namespace string, name string, dst io.Writer) error { | ||
err := c.active.Download(namespace, name, dst) | ||
return err | ||
} | ||
|
||
// Upload upserts the data into the backend. | ||
func (c *Client) Upload(namespace string, name string, src io.Reader) error { | ||
rs, ok := src.(io.ReadSeeker) | ||
if !ok { | ||
return errors.New("refusing upload: src does not implement io.Seeker") | ||
} | ||
|
||
// write to both, fail if write fails for any | ||
err := c.active.Upload(namespace, name, rs) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// Need to rewind the ReadSeeker here before the second upload | ||
if _, err := rs.Seek(0, io.SeekStart); err != nil { | ||
return err | ||
} | ||
|
||
err = c.shadow.Upload(namespace, name, rs) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// List lists names with start with prefix. | ||
func (c *Client) List(prefix string, opts ...backend.ListOption) (*backend.ListResult, error) { | ||
res, err := c.active.List(prefix, opts...) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return res, nil | ||
} |
Oops, something went wrong.