Skip to content
81 changes: 45 additions & 36 deletions go/cmd/vtbackup/cli/vtbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ var (
initShard string
concurrency = 4
incrementalFromPos string
restoreWithClone bool

// mysqlctld-like flags
mysqlPort = 3306
Expand Down Expand Up @@ -157,7 +158,7 @@ When run periodically for each shard, vtbackup can ensure these configurable pol
* Old backups for the shard are removed.

Whatever system launches vtbackup is responsible for the following:
- Running vtbackup with similar flags that would be used for a vttablet and
- Running vtbackup with similar flags that would be used for a vttablet and
mysqlctld in the target shard to be backed up.

- Provisioning as much disk space for vtbackup as would be given to vttablet.
Expand Down Expand Up @@ -226,6 +227,7 @@ func init() {
utils.SetFlagStringVar(Main.Flags(), &initShard, "init-shard", initShard, "(init parameter) shard to use for this tablet")
Main.Flags().IntVar(&concurrency, "concurrency", concurrency, "(init restore parameter) how many concurrent files to restore at once")
utils.SetFlagStringVar(Main.Flags(), &incrementalFromPos, "incremental-from-pos", incrementalFromPos, "Position, or name of backup from which to create an incremental backup. Default: empty. If given, then this backup becomes an incremental backup from given position or given backup. If value is 'auto', this backup will be taken from the last successful backup position.")
Main.Flags().BoolVar(&restoreWithClone, "restore-with-clone", restoreWithClone, "(init parameter) will perform the restore phase with MySQL CLONE, requires either --clone-from-primary or --clone-from-tablet")

// mysqlctld-like flags
utils.SetFlagIntVar(Main.Flags(), &mysqlPort, "mysql-port", mysqlPort, "MySQL port")
Expand Down Expand Up @@ -457,42 +459,49 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac
return nil
}

phase.Set(phaseNameRestoreLastBackup, int64(1))
defer phase.Set(phaseNameRestoreLastBackup, int64(0))
backupDir := mysqlctl.GetBackupDir(initKeyspace, initShard)
log.Infof("Restoring latest backup from directory %v", backupDir)
restoreAt := time.Now()
params := mysqlctl.RestoreParams{
Cnf: mycnf,
Mysqld: mysqld,
Logger: logutil.NewConsoleLogger(),
Concurrency: concurrency,
HookExtraEnv: extraEnv,
DeleteBeforeRestore: true,
DbName: dbName,
Keyspace: initKeyspace,
Shard: initShard,
Stats: backupstats.RestoreStats(),
MysqlShutdownTimeout: mysqlShutdownTimeout,
}
backupManifest, err := mysqlctl.Restore(ctx, params)
var restorePos replication.Position
switch err {
case nil:
// if err is nil, we expect backupManifest to be non-nil
restorePos = backupManifest.Position
log.Infof("Successfully restored from backup at replication position %v", restorePos)
case mysqlctl.ErrNoBackup:
// There is no backup found, but we may be taking the initial backup of a shard
if !allowFirstBackup {
return errors.New("no backup found; not starting up empty since --initial_backup flag was not enabled")
}
restorePos = replication.Position{}
default:
return fmt.Errorf("can't restore from backup: %v", err)
}
deprecatedDurationByPhase.Set("RestoreLastBackup", int64(time.Since(restoreAt).Seconds()))
phase.Set(phaseNameRestoreLastBackup, int64(0))
if restoreWithClone {
restorePos, err = mysqlctl.CloneFromDonor(ctx, topoServer, mysqld, initKeyspace, initShard)
if err != nil {
return fmt.Errorf("restore with clone failed: %v", err)
}
} else {
phase.Set(phaseNameRestoreLastBackup, int64(1))
defer phase.Set(phaseNameRestoreLastBackup, int64(0))
backupDir := mysqlctl.GetBackupDir(initKeyspace, initShard)
log.Infof("Restoring latest backup from directory %v", backupDir)
restoreAt := time.Now()
params := mysqlctl.RestoreParams{
Cnf: mycnf,
Mysqld: mysqld,
Logger: logutil.NewConsoleLogger(),
Concurrency: concurrency,
HookExtraEnv: extraEnv,
DeleteBeforeRestore: true,
DbName: dbName,
Keyspace: initKeyspace,
Shard: initShard,
Stats: backupstats.RestoreStats(),
MysqlShutdownTimeout: mysqlShutdownTimeout,
}
backupManifest, err := mysqlctl.Restore(ctx, params)
switch err {
case nil:
// if err is nil, we expect backupManifest to be non-nil
restorePos = backupManifest.Position
log.Infof("Successfully restored from backup at replication position %v", restorePos)
case mysqlctl.ErrNoBackup:
// There is no backup found, but we may be taking the initial backup of a shard
if !allowFirstBackup {
return errors.New("no backup found; not starting up empty since --initial_backup flag was not enabled")
}
restorePos = replication.Position{}
default:
return fmt.Errorf("can't restore from backup: %v", err)
}
deprecatedDurationByPhase.Set("RestoreLastBackup", int64(time.Since(restoreAt).Seconds()))
phase.Set(phaseNameRestoreLastBackup, int64(0))
}

// As of MySQL 8.0.21, you can disable redo logging using the ALTER INSTANCE
// DISABLE INNODB REDO_LOG statement. This functionality is intended for
Expand Down
6 changes: 5 additions & 1 deletion go/flags/endtoend/vtbackup.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ When run periodically for each shard, vtbackup can ensure these configurable pol
* Old backups for the shard are removed.

Whatever system launches vtbackup is responsible for the following:
- Running vtbackup with similar flags that would be used for a vttablet and
- Running vtbackup with similar flags that would be used for a vttablet and
mysqlctld in the target shard to be backed up.

- Provisioning as much disk space for vtbackup as would be given to vttablet.
Expand Down Expand Up @@ -68,6 +68,8 @@ Flags:
--builtinbackup-mysqld-timeout duration how long to wait for mysqld to shutdown at the start of the backup. (default 10m0s)
--builtinbackup-progress duration how often to send progress updates when backing up large files. (default 5s)
--ceph-backup-storage-config string Path to JSON config file for ceph backup storage. (default "ceph_backup_config.json")
--clone-from-primary Clone data from the primary tablet in the shard using MySQL CLONE REMOTE instead of restoring from backup. Requires MySQL 8.0.17+. Mutually exclusive with --clone-from-tablet.
--clone-from-tablet string Clone data from this tablet using MySQL CLONE REMOTE instead of restoring from backup (tablet alias, e.g., zone1-123). Requires MySQL 8.0.17+. Mutually exclusive with --clone-from-primary.
--compression-engine-name string compressor engine used for compression. (default "pargzip")
--compression-level int what level to pass to the compressor. (default 1)
--concurrency int (init restore parameter) how many concurrent files to restore at once (default 4)
Expand Down Expand Up @@ -189,6 +191,7 @@ Flags:
--mycnf-slow-log-path string mysql slow query log path
--mycnf-socket-file string mysql socket file
--mycnf-tmp-dir string mysql tmp directory
--mysql-clone-enabled Enable MySQL CLONE plugin and user for backup/replica provisioning (requires MySQL 8.0.17+)
--mysql-port int MySQL port (default 3306)
--mysql-server-version string MySQL server version to advertise. (default "8.4.6-Vitess")
--mysql-shell-backup-location string location where the backup will be stored
Expand All @@ -207,6 +210,7 @@ Flags:
--purge-logs-interval duration how often try to remove old logs (default 1h0m0s)
--remote-operation-timeout duration time to wait for a remote operation (default 15s)
--restart-before-backup Perform a mysqld clean/full restart after applying binlogs, but before taking the backup. Only makes sense to work around xtrabackup bugs.
--restore-with-clone (init parameter) will perform the restore phase with MySQL CLONE, requires either --clone-from-primary or --clone-from-tablet
--s3-backup-aws-endpoint string endpoint of the S3 backend (region must be provided).
--s3-backup-aws-min-partsize int Minimum part size to use, defaults to 5MiB but can be increased due to the dataset size. (default 5242880)
--s3-backup-aws-region string AWS region to use. (default "us-east-1")
Expand Down
164 changes: 164 additions & 0 deletions go/test/endtoend/backup/clone/backup_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*
Copyright 2025 The Vitess Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package clone

import (
"os"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/test/endtoend/cluster"
"vitess.io/vitess/go/vt/log"
vtutils "vitess.io/vitess/go/vt/utils"
)

func TestCloneBackup(t *testing.T) {
t.Cleanup(func() { removeBackups(t) })
t.Cleanup(tearDown)

// Initialize tablets first so we can connect to MySQL.
for _, tablet := range []*cluster.Vttablet{primary, replica1} {
err := localCluster.InitTablet(tablet, keyspaceName, shardName)
require.NoError(t, err)
err = tablet.VttabletProcess.Setup()
require.NoError(t, err)
}

// Initialize shard primary.
err := localCluster.VtctldClientProcess.InitShardPrimary(keyspaceName, shardName, cell, primary.TabletUID)
require.NoError(t, err)

// Now check if MySQL version supports clone (need vttablet running to query).
if !mysqlVersionSupportsClone(t, primary) {
t.Skip("Skipping clone test: MySQL version does not support CLONE (requires 8.0.17+)")
}

// Check if clone plugin is available.
if !clonePluginAvailable(t, primary) {
t.Skip("Skipping clone test: clone plugin not available")
}

// Set up clean test data (table may have data from previous tests).
_, err = primary.VttabletProcess.QueryTablet(vtInsertTest, keyspaceName, true)
require.NoError(t, err)
_, err = primary.VttabletProcess.QueryTablet("TRUNCATE TABLE vt_insert_test", keyspaceName, true)
require.NoError(t, err)
_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('clone_test_1')", keyspaceName, true)
require.NoError(t, err)
_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('clone_test_2')", keyspaceName, true)
require.NoError(t, err)

// Verify data exists on primary.
cluster.VerifyRowsInTablet(t, primary, keyspaceName, 2)

// Wait for replica to catch up.
time.Sleep(2 * time.Second)
cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2)

// Take a backup using clone from primary.
log.Infof("Starting vtbackup with --clone-from-primary")
err = vtbackupWithClone(t)
require.NoError(t, err)

// Verify a backup was created.
backups := verifyBackupCount(t, shardKsName, 1)
assert.NotEmpty(t, backups)

// Insert more data AFTER the backup was taken.
_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('after_backup')", keyspaceName, true)
require.NoError(t, err)
cluster.VerifyRowsInTablet(t, primary, keyspaceName, 3)

// Now bring up replica2 and restore from the backup we just created.
// This verifies the clone-based backup actually contains the data.
log.Infof("Restoring replica2 from backup to verify clone worked")
err = localCluster.InitTablet(replica2, keyspaceName, shardName)
require.NoError(t, err)
restore(t, replica2, "replica", "SERVING")

// Give replica2 time to catch up via replication.
time.Sleep(5 * time.Second)

// Verify replica2 has ALL the data (2 rows from before backup + 1 from after).
// The 2 pre-backup rows prove the clone-based backup worked.
// The 3rd row proves replication is working after restore.
cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 3)
log.Infof("Clone backup verification successful: replica2 has all data")
}

func vtbackupWithClone(t *testing.T) error {
mysqlSocket, err := os.CreateTemp("", "vtbackup_clone_test_mysql.sock")
require.NoError(t, err)
defer os.Remove(mysqlSocket.Name())

extraArgs := []string{
"--allow_first_backup",
"--db-credentials-file", dbCredentialFile,
"--mysql-clone-enabled",
vtutils.GetFlagVariantForTests("--mysql-socket"), mysqlSocket.Name(),
// Clone from primary instead of restoring from backup.
"--restore-with-clone",
"--clone-from-primary",
// Clone credentials - use vt_clone user which is created with @'%' host
// and BACKUP_ADMIN privilege in init_db.sql (no password).
"--db-clone-user", "vt_clone",
"--db-clone-password", "",
"--db-clone-use-ssl=false",
}

log.Infof("Starting vtbackup with clone args: %v", extraArgs)
return localCluster.StartVtbackup(newInitDBFile, false, keyspaceName, shardName, cell, extraArgs...)
}

func verifyBackupCount(t *testing.T, shardKsName string, expected int) []string {
backups, err := localCluster.VtctldClientProcess.ExecuteCommandWithOutput("GetBackups", shardKsName)
require.NoError(t, err)

var result []string
for _, line := range splitLines(backups) {
if line != "" {
result = append(result, line)
}
}
assert.Equalf(t, expected, len(result), "expected %d backups, got %d", expected, len(result))
return result
}

func restore(t *testing.T, tablet *cluster.Vttablet, tabletType string, waitForState string) {
// Start tablet with restore enabled. MySQL is already running from TestMain.
log.Infof("restoring tablet %s", time.Now())
tablet.VttabletProcess.ExtraArgs = []string{"--db-credentials-file", dbCredentialFile}
tablet.VttabletProcess.TabletType = tabletType
tablet.VttabletProcess.ServingStatus = waitForState
tablet.VttabletProcess.SupportsBackup = true
err := tablet.VttabletProcess.Setup()
require.NoError(t, err)
}

func tearDown() {
for _, tablet := range []*cluster.Vttablet{primary, replica1, replica2} {
if tablet != nil && tablet.VttabletProcess != nil {
_ = tablet.VttabletProcess.TearDown()
}
if tablet != nil {
_ = localCluster.VtctldClientProcess.ExecuteCommand("DeleteTablets", "--allow-primary", tablet.Alias)
}
}
}
3 changes: 1 addition & 2 deletions go/vt/mysqlctl/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ var (
)

func init() {
// TODO: enable these flags for vtbackup.
for _, cmd := range []string{"vttablet" /*, "vtbackup"*/} {
for _, cmd := range []string{"vttablet", "vtbackup"} {
servenv.OnParseFor(cmd, registerCloneFlags)
}
}
Expand Down
6 changes: 6 additions & 0 deletions go/vt/mysqlctl/mysqld.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,19 @@ func init() {
for _, cmd := range []string{"mysqlctl", "mysqlctld", "vtcombo", "vttablet", "vttestserver"} {
servenv.OnParseFor(cmd, registerPoolFlags)
}
for _, cmd := range []string{"mysqlctl", "mysqlctld", "vtcombo", "vttablet", "vttestserver", "vtbackup"} {
servenv.OnParseFor(cmd, registerMySQLDCloneFlags)
}
}

func registerMySQLDFlags(fs *pflag.FlagSet) {
utils.SetFlagDurationVar(fs, &PoolDynamicHostnameResolution, "pool-hostname-resolve-interval", PoolDynamicHostnameResolution, "if set force an update to all hostnames and reconnect if changed, defaults to 0 (disabled)")
utils.SetFlagStringVar(fs, &mycnfTemplateFile, "mysqlctl-mycnf-template", mycnfTemplateFile, "template file to use for generating the my.cnf file during server init")
utils.SetFlagStringVar(fs, &socketFile, "mysqlctl-socket", socketFile, "socket file to use for remote mysqlctl actions (empty for local actions)")
utils.SetFlagDurationVar(fs, &replicationConnectRetry, "replication-connect-retry", replicationConnectRetry, "how long to wait in between replica reconnect attempts. Only precise to the second.")
}

func registerMySQLDCloneFlags(fs *pflag.FlagSet) {
utils.SetFlagBoolVar(fs, &mysqlCloneEnabled, "mysql-clone-enabled", mysqlCloneEnabled, "Enable MySQL CLONE plugin and user for backup/replica provisioning (requires MySQL 8.0.17+)")
}

Expand Down
Loading