From 63df8e9c8a5cc464b91d3b2fcce2d5d999cc21a9 Mon Sep 17 00:00:00 2001 From: Mohamed Hamza Date: Wed, 24 Dec 2025 19:01:44 -0500 Subject: [PATCH 1/4] Fix ReloadSchema incorrectly using DisableBinlogs value in grpctmclient Fix bug where `ReloadSchema` was incorrectly set to `req.DisableBinlogs` instead of `req.ReloadSchema` in `ExecuteFetchAsDba` and `ExecuteMultiFetchAsDba` Signed-off-by: Mohamed Hamza --- go/vt/vttablet/grpctmclient/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/vttablet/grpctmclient/client.go b/go/vt/vttablet/grpctmclient/client.go index 8fe25bf20f5..143dc4798ff 100644 --- a/go/vt/vttablet/grpctmclient/client.go +++ b/go/vt/vttablet/grpctmclient/client.go @@ -671,7 +671,7 @@ func (client *Client) ExecuteFetchAsDba(ctx context.Context, tablet *topodatapb. DbName: topoproto.TabletDbName(tablet), MaxRows: req.MaxRows, DisableBinlogs: req.DisableBinlogs, - ReloadSchema: req.DisableBinlogs, + ReloadSchema: req.ReloadSchema, DisableForeignKeyChecks: req.DisableForeignKeyChecks, }) if err != nil { @@ -707,7 +707,7 @@ func (client *Client) ExecuteMultiFetchAsDba(ctx context.Context, tablet *topoda DbName: topoproto.TabletDbName(tablet), MaxRows: req.MaxRows, DisableBinlogs: req.DisableBinlogs, - ReloadSchema: req.DisableBinlogs, + ReloadSchema: req.ReloadSchema, DisableForeignKeyChecks: req.DisableForeignKeyChecks, }) if err != nil { From 97e0168714853eae61509feacab5b1d0bf01557e Mon Sep 17 00:00:00 2001 From: Mohamed Hamza Date: Fri, 9 Jan 2026 19:26:11 -0500 Subject: [PATCH 2/4] add reproduction test case Signed-off-by: Mohamed Hamza --- .../backup/vtctlbackup/backup_utils.go | 58 ++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/go/test/endtoend/backup/vtctlbackup/backup_utils.go b/go/test/endtoend/backup/vtctlbackup/backup_utils.go index cdf39408be3..719a6b6d221 100644 --- a/go/test/endtoend/backup/vtctlbackup/backup_utils.go +++ b/go/test/endtoend/backup/vtctlbackup/backup_utils.go @@ -132,7 +132,7 @@ func LaunchCluster(setupType int, streamMode string, stripes int, cDetails *Comp return 1, err } newInitDBFile = path.Join(localCluster.TmpDirectory, "init_db_with_passwords.sql") - err = os.WriteFile(newInitDBFile, []byte(sql), 0666) + err = os.WriteFile(newInitDBFile, []byte(sql), 0o666) if err != nil { return 1, err } @@ -381,6 +381,10 @@ func TestBackup(t *testing.T, setupType int, streamMode string, stripes int, cDe name: "TestPrimaryBackup", method: primaryBackup, }, + { + name: "TestPrimaryRestoreSidecarReplication", + method: primaryRestoreSidecarReplication, + }, { name: "TestPrimaryReplicaSameBackup", method: primaryReplicaSameBackup, @@ -563,6 +567,58 @@ func primaryBackup(t *testing.T) { require.NoError(t, err) } +// primaryRestoreSidecarReplication ensures replication remains healthy after restoring a primary from a backup that +// has a mismatch in _vt.tables relative to existing replicas. +func primaryRestoreSidecarReplication(t *testing.T) { + localCluster.DisableVTOrcRecoveries(t) + defer localCluster.EnableVTOrcRecoveries(t) + + // Step 1: create the table and get initial replication. + verifyInitialReplication(t) + + // Step 2: take a backup before the primary has recorded _vt.tables. + err := localCluster.VtctldClientProcess.ExecuteCommand("Backup", "--allow-primary", primary.Alias) + require.NoError(t, err) + firstBackupTimestamp := time.Now().UTC().Format(mysqlctl.BackupTimestampFormat) + + // // Step 3: make the primary advance so the replica diverges. + // _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true) + // require.NoError(t, err) + + // Step 3: bring up a second replica + restoreWaitForBackup(t, "replica", nil, true) + err = replica2.VttabletProcess.WaitForTabletStatusesForTimeout([]string{"SERVING"}, timeout) + require.NoError(t, err) + + // Step 4: Promote the second replica, which initiates a schema reload. A table is now created in _vt.tables. + err = localCluster.VtctldClientProcess.ExecuteCommand("PlannedReparentShard", "--new-primary", replica2.Alias, shardKsName) + require.NoError(t, err) + + // Replica 2 isn't needed anymore + err = localCluster.VtctldClientProcess.ExecuteCommand("DeleteTablets", "--allow-primary", replica2.Alias) + require.NoError(t, err) + err = replica2.VttabletProcess.TearDown() + require.NoError(t, err) + + // Step 5: Restore the primary from the backup we took + err = localCluster.VtctldClientProcess.ExecuteCommand("RestoreFromBackup", "--backup-timestamp", firstBackupTimestamp, primary.Alias) + require.NoError(t, err) + + // Step 6: make the old primary the primary again. This will initiate a schema reload, but since the backup is missing the table + // in _vt.tables, it will create it and replicate it to the replica, which already has the table in its _vt.tables from replica 2. + err = localCluster.VtctldClientProcess.InitShardPrimary(keyspaceName, shardName, cell, primary.TabletUID) + require.NoError(t, err) + + // Validate replication still works + _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test_after_init')", keyspaceName, true) + require.NoError(t, err) + + require.Eventually(t, func() bool { + res, err := replica1.VttabletProcess.QueryTablet("select msg from vt_insert_test where msg = 'test_after_init'", keyspaceName, true) + return err == nil && len(res.Rows) == 1 + }, 1*time.Second, 10*time.Millisecond, "expected replication to be working") +} + // Test a primary and replica from the same backup. // // Check that a replica and primary both restored from the same backup From 77518ecd6798e88d5d9980718aac395c05786bf3 Mon Sep 17 00:00:00 2001 From: Mohamed Hamza Date: Fri, 9 Jan 2026 19:30:01 -0500 Subject: [PATCH 3/4] reload schema before taking backup Signed-off-by: Mohamed Hamza --- go/vt/vttablet/tabletmanager/rpc_backup.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/go/vt/vttablet/tabletmanager/rpc_backup.go b/go/vt/vttablet/tabletmanager/rpc_backup.go index 60444a03545..87636889bb5 100644 --- a/go/vt/vttablet/tabletmanager/rpc_backup.go +++ b/go/vt/vttablet/tabletmanager/rpc_backup.go @@ -102,6 +102,12 @@ func (tm *TabletManager) Backup(ctx context.Context, logger logutil.Logger, req return vterrors.Wrap(err, "failed to execute backup init SQL queries") } + // Reload the schema so that backup takes most up-to-date snapshot of the sidecar database (for example, + // to capture a new table that has been created since the last schema reload). + if err := tm.QueryServiceControl.ReloadSchema(ctx); err != nil { + return vterrors.Wrap(err, "failed to reload schema before backup") + } + // Prevent concurrent backups, and record stats backupMode := backupModeOnline if engine.ShouldDrainForBackup(req) { From 8e237fb8383972180ceebd4d8f52e990e6072f95 Mon Sep 17 00:00:00 2001 From: Mohamed Hamza Date: Sat, 10 Jan 2026 18:43:55 -0500 Subject: [PATCH 4/4] remove fix and properly reset state between tests Signed-off-by: Mohamed Hamza --- .../backup/vtctlbackup/backup_utils.go | 58 +------------------ go/vt/vttablet/tabletmanager/rpc_backup.go | 6 -- 2 files changed, 1 insertion(+), 63 deletions(-) diff --git a/go/test/endtoend/backup/vtctlbackup/backup_utils.go b/go/test/endtoend/backup/vtctlbackup/backup_utils.go index 719a6b6d221..492701345ee 100644 --- a/go/test/endtoend/backup/vtctlbackup/backup_utils.go +++ b/go/test/endtoend/backup/vtctlbackup/backup_utils.go @@ -381,10 +381,6 @@ func TestBackup(t *testing.T, setupType int, streamMode string, stripes int, cDe name: "TestPrimaryBackup", method: primaryBackup, }, - { - name: "TestPrimaryRestoreSidecarReplication", - method: primaryRestoreSidecarReplication, - }, { name: "TestPrimaryReplicaSameBackup", method: primaryReplicaSameBackup, @@ -565,62 +561,10 @@ func primaryBackup(t *testing.T) { _, err = primary.VttabletProcess.QueryTablet("DROP TABLE vt_insert_test", keyspaceName, true) require.NoError(t, err) -} - -// primaryRestoreSidecarReplication ensures replication remains healthy after restoring a primary from a backup that -// has a mismatch in _vt.tables relative to existing replicas. -func primaryRestoreSidecarReplication(t *testing.T) { - localCluster.DisableVTOrcRecoveries(t) - defer localCluster.EnableVTOrcRecoveries(t) - - // Step 1: create the table and get initial replication. - verifyInitialReplication(t) - - // Step 2: take a backup before the primary has recorded _vt.tables. - err := localCluster.VtctldClientProcess.ExecuteCommand("Backup", "--allow-primary", primary.Alias) - require.NoError(t, err) - firstBackupTimestamp := time.Now().UTC().Format(mysqlctl.BackupTimestampFormat) - - // // Step 3: make the primary advance so the replica diverges. - // _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true) - // require.NoError(t, err) - - // Step 3: bring up a second replica - restoreWaitForBackup(t, "replica", nil, true) - err = replica2.VttabletProcess.WaitForTabletStatusesForTimeout([]string{"SERVING"}, timeout) - require.NoError(t, err) - - // Step 4: Promote the second replica, which initiates a schema reload. A table is now created in _vt.tables. - err = localCluster.VtctldClientProcess.ExecuteCommand("PlannedReparentShard", "--new-primary", replica2.Alias, shardKsName) - require.NoError(t, err) - // Replica 2 isn't needed anymore - err = localCluster.VtctldClientProcess.ExecuteCommand("DeleteTablets", "--allow-primary", replica2.Alias) - require.NoError(t, err) - err = replica2.VttabletProcess.TearDown() - require.NoError(t, err) - - // Step 5: Restore the primary from the backup we took - err = localCluster.VtctldClientProcess.ExecuteCommand("RestoreFromBackup", "--backup-timestamp", firstBackupTimestamp, primary.Alias) - require.NoError(t, err) - - // Step 6: make the old primary the primary again. This will initiate a schema reload, but since the backup is missing the table - // in _vt.tables, it will create it and replicate it to the replica, which already has the table in its _vt.tables from replica 2. - err = localCluster.VtctldClientProcess.InitShardPrimary(keyspaceName, shardName, cell, primary.TabletUID) - require.NoError(t, err) - - // Validate replication still works - _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test_after_init')", keyspaceName, true) - require.NoError(t, err) - - require.Eventually(t, func() bool { - res, err := replica1.VttabletProcess.QueryTablet("select msg from vt_insert_test where msg = 'test_after_init'", keyspaceName, true) - return err == nil && len(res.Rows) == 1 - }, 1*time.Second, 10*time.Millisecond, "expected replication to be working") + restartPrimaryAndReplica(t) } -// Test a primary and replica from the same backup. -// // Check that a replica and primary both restored from the same backup // can replicate successfully. func primaryReplicaSameBackup(t *testing.T) { diff --git a/go/vt/vttablet/tabletmanager/rpc_backup.go b/go/vt/vttablet/tabletmanager/rpc_backup.go index 87636889bb5..60444a03545 100644 --- a/go/vt/vttablet/tabletmanager/rpc_backup.go +++ b/go/vt/vttablet/tabletmanager/rpc_backup.go @@ -102,12 +102,6 @@ func (tm *TabletManager) Backup(ctx context.Context, logger logutil.Logger, req return vterrors.Wrap(err, "failed to execute backup init SQL queries") } - // Reload the schema so that backup takes most up-to-date snapshot of the sidecar database (for example, - // to capture a new table that has been created since the last schema reload). - if err := tm.QueryServiceControl.ReloadSchema(ctx); err != nil { - return vterrors.Wrap(err, "failed to reload schema before backup") - } - // Prevent concurrent backups, and record stats backupMode := backupModeOnline if engine.ShouldDrainForBackup(req) {