diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6ab38a4ddb1..1a2c5efe5df 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -31,7 +31,7 @@ go.sum @harshit-gangal @mattlord @rohit-nayak-ps @systay @frouioui /go/test/endtoend/transaction @harshit-gangal @systay @frouioui /go/test/endtoend/*throttler* @shlomi-noach @mattlord @timvaillancourt /go/test/endtoend/vtgate @harshit-gangal @systay @frouioui -/go/test/endtoend/vtorc @shlomi-noach @timvaillancourt +/go/test/endtoend/vtorc @mattlord @shlomi-noach @timvaillancourt /go/tools/ @frouioui @systay /go/vt/dbconnpool @harshit-gangal @mattlord /go/vt/discovery @frouioui @@ -63,7 +63,7 @@ go.sum @harshit-gangal @mattlord @rohit-nayak-ps @systay @frouioui /go/vt/vtgate/planbuilder @harshit-gangal @systay @frouioui @arthurschreiber /go/vt/vtgate/*vstream* @rohit-nayak-ps @mattlord @shlomi-noach @beingnoble03 /go/vt/vtgate/evalengine @dbussink @systay -/go/vt/vtorc @shlomi-noach @timvaillancourt +/go/vt/vtorc @mattlord @shlomi-noach @timvaillancourt /go/vt/vttablet/*conn* @harshit-gangal @systay /go/vt/vttablet/endtoend @harshit-gangal @mattlord @rohit-nayak-ps @systay /go/vt/vttablet/grpc* @rohit-nayak-ps @shlomi-noach @harshit-gangal diff --git a/go/test/endtoend/vtorc/general/vtorc_test.go b/go/test/endtoend/vtorc/general/vtorc_test.go index e9e48dfd2fa..4d3088224c7 100644 --- a/go/test/endtoend/vtorc/general/vtorc_test.go +++ b/go/test/endtoend/vtorc/general/vtorc_test.go @@ -502,6 +502,68 @@ func TestSemiSync(t *testing.T) { } } +func TestSemiSync_NoAckers(t *testing.T) { + // stop any vtorc instance running due to a previous test. + utils.StopVTOrcs(t, clusterInfo) + defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) + utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 3, 0, nil, cluster.VTOrcConfiguration{ + PreventCrossCellFailover: true, + }, 1, "") + defer func() { + utils.StopVTOrcs(t, clusterInfo) + clusterInfo.ClusterInstance.Teardown() + }() + keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + + // find primary from topo + curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) + assert.NotNil(t, curPrimary, "should have elected a primary") + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, keyspace.Name, shard0.Name, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) + + // find the replica tablet + replicas := make([]*cluster.Vttablet, 0) + for _, tablet := range shard0.Vttablets { + if tablet.Alias != curPrimary.Alias { + replicas = append(replicas, tablet) + } + } + assert.NotEmpty(t, replicas, "did not find replica tablets") + + // check that the replication is setup correctly before we failover + utils.CheckReplication(t, clusterInfo, curPrimary, shard0.Vttablets, 10*time.Second) + + // Make the replica vttablet unavailable and delete from topo so the PRIMARY cannot apply semi-sync + for _, replica := range replicas { + replica.VttabletProcess.Kill() + out, err := clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("DeleteTablets", replica.Alias) + require.NoError(t, err, out) + } + + // Enable semi-sync durability policy + out, err := clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspace.Name, "--durability-policy=semi_sync") + require.NoError(t, err, out) + + // Wait for no detected PrimarySemiSyncMustBeSet problem, and no FixPrimary recovery, because we have no ackers + time.Sleep(time.Second * 10) + utils.WaitForDetectedProblems(t, vtOrcProcess, + string(inst.PrimarySemiSyncMustBeSet), + curPrimary.Alias, + keyspace.Name, + shard0.Name, + 0, + ) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.FixPrimaryRecoveryName, keyspace.Name, shard0.Name, 0) + + // Startup up replica vttablet again, wait for FixPrimary recovery + for _, replica := range replicas { + require.NoError(t, replica.VttabletProcess.Setup()) + } + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.FixPrimaryRecoveryName, keyspace.Name, shard0.Name, 1) +} + // TestVTOrcWithPrs tests that VTOrc works fine even when PRS is called from vtctld func TestVTOrcWithPrs(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) diff --git a/go/vt/vtorc/inst/analysis.go b/go/vt/vtorc/inst/analysis.go index c2f5e8ae85a..be6c558a908 100644 --- a/go/vt/vtorc/inst/analysis.go +++ b/go/vt/vtorc/inst/analysis.go @@ -21,6 +21,7 @@ import ( "time" topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" "vitess.io/vitess/go/vt/vtorc/config" ) @@ -122,6 +123,7 @@ type DetectionAnalysis struct { SemiSyncReplicaEnabled bool SemiSyncBlocked bool CountSemiSyncReplicasEnabled uint + CountValidSemiSyncReplicatingReplicas uint CountLoggingReplicas uint CountStatementBasedLoggingReplicas uint CountMixedBasedLoggingReplicas uint @@ -148,6 +150,16 @@ func (detectionAnalysis *DetectionAnalysis) MarshalJSON() ([]byte, error) { return json.Marshal(i) } +// hasMinSemiSyncAckers returns true if there are a minimum number of semi-sync ackers enabled and replicating. +// True is always returned if the durability policy does not require semi-sync ackers (eg: "none"). This gives +// a useful signal if it is safe to enable semi-sync without risk of stalling ongoing PRIMARY writes. +func hasMinSemiSyncAckers(durabler policy.Durabler, primary *topodatapb.Tablet, analysis *DetectionAnalysis) bool { + if durabler == nil || analysis == nil { + return false + } + return int(analysis.CountValidSemiSyncReplicatingReplicas) >= durabler.SemiSyncAckers(primary) +} + // ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time // between last_attempted_check to last_checked before we consider the instance as invalid. func ValidSecondsFromSeenToLastAttemptedCheck() uint { diff --git a/go/vt/vtorc/inst/analysis_dao.go b/go/vt/vtorc/inst/analysis_dao.go index e0bcd0a2709..7d038ea061d 100644 --- a/go/vt/vtorc/inst/analysis_dao.go +++ b/go/vt/vtorc/inst/analysis_dao.go @@ -191,6 +191,15 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi ), 0 ) AS count_valid_semi_sync_replicas, + IFNULL( + SUM( + replica_instance.last_checked <= replica_instance.last_seen + AND replica_instance.replica_io_running != 0 + AND replica_instance.replica_sql_running != 0 + AND replica_instance.semi_sync_replica_enabled != 0 + ), + 0 + ) AS count_valid_semi_sync_replicating_replicas, IFNULL( SUM( replica_instance.log_bin @@ -345,6 +354,7 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi a.SemiSyncBlocked = m.GetBool("semi_sync_blocked") a.SemiSyncReplicaEnabled = m.GetBool("semi_sync_replica_enabled") a.CountSemiSyncReplicasEnabled = m.GetUint("count_semi_sync_replicas") + a.CountValidSemiSyncReplicatingReplicas = m.GetUint("count_valid_semi_sync_replicating_replicas") // countValidSemiSyncReplicasEnabled := m.GetUint("count_valid_semi_sync_replicas") a.SemiSyncPrimaryWaitForReplicaCount = m.GetUint("semi_sync_primary_wait_for_replica_count") a.SemiSyncPrimaryClients = m.GetUint("semi_sync_primary_clients") @@ -447,7 +457,7 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi a.Analysis = PrimaryIsReadOnly a.Description = "Primary is read-only" // - case a.IsClusterPrimary && policy.SemiSyncAckers(ca.durability, tablet) != 0 && !a.SemiSyncPrimaryEnabled: + case a.IsClusterPrimary && policy.SemiSyncAckers(ca.durability, tablet) > 0 && hasMinSemiSyncAckers(ca.durability, tablet, a) && !a.SemiSyncPrimaryEnabled: a.Analysis = PrimarySemiSyncMustBeSet a.Description = "Primary semi-sync must be set" // diff --git a/go/vt/vtorc/inst/analysis_dao_test.go b/go/vt/vtorc/inst/analysis_dao_test.go index 0ccc08fd4c8..a21ba6bc26a 100644 --- a/go/vt/vtorc/inst/analysis_dao_test.go +++ b/go/vt/vtorc/inst/analysis_dao_test.go @@ -63,7 +63,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ClusterHasNoPrimary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -81,7 +81,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimaryTabletDeleted", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -100,7 +100,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "StalledDiskPrimary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -124,7 +124,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimarySemiSyncBlocked", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -153,7 +153,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "LockedSemiSync", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -182,7 +182,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "DeadPrimary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -205,7 +205,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "DeadPrimaryWithoutReplicas", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -226,7 +226,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "DeadPrimaryAndReplicas", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -247,7 +247,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "DeadPrimaryAndSomeReplicas", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -270,7 +270,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimaryHasPrimary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -292,7 +292,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimaryIsReadOnly", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -315,7 +315,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimaryCurrentTypeMismatch", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -337,7 +337,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "Unknown tablet type shouldn't run the mismatch recovery analysis", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -362,7 +362,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimarySemiSyncMustNotBeSet", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -385,7 +385,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "PrimarySemiSyncMustBeSet", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -393,13 +393,14 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { MysqlHostname: "localhost", MysqlPort: 6709, }, - DurabilityPolicy: policy.DurabilitySemiSync, - LastCheckValid: 1, - CountReplicas: 4, - CountValidReplicas: 4, - IsPrimary: 1, - SemiSyncPrimaryEnabled: 0, - CurrentTabletType: int(topodatapb.TabletType_PRIMARY), + DurabilityPolicy: policy.DurabilitySemiSync, + LastCheckValid: 1, + CountReplicas: 4, + CountValidReplicas: 4, + CountValidSemiSyncReplicatingReplicas: 4, + IsPrimary: 1, + SemiSyncPrimaryEnabled: 0, + CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }}, keyspaceWanted: "ks", shardWanted: "0", @@ -408,7 +409,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "NotConnectedToPrimary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -427,7 +428,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -446,7 +447,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ReplicaIsWritable", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -465,7 +466,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -475,7 +476,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { }, DurabilityPolicy: policy.DurabilityNone, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, LastCheckValid: 1, ReadOnly: 0, @@ -487,7 +488,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ConnectedToWrongPrimary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -506,7 +507,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -516,7 +517,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { }, DurabilityPolicy: policy.DurabilityNone, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 102}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 102}, }, LastCheckValid: 1, ReadOnly: 1, @@ -528,7 +529,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ReplicationStopped", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -547,7 +548,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -557,7 +558,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { }, DurabilityPolicy: policy.DurabilityNone, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, LastCheckValid: 1, ReadOnly: 1, @@ -570,7 +571,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "No recoveries on drained tablets", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -589,7 +590,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -599,7 +600,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { }, DurabilityPolicy: policy.DurabilityNone, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, LastCheckValid: 1, ReadOnly: 1, @@ -612,7 +613,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ReplicaMisconfigured", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -631,7 +632,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -641,7 +642,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { }, DurabilityPolicy: policy.DurabilityNone, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, LastCheckValid: 1, ReadOnly: 1, @@ -656,7 +657,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ReplicaSemiSyncMustBeSet", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -676,7 +677,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -685,7 +686,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { MysqlPort: 6709, }, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, DurabilityPolicy: policy.DurabilitySemiSync, LastCheckValid: 1, @@ -699,7 +700,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ReplicaSemiSyncMustNotBeSet", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -718,7 +719,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -727,7 +728,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { MysqlPort: 6709, }, PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, DurabilityPolicy: policy.DurabilityNone, LastCheckValid: 1, @@ -741,7 +742,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "SnapshotKeyspace", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -761,7 +762,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "EmptyDurabilityPolicy", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -781,7 +782,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "Empty database_instance table", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -801,7 +802,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -819,7 +820,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "DeadPrimary when VTOrc is starting up", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -831,7 +832,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { IsInvalid: 1, }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -843,7 +844,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { ReplicationStopped: 1, }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 103}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 103}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -861,7 +862,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "Invalid Primary", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -879,7 +880,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ErrantGTID", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -898,7 +899,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -909,7 +910,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { DurabilityPolicy: policy.DurabilityNone, ErrantGTID: "some errant GTID", PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, LastCheckValid: 1, ReadOnly: 1, @@ -921,7 +922,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { name: "ErrantGTID on a non-replica", info: []*test.InfoForRecoveryAnalysis{{ TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -940,7 +941,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { CurrentTabletType: int(topodatapb.TabletType_PRIMARY), }, { TabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 100}, Hostname: "localhost", Keyspace: "ks", Shard: "0", @@ -951,7 +952,7 @@ func TestGetDetectionAnalysisDecision(t *testing.T) { DurabilityPolicy: policy.DurabilityNone, ErrantGTID: "some errant GTID", PrimaryTabletInfo: &topodatapb.Tablet{ - Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101}, + Alias: &topodatapb.TabletAlias{Cell: "zone1", Uid: 101}, }, LastCheckValid: 1, ReadOnly: 1, diff --git a/go/vt/vtorc/inst/analysis_test.go b/go/vt/vtorc/inst/analysis_test.go new file mode 100644 index 00000000000..110998ed14d --- /dev/null +++ b/go/vt/vtorc/inst/analysis_test.go @@ -0,0 +1,70 @@ +/* +Copyright 2025 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inst + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" +) + +func TestHasMinSemiSyncAckers(t *testing.T) { + durablerNone, _ := policy.GetDurabilityPolicy("none") + durablerCrossCell, _ := policy.GetDurabilityPolicy("cross_cell") + tablet := &topodatapb.Tablet{Keyspace: t.Name(), Shard: "-"} + + testCases := []struct { + name string + durabler policy.Durabler + analysis *DetectionAnalysis + expect bool + }{ + { + name: "durability policy none", + analysis: &DetectionAnalysis{ + CountValidSemiSyncReplicatingReplicas: 0, + }, + durabler: durablerNone, + expect: true, + }, + { + name: "durability policy cross_cell without min ackers", + durabler: durablerCrossCell, + analysis: &DetectionAnalysis{ + CountValidSemiSyncReplicatingReplicas: 0, + }, + expect: false, + }, + { + name: "durability policy cross_cell with min ackers", + durabler: durablerCrossCell, + analysis: &DetectionAnalysis{ + CountValidSemiSyncReplicatingReplicas: uint(durablerCrossCell.SemiSyncAckers(tablet)), + }, + expect: true, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + assert.Equal(t, testCase.expect, hasMinSemiSyncAckers(testCase.durabler, tablet, testCase.analysis)) + }) + } +} diff --git a/go/vt/vtorc/test/recovery_analysis.go b/go/vt/vtorc/test/recovery_analysis.go index 64505659ca5..3e542d31412 100644 --- a/go/vt/vtorc/test/recovery_analysis.go +++ b/go/vt/vtorc/test/recovery_analysis.go @@ -69,6 +69,7 @@ type InfoForRecoveryAnalysis struct { SemiSyncReplicaEnabled int CurrentTabletType int CountSemiSyncReplicasEnabled uint + CountValidSemiSyncReplicatingReplicas uint CountLoggingReplicas uint CountStatementBasedLoggingReplicas uint CountMixedBasedLoggingReplicas uint @@ -100,6 +101,7 @@ func (info *InfoForRecoveryAnalysis) ConvertToRowMap() sqlutils.RowMap { rowMap["count_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountReplicas), Valid: true} rowMap["count_row_based_logging_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountRowBasedLoggingReplicas), Valid: true} rowMap["count_semi_sync_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountSemiSyncReplicasEnabled), Valid: true} + rowMap["count_valid_semi_sync_replicating_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountValidSemiSyncReplicatingReplicas), Valid: true} rowMap["count_statement_based_logging_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountStatementBasedLoggingReplicas), Valid: true} rowMap["count_valid_binlog_server_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountValidBinlogServerReplicas), Valid: true} rowMap["count_valid_oracle_gtid_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountValidOracleGTIDReplicas), Valid: true}