Skip to content
12 changes: 12 additions & 0 deletions go/vt/vtorc/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"time"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/vtctl/reparentutil/policy"
"vitess.io/vitess/go/vt/vtorc/config"
)

Expand Down Expand Up @@ -122,6 +123,7 @@ type DetectionAnalysis struct {
SemiSyncReplicaEnabled bool
SemiSyncBlocked bool
CountSemiSyncReplicasEnabled uint
CountValidSemiSyncReplicasReplicating uint
CountLoggingReplicas uint
CountStatementBasedLoggingReplicas uint
CountMixedBasedLoggingReplicas uint
Expand All @@ -148,6 +150,16 @@ func (detectionAnalysis *DetectionAnalysis) MarshalJSON() ([]byte, error) {
return json.Marshal(i)
}

// HasMinSemiSyncAckers returns true if there are a minimum number of semi-sync ackers enabled and replicating.
// True is always returned if the durability policy does not require semi-sync ackers (eg: "none"). This gives
// a useful signal if it is safe to enable semi-sync without risk of stalling ongoing PRIMARY writes.
func HasMinSemiSyncAckers(durabler policy.Durabler, primary *topodatapb.Tablet, analysis *DetectionAnalysis) bool {
if durabler == nil || analysis == nil {
return false
}
return int(analysis.CountValidSemiSyncReplicasReplicating) >= durabler.SemiSyncAckers(primary)
}

// ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time
// between last_attempted_check to last_checked before we consider the instance as invalid.
func ValidSecondsFromSeenToLastAttemptedCheck() uint {
Expand Down
12 changes: 11 additions & 1 deletion go/vt/vtorc/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi
),
0
) AS count_valid_semi_sync_replicas,
IFNULL(
SUM(
replica_instance.last_checked <= replica_instance.last_seen
AND replica_instance.replica_io_running != 0
AND replica_instance.replica_sql_running != 0
AND replica_instance.semi_sync_replica_enabled != 0
),
0
) AS count_valid_semi_sync_replicas_replicating,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have this data if we observe 2 x existing values from the GetDetectionAnalysis query, but we can't be certain both counts refer to the exact same instances, so this new value is added

IFNULL(
SUM(
replica_instance.log_bin
Expand Down Expand Up @@ -345,6 +354,7 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi
a.SemiSyncBlocked = m.GetBool("semi_sync_blocked")
a.SemiSyncReplicaEnabled = m.GetBool("semi_sync_replica_enabled")
a.CountSemiSyncReplicasEnabled = m.GetUint("count_semi_sync_replicas")
a.CountValidSemiSyncReplicasReplicating = m.GetUint("count_valid_semi_sync_replicas_replicating")
// countValidSemiSyncReplicasEnabled := m.GetUint("count_valid_semi_sync_replicas")
a.SemiSyncPrimaryWaitForReplicaCount = m.GetUint("semi_sync_primary_wait_for_replica_count")
a.SemiSyncPrimaryClients = m.GetUint("semi_sync_primary_clients")
Expand Down Expand Up @@ -447,7 +457,7 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi
a.Analysis = PrimaryIsReadOnly
a.Description = "Primary is read-only"
//
case a.IsClusterPrimary && policy.SemiSyncAckers(ca.durability, tablet) != 0 && !a.SemiSyncPrimaryEnabled:
case a.IsClusterPrimary && policy.SemiSyncAckers(ca.durability, tablet) > 0 && HasMinSemiSyncAckers(ca.durability, tablet, a) && !a.SemiSyncPrimaryEnabled:
a.Analysis = PrimarySemiSyncMustBeSet
a.Description = "Primary semi-sync must be set"
//
Expand Down
15 changes: 8 additions & 7 deletions go/vt/vtorc/inst/analysis_dao_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,13 +393,14 @@ func TestGetDetectionAnalysisDecision(t *testing.T) {
MysqlHostname: "localhost",
MysqlPort: 6709,
},
DurabilityPolicy: policy.DurabilitySemiSync,
LastCheckValid: 1,
CountReplicas: 4,
CountValidReplicas: 4,
IsPrimary: 1,
SemiSyncPrimaryEnabled: 0,
CurrentTabletType: int(topodatapb.TabletType_PRIMARY),
DurabilityPolicy: policy.DurabilitySemiSync,
LastCheckValid: 1,
CountReplicas: 4,
CountValidReplicas: 4,
CountValidSemiSyncReplicasReplicating: 4,
IsPrimary: 1,
SemiSyncPrimaryEnabled: 0,
CurrentTabletType: int(topodatapb.TabletType_PRIMARY),
}},
keyspaceWanted: "ks",
shardWanted: "0",
Expand Down
70 changes: 70 additions & 0 deletions go/vt/vtorc/inst/analysis_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
Copyright 2025 The Vitess Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package inst

import (
"testing"

"github.com/stretchr/testify/assert"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/vtctl/reparentutil/policy"
)

func TestHasMinSemiSyncAckers(t *testing.T) {
durablerNone, _ := policy.GetDurabilityPolicy("none")
durablerCrossCell, _ := policy.GetDurabilityPolicy("cross_cell")
tablet := &topodatapb.Tablet{Keyspace: t.Name(), Shard: "-"}

testCases := []struct {
name string
durabler policy.Durabler
analysis *DetectionAnalysis
expect bool
}{
{
name: "durability policy none",
analysis: &DetectionAnalysis{
CountValidSemiSyncReplicasReplicating: 0,
},
durabler: durablerNone,
expect: true,
},
{
name: "durability policy cross_cell without min ackers",
durabler: durablerCrossCell,
analysis: &DetectionAnalysis{
CountValidSemiSyncReplicasReplicating: 0,
},
expect: false,
},
{
name: "durability policy cross_cell with min ackers",
durabler: durablerCrossCell,
analysis: &DetectionAnalysis{
CountValidSemiSyncReplicasReplicating: uint(durablerCrossCell.SemiSyncAckers(tablet)),
},
expect: true,
},
}

for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
assert.Equal(t, testCase.expect, HasMinSemiSyncAckers(testCase.durabler, tablet, testCase.analysis))
})
}
}
2 changes: 2 additions & 0 deletions go/vt/vtorc/test/recovery_analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ type InfoForRecoveryAnalysis struct {
SemiSyncReplicaEnabled int
CurrentTabletType int
CountSemiSyncReplicasEnabled uint
CountValidSemiSyncReplicasReplicating uint
CountLoggingReplicas uint
CountStatementBasedLoggingReplicas uint
CountMixedBasedLoggingReplicas uint
Expand Down Expand Up @@ -100,6 +101,7 @@ func (info *InfoForRecoveryAnalysis) ConvertToRowMap() sqlutils.RowMap {
rowMap["count_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountReplicas), Valid: true}
rowMap["count_row_based_logging_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountRowBasedLoggingReplicas), Valid: true}
rowMap["count_semi_sync_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountSemiSyncReplicasEnabled), Valid: true}
rowMap["count_valid_semi_sync_replicas_replicating"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountValidSemiSyncReplicasReplicating), Valid: true}
rowMap["count_statement_based_logging_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountStatementBasedLoggingReplicas), Valid: true}
rowMap["count_valid_binlog_server_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountValidBinlogServerReplicas), Valid: true}
rowMap["count_valid_oracle_gtid_replicas"] = sqlutils.CellData{String: fmt.Sprintf("%v", info.CountValidOracleGTIDReplicas), Valid: true}
Expand Down
Loading