-
Notifications
You must be signed in to change notification settings - Fork 0
shardctrler.migrate()的非原子性和ChangeConfigTo在Concurrent configuration changes下的问题 #2
Copy link
Copy link
Open
Description
在claimNextOwnership
A 可能因为读取到 Next==new 而继续,B 也可能因为 QueryNext 失败后仍成功执行 UpdateConfig(new, "Next") 而继续。导致多个ctrler获取ownership
// clainNextOwnership tries to claim ownership of the "Next" configuration
func (sck *ShardCtrler) claimNextOwnership(old, new *shardcfg.ShardConfig) bool {
nextStr := new.String()
for {
nextValue, nextCfg := sck.QueryNext()
if nextCfg != nil && nextCfg.Num > old.Num {
return new.Num == nextCfg.Num && nextValue == nextStr
}
err := sck.UpdateConfig(new, "Next")
if err == rpc.OK {
return true
}在migrate
当前 migrate() 不是原子迁移;FreezeShard 成功后失败退出会留下不可自动恢复的 frozen shard
for {
if sck.hasConfigApplied(new.Num) {
return false
}
state, err = srcClerk.FreezeShard(move.Shard, new.Num)
if err == rpc.OK {
break
}
if err == rpc.ErrWrongGroup || err == rpc.ErrStaleNum {
return false
}
}
for {
if sck.hasConfigApplied(new.Num) {
return false
}
err = dstClerk.InstallShard(move.Shard, state, new.Num)
if err == rpc.OK {
break
}
if err == rpc.ErrWrongGroup || err == rpc.ErrStaleNum {
return false
}
}Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels