From ccf9ef2e2c0462f68caf0698ffca538e89092cb4 Mon Sep 17 00:00:00 2001 From: Rafael Chacon Date: Thu, 8 Nov 2018 16:47:09 +0000 Subject: [PATCH 1/3] Do not reuse a consul lock * When you lose a consul lock, you can't reuse that lock. Existent code does not does that and should a vtctld master lose a lock, it gets stuck in a state where it can't never use that lock again and you will need to restart the process. Signed-off-by: Rafael Chacon --- go/vt/topo/consultopo/election.go | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/go/vt/topo/consultopo/election.go b/go/vt/topo/consultopo/election.go index bb4cace7e58..c36c9d8d6af 100644 --- a/go/vt/topo/consultopo/election.go +++ b/go/vt/topo/consultopo/election.go @@ -28,19 +28,8 @@ import ( // NewMasterParticipation is part of the topo.Server interface func (s *Server) NewMasterParticipation(name, id string) (topo.MasterParticipation, error) { - // Create the lock here. - electionPath := path.Join(s.root, electionsPath, name) - l, err := s.client.LockOpts(&api.LockOptions{ - Key: electionPath, - Value: []byte(id), - }) - if err != nil { - return nil, err - } - return &consulMasterParticipation{ s: s, - lock: l, name: name, id: id, stop: make(chan struct{}), @@ -56,9 +45,6 @@ type consulMasterParticipation struct { // s is our parent consul topo Server s *Server - // lock is the *api.Lock structure we're going to use. - lock *api.Lock - // name is the name of this MasterParticipation name string @@ -74,6 +60,16 @@ type consulMasterParticipation struct { // WaitForMastership is part of the topo.MasterParticipation interface. func (mp *consulMasterParticipation) WaitForMastership() (context.Context, error) { + + electionPath := path.Join(mp.s.root, electionsPath, mp.name) + l, err := mp.s.client.LockOpts(&api.LockOptions{ + Key: electionPath, + Value: []byte(mp.id), + }) + if err != nil { + return nil, err + } + // If Stop was already called, mp.done is closed, so we are interrupted. select { case <-mp.done: @@ -82,7 +78,7 @@ func (mp *consulMasterParticipation) WaitForMastership() (context.Context, error } // Try to lock until mp.stop is closed. - lost, err := mp.lock.Lock(mp.stop) + lost, err := l.Lock(mp.stop) if err != nil { // We can't lock. See if it was because we got canceled. select { @@ -105,7 +101,7 @@ func (mp *consulMasterParticipation) WaitForMastership() (context.Context, error // so the running process is not thinking it // is the master any more, then we unlock. lockCancel() - if err := mp.lock.Unlock(); err != nil { + if err := l.Unlock(); err != nil { log.Errorf("master election(%v) Unlock failed: %v", mp.name, err) } close(mp.done) From b3a9459de226cbfdd84ca123f1e55de115572b0d Mon Sep 17 00:00:00 2001 From: Rafael Chacon Date: Fri, 7 Dec 2018 14:52:35 -0800 Subject: [PATCH 2/3] Calls Unlock explicitly after potetiantly losing it Signed-off-by: Rafael Chacon --- go/vt/topo/consultopo/election.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/go/vt/topo/consultopo/election.go b/go/vt/topo/consultopo/election.go index c36c9d8d6af..5fa9f5eff22 100644 --- a/go/vt/topo/consultopo/election.go +++ b/go/vt/topo/consultopo/election.go @@ -94,8 +94,11 @@ func (mp *consulMasterParticipation) WaitForMastership() (context.Context, error go func() { select { case <-lost: - // We lost the lock, nothing to do but lockCancel(). lockCancel() + // We could have lost the lock. Per consul API, explicitly call Unlock to make sure that session will not be renewed. + if err := l.Unlock(); err != nil { + log.Errorf("master election(%v) Unlock failed: %v", mp.name, err) + } case <-mp.stop: // Stop was called. We stop the context first, // so the running process is not thinking it From dc2cbe0e6a47486d51535f0d1508c598bd0b1740 Mon Sep 17 00:00:00 2001 From: Rafael Chacon Date: Fri, 7 Dec 2018 15:34:14 -0800 Subject: [PATCH 3/3] Fix typo in comment Signed-off-by: Rafael Chacon --- go/vt/topo/consultopo/election.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/topo/consultopo/election.go b/go/vt/topo/consultopo/election.go index 5fa9f5eff22..bacbbac8d58 100644 --- a/go/vt/topo/consultopo/election.go +++ b/go/vt/topo/consultopo/election.go @@ -89,7 +89,7 @@ func (mp *consulMasterParticipation) WaitForMastership() (context.Context, error return nil, err } - // We have the lock, keep mastership until we loose it. + // We have the lock, keep mastership until we lose it. lockCtx, lockCancel := context.WithCancel(context.Background()) go func() { select {