-
Notifications
You must be signed in to change notification settings - Fork 2
feat: implement suspected #22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
4d53597
c29c510
9e5851d
f3dbd00
34a6da2
fe50f95
acc833d
3fcb0b7
94f5abb
7c4f849
e0df1ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,21 +17,28 @@ func (n *Node) handleGossip(msg *gossip.Message, addr string) { | |
|
|
||
| slog.Debug("Received Message", "message", *msg) | ||
|
|
||
| n.mu.Lock() | ||
| defer n.mu.Unlock() | ||
|
|
||
| if msg.Payload != nil { | ||
| n.handlePayload(msg.Payload, msg.SourceId) | ||
| } | ||
|
|
||
| switch msg.Type { | ||
| case gossip.Ping: | ||
| n.handlePing(msg, addr) | ||
| n.handlePing(msg) | ||
| case gossip.PingReq: | ||
| n.handlePingReq(msg) | ||
| case gossip.PingAck: | ||
| n.handlePingAck(msg) | ||
| } | ||
| } | ||
|
|
||
| func (n *Node) handlePing(msg *gossip.Message, addr string) { | ||
| func (n *Node) handlePing(msg *gossip.Message) { | ||
| peerBody, ok := n.peers[msg.SourceId] | ||
| if !ok { | ||
| return | ||
| } | ||
| payload := n.removeGossip() | ||
| message := gossip.NewMessage( | ||
| gossip.PingAck, | ||
|
|
@@ -40,7 +47,7 @@ func (n *Node) handlePing(msg *gossip.Message, addr string) { | |
| msg.OriginId, | ||
| payload, | ||
| ) | ||
| n.sendGossip(message, addr) | ||
| n.sendGossip(message, peerBody.Addr) | ||
| } | ||
|
|
||
| func (n *Node) handlePingReq(msg *gossip.Message) { | ||
|
|
@@ -60,23 +67,26 @@ func (n *Node) handlePingReq(msg *gossip.Message) { | |
| } | ||
|
|
||
| func (n *Node) handlePingAck(msg *gossip.Message) { | ||
| // when a ping ack for suspected node is received | ||
| // we stop the ping req timeout and reset the suspected peer | ||
| if msg.OriginId == n.id && n.suspectPeer == msg.SubjectId { | ||
| if n.timeout != nil { | ||
| n.timeout.Stop() | ||
| n.timeout = nil | ||
| // handle ack at node that requested it | ||
| if msg.OriginId == n.id { | ||
| if n.targetPeer == msg.SubjectId { | ||
| if n.timeout != nil { | ||
| n.timeout.Stop() | ||
| n.timeout = nil | ||
| } | ||
| n.targetPeer = "" | ||
| } | ||
| n.suspectPeer = "" | ||
| return | ||
| } | ||
|
|
||
| // handle forwarding ack when ping req | ||
| peerBody, ok := n.peers[msg.OriginId] | ||
| if !ok { | ||
| return | ||
| } | ||
| payload := n.removeGossip() | ||
| message := gossip.NewMessage( | ||
| gossip.Ping, | ||
| gossip.PingAck, | ||
| msg.SubjectId, | ||
| n.id, | ||
| msg.OriginId, | ||
|
|
@@ -96,6 +106,8 @@ func (n *Node) handlePayload(msg *gossip.MessagePayload, sourceId string) { | |
| switch updatedPeer.Status { | ||
| case peer.Alive: | ||
| n.handleAliveStatus(id, updatedPeer, sourceId) | ||
| case peer.Suspected: | ||
| n.handleSuspectedStatus(id, updatedPeer) | ||
| case peer.Dead: | ||
| n.handleDeadStatus(id, updatedPeer) | ||
| } | ||
|
|
@@ -125,7 +137,7 @@ func (n *Node) handleAliveStatus(id string, updatedPeer peer.Peer, sourceId stri | |
|
|
||
| // determine whether message is stale or not | ||
| // update peer status if not stale and propagate update to other nodes | ||
| shouldUpdate := !ok || (updatedPeer.Incarnation > currentPeer.Incarnation) | ||
| shouldUpdate := !ok || updatedPeer.Supersedes(currentPeer) | ||
| if shouldUpdate { | ||
| n.setPeer(id, updatedPeer) | ||
| peers := map[string]peer.Peer{ | ||
|
|
@@ -136,18 +148,49 @@ func (n *Node) handleAliveStatus(id string, updatedPeer peer.Peer, sourceId stri | |
| } | ||
| } | ||
|
|
||
| func (n *Node) handleDeadStatus(id string, updatedPeer peer.Peer) { | ||
| func (n *Node) handleSuspectedStatus(id string, updatedPeer peer.Peer) { | ||
| // drop payloads about yourself | ||
| if id == n.id { | ||
| // refute updates saying you are suspected | ||
| if updatedPeer.Incarnation == n.incarnation { | ||
| n.incarnation += 1 | ||
| peers := map[string]peer.Peer{ | ||
| n.id: { | ||
| Addr: n.addr, | ||
| Status: peer.Alive, | ||
| Incarnation: n.incarnation, | ||
| }, | ||
| } | ||
| payload := gossip.NewPayload(peers, true) | ||
| n.addGossip(payload) | ||
| } | ||
| return | ||
| } | ||
|
|
||
| // determine whether message is stale or not | ||
| // update peer status if not stale and propagate update to other nodes | ||
| currentPeer, ok := n.peers[id] | ||
| shouldUpdate := !ok || updatedPeer.Supersedes(currentPeer) | ||
| if shouldUpdate { | ||
| n.setPeer(id, updatedPeer) | ||
| peers := map[string]peer.Peer{ | ||
| id: updatedPeer, | ||
| } | ||
| payload := gossip.NewPayload(peers, true) | ||
| n.addGossip(payload) | ||
| } | ||
| } | ||
|
|
||
| func (n *Node) handleDeadStatus(id string, updatedPeer peer.Peer) { | ||
| // drop payloads about yourself | ||
| if id == n.id { | ||
|
||
| return | ||
| } | ||
|
|
||
| // determine whether message is stale or not | ||
| // update peer status if not stale and propagate update to other nodes | ||
| // dead status has precedence over alive messages for equal incarnation | ||
| shouldUpdate := !ok || (updatedPeer.Incarnation > currentPeer.Incarnation || | ||
| updatedPeer.Incarnation == currentPeer.Incarnation && currentPeer.Status == peer.Alive) | ||
| currentPeer, ok := n.peers[id] | ||
| shouldUpdate := !ok || updatedPeer.Supersedes(currentPeer) | ||
| if shouldUpdate { | ||
| n.setPeer(id, updatedPeer) | ||
| peers := map[string]peer.Peer{ | ||
|
|
@@ -181,7 +224,14 @@ func (n *Node) sendGossip(msg *gossip.Message, addr string) { | |
| } | ||
| } | ||
|
|
||
| func (n *Node) attemptConnectToCluster(addr string) { | ||
| func (n *Node) attemptConnectToCluster(addr string) bool { | ||
| n.mu.Lock() | ||
| defer n.mu.Unlock() | ||
|
|
||
| if len(n.peers) > 0 { | ||
| return true | ||
| } | ||
|
|
||
| peers := map[string]peer.Peer{ | ||
| n.id: { | ||
| Addr: n.addr, | ||
|
|
@@ -198,15 +248,16 @@ func (n *Node) attemptConnectToCluster(addr string) { | |
| payload, | ||
| ) | ||
| n.sendGossip(message, addr) | ||
|
|
||
| return false | ||
| } | ||
|
|
||
| func (n *Node) ConnectToCluster(addr string, attemptPeriod time.Duration) { | ||
| ticker := time.NewTicker(attemptPeriod) | ||
| for range ticker.C { | ||
| if len(n.peers) > 0 { | ||
| if n.attemptConnectToCluster(addr) { | ||
| break | ||
| } | ||
| n.attemptConnectToCluster(addr) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -244,9 +295,10 @@ func StartGossipListener(node *Node) { | |
| } | ||
|
|
||
| type pingerConfig struct { | ||
| period time.Duration | ||
| timeout time.Duration | ||
| k int | ||
| period time.Duration | ||
| pingTimeout time.Duration | ||
| suspectedTimeout time.Duration | ||
| k int | ||
| } | ||
|
|
||
| type pingerOption func(*pingerConfig) | ||
|
|
@@ -257,9 +309,15 @@ func WithPeriod(period time.Duration) pingerOption { | |
| } | ||
| } | ||
|
|
||
| func WithTimeout(timeout time.Duration) pingerOption { | ||
| func WithPingTimeout(timeout time.Duration) pingerOption { | ||
| return func(c *pingerConfig) { | ||
| c.timeout = timeout | ||
| c.pingTimeout = timeout | ||
| } | ||
| } | ||
|
|
||
| func WithSuspectedTimeout(timeout time.Duration) pingerOption { | ||
| return func(c *pingerConfig) { | ||
| c.suspectedTimeout = timeout | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -269,11 +327,92 @@ func WithK(k int) pingerOption { | |
| } | ||
| } | ||
|
|
||
| func runGossipPing(node *Node, cfg *pingerConfig) { | ||
| node.mu.Lock() | ||
| defer node.mu.Unlock() | ||
|
|
||
| // propagate SUSPECTED if ALIVE target not been acked since last ping | ||
| if node.targetPeer != "" { | ||
| peerBody, ok := node.peers[node.targetPeer] | ||
| if ok && peerBody.Status == peer.Alive { | ||
| peerBody.Status = peer.Suspected | ||
| node.setPeer(node.targetPeer, peerBody) | ||
| peers := map[string]peer.Peer{ | ||
| node.targetPeer: peerBody, | ||
| } | ||
| payload := gossip.NewPayload(peers, true) | ||
| node.addGossip(payload) | ||
|
|
||
| // set timeout to declare dead if SUSPECTED for long enough | ||
| targetPeer := node.targetPeer | ||
| time.AfterFunc(cfg.suspectedTimeout, func() { | ||
| node.mu.Lock() | ||
| defer node.mu.Unlock() | ||
|
|
||
| peerBody, ok := node.peers[targetPeer] | ||
| if !ok || peerBody.Status != peer.Suspected { | ||
| return | ||
| } | ||
| peerBody.Status = peer.Dead | ||
| node.setPeer(targetPeer, peerBody) | ||
| peers := map[string]peer.Peer{ | ||
| targetPeer: peerBody, | ||
| } | ||
| payload := gossip.NewPayload(peers, true) | ||
| node.addGossip(payload) | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| // send ping to new random target peer | ||
| node.targetPeer = node.getRandomPeer() | ||
| peerBody, ok := node.peers[node.targetPeer] | ||
| if !ok { | ||
| return | ||
| } | ||
| payload := node.removeGossip() | ||
| message := gossip.NewMessage( | ||
| gossip.Ping, | ||
| node.targetPeer, | ||
| node.id, | ||
| node.id, | ||
| payload, | ||
| ) | ||
| node.sendGossip(message, peerBody.Addr) | ||
|
|
||
| // send ping req to k random peers after timeout | ||
| targetPeer := node.targetPeer | ||
| node.timeout = time.AfterFunc(cfg.pingTimeout, func() { | ||
| node.mu.Lock() | ||
| defer node.mu.Unlock() | ||
|
|
||
| for _, id := range node.getKRandomPeers(cfg.k) { | ||
| if id == targetPeer { | ||
| continue | ||
| } | ||
| peerBody, ok := node.peers[id] | ||
| if !ok { | ||
| continue | ||
| } | ||
| payload := node.removeGossip() | ||
| message := gossip.NewMessage( | ||
| gossip.PingReq, | ||
| id, | ||
|
||
| node.id, | ||
| node.id, | ||
| payload, | ||
| ) | ||
| node.sendGossip(message, peerBody.Addr) | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| func StartGossipPinger(node *Node, opts ...pingerOption) { | ||
| cfg := &pingerConfig{ | ||
| period: 1 * time.Second, | ||
| timeout: 500 * time.Millisecond, | ||
| k: 3, | ||
| period: 1 * time.Second, | ||
| pingTimeout: 500 * time.Millisecond, | ||
| suspectedTimeout: 3 * time.Second, | ||
| k: 3, | ||
| } | ||
|
|
||
| for _, opt := range opts { | ||
|
|
@@ -284,55 +423,6 @@ func StartGossipPinger(node *Node, opts ...pingerOption) { | |
| defer ticker.Stop() | ||
|
|
||
| for range ticker.C { | ||
| // declare peer dead if has not been acked since last ping | ||
| if node.suspectPeer != "" { | ||
| peerBody, ok := node.peers[node.suspectPeer] | ||
| if ok { | ||
| peerBody.Status = peer.Dead | ||
| peers := map[string]peer.Peer{ | ||
| node.suspectPeer: peerBody, | ||
| } | ||
| payload := gossip.NewPayload(peers, true) | ||
| node.addGossip(payload) | ||
| node.setPeer(node.suspectPeer, peerBody) | ||
| } | ||
| } | ||
|
|
||
| // send ping to new random suspected peer | ||
| payload := node.removeGossip() | ||
| node.suspectPeer = node.getRandomPeer() | ||
| peerBody, ok := node.peers[node.suspectPeer] | ||
| if !ok { | ||
| continue | ||
| } | ||
| message := gossip.NewMessage( | ||
| gossip.Ping, | ||
| node.suspectPeer, | ||
| node.id, | ||
| node.id, | ||
| payload, | ||
| ) | ||
| node.sendGossip(message, peerBody.Addr) | ||
|
|
||
| // send ping req to k random peers after timeout | ||
| node.timeout = time.AfterFunc(cfg.timeout, func() { | ||
| for _, id := range node.getKRandomPeers(cfg.k) { | ||
| if id == node.suspectPeer { | ||
| continue | ||
| } | ||
| peerBody, ok := node.peers[id] | ||
| if !ok { | ||
| continue | ||
| } | ||
| message := gossip.NewMessage( | ||
| gossip.PingReq, | ||
| id, | ||
| node.id, | ||
| node.id, | ||
| payload, | ||
| ) | ||
| node.sendGossip(message, peerBody.Addr) | ||
| } | ||
| }) | ||
| runGossipPing(node, cfg) | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we just send the refutation immediately? I think refuting immediately resolves the uncertainty about the comment on line 186 of this file.