From 5cf80bedaa6d09ea1e872626bdcc3c693e7ded4a Mon Sep 17 00:00:00 2001 From: Ben Hoyt Date: Thu, 29 Aug 2024 12:55:40 +1200 Subject: [PATCH] refactor(state): avoid need for state lock in LatestWarningTime (#493) This means that not every request/response needs to acquire the state lock, as shown by the removal of the custom response type in the /v1/health endpoint. Note that there's a test, TestHealthStateLockNotHeldSuccess, that specifically tests that the state lock is not held during /v1/health requests. If we accidentally introduce locking again, that will fail. Fixes #366. --- internals/daemon/api_health.go | 47 +++--------------------- internals/daemon/daemon.go | 5 +-- internals/overlord/state/notices.go | 28 +++++++++----- internals/overlord/state/notices_test.go | 10 ++++- internals/overlord/state/state.go | 8 +++- internals/overlord/state/state_test.go | 1 - 6 files changed, 41 insertions(+), 58 deletions(-) diff --git a/internals/daemon/api_health.go b/internals/daemon/api_health.go index 529d23337..28eef5e67 100644 --- a/internals/daemon/api_health.go +++ b/internals/daemon/api_health.go @@ -15,7 +15,6 @@ package daemon import ( - "encoding/json" "net/http" "github.com/canonical/x-go/strutil" @@ -35,7 +34,7 @@ func v1Health(c *Command, r *http.Request, _ *UserState) Response { switch level { case plan.UnsetLevel, plan.AliveLevel, plan.ReadyLevel: default: - return healthError(http.StatusBadRequest, `level must be "alive" or "ready"`) + return BadRequest(`level must be "alive" or "ready"`) } names := strutil.MultiCommaSeparatedList(query["names"]) @@ -43,7 +42,7 @@ func v1Health(c *Command, r *http.Request, _ *UserState) Response { checks, err := getChecks(c.d.overlord) if err != nil { logger.Noticef("Cannot fetch checks: %v", err.Error()) - return healthError(http.StatusInternalServerError, "internal server error") + return InternalError("internal server error") } healthy := true @@ -58,43 +57,9 @@ func v1Health(c *Command, r *http.Request, _ *UserState) Response { } } - return SyncResponse(&healthResp{ - Type: ResponseTypeSync, - Status: status, - StatusText: http.StatusText(status), - Result: healthInfo{Healthy: healthy}, + return SyncResponse(&resp{ + Type: ResponseTypeSync, + Status: status, + Result: healthInfo{Healthy: healthy}, }) } - -// Like the resp struct, but without the warning/maintenance fields, so that -// the health endpoint doesn't have to acquire the state lock (resulting in a -// slow response on heavily-loaded systems). -type healthResp struct { - Type ResponseType `json:"type"` - Status int `json:"status-code"` - StatusText string `json:"status,omitempty"` - Result interface{} `json:"result,omitempty"` -} - -func (r *healthResp) ServeHTTP(w http.ResponseWriter, _ *http.Request) { - status := r.Status - bs, err := json.Marshal(r) - if err != nil { - logger.Noticef("Cannot marshal %#v to JSON: %v", *r, err) - bs = nil - status = http.StatusInternalServerError - } - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - w.Write(bs) -} - -func healthError(status int, message string) *healthResp { - return &healthResp{ - Type: ResponseTypeError, - Status: status, - StatusText: http.StatusText(status), - Result: &errorResult{Message: message}, - } -} diff --git a/internals/daemon/daemon.go b/internals/daemon/daemon.go index 65726522a..b39c17975 100644 --- a/internals/daemon/daemon.go +++ b/internals/daemon/daemon.go @@ -248,10 +248,7 @@ func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) { rsp.transmitMaintenance(errorKindDaemonRestart, "daemon is stopping to wait for socket activation") } if rsp.Type != ResponseTypeError { - st := c.d.state - st.Lock() - latest := st.LatestWarningTime() - st.Unlock() + latest := c.d.state.LatestWarningTime() rsp.addWarningsToMeta(latest) } } diff --git a/internals/overlord/state/notices.go b/internals/overlord/state/notices.go index 1437bb434..ffeb729ba 100644 --- a/internals/overlord/state/notices.go +++ b/internals/overlord/state/notices.go @@ -270,6 +270,13 @@ func (s *State) AddNotice(userID *uint32, noticeType NoticeType, key string, opt notice.lastData = options.Data notice.repeatAfter = options.RepeatAfter + // Update the latest warning time cache if needed. There's no need to + // actually update atomically here, because the state lock is held. + if notice.noticeType == WarningNotice && notice.lastRepeated.After(s.LatestWarningTime()) { + latestWarningTime := notice.lastRepeated + s.latestWarningTime.Store(&latestWarningTime) + } + if newOrRepeated { s.noticeCond.Broadcast() } @@ -371,18 +378,14 @@ func (s *State) Notices(filter *NoticeFilter) []*Notice { // LatestWarningTime returns the most recent time a warning notice was // repeated, or the zero value if there are no warnings. +// +// The state lock does not need to be held when calling this method. func (s *State) LatestWarningTime() time.Time { - s.reading() - - // TODO(benhoyt): optimise with an in-memory atomic cache when warnings are added (or pruned), - // to avoid having to acquire the state lock on every request - var latest time.Time - for _, notice := range s.notices { - if notice.noticeType == WarningNotice && notice.lastRepeated.After(latest) { - latest = notice.lastRepeated - } + t := s.latestWarningTime.Load() + if t == nil { + return time.Time{} } - return latest + return *t } // Notice returns a single notice by ID, or nil if not found. @@ -415,6 +418,7 @@ func (s *State) flattenNotices(filter *NoticeFilter) []*Notice { func (s *State) unflattenNotices(flat []*Notice) { now := time.Now() s.notices = make(map[noticeKey]*Notice) + var latestWarningTime time.Time for _, n := range flat { if n.expired(now) { continue @@ -422,7 +426,11 @@ func (s *State) unflattenNotices(flat []*Notice) { userID, hasUserID := n.UserID() uniqueKey := noticeKey{hasUserID, userID, n.noticeType, n.key} s.notices[uniqueKey] = n + if n.noticeType == WarningNotice && n.lastRepeated.After(latestWarningTime) { + latestWarningTime = n.lastRepeated + } } + s.latestWarningTime.Store(&latestWarningTime) } // WaitNotices waits for notices that match the filter to exist or occur, diff --git a/internals/overlord/state/notices_test.go b/internals/overlord/state/notices_test.go index f70b90bfd..a069c7cea 100644 --- a/internals/overlord/state/notices_test.go +++ b/internals/overlord/state/notices_test.go @@ -436,6 +436,9 @@ func (s *noticesSuite) TestDeleteExpired(c *C) { st.Lock() defer st.Unlock() + c.Assert(st.NumNotices(), Equals, 0) + c.Assert(st.LatestWarningTime().IsZero(), Equals, true) + old := time.Now().Add(-8 * 24 * time.Hour) addNotice(c, st, nil, state.CustomNotice, "foo.com/w", &state.AddNoticeOptions{ Time: old, @@ -443,13 +446,18 @@ func (s *noticesSuite) TestDeleteExpired(c *C) { addNotice(c, st, nil, state.CustomNotice, "foo.com/x", &state.AddNoticeOptions{ Time: old, }) + addNotice(c, st, nil, state.WarningNotice, "warning!", &state.AddNoticeOptions{ + Time: old, + }) addNotice(c, st, nil, state.CustomNotice, "foo.com/y", nil) time.Sleep(time.Microsecond) addNotice(c, st, nil, state.CustomNotice, "foo.com/z", nil) - c.Assert(st.NumNotices(), Equals, 4) + c.Assert(st.NumNotices(), Equals, 5) + c.Assert(st.LatestWarningTime().Equal(old), Equals, true) st.Prune(time.Now(), 0, 0, 0) c.Assert(st.NumNotices(), Equals, 2) + c.Assert(st.LatestWarningTime().IsZero(), Equals, true) notices := st.Notices(nil) c.Assert(notices, HasLen, 2) diff --git a/internals/overlord/state/state.go b/internals/overlord/state/state.go index 49dad55d7..5d339aabf 100644 --- a/internals/overlord/state/state.go +++ b/internals/overlord/state/state.go @@ -94,7 +94,8 @@ type State struct { notices map[noticeKey]*Notice identities map[string]*Identity - noticeCond *sync.Cond + noticeCond *sync.Cond + latestWarningTime atomic.Pointer[time.Time] modified bool @@ -504,11 +505,16 @@ func (s *State) Prune(startOfOperation time.Time, pruneWait, abortWait time.Dura readyChangesCount++ } + // Prune expired notices, and update the latest warning time cache. + var latestWarningTime time.Time for k, n := range s.notices { if n.expired(now) { delete(s.notices, k) + } else if n.noticeType == WarningNotice && n.lastRepeated.After(latestWarningTime) { + latestWarningTime = n.lastRepeated } } + s.latestWarningTime.Store(&latestWarningTime) NextChange: for _, chg := range changes { diff --git a/internals/overlord/state/state_test.go b/internals/overlord/state/state_test.go index a651e8760..f1877d89d 100644 --- a/internals/overlord/state/state_test.go +++ b/internals/overlord/state/state_test.go @@ -795,7 +795,6 @@ func (ss *stateSuite) TestMethodEntrance(c *C) { func() { st.MarshalJSON() }, func() { st.Prune(time.Now(), time.Hour, time.Hour, 100) }, func() { st.TaskCount() }, - func() { st.LatestWarningTime() }, } for i, f := range reads {