Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow stopping services in "starting" state #503

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions internals/daemon/daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ services:
// We have to wait for it be in running state.
for i := 0; ; i++ {
if i >= 25 {
c.Fatalf("timed out waiting or service to start")
c.Fatalf("timed out waiting for service to start")
}
d.state.Lock()
change := d.state.Change(rsp.Change)
Expand Down Expand Up @@ -1283,7 +1283,7 @@ services:
// We have to wait for it be in running state for StopRunning to stop it.
for i := 0; ; i++ {
if i >= 25 {
c.Fatalf("timed out waiting or service to start")
c.Fatalf("timed out waiting for service to start")
}
d.state.Lock()
change := d.state.Change(rsp.Change)
Expand Down Expand Up @@ -1317,6 +1317,67 @@ services:
c.Check(tasks[0].Kind(), Equals, "stop")
}

func (s *daemonSuite) TestStopWithinOkayDelay(c *C) {
// Start the daemon.
writeTestLayer(s.pebbleDir, `
services:
test1:
override: replace
command: sleep 10
`)
d := s.newDaemon(c)
err := d.Init()
c.Assert(err, IsNil)
c.Assert(d.Start(), IsNil)

// Start the test service.
payload := bytes.NewBufferString(`{"action": "start", "services": ["test1"]}`)
req, err := http.NewRequest("POST", "/v1/services", payload)
c.Assert(err, IsNil)
rsp := v1PostServices(apiCmd("/v1/services"), req, nil).(*resp)
rec := httptest.NewRecorder()
rsp.ServeHTTP(rec, req)
c.Check(rec.Result().StatusCode, Equals, 202)

// Wait for the change to be in doing state.
for i := 0; ; i++ {
if i >= 10 {
c.Fatalf("timed out waiting for change")
}
d.state.Lock()
change := d.state.Change(rsp.Change)
changeStatus := change.Status()
d.state.Unlock()
if change != nil && changeStatus == state.DoingStatus {
break
}
time.Sleep(20 * time.Millisecond)
}

// Stop the daemon within the okayDelay,
// which should stop services in running state.
err = d.Stop(nil)
c.Assert(err, IsNil)

// Ensure the "stop" change was created, along with its "stop" tasks.
d.state.Lock()
defer d.state.Unlock()
changes := d.state.Changes()
var change *state.Change
for _, ch := range changes {
if ch.Kind() == "stop" {
change = ch
}
}
if change == nil {
c.Fatalf("stop change not found")
}
c.Check(change.Status(), Equals, state.DoneStatus)
tasks := change.Tasks()
c.Assert(tasks, HasLen, 1)
c.Check(tasks[0].Kind(), Equals, "stop")
}

func (s *daemonSuite) TestWritesRequireAdminAccess(c *C) {
for _, cmd := range API {
if cmd.Path == "/v1/notices" {
Expand Down
4 changes: 4 additions & 0 deletions internals/overlord/servstate/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,10 @@ func (s *serviceData) stop() error {
defer s.manager.servicesLock.Unlock()

switch s.state {
case stateStarting:
s.started <- fmt.Errorf("stopped before the %s okay delay", okayDelay)
fallthrough

case stateRunning:
logger.Debugf("Attempting to stop service %q by sending SIGTERM", s.config.Name)
// First send SIGTERM to try to terminate it gracefully.
Expand Down
2 changes: 1 addition & 1 deletion internals/overlord/servstate/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func servicesToStop(m *ServiceManager) ([][]string, error) {
var notStopped []string
for _, name := range services {
s := m.services[name]
if s != nil && (s.state == stateRunning || s.state == stateBackoff) {
if s != nil && (s.state == stateStarting || s.state == stateRunning || s.state == stateBackoff) {
notStopped = append(notStopped, name)
}
}
Expand Down
46 changes: 46 additions & 0 deletions internals/overlord/servstate/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,52 @@ services:
c.Assert(s.manager.StopTimeout(), Equals, time.Minute*60+time.Millisecond*200)
}

func (s *S) TestStopServiceWithinOkayDelay(c *C) {
// A longer okayDelay is used so that the change for starting the services won't
// quickly transition into the running state.
fakeOkayDelay := 5 * shortOkayDelay
servstate.FakeOkayWait(fakeOkayDelay)

s.newServiceManager(c)
layer := `
services:
%s:
override: replace
command: /bin/sh -c "sleep %g; {{.NotifyDoneCheck}}"
`
serviceName := "test-stop-within-okaywait"
s.planAddLayer(c, fmt.Sprintf(layer, serviceName, fakeOkayDelay))
s.planChanged(c)

// Start the service without waiting for change ready.
s.st.Lock()
ts, err := servstate.Start(s.st, [][]string{{serviceName}})
c.Check(err, IsNil)
chgStart := s.st.NewChange("test", "Start test")
chgStart.AddAll(ts)
s.st.Unlock()
s.runner.Ensure()

// Stop the service immediately within okayDelay
chg := s.stopServices(c, [][]string{{serviceName}})
s.st.Lock()
c.Assert(chg.Err(), IsNil)
s.st.Unlock()

waitChangeReady(c, s.runner, chgStart, "Start test")

s.st.Lock()
c.Check(chgStart.Status(), Equals, state.ErrorStatus)
c.Check(chgStart.Err(), ErrorMatches, `(?s).*cannot start service: exited quickly with code.*`)
s.st.Unlock()

donePath := filepath.Join(s.dir, serviceName)
// DonePath not created means the service is terminated within the okayWait.
if _, err := os.Stat(donePath); err == nil {
c.Fatalf("service %s waiting for service output", serviceName)
}
}

func (s *S) TestKillDelayIsUsed(c *C) {
s.newServiceManager(c)
s.planAddLayer(c, testPlanLayer)
Expand Down
1 change: 1 addition & 0 deletions internals/overlord/servstate/state-diagram.dot
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ digraph service_state_machine {
node [penwidth=1]
initial -> starting [label="start"]
starting -> running [label="okay wait\nelapsed"]
starting -> terminating [label="stop (before\nokay wait elapses)"]
running -> terminating [label="stop"]
running -> terminating [label="check failed\n(action \"restart\")"]
terminating -> killing [label="terminate time\nelapsed"]
Expand Down
Loading
Loading