From 416790b05b186c257b5b8377967403b04a2bc94b Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Mon, 28 Mar 2016 23:39:07 -0700 Subject: [PATCH 1/7] Add POST discover/node REST endpoint this endpoint allows the node to be provisioned for discovery in a cluster Signed-off-by: Madhav Puri --- management/src/clusterm/manager/api.go | 47 ++++++++++--------- management/src/clusterm/manager/client.go | 5 ++ .../src/clusterm/manager/client_test.go | 31 ++++++++++++ management/src/clusterm/manager/consts.go | 5 ++ management/src/clusterm/manager/events.go | 41 ++++++++++++++++ management/src/clusterm/manager/utils.go | 13 ++++- management/src/inventory/asset.go | 5 ++ management/src/inventory/inventory.go | 4 +- 8 files changed, 127 insertions(+), 24 deletions(-) diff --git a/management/src/clusterm/manager/api.go b/management/src/clusterm/manager/api.go index be1bbbd..9c10d36 100644 --- a/management/src/clusterm/manager/api.go +++ b/management/src/clusterm/manager/api.go @@ -19,6 +19,7 @@ func (m *Manager) apiLoop(errCh chan error) { s.HandleFunc(fmt.Sprintf("/%s", postNodeCommission), post(m.nodeCommission)) s.HandleFunc(fmt.Sprintf("/%s", postNodeDecommission), post(m.nodeDecommission)) s.HandleFunc(fmt.Sprintf("/%s", postNodeMaintenance), post(m.nodeMaintenance)) + s.HandleFunc(fmt.Sprintf("/%s", postNodeDiscover), post(m.nodeDiscover)) s.HandleFunc(fmt.Sprintf("/%s", PostGlobals), post(m.globalsSet)) s = r.Methods("Get").Subrouter() @@ -38,12 +39,22 @@ func (m *Manager) apiLoop(errCh chan error) { } } -func post(postCb func(tag string, extraVars string) error) func(http.ResponseWriter, *http.Request) { +func post(postCb func(tagOrAddr string, sanitizedExtraVars string) error) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) - tag := vars["tag"] + tagOrAddr := vars["tag"] + if tagOrAddr == "" { + tagOrAddr = vars["addr"] + } extraVars := r.FormValue(ExtraVarsQuery) - if err := postCb(tag, extraVars); err != nil { + sanitzedExtraVars, err := validateAndSanitizeEmptyExtraVars(ExtraVarsQuery, extraVars) + if err != nil { + http.Error(w, + err.Error(), + http.StatusInternalServerError) + return + } + if err := postCb(tagOrAddr, sanitzedExtraVars); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) @@ -68,32 +79,26 @@ func validateAndSanitizeEmptyExtraVars(errorPrefix, extraVars string) (string, e return extraVars, nil } -func (m *Manager) nodeCommission(tag, extraVars string) error { - extraVars, err := validateAndSanitizeEmptyExtraVars(ExtraVarsQuery, extraVars) - if err != nil { - return err - } - me := newWaitableEvent(newNodeCommissioned(m, tag, extraVars)) +func (m *Manager) nodeCommission(tag, sanitizedExtraVars string) error { + me := newWaitableEvent(newNodeCommissioned(m, tag, sanitizedExtraVars)) m.reqQ <- me return me.waitForCompletion() } -func (m *Manager) nodeDecommission(tag, extraVars string) error { - extraVars, err := validateAndSanitizeEmptyExtraVars(ExtraVarsQuery, extraVars) - if err != nil { - return err - } - me := newWaitableEvent(newNodeDecommissioned(m, tag, extraVars)) +func (m *Manager) nodeDecommission(tag, sanitizedExtraVars string) error { + me := newWaitableEvent(newNodeDecommissioned(m, tag, sanitizedExtraVars)) m.reqQ <- me return me.waitForCompletion() } -func (m *Manager) nodeMaintenance(tag, extraVars string) error { - extraVars, err := validateAndSanitizeEmptyExtraVars(ExtraVarsQuery, extraVars) - if err != nil { - return err - } - me := newWaitableEvent(newNodeInMaintenance(m, tag, extraVars)) +func (m *Manager) nodeMaintenance(tag, sanitizedExtraVars string) error { + me := newWaitableEvent(newNodeInMaintenance(m, tag, sanitizedExtraVars)) + m.reqQ <- me + return me.waitForCompletion() +} + +func (m *Manager) nodeDiscover(addr, sanitizedExtraVars string) error { + me := newWaitableEvent(newNodeDiscover(m, addr, sanitizedExtraVars)) m.reqQ <- me return me.waitForCompletion() } diff --git a/management/src/clusterm/manager/client.go b/management/src/clusterm/manager/client.go index 11c8c4e..8f321d6 100644 --- a/management/src/clusterm/manager/client.go +++ b/management/src/clusterm/manager/client.go @@ -93,6 +93,11 @@ func (c *Client) PostNodeInMaintenance(nodeName, extraVars string) error { return c.doPost(fmt.Sprintf("%s/%s", PostNodeMaintenancePrefix, nodeName), extraVars) } +// PostNodeDiscover posts the request to provision a node for discovery +func (c *Client) PostNodeDiscover(nodeAddr, extraVars string) error { + return c.doPost(fmt.Sprintf("%s/%s", PostNodeDiscoverPrefix, nodeAddr), extraVars) +} + // PostGlobals posts the request to set global extra vars func (c *Client) PostGlobals(extraVars string) error { return c.doPost(fmt.Sprintf("%s", PostGlobals), extraVars) diff --git a/management/src/clusterm/manager/client_test.go b/management/src/clusterm/manager/client_test.go index b641198..f5f9025 100644 --- a/management/src/clusterm/manager/client_test.go +++ b/management/src/clusterm/manager/client_test.go @@ -131,6 +131,37 @@ func (s *managerSuite) TestPostDecommissionWithVarsSuccess(c *C) { c.Assert(err, IsNil) } +func (s *managerSuite) TestPostDiscoverSuccess(c *C) { + expURLStr := fmt.Sprintf("http://%s/%s/%s", baseURL, PostNodeDiscoverPrefix, nodeName) + expURL, err := url.Parse(expURLStr) + c.Assert(err, IsNil) + httpS, httpC := getHTTPTestClientAndServer(c, okReturner(c, expURL)) + defer httpS.Close() + clstrC := Client{ + url: baseURL, + httpC: httpC, + } + + err = clstrC.PostNodeDiscover(nodeName, "") + c.Assert(err, IsNil) +} + +func (s *managerSuite) TestPostDiscoverWithVarsSuccess(c *C) { + expURLStr := fmt.Sprintf("http://%s/%s/%s?%s=%s", + baseURL, PostNodeDiscoverPrefix, nodeName, ExtraVarsQuery, testExtraVars) + expURL, err := url.Parse(expURLStr) + c.Assert(err, IsNil) + httpS, httpC := getHTTPTestClientAndServer(c, okReturner(c, expURL)) + defer httpS.Close() + clstrC := Client{ + url: baseURL, + httpC: httpC, + } + + err = clstrC.PostNodeDiscover(nodeName, testExtraVars) + c.Assert(err, IsNil) +} + func (s *managerSuite) TestPostInMaintenance(c *C) { expURLStr := fmt.Sprintf("http://%s/%s/%s", baseURL, PostNodeMaintenancePrefix, nodeName) expURL, err := url.Parse(expURLStr) diff --git a/management/src/clusterm/manager/consts.go b/management/src/clusterm/manager/consts.go index 1c4c660..5d919df 100644 --- a/management/src/clusterm/manager/consts.go +++ b/management/src/clusterm/manager/consts.go @@ -13,6 +13,10 @@ const ( // to put an asset in maintenance PostNodeMaintenancePrefix = "maintenance/node" postNodeMaintenance = PostNodeMaintenancePrefix + "/{tag}" + // PostNodeDiscoverPrefix is the prefix for the POST REST endpoint + // to provision a specified node for discovery + PostNodeDiscoverPrefix = "discover/node" + postNodeDiscover = PostNodeDiscoverPrefix + "/{addr}" // PostGlobals is the prefix for the POST REST endpoint // to set global configuration values PostGlobals = "globals" @@ -34,6 +38,7 @@ const ( const ( ansibleMasterGroupName = "service-master" ansibleWorkerGroupName = "service-worker" + ansibleDiscoverGroupName = "cluster-node" ansibleNodeNameHostVar = "node_name" ansibleNodeAddrHostVar = "node_addr" ansibleEtcdMasterAddrHostVar = "etcd_master_addr" diff --git a/management/src/clusterm/manager/events.go b/management/src/clusterm/manager/events.go index b1181d3..21d0977 100644 --- a/management/src/clusterm/manager/events.go +++ b/management/src/clusterm/manager/events.go @@ -443,6 +443,47 @@ func (e *nodeUpgrade) process() error { return nil } +type nodeDiscover struct { + mgr *Manager + nodeAddr string + extraVars string +} + +func newNodeDiscover(mgr *Manager, nodeAddr, extraVars string) *nodeDiscover { + return &nodeDiscover{ + mgr: mgr, + nodeAddr: nodeAddr, + extraVars: extraVars, + } +} + +func (e *nodeDiscover) String() string { + return fmt.Sprintf("nodeDiscover: %s", e.nodeAddr) +} + +func (e *nodeDiscover) process() error { + node, err := e.mgr.findNodeByMgmtAddr(e.nodeAddr) + if err == nil { + return errored.Errorf("a node %q already exists with the management address %q", + node.Inv.GetTag(), e.nodeAddr) + } + + // create a temporary ansible host config to provision the host in discover host-group + hostCfg := configuration.NewAnsibleHost("node1", e.nodeAddr, + ansibleDiscoverGroupName, map[string]string{ + ansibleNodeNameHostVar: "node1", + ansibleNodeAddrHostVar: e.nodeAddr, + }) + + outReader, _, errCh := e.mgr.configuration.Configure( + configuration.SubsysHosts([]*configuration.AnsibleHost{hostCfg}), e.extraVars) + if err := logOutputAndReturnStatus(outReader, errCh); err != nil { + log.Errorf("discover failed. Error: %s", err) + return err + } + return nil +} + type setGlobals struct { mgr *Manager extraVars string diff --git a/management/src/clusterm/manager/utils.go b/management/src/clusterm/manager/utils.go index 6bb4c99..c78bdd6 100644 --- a/management/src/clusterm/manager/utils.go +++ b/management/src/clusterm/manager/utils.go @@ -6,8 +6,8 @@ import ( "github.com/contiv/errored" ) -func nodeNotExistsError(name string) error { - return errored.Errorf("node with name %q doesn't exists", name) +func nodeNotExistsError(nameOrAddr string) error { + return errored.Errorf("node with name or address %q doesn't exists", nameOrAddr) } func nodeConfigNotExistsError(name string) error { @@ -26,6 +26,15 @@ func (m *Manager) findNode(name string) (*node, error) { return n, nil } +func (m *Manager) findNodeByMgmtAddr(addr string) (*node, error) { + for _, node := range m.nodes { + if node.Mon.GetMgmtAddress() == addr { + return node, nil + } + } + return nil, nodeNotExistsError(addr) +} + func (m *Manager) isMasterNode(name string) (bool, error) { n, err := m.findNode(name) if err != nil { diff --git a/management/src/inventory/asset.go b/management/src/inventory/asset.go index 4ebb7a7..eab124e 100644 --- a/management/src/inventory/asset.go +++ b/management/src/inventory/asset.go @@ -190,6 +190,11 @@ func (a *Asset) GetStatus() (AssetStatus, AssetState) { return a.status, a.state } +//GetTag retuns the inventory tag of the asset +func (a *Asset) GetTag() string { + return a.name +} + // MarshalJSON implements the json marshaller for asset. It is done this way // than making the fields public inorder to safeguard against direct state interpolation. func (a *Asset) MarshalJSON() ([]byte, error) { diff --git a/management/src/inventory/inventory.go b/management/src/inventory/inventory.go index 0cbafce..9237764 100644 --- a/management/src/inventory/inventory.go +++ b/management/src/inventory/inventory.go @@ -31,8 +31,10 @@ type Subsys interface { // SubsysAsset denotes a single asset in inventory subsystem type SubsysAsset interface { - //GetStatus return the current status of the asset + //GetStatus returns the current status of the asset GetStatus() (AssetStatus, AssetState) + //GetTag retuns the inventory tag of the asset + GetTag() string // SubsysAsset shall satisfy the json marshaller interface to encode asset's info in json json.Marshaler } From 91918c30a9b897806c3db98afaf781070429d9af Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Tue, 29 Mar 2016 01:25:50 -0700 Subject: [PATCH 2/7] add clusterctl discover command this command exercises the discover/node REST endpoint to provision a node for discovery Signed-off-by: Madhav Puri --- management/src/clusterctl/main.go | 38 +++++++++++++---- management/src/clusterctl/main_test.go | 57 ++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 management/src/clusterctl/main_test.go diff --git a/management/src/clusterctl/main.go b/management/src/clusterctl/main.go index de17053..ec1c83f 100644 --- a/management/src/clusterctl/main.go +++ b/management/src/clusterctl/main.go @@ -3,6 +3,7 @@ package main import ( "bytes" "encoding/json" + "net" "os" log "github.com/Sirupsen/logrus" @@ -13,6 +14,8 @@ import ( var ( errNodeNameMissing = func(c string) error { return errored.Errorf("command %q expects a node name", c) } + errNodeAddrMissing = func(c string) error { return errored.Errorf("command %q expects a node IP address", c) } + errInvalidIPAddr = func(a string) error { return errored.Errorf("failed to parse ip address %q", a) } clustermFlags = []cli.Flag{ cli.StringFlag{ @@ -46,7 +49,7 @@ func main() { Name: "commission", Aliases: []string{"c"}, Usage: "commission a node", - Action: doAction(newPostActioner(nodecommission)), + Action: doAction(newPostActioner(nodeCommission)), Flags: extraVarsFlags, }, { @@ -104,6 +107,13 @@ func main() { }, }, }, + { + Name: "discover", + Aliases: []string{"d"}, + Usage: "provision a node for discovery", + Action: doAction(newPostActioner(nodeDiscover)), + Flags: extraVarsFlags, + }, } app.Run(os.Args) @@ -127,12 +137,12 @@ func doAction(a actioner) func(*cli.Context) { } type postActioner struct { - nodeName string - extraVars string - postCb func(c *manager.Client, nodeName, extraVars string) error + nodeNameOrAddr string + extraVars string + postCb func(c *manager.Client, nodeNameOrAddr, extraVars string) error } -func newPostActioner(postCb func(c *manager.Client, nodeName, extraVars string) error) *postActioner { +func newPostActioner(postCb func(c *manager.Client, nodeNameOrAddr, extraVars string) error) *postActioner { return &postActioner{postCb: postCb} } @@ -141,14 +151,14 @@ func (npa *postActioner) procFlags(c *cli.Context) { } func (npa *postActioner) procArgs(c *cli.Context) { - npa.nodeName = c.Args().First() + npa.nodeNameOrAddr = c.Args().First() } func (npa *postActioner) action(c *manager.Client) error { - return npa.postCb(c, npa.nodeName, npa.extraVars) + return npa.postCb(c, npa.nodeNameOrAddr, npa.extraVars) } -func nodecommission(c *manager.Client, nodeName, extraVars string) error { +func nodeCommission(c *manager.Client, nodeName, extraVars string) error { if nodeName == "" { return errNodeNameMissing("commission") } @@ -164,11 +174,21 @@ func nodeDecommission(c *manager.Client, nodeName, extraVars string) error { func nodeMaintenance(c *manager.Client, nodeName, extraVars string) error { if nodeName == "" { - return errNodeNameMissing("decommission") + return errNodeNameMissing("maintenance") } return c.PostNodeInMaintenance(nodeName, extraVars) } +func nodeDiscover(c *manager.Client, nodeAddr, extraVars string) error { + if nodeAddr == "" { + return errNodeAddrMissing("discover") + } + if ip := net.ParseIP(nodeAddr); ip == nil { + return errInvalidIPAddr(nodeAddr) + } + return c.PostNodeDiscover(nodeAddr, extraVars) +} + func globalsSet(c *manager.Client, noop, extraVars string) error { return c.PostGlobals(extraVars) } diff --git a/management/src/clusterctl/main_test.go b/management/src/clusterctl/main_test.go new file mode 100644 index 0000000..2c1b15c --- /dev/null +++ b/management/src/clusterctl/main_test.go @@ -0,0 +1,57 @@ +// +build unittest + +package main + +import ( + "testing" + + "github.com/contiv/cluster/management/src/clusterm/manager" + . "gopkg.in/check.v1" +) + +// Hook up gocheck into the "go test" runner. +func Test(t *testing.T) { TestingT(t) } + +type mainSuite struct { +} + +var _ = Suite(&mainSuite{}) + +func (s *mainSuite) TestCommandArgValidationError(c *C) { + tests := map[string]struct { + f func(*manager.Client, string, string) error + args []string + exptdErr error + }{ + "commission": { + f: nodeCommission, + args: []string{"", ""}, + exptdErr: errNodeNameMissing("commission"), + }, + "decommission": { + f: nodeDecommission, + args: []string{"", ""}, + exptdErr: errNodeNameMissing("decommission"), + }, + "maintenance": { + f: nodeMaintenance, + args: []string{"", ""}, + exptdErr: errNodeNameMissing("maintenance"), + }, + "discover": { + f: nodeDiscover, + args: []string{"", ""}, + exptdErr: errNodeAddrMissing("discover"), + }, + "discover_invalid_ip": { + f: nodeDiscover, + args: []string{"1.2.3.4.5", ""}, + exptdErr: errInvalidIPAddr("1.2.3.4.5"), + }, + } + + for key, test := range tests { + err := test.f(nil, test.args[0], test.args[1]) + c.Assert(err.Error(), Equals, test.exptdErr.Error(), Commentf("test key: %s", key)) + } +} From 0223894dafd2a409bb343a5230ea7d4800c4d27b Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Tue, 29 Mar 2016 05:17:09 -0700 Subject: [PATCH 3/7] update godeps for systemtest-utils and executor Signed-off-by: Madhav Puri --- management/src/Godeps/Godeps.json | 4 ++-- .../vendor/github.com/contiv/executor/executor.go | 12 ++++++------ .../github.com/contiv/systemtests-utils/system.go | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/management/src/Godeps/Godeps.json b/management/src/Godeps/Godeps.json index 2c9e14e..c06f554 100644 --- a/management/src/Godeps/Godeps.json +++ b/management/src/Godeps/Godeps.json @@ -39,11 +39,11 @@ }, { "ImportPath": "github.com/contiv/executor", - "Rev": "1c3e497e2b9da2c15caccfb77b52393fe6c101b5" + "Rev": "c06bb5cd4fcb9634afa5bb732e213d05b55d1c12" }, { "ImportPath": "github.com/contiv/systemtests-utils", - "Rev": "c8687a787fd4e395fa2285abb2c03ccef80402c7" + "Rev": "c824428c954469af7732b68cb6246cf36a7f5cd3" }, { "ImportPath": "github.com/contiv/vagrantssh", diff --git a/management/src/vendor/github.com/contiv/executor/executor.go b/management/src/vendor/github.com/contiv/executor/executor.go index 1f2487c..dd8b461 100644 --- a/management/src/vendor/github.com/contiv/executor/executor.go +++ b/management/src/vendor/github.com/contiv/executor/executor.go @@ -46,7 +46,7 @@ import ( type ExecResult struct { Stdout string Stderr string - ExitStatus uint32 + ExitStatus int Runtime time.Duration executor *Executor @@ -215,11 +215,10 @@ func (e *Executor) Wait(ctx context.Context) (*ExecResult, error) { } res := &ExecResult{executor: e} - exitErr := err if err != nil { if exit, ok := err.(*exec.ExitError); ok { - res.ExitStatus = uint32(exit.Sys().(syscall.WaitStatus)) + res.ExitStatus = int(exit.ProcessState.Sys().(syscall.WaitStatus) / 256) } } @@ -230,11 +229,12 @@ func (e *Executor) Wait(ctx context.Context) (*ExecResult, error) { res.Runtime = e.TimeRunning() - return res, exitErr + return res, err } // Run calls Start(), then Wait(), and returns an ExecResult and error (if -// any). If an error is returned, ExecResult will be nil. +// any). The error may be of many types including *exec.ExitError and +// context.Canceled, context.DeadlineExceeded. func (e *Executor) Run(ctx context.Context) (*ExecResult, error) { if err := e.Start(); err != nil { return nil, err @@ -242,7 +242,7 @@ func (e *Executor) Run(ctx context.Context) (*ExecResult, error) { er, err := e.Wait(ctx) if err != nil { - return nil, err + return er, err } return er, nil diff --git a/management/src/vendor/github.com/contiv/systemtests-utils/system.go b/management/src/vendor/github.com/contiv/systemtests-utils/system.go index a4d7705..afce545 100644 --- a/management/src/vendor/github.com/contiv/systemtests-utils/system.go +++ b/management/src/vendor/github.com/contiv/systemtests-utils/system.go @@ -81,9 +81,9 @@ func ServiceStatus(n vagrantssh.TestbedNode, srv string) (string, error) { } // WaitForDone polls for checkDoneFn function to return true up until specified timeout -func WaitForDone(doneFn func() (string, bool), timeoutSec int, timeoutMsg string) (string, error) { - tick := time.Tick(time.Duration(2) * time.Second) - timeout := time.After(time.Duration(timeoutSec) * time.Second) +func WaitForDone(doneFn func() (string, bool), tickDur time.Duration, timeoutDur time.Duration, timeoutMsg string) (string, error) { + tick := time.Tick(tickDur) + timeout := time.After(timeoutDur) doneCount := 0 for { select { @@ -123,7 +123,7 @@ func ServiceActionAndWaitForState(n vagrantssh.TestbedNode, srv string, timeoutS return out, true } return out, false - }, timeoutSec, fmt.Sprintf("it seems that service %q is not %q", srv, state)) + }, 2*time.Second, time.Duration(timeoutSec)*time.Second, fmt.Sprintf("it seems that service %q is not %q", srv, state)) } //ServiceStartAndWaitForUp starts a systemd service unit and waits for it to be up From 52df706ff5cc31e90876c34b49ad5afe97731009 Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Tue, 29 Mar 2016 05:52:42 -0700 Subject: [PATCH 4/7] Add systemtests for node discover command Also enabled the test case for commission of a disappeared node and added a few more utility functions Signed-off-by: Madhav Puri --- management/src/demo/files/site.yml | 12 +- management/src/systemtests/cli_test.go | 163 +++++++++++++++++++------ 2 files changed, 139 insertions(+), 36 deletions(-) diff --git a/management/src/demo/files/site.yml b/management/src/demo/files/site.yml index de3915c..9642050 100644 --- a/management/src/demo/files/site.yml +++ b/management/src/demo/files/site.yml @@ -1,6 +1,16 @@ --- +- hosts: service-master + tasks: + - name: provision + shell: touch /tmp/yay -- hosts: all +- hosts: service-worker tasks: - name: provision shell: touch /tmp/yay + +- hosts: cluster-node + sudo: true + tasks: + - name: start serf + service: name=serf state=started diff --git a/management/src/systemtests/cli_test.go b/management/src/systemtests/cli_test.go index be1d2cf..c233972 100644 --- a/management/src/systemtests/cli_test.go +++ b/management/src/systemtests/cli_test.go @@ -8,6 +8,7 @@ import ( "regexp" "strings" "testing" + "time" tutils "github.com/contiv/systemtests-utils" "github.com/contiv/vagrantssh" @@ -26,15 +27,15 @@ type CliTestSuite struct { } var _ = Suite(&CliTestSuite{ - // add tests to skip due to know issues here. Please add the issue# - // being used to track - skipTests: map[string]string{ - "CliTestSuite.TestCommissionDisappearedNode": "https://github.com/contiv/cluster/issues/28", - }, + // add tests to skip due to known issues here. + // The key of the map is test name like CliTestSuite.TestCommissionDisappearedNode + // The value of the map is the github issue# or url tracking reason for skip + skipTests: map[string]string{}, }) var ( validNodeNames = []string{"cluster-node1-0", "cluster-node2-0"} + validNodeAddrs = []string{} invalidNodeName = "invalid-test-node" dummyAnsibleFile = "/tmp/yay" ) @@ -49,6 +50,30 @@ func (s *CliTestSuite) Assert(c *C, obtained interface{}, checker Checker, args } } +func (s *CliTestSuite) startSerf(c *C, nut vagrantssh.TestbedNode) { + out, err := tutils.ServiceStartAndWaitForUp(nut, "serf", 30) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) + c.Logf("serf is running. %s", out) +} + +func (s *CliTestSuite) stopSerf(c *C, nut vagrantssh.TestbedNode) { + out, err := tutils.ServiceStop(nut, "serf") + s.Assert(c, err, IsNil, Commentf("output: %s", out)) + c.Logf("serf is stopped. %s", out) +} + +func (s *CliTestSuite) startClusterm(c *C, nut vagrantssh.TestbedNode, timeout int) { + out, err := tutils.ServiceStartAndWaitForUp(nut, "clusterm", timeout) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) + c.Logf("clusterm is running. %s", out) +} + +func (s *CliTestSuite) restartClusterm(c *C, nut vagrantssh.TestbedNode) { + out, err := tutils.ServiceRestartAndWaitForUp(nut, "clusterm", 30) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) + c.Logf("clusterm is running. %s", out) +} + func (s *CliTestSuite) SetUpSuite(c *C) { pwd, err := os.Getwd() s.Assert(c, err, IsNil) @@ -82,21 +107,18 @@ func (s *CliTestSuite) SetUpSuite(c *C) { s.Assert(c, s.tbn1, NotNil) s.tbn2 = s.tb.GetNodes()[1] s.Assert(c, s.tbn2, NotNil) + validNodeAddrs = nodeIPs // When a new vagrant setup comes up cluster-manager can take a bit to - // come up as it waits on collins container to come up, which depending on - // image download speed can take a while, so we wait for cluster-manager + // come up as it waits on collins container to come up and start serving it's API. + // This can take a while, so we wait for cluster-manager // to start with a long timeout here. This way we have this long wait only once. - // XXX: we can alternatively save the collins container in the image and cut - // this wait altogether. - out, err := tutils.ServiceStartAndWaitForUp(s.tbn1, "clusterm", 1200) - s.Assert(c, err, IsNil, Commentf("output: %s", out)) + s.startClusterm(c, s.tbn1, 1200) //provide test ansible playbooks and restart cluster-mgr src := fmt.Sprintf("%s/../demo/files/cli_test/*", pwd) dst := "/etc/default/clusterm/" - out, err = s.tbn1.RunCommandWithOutput(fmt.Sprintf("sudo cp -rf %s %s", src, dst)) - s.Assert(c, err, IsNil, Commentf("output: %s", out)) - out, err = tutils.ServiceRestartAndWaitForUp(s.tbn1, "clusterm", 30) + out, err := s.tbn1.RunCommandWithOutput(fmt.Sprintf("sudo cp -rf %s %s", src, dst)) s.Assert(c, err, IsNil, Commentf("output: %s", out)) + s.restartClusterm(c, s.tbn1) } func (s *CliTestSuite) TearDownSuite(c *C) { @@ -129,14 +151,16 @@ func (s *CliTestSuite) SetUpTest(c *C) { out, err = s.tbn2.RunCommandWithOutput(fmt.Sprintf("rm %s", file)) c.Logf("dummy file cleanup. Error: %v, Output: %s", err, out) + // make sure serf is running + s.startSerf(c, s.tbn1) + s.startSerf(c, s.tbn2) + // XXX: we cleanup up assets from collins instead of restarting it to save test time. for _, name := range validNodeNames { s.nukeNodeInCollins(c, name) } - out, err = tutils.ServiceRestartAndWaitForUp(s.tbn1, "clusterm", 90) - s.Assert(c, err, IsNil, Commentf("output: %s", out)) - c.Logf("clusterm is running. %s", out) + s.restartClusterm(c, s.tbn1) } func (s *CliTestSuite) TearDownTest(c *C) { @@ -159,23 +183,27 @@ func (s *CliTestSuite) TestCommissionNonExistentNode(c *C) { cmdStr := fmt.Sprintf("clusterctl node commission %s", nodeName) out, err := s.tbn1.RunCommandWithOutput(cmdStr) s.Assert(c, err, NotNil, Commentf("output: %s", out)) - exptStr := fmt.Sprintf(".*asset.*%s.*doesn't exists.*", nodeName) + exptStr := fmt.Sprintf(".*node.*%s.*doesn't exists.*", nodeName) s.assertMatch(c, exptStr, out) } func (s *CliTestSuite) TestCommissionDisappearedNode(c *C) { - nodeName := validNodeNames[0] - // stop serf discovery - out, err := tutils.ServiceStop(s.tbn1, "serf") - s.Assert(c, err, IsNil, Commentf("output: %s", out)) - defer func() { - // start serf discovery - out, err := tutils.ServiceStart(s.tbn1, "serf") - s.Assert(c, err, IsNil, Commentf("output: %s", out)) - }() + nodeName := validNodeNames[1] + // make sure test node is visible in inventory + s.getNodeInfoSuccess(c, nodeName) + + // stop serf discovery on test node + s.stopSerf(c, s.tbn2) + + // wait for serf membership to update + s.waitForSerfMembership(c, s.tbn1, nodeName, "failed") + + //try to commission the node cmdStr := fmt.Sprintf("clusterctl node commission %s", nodeName) - out, err = s.tbn1.RunCommandWithOutput(cmdStr) - s.Assert(c, err, ErrorMatches, "node has disappeared", Commentf("output: %s", out)) + out, err := s.tbn1.RunCommandWithOutput(cmdStr) + s.Assert(c, err, NotNil, Commentf("output: %s", out)) + exptStr := fmt.Sprintf(".*node.*%s.*has disappeared.*", nodeName) + s.assertMatch(c, exptStr, out) } func checkProvisionStatus(tbn1 vagrantssh.TestbedNode, nodeName, exptdStatus string) (string, error) { @@ -191,7 +219,7 @@ func checkProvisionStatus(tbn1 vagrantssh.TestbedNode, nodeName, exptdStatus str return out, true } return out, false - }, 30, fmt.Sprintf("node is still not in %q status", exptdStatus)) + }, 1*time.Second, 30*time.Second, fmt.Sprintf("node is still not in %q status", exptdStatus)) } func (s *CliTestSuite) TestCommissionProvisionFailure(c *C) { @@ -287,6 +315,63 @@ func (s *CliTestSuite) TestDecommissionFailureRemainingWorkerNodes(c *C) { s.assertMatch(c, exptdOut, out) } +func (s *CliTestSuite) TestDiscoverNodeAlreadyExistError(c *C) { + nodeName := validNodeNames[0] + nodeAddr := validNodeAddrs[0] + cmdStr := fmt.Sprintf("clusterctl discover %s", nodeAddr) + out, err := s.tbn1.RunCommandWithOutput(cmdStr) + s.Assert(c, err, NotNil, Commentf("output: %s", out)) + exptdOut := fmt.Sprintf("a node.*%s.*already exists with the management address.*%s.*", nodeName, nodeAddr) + s.assertMatch(c, exptdOut, out) +} + +func (s *CliTestSuite) waitForSerfMembership(c *C, nut vagrantssh.TestbedNode, nodeName, state string) { + out, err := tutils.WaitForDone(func() (string, bool) { + out, err := nut.RunCommandWithOutput(`serf members`) + if err != nil { + return out, false + } + stateStr := fmt.Sprintf(`%s.*%s.*`, nodeName, state) + if match, err := regexp.MatchString(stateStr, out); err != nil || !match { + return out, false + } + return out, true + }, 1*time.Second, time.Duration(10)*time.Second, + fmt.Sprintf("%s's serf membership is not in %s state", nodeName, state)) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) +} + +func (s *CliTestSuite) TestDiscoverSuccess(c *C) { + nodeName := validNodeNames[1] + nodeAddr := validNodeAddrs[1] + + // nuke the node in collins + s.nukeNodeInCollins(c, nodeName) + + // stop serf on test node + s.stopSerf(c, s.tbn2) + + // wait for serf membership to update + s.waitForSerfMembership(c, s.tbn1, nodeName, "failed") + + // restart clusterm + s.restartClusterm(c, s.tbn1) + + // make sure node is not visible in inventory + s.getNodeInfoFailureNonExistentNode(c, nodeName) + + // run discover command + cmdStr := fmt.Sprintf("clusterctl discover %s", nodeAddr) + out, err := s.tbn1.RunCommandWithOutput(cmdStr) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) + + // wait for serf membership to update + s.waitForSerfMembership(c, s.tbn1, nodeName, "alive") + + // make sure node is now visible in inventory + s.getNodeInfoSuccess(c, nodeName) +} + func (s *CliTestSuite) TestSetGetGlobalExtraVarsSuccess(c *C) { cmdStr := fmt.Sprintf(`clusterctl global set -e '{\\\"foo\\\":\\\"bar\\\"}'`) out, err := s.tbn1.RunCommandWithOutput(cmdStr) @@ -307,16 +392,20 @@ func (s *CliTestSuite) TestSetGetGlobalExtraVarsFailureInvalidJSON(c *C) { s.assertMatch(c, exptdOut, out) } -func (s *CliTestSuite) TestGetNodeInfoFailureNonExistentNode(c *C) { - cmdStr := fmt.Sprintf(`clusterctl node get %s`, invalidNodeName) +func (s *CliTestSuite) getNodeInfoFailureNonExistentNode(c *C, nodeName string) { + cmdStr := fmt.Sprintf(`clusterctl node get %s`, nodeName) out, err := s.tbn1.RunCommandWithOutput(cmdStr) s.Assert(c, err, NotNil, Commentf("output: %s", out)) - exptdOut := fmt.Sprintf(`.*node with name.*%s.*doesn't exists.*`, invalidNodeName) + exptdOut := fmt.Sprintf(`.*node with name.*%s.*doesn't exists.*`, nodeName) s.assertMatch(c, exptdOut, out) } -func (s *CliTestSuite) TestGetNodeInfoSuccess(c *C) { - cmdStr := fmt.Sprintf(`clusterctl node get %s`, validNodeNames[0]) +func (s *CliTestSuite) TestGetNodeInfoFailureNonExistentNode(c *C) { + s.getNodeInfoFailureNonExistentNode(c, invalidNodeName) +} + +func (s *CliTestSuite) getNodeInfoSuccess(c *C, nodeName string) { + cmdStr := fmt.Sprintf(`clusterctl node get %s`, nodeName) out, err := s.tbn1.RunCommandWithOutput(cmdStr) s.Assert(c, err, IsNil, Commentf("output: %s", out)) exptdOut := `.*"monitoring-state":.*` @@ -327,6 +416,10 @@ func (s *CliTestSuite) TestGetNodeInfoSuccess(c *C) { s.assertMultiMatch(c, exptdOut, out, 1) } +func (s *CliTestSuite) TestGetNodeInfoSuccess(c *C) { + s.getNodeInfoSuccess(c, validNodeNames[0]) +} + func (s *CliTestSuite) TestGetNodesInfoSuccess(c *C) { cmdStr := `clusterctl nodes get` out, err := s.tbn1.RunCommandWithOutput(cmdStr) From 4beeb8ad6df64ed06c7c11faab4eafa973ef69b0 Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Tue, 29 Mar 2016 05:54:22 -0700 Subject: [PATCH 5/7] fix to prevent a disappeared node from being commissioned Signed-off-by: Madhav Puri --- management/src/clusterm/manager/events.go | 8 ++++++++ management/src/clusterm/manager/utils.go | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/management/src/clusterm/manager/events.go b/management/src/clusterm/manager/events.go index 21d0977..51bb9ea 100644 --- a/management/src/clusterm/manager/events.go +++ b/management/src/clusterm/manager/events.go @@ -132,6 +132,14 @@ func (e *nodeCommissioned) String() string { } func (e *nodeCommissioned) process() error { + isDiscovered, err := e.mgr.isDiscoveredNode(e.nodeName) + if err != nil { + return err + } + if !isDiscovered { + return errored.Errorf("node %q has disappeared from monitoring subsystem, it can't be commissioned. Please check node's network reachability", e.nodeName) + } + if err := e.mgr.inventory.SetAssetProvisioning(e.nodeName); err != nil { // XXX. Log this to collins return err diff --git a/management/src/clusterm/manager/utils.go b/management/src/clusterm/manager/utils.go index c78bdd6..e6b25c6 100644 --- a/management/src/clusterm/manager/utils.go +++ b/management/src/clusterm/manager/utils.go @@ -1,7 +1,6 @@ package manager import ( - log "github.com/Sirupsen/logrus" "github.com/contiv/cluster/management/src/inventory" "github.com/contiv/errored" ) @@ -43,7 +42,6 @@ func (m *Manager) isMasterNode(name string) (bool, error) { if n.Cfg == nil { return false, nodeConfigNotExistsError(name) } - log.Debugf("node: %q, group: %q", name, n.Cfg.GetGroup()) return n.Cfg.GetGroup() == ansibleMasterGroupName, nil } @@ -55,10 +53,21 @@ func (m *Manager) isWorkerNode(name string) (bool, error) { if n.Cfg == nil { return false, nodeConfigNotExistsError(name) } - log.Debugf("node: %q, group: %q", name, n.Cfg.GetGroup()) return n.Cfg.GetGroup() == ansibleWorkerGroupName, nil } +func (m *Manager) isDiscoveredNode(name string) (bool, error) { + n, err := m.findNode(name) + if err != nil { + return false, err + } + if n.Inv == nil { + return false, nodeInventoryNotExistsError(name) + } + _, state := n.Inv.GetStatus() + return state == inventory.Discovered, nil +} + func (m *Manager) isDiscoveredAndAllocatedNode(name string) (bool, error) { n, err := m.findNode(name) if err != nil { @@ -68,6 +77,5 @@ func (m *Manager) isDiscoveredAndAllocatedNode(name string) (bool, error) { return false, nodeInventoryNotExistsError(name) } status, state := n.Inv.GetStatus() - log.Debugf("node: %q, status: %q, state: %q", name, status, state) return state == inventory.Discovered && status == inventory.Allocated, nil } From 9cb4ef5d67c9403d859bdd766934be826b647896 Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Wed, 30 Mar 2016 04:42:17 -0700 Subject: [PATCH 6/7] make file stat check a bit more reliable Signed-off-by: Madhav Puri --- management/src/systemtests/cli_test.go | 32 +++++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/management/src/systemtests/cli_test.go b/management/src/systemtests/cli_test.go index c233972..c77db0d 100644 --- a/management/src/systemtests/cli_test.go +++ b/management/src/systemtests/cli_test.go @@ -252,9 +252,7 @@ func (s *CliTestSuite) commissionNode(c *C, nodeName string, nut vagrantssh.Test s.Assert(c, err, IsNil, Commentf("output: %s", out)) // verify that site.yml got executed on the node and created the dummy file - file := dummyAnsibleFile - out, err = nut.RunCommandWithOutput(fmt.Sprintf("stat -t %s", file)) - s.Assert(c, err, IsNil, Commentf("output: %s", out)) + s.waitForStatToSucceed(c, nut, dummyAnsibleFile) } func (s *CliTestSuite) TestCommissionSuccess(c *C) { @@ -262,6 +260,30 @@ func (s *CliTestSuite) TestCommissionSuccess(c *C) { s.commissionNode(c, nodeName, s.tbn1) } +func (s *CliTestSuite) waitForStatToSucceed(c *C, nut vagrantssh.TestbedNode, file string) { + out, err := tutils.WaitForDone(func() (string, bool) { + cmdStr := fmt.Sprintf("stat -t %s", file) + out, err := nut.RunCommandWithOutput(cmdStr) + if err != nil { + return out, false + } + return out, true + }, 1*time.Second, 10*time.Second, fmt.Sprintf("file %q still doesn't seems to exist", file)) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) +} + +func (s *CliTestSuite) waitForStatToFail(c *C, nut vagrantssh.TestbedNode, file string) { + out, err := tutils.WaitForDone(func() (string, bool) { + cmdStr := fmt.Sprintf("stat -t %s", file) + out, err := nut.RunCommandWithOutput(cmdStr) + if err == nil { + return out, false + } + return out, true + }, 1*time.Second, 10*time.Second, fmt.Sprintf("file %q still seems to exist", file)) + s.Assert(c, err, IsNil, Commentf("output: %s", out)) +} + func (s *CliTestSuite) decommissionNode(c *C, nodeName string, nut vagrantssh.TestbedNode) { // decommission the node cmdStr := fmt.Sprintf("clusterctl node decommission %s", nodeName) @@ -271,9 +293,7 @@ func (s *CliTestSuite) decommissionNode(c *C, nodeName string, nut vagrantssh.Te s.Assert(c, err, IsNil, Commentf("output: %s", out)) // verify that cleanup.yml got executed on the node and deleted the dummy file - file := dummyAnsibleFile - out, err = nut.RunCommandWithOutput(fmt.Sprintf("stat -t %s", file)) - s.Assert(c, err, NotNil, Commentf("output: %s", out)) + s.waitForStatToFail(c, nut, dummyAnsibleFile) } func (s *CliTestSuite) TestDecommissionSuccess(c *C) { From 822e7a4f3e2c9ed498919da8605e13ef9bd72d3f Mon Sep 17 00:00:00 2001 From: Madhav Puri Date: Wed, 30 Mar 2016 04:53:02 -0700 Subject: [PATCH 7/7] call GetNode() instead of GetNodes() as latter doesn't ensure same order of nodes on every call Signed-off-by: Madhav Puri --- management/src/systemtests/cli_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/management/src/systemtests/cli_test.go b/management/src/systemtests/cli_test.go index c77db0d..d0c2122 100644 --- a/management/src/systemtests/cli_test.go +++ b/management/src/systemtests/cli_test.go @@ -103,9 +103,9 @@ func (s *CliTestSuite) SetUpSuite(c *C) { } s.tb = &vagrantssh.Baremetal{} s.Assert(c, s.tb.Setup(hosts), IsNil) - s.tbn1 = s.tb.GetNodes()[0] + s.tbn1 = s.tb.GetNode("node1") s.Assert(c, s.tbn1, NotNil) - s.tbn2 = s.tb.GetNodes()[1] + s.tbn2 = s.tb.GetNode("node2") s.Assert(c, s.tbn2, NotNil) validNodeAddrs = nodeIPs // When a new vagrant setup comes up cluster-manager can take a bit to