From 763d0a47882e911e112fb56a303d2f79abd5341a Mon Sep 17 00:00:00 2001 From: Chris Selzo Date: Fri, 30 Jun 2023 15:48:48 -0700 Subject: [PATCH 1/5] Add `create-recovery-plan` command This command will allow users to scan a deployment for problems, then write out a YAML-formatted recovery plan, to be used later in the `recover` command. [#185483613] Authored-by: Chris Selzo --- cmd/cmd.go | 3 + cmd/create_recovery_plan.go | 157 ++++++++++++++++++ cmd/create_recovery_plan_test.go | 276 +++++++++++++++++++++++++++++++ cmd/opts/opts.go | 10 ++ cmd/opts/opts_test.go | 32 ++++ director/problems.go | 9 +- director/problems_test.go | 4 + 7 files changed, 487 insertions(+), 4 deletions(-) create mode 100644 cmd/create_recovery_plan.go create mode 100644 cmd/create_recovery_plan_test.go diff --git a/cmd/cmd.go b/cmd/cmd.go index 3e5a7dba3..f2328df01 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -357,6 +357,9 @@ func (c Cmd) Execute() (cmdErr error) { case *CloudCheckOpts: return NewCloudCheckCmd(c.deployment(), deps.UI).Run(*opts) + case *CreateRecoveryPlanOpts: + return NewCreateRecoveryPlanCmd(c.deployment(), deps.UI, deps.FS).Run(*opts) + case *CleanUpOpts: return NewCleanUpCmd(deps.UI, c.director()).Run(*opts) diff --git a/cmd/create_recovery_plan.go b/cmd/create_recovery_plan.go new file mode 100644 index 000000000..3c34fb293 --- /dev/null +++ b/cmd/create_recovery_plan.go @@ -0,0 +1,157 @@ +package cmd + +import ( + "fmt" + "sort" + + "gopkg.in/yaml.v2" + + bosherr "github.com/cloudfoundry/bosh-utils/errors" + boshsys "github.com/cloudfoundry/bosh-utils/system" + + boshdir "github.com/cloudfoundry/bosh-cli/v7/director" + boshui "github.com/cloudfoundry/bosh-cli/v7/ui" + boshtbl "github.com/cloudfoundry/bosh-cli/v7/ui/table" + + . "github.com/cloudfoundry/bosh-cli/v7/cmd/opts" +) + +type InstanceGroupPlan struct { + Name string `yaml:"name"` + MaxInFlight string `yaml:"max_in_flight,omitempty"` + PlannedResolutions map[string]boshdir.ProblemResolution `yaml:"planned_resolutions"` +} + +type RecoveryPlan struct { + InstanceGroupsPlan []InstanceGroupPlan `yaml:"instance_groups_plan"` +} + +type CreateRecoveryPlanCmd struct { + deployment boshdir.Deployment + ui boshui.UI + fs boshsys.FileSystem +} + +func NewCreateRecoveryPlanCmd(deployment boshdir.Deployment, ui boshui.UI, fs boshsys.FileSystem) CreateRecoveryPlanCmd { + return CreateRecoveryPlanCmd{deployment: deployment, ui: ui, fs: fs} +} + +func (c CreateRecoveryPlanCmd) Run(opts CreateRecoveryPlanOpts) error { + problemsByInstanceGroup, err := c.getProblemsByInstanceGroup() + if err != nil { + return err + } + + if len(problemsByInstanceGroup) == 0 { + c.ui.PrintLinef("No problems found\n") + return nil + } + + var plan RecoveryPlan + for _, instanceGroup := range sortedInstanceGroups(problemsByInstanceGroup) { + c.ui.PrintLinef("Instance Group '%s'\n", instanceGroup) + + instanceGroupResolutions, err := c.processProblemsByType(problemsByInstanceGroup[instanceGroup]) + if err != nil { + return err + } + + plan.InstanceGroupsPlan = append(plan.InstanceGroupsPlan, InstanceGroupPlan{ + Name: instanceGroup, + PlannedResolutions: instanceGroupResolutions, + }) + } + + bytes, err := yaml.Marshal(plan) + if err != nil { + return err + } + + return c.fs.WriteFile(opts.Args.RecoveryPlan.ExpandedPath, bytes) +} + +func sortedInstanceGroups(problemsByInstanceGroup map[string][]boshdir.Problem) []string { + var instanceGroups []string + for k := range problemsByInstanceGroup { + instanceGroups = append(instanceGroups, k) + } + sort.Strings(instanceGroups) + + return instanceGroups +} + +func (c CreateRecoveryPlanCmd) processProblemsByType(problems []boshdir.Problem) (map[string]boshdir.ProblemResolution, error) { + problemsByType := mapProblemsByTrait(problems, func(p boshdir.Problem) string { return p.Type }) + + resolutions := make(map[string]boshdir.ProblemResolution) + for problemType, problemsForType := range problemsByType { + c.printProblemTable(problemType, problemsForType) + + var opts []string + for _, res := range problemsForType[0].Resolutions { + opts = append(opts, res.Plan) + } + + chosenIndex, err := c.ui.AskForChoice(problemType, opts) + if err != nil { + return nil, err + } + + resolutions[problemType] = problemsForType[0].Resolutions[chosenIndex] + } + + return resolutions, nil +} + +func (c CreateRecoveryPlanCmd) printProblemTable(problemType string, problemsForType []boshdir.Problem) { + table := boshtbl.Table{ + Title: fmt.Sprintf("Problem type: %s", problemType), + Content: fmt.Sprintf("%s problems", problemType), + Header: []boshtbl.Header{ + boshtbl.NewHeader("#"), + boshtbl.NewHeader("Description"), + }, + SortBy: []boshtbl.ColumnSort{{Column: 0, Asc: true}}, + } + + for _, p := range problemsForType { + table.Rows = append(table.Rows, []boshtbl.Value{ + boshtbl.NewValueInt(p.ID), + boshtbl.NewValueString(p.Description), + }) + } + + c.ui.PrintTable(table) +} + +func (c CreateRecoveryPlanCmd) getProblemsByInstanceGroup() (map[string][]boshdir.Problem, error) { + problems, err := c.deployment.ScanForProblems() + if err != nil { + return nil, err + } + + if anyProblemsHaveNoInstanceGroups(problems) { + return nil, bosherr.Error("Director does not support this command. Try 'bosh cloud-check' instead") + } + + return mapProblemsByTrait(problems, func(p boshdir.Problem) string { return p.InstanceGroup }), nil +} + +func anyProblemsHaveNoInstanceGroups(problems []boshdir.Problem) bool { + for _, p := range problems { + if p.InstanceGroup == "" { + return true + } + } + + return false +} + +func mapProblemsByTrait(problems []boshdir.Problem, traitFunc func(p boshdir.Problem) string) map[string][]boshdir.Problem { + probMap := make(map[string][]boshdir.Problem) + for _, p := range problems { + probMap[traitFunc(p)] = append(probMap[traitFunc(p)], p) + } + + return probMap +} diff --git a/cmd/create_recovery_plan_test.go b/cmd/create_recovery_plan_test.go new file mode 100644 index 000000000..4eae23eae --- /dev/null +++ b/cmd/create_recovery_plan_test.go @@ -0,0 +1,276 @@ +package cmd_test + +import ( + "errors" + + fakesys "github.com/cloudfoundry/bosh-utils/system/fakes" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "gopkg.in/yaml.v2" + + . "github.com/cloudfoundry/bosh-cli/v7/cmd" + . "github.com/cloudfoundry/bosh-cli/v7/cmd/opts" + boshdir "github.com/cloudfoundry/bosh-cli/v7/director" + fakedir "github.com/cloudfoundry/bosh-cli/v7/director/directorfakes" + fakeui "github.com/cloudfoundry/bosh-cli/v7/ui/fakes" + boshtbl "github.com/cloudfoundry/bosh-cli/v7/ui/table" +) + +func createResolution(name, plan string) boshdir.ProblemResolution { + return boshdir.ProblemResolution{ + Name: &name, + Plan: plan, + } +} + +var _ = Describe("CreateRecoveryPlanCmd", func() { + skipResolution := createResolution("ignore", "Skip for now") + recreateResolution := createResolution("recreate_vm", "Recreate VM") + rebootResolution := createResolution("reboot_vm", "Reboot VM") + deleteVmReferenceResolution := createResolution("delete_vm_reference", "Delete VM reference") + deleteDiskReferenceResolution := createResolution("delete_disk_reference", "Delete disk reference (DANGEROUS!)") + reattachDiskResolution := createResolution("reattach_disk", "Reattach disk to instance") + reattachDiskAndRebootResolution := createResolution("reattach_disk_and_reboot", "Reattach disk and reboot instance") + + var ( + deployment *fakedir.FakeDeployment + ui *fakeui.FakeUI + fakeFS *fakesys.FakeFileSystem + command CreateRecoveryPlanCmd + ) + + BeforeEach(func() { + deployment = &fakedir.FakeDeployment{} + ui = &fakeui.FakeUI{} + fakeFS = fakesys.NewFakeFileSystem() + command = NewCreateRecoveryPlanCmd(deployment, ui, fakeFS) + }) + + Describe("Run", func() { + var ( + opts CreateRecoveryPlanOpts + severalProbs []boshdir.Problem + ) + + BeforeEach(func() { + opts = CreateRecoveryPlanOpts{ + Args: CreateRecoveryPlanArgs{ + RecoveryPlan: FileArg{ + ExpandedPath: "/tmp/foo.yml", + FS: fakeFS, + }, + }, + } + + severalProbs = []boshdir.Problem{ + { + ID: 3, + + Type: "unresponsive_agent", + Description: "problem1-desc", + InstanceGroup: "diego_cell", + + Resolutions: []boshdir.ProblemResolution{ + skipResolution, + recreateResolution, + deleteVmReferenceResolution, + }, + }, + { + ID: 4, + + Type: "missing_vm", + Description: "problem2-desc", + InstanceGroup: "router", + + Resolutions: []boshdir.ProblemResolution{ + skipResolution, + recreateResolution, + rebootResolution, + deleteDiskReferenceResolution, + }, + }, + { + ID: 5, + + Type: "mount_info_mismatch", + Description: "problem3-desc", + InstanceGroup: "router", + + Resolutions: []boshdir.ProblemResolution{ + skipResolution, + reattachDiskResolution, + reattachDiskAndRebootResolution, + }, + }, + } + }) + + act := func() error { return command.Run(opts) } + + Context("scanning for problems failed", func() { + BeforeEach(func() { + deployment.ScanForProblemsReturns(nil, errors.New("fake-err")) + }) + + It("returns error", func() { + err := act() + Expect(err).To(MatchError(ContainSubstring("fake-err"))) + }) + }) + + Context("no problems are found", func() { + BeforeEach(func() { + deployment.ScanForProblemsReturns([]boshdir.Problem{}, nil) + }) + + It("tells the user", func() { + err := act() + Expect(err).ToNot(HaveOccurred()) + + Expect(ui.Said).To(ContainElement("No problems found\n")) + }) + + It("does not ask for confirmation or with choices", func() { + err := act() + Expect(err).ToNot(HaveOccurred()) + + Expect(ui.AskedChoiceCalled).To(BeFalse()) + Expect(ui.AskedConfirmationCalled).To(BeFalse()) + }) + + It("does not write a file", func() { + err := act() + Expect(err).ToNot(HaveOccurred()) + + Expect(fakeFS.WriteFileCallCount).To(BeZero()) + }) + }) + + Context("problems are found", func() { + BeforeEach(func() { + deployment.ScanForProblemsReturns(severalProbs, nil) + ui.AskedChoiceChosens = []int{0, 1, 2} + ui.AskedChoiceErrs = []error{nil, nil, nil} + }) + + It("shows problems by instance group and type", func() { + err := act() + Expect(err).ToNot(HaveOccurred()) + + Expect(ui.Said).To(ContainElements( + "Instance Group 'router'\n", + "Instance Group 'diego_cell'\n", + )) + Expect(ui.Tables).To( + ContainElements( + boshtbl.Table{ + Title: "Problem type: unresponsive_agent", + Content: "unresponsive_agent problems", + + Header: []boshtbl.Header{ + boshtbl.NewHeader("#"), + boshtbl.NewHeader("Description"), + }, + + SortBy: []boshtbl.ColumnSort{{Column: 0, Asc: true}}, + + Rows: [][]boshtbl.Value{ + { + boshtbl.NewValueInt(3), + boshtbl.NewValueString("problem1-desc"), + }, + }, + }, + boshtbl.Table{ + Title: "Problem type: missing_vm", + Content: "missing_vm problems", + + Header: []boshtbl.Header{ + boshtbl.NewHeader("#"), + boshtbl.NewHeader("Description"), + }, + + SortBy: []boshtbl.ColumnSort{{Column: 0, Asc: true}}, + + Rows: [][]boshtbl.Value{ + { + boshtbl.NewValueInt(4), + boshtbl.NewValueString("problem2-desc"), + }, + }, + }, + boshtbl.Table{ + Title: "Problem type: mount_info_mismatch", + Content: "mount_info_mismatch problems", + + Header: []boshtbl.Header{ + boshtbl.NewHeader("#"), + boshtbl.NewHeader("Description"), + }, + + SortBy: []boshtbl.ColumnSort{{Column: 0, Asc: true}}, + + Rows: [][]boshtbl.Value{ + { + boshtbl.NewValueInt(5), + boshtbl.NewValueString("problem3-desc"), + }, + }, + }, + ), + ) + }) + + It("writes a recovery plan based on answers", func() { + err := act() + Expect(err).ToNot(HaveOccurred()) + + Expect(ui.AskedChoiceCalled).To(BeTrue()) + + Expect(fakeFS.WriteFileCallCount).To(Equal(1)) + Expect(fakeFS.FileExists("/tmp/foo.yml")).To(BeTrue()) + bytes, err := fakeFS.ReadFile("/tmp/foo.yml") + Expect(err).ToNot(HaveOccurred()) + + var actualPlan RecoveryPlan + Expect(yaml.Unmarshal(bytes, &actualPlan)).ToNot(HaveOccurred()) + + Expect(actualPlan.InstanceGroupsPlan).To(HaveLen(2)) + + Expect(actualPlan.InstanceGroupsPlan[0].Name).To(Equal("diego_cell")) + Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveLen(1)) + Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveKeyWithValue("unresponsive_agent", skipResolution)) + + Expect(actualPlan.InstanceGroupsPlan[1].Name).To(Equal("router")) + Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveLen(2)) + Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("missing_vm", recreateResolution)) + Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("mount_info_mismatch", reattachDiskAndRebootResolution)) + }) + + It("returns an error if asking fails", func() { + ui.AskedChoiceErrs = []error{nil, errors.New("fake-err"), nil} + + err := act() + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("fake-err")) + }) + + Context("director does not return instance group", func() { + BeforeEach(func() { + grouplessProbs := severalProbs + for i := range grouplessProbs { + grouplessProbs[i].InstanceGroup = "" + } + + deployment.ScanForProblemsReturns(grouplessProbs, nil) + }) + + It("informs the user and exits", func() { + err := act() + Expect(err).To(MatchError(ContainSubstring("does not support"))) + }) + }) + }) + }) +}) diff --git a/cmd/opts/opts.go b/cmd/opts/opts.go index 4c7c9a406..c5b2aacf7 100644 --- a/cmd/opts/opts.go +++ b/cmd/opts/opts.go @@ -137,6 +137,7 @@ type BoshOpts struct { Ignore IgnoreOpts `command:"ignore" description:"Ignore an instance"` Unignore UnignoreOpts `command:"unignore" description:"Unignore an instance"` CloudCheck CloudCheckOpts `command:"cloud-check" alias:"cck" alias:"cloudcheck" description:"Cloud consistency check and interactive repair"` //nolint:staticcheck + CreateRecoveryPlan CreateRecoveryPlanOpts `command:"create-recovery-plan" description:"Interactively generate a recovery plan for disaster repair"` OrphanedVMs OrphanedVMsOpts `command:"orphaned-vms" description:"List all the orphaned VMs in all deployments"` // Instance management @@ -816,6 +817,15 @@ type CloudCheckOpts struct { cmd } +type CreateRecoveryPlanOpts struct { + Args CreateRecoveryPlanArgs `positional-args:"true" required:"true"` + cmd +} + +type CreateRecoveryPlanArgs struct { + RecoveryPlan FileArg `positional-arg-name:"PATH" description:"Create recovery plan file at path"` +} + type OrphanedVMsOpts struct { cmd } diff --git a/cmd/opts/opts_test.go b/cmd/opts/opts_test.go index 13f202902..5afeb5e9c 100644 --- a/cmd/opts/opts_test.go +++ b/cmd/opts/opts_test.go @@ -2455,6 +2455,38 @@ var _ = Describe("Opts", func() { }) }) + Describe("CreateRecoveryPlanOpts", func() { + var opts *CreateRecoveryPlanOpts + + BeforeEach(func() { + opts = &CreateRecoveryPlanOpts{} + }) + + Describe("Args", func() { + It("contains desired values", func() { + Expect(getStructTagForName("Args", opts)).To(Equal( + `positional-args:"true" required:"true"`, + )) + }) + }) + }) + + Describe("CreateRecoveryPlanArgs", func() { + var opts *CreateRecoveryPlanArgs + + BeforeEach(func() { + opts = &CreateRecoveryPlanArgs{} + }) + + Describe("RecoveryPlan", func() { + It("contains desired values", func() { + Expect(getStructTagForName("RecoveryPlan", opts)).To(Equal( + `positional-arg-name:"PATH" description:"Create recovery plan file at path"`, + )) + }) + }) + }) + Describe("UpdateResurrectionOpts", func() { var opts *UpdateResurrectionOpts diff --git a/director/problems.go b/director/problems.go index f52392079..9d2537634 100644 --- a/director/problems.go +++ b/director/problems.go @@ -12,8 +12,9 @@ import ( type Problem struct { ID int // e.g. 4 - Type string // e.g. "unresponsive_agent" - Description string // e.g. "api/1 (5efd2cb8-d73b-4e45-6df4-58f5dd5ec2ec) is not responding" + Type string // e.g. "unresponsive_agent" + Description string // e.g. "api/1 (5efd2cb8-d73b-4e45-6df4-58f5dd5ec2ec) is not responding" + InstanceGroup string `json:"instance_group"` // e.g. "database" Data interface{} Resolutions []ProblemResolution @@ -33,8 +34,8 @@ type ProblemResolution struct { } type ProblemAnswer struct { - ProblemID int - Resolution ProblemResolution + ProblemID int `yaml:"problem_id"` + Resolution ProblemResolution `yaml:"resolution"` } func (d DeploymentImpl) ScanForProblems() ([]Problem, error) { diff --git a/director/problems_test.go b/director/problems_test.go index 95633a568..6668df010 100644 --- a/director/problems_test.go +++ b/director/problems_test.go @@ -52,6 +52,7 @@ var _ = Describe("Director", func() { "id": 4, "type": "unresponsive_agent", "description": "desc1", + "instance_group": "diego_cell", "resolutions": [ {"name": "Skip for now", "plan": "ignore"}, {"name": "Reboot VM", "plan": "reboot_vm"} @@ -61,6 +62,7 @@ var _ = Describe("Director", func() { "id": 5, "type": "unresponsive_agent", "description": "desc2", + "instance_group": "router", "resolutions": [ {"name": "Skip for now", "plan": "ignore"} ] @@ -78,6 +80,7 @@ var _ = Describe("Director", func() { Expect(problem0.ID).To(Equal(4)) Expect(problem0.Type).To(Equal("unresponsive_agent")) Expect(problem0.Description).To(Equal("desc1")) + Expect(problem0.InstanceGroup).To(Equal("diego_cell")) problem0Resolutions := problem0.Resolutions Expect(len(problem0Resolutions)).To(Equal(2)) Expect(*problem0Resolutions[0].Name).To(Equal("Skip for now")) @@ -89,6 +92,7 @@ var _ = Describe("Director", func() { Expect(problem1.ID).To(Equal(5)) Expect(problem1.Type).To(Equal("unresponsive_agent")) Expect(problem1.Description).To(Equal("desc2")) + Expect(problem1.InstanceGroup).To(Equal("router")) problem1Resolutions := problem1.Resolutions Expect(len(problem1Resolutions)).To(Equal(1)) Expect(*problem1Resolutions[0].Name).To(Equal("Skip for now")) From cecc52cd3408eb71592828ad710c96ebcfa12091 Mon Sep 17 00:00:00 2001 From: Chris Selzo Date: Mon, 3 Jul 2023 11:11:24 -0700 Subject: [PATCH 2/5] Recovery plans only store the "name" of a resolution The "plan" is not necessary for resolving problems via the director [#185483613] Authored-by: Chris Selzo --- cmd/create_recovery_plan.go | 12 ++++++------ cmd/create_recovery_plan_test.go | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/create_recovery_plan.go b/cmd/create_recovery_plan.go index 3c34fb293..5e681a445 100644 --- a/cmd/create_recovery_plan.go +++ b/cmd/create_recovery_plan.go @@ -17,9 +17,9 @@ import ( ) type InstanceGroupPlan struct { - Name string `yaml:"name"` - MaxInFlight string `yaml:"max_in_flight,omitempty"` - PlannedResolutions map[string]boshdir.ProblemResolution `yaml:"planned_resolutions"` + Name string `yaml:"name"` + MaxInFlight string `yaml:"max_in_flight,omitempty"` + PlannedResolutions map[string]string `yaml:"planned_resolutions"` } type RecoveryPlan struct { @@ -80,10 +80,10 @@ func sortedInstanceGroups(problemsByInstanceGroup map[string][]boshdir.Problem) return instanceGroups } -func (c CreateRecoveryPlanCmd) processProblemsByType(problems []boshdir.Problem) (map[string]boshdir.ProblemResolution, error) { +func (c CreateRecoveryPlanCmd) processProblemsByType(problems []boshdir.Problem) (map[string]string, error) { problemsByType := mapProblemsByTrait(problems, func(p boshdir.Problem) string { return p.Type }) - resolutions := make(map[string]boshdir.ProblemResolution) + resolutions := make(map[string]string) for problemType, problemsForType := range problemsByType { c.printProblemTable(problemType, problemsForType) @@ -97,7 +97,7 @@ func (c CreateRecoveryPlanCmd) processProblemsByType(problems []boshdir.Problem) return nil, err } - resolutions[problemType] = problemsForType[0].Resolutions[chosenIndex] + resolutions[problemType] = *problemsForType[0].Resolutions[chosenIndex].Name } return resolutions, nil diff --git a/cmd/create_recovery_plan_test.go b/cmd/create_recovery_plan_test.go index 4eae23eae..f01365c73 100644 --- a/cmd/create_recovery_plan_test.go +++ b/cmd/create_recovery_plan_test.go @@ -240,12 +240,12 @@ var _ = Describe("CreateRecoveryPlanCmd", func() { Expect(actualPlan.InstanceGroupsPlan[0].Name).To(Equal("diego_cell")) Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveLen(1)) - Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveKeyWithValue("unresponsive_agent", skipResolution)) + Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveKeyWithValue("unresponsive_agent", *skipResolution.Name)) Expect(actualPlan.InstanceGroupsPlan[1].Name).To(Equal("router")) Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveLen(2)) - Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("missing_vm", recreateResolution)) - Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("mount_info_mismatch", reattachDiskAndRebootResolution)) + Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("missing_vm", *recreateResolution.Name)) + Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("mount_info_mismatch", *reattachDiskAndRebootResolution.Name)) }) It("returns an error if asking fails", func() { From c13b006a5f3a388730b9fcc390175d88b6f59f6b Mon Sep 17 00:00:00 2001 From: Chris Selzo Date: Mon, 3 Jul 2023 13:55:21 -0700 Subject: [PATCH 3/5] Add ability to override `max_in_flight` for `bosh create-recovery-plan` The current value of `max_in_flight` is fetched from the director and used as a default. If the value does not change from the default, no `max_in_flight_override` is written to the recovery plan. [#185483613] Authored-by: Chris Selzo --- cmd/create_recovery_plan.go | 77 +++++++++++++++++++++++++++++--- cmd/create_recovery_plan_test.go | 30 +++++++++++++ 2 files changed, 102 insertions(+), 5 deletions(-) diff --git a/cmd/create_recovery_plan.go b/cmd/create_recovery_plan.go index 5e681a445..52d5201b4 100644 --- a/cmd/create_recovery_plan.go +++ b/cmd/create_recovery_plan.go @@ -3,6 +3,7 @@ package cmd import ( "fmt" "sort" + "strconv" "gopkg.in/yaml.v2" @@ -17,9 +18,9 @@ import ( ) type InstanceGroupPlan struct { - Name string `yaml:"name"` - MaxInFlight string `yaml:"max_in_flight,omitempty"` - PlannedResolutions map[string]string `yaml:"planned_resolutions"` + Name string `yaml:"name"` + MaxInFlightOverride string `yaml:"max_in_flight_override,omitempty"` + PlannedResolutions map[string]string `yaml:"planned_resolutions"` } type RecoveryPlan struct { @@ -47,6 +48,11 @@ func (c CreateRecoveryPlanCmd) Run(opts CreateRecoveryPlanOpts) error { return nil } + maxInFlightByInstanceGroup, err := c.getMaxInFlightByInstanceGroup() + if err != nil { + return err + } + var plan RecoveryPlan for _, instanceGroup := range sortedInstanceGroups(problemsByInstanceGroup) { c.ui.PrintLinef("Instance Group '%s'\n", instanceGroup) @@ -56,9 +62,24 @@ func (c CreateRecoveryPlanCmd) Run(opts CreateRecoveryPlanOpts) error { return err } + instanceGroupCurrentMaxInFlight := maxInFlightByInstanceGroup[instanceGroup] + var instanceGroupMaxInFlightOverride string + if c.ui.AskForConfirmationWithLabel( + fmt.Sprintf("Override current max_in_flight value of '%s'?", instanceGroupCurrentMaxInFlight), + ) == nil { + instanceGroupMaxInFlightOverride, err = c.ui.AskForTextWithDefaultValue( + fmt.Sprintf("max_in_flight override for '%s'", instanceGroup), + instanceGroupCurrentMaxInFlight, + ) + if err != nil { + return err + } + } + plan.InstanceGroupsPlan = append(plan.InstanceGroupsPlan, InstanceGroupPlan{ - Name: instanceGroup, - PlannedResolutions: instanceGroupResolutions, + Name: instanceGroup, + MaxInFlightOverride: instanceGroupMaxInFlightOverride, + PlannedResolutions: instanceGroupResolutions, }) } @@ -70,6 +91,52 @@ func (c CreateRecoveryPlanCmd) Run(opts CreateRecoveryPlanOpts) error { return c.fs.WriteFile(opts.Args.RecoveryPlan.ExpandedPath, bytes) } +type updateInstanceGroup struct { + Name string `yaml:"name"` + Update map[string]interface{} `yaml:"update"` +} + +type updateManifest struct { + InstanceGroups []updateInstanceGroup `yaml:"instance_groups"` + Update map[string]interface{} `yaml:"update"` +} + +func (c CreateRecoveryPlanCmd) getMaxInFlightByInstanceGroup() (map[string]string, error) { + rawManifest, err := c.deployment.Manifest() + if err != nil { + return nil, err + } + + var updateManifest updateManifest + err = yaml.Unmarshal([]byte(rawManifest), &updateManifest) + if err != nil { + return nil, err + } + + globalMaxInFlight := updateManifest.Update["max_in_flight"] + flightMap := make(map[string]string) + for _, instanceGroup := range updateManifest.InstanceGroups { + groupMaxInFlight := instanceGroup.Update["max_in_flight"] + if groupMaxInFlight == nil { + groupMaxInFlight = globalMaxInFlight + } + flightMap[instanceGroup.Name] = ensureString(groupMaxInFlight) + } + + return flightMap, nil +} + +func ensureString(i interface{}) string { + switch v := i.(type) { + case int: + return strconv.Itoa(v) + case string: + return v + } + + return i.(string) +} + func sortedInstanceGroups(problemsByInstanceGroup map[string][]boshdir.Problem) []string { var instanceGroups []string for k := range problemsByInstanceGroup { diff --git a/cmd/create_recovery_plan_test.go b/cmd/create_recovery_plan_test.go index f01365c73..d0e6b06dd 100644 --- a/cmd/create_recovery_plan_test.go +++ b/cmd/create_recovery_plan_test.go @@ -152,6 +152,11 @@ var _ = Describe("CreateRecoveryPlanCmd", func() { deployment.ScanForProblemsReturns(severalProbs, nil) ui.AskedChoiceChosens = []int{0, 1, 2} ui.AskedChoiceErrs = []error{nil, nil, nil} + ui.AskedConfirmationErr = nil + ui.AskedText = []fakeui.Answer{ + {Text: "10", Error: nil}, + {Text: "50%", Error: nil}, + } }) It("shows problems by instance group and type", func() { @@ -239,10 +244,12 @@ var _ = Describe("CreateRecoveryPlanCmd", func() { Expect(actualPlan.InstanceGroupsPlan).To(HaveLen(2)) Expect(actualPlan.InstanceGroupsPlan[0].Name).To(Equal("diego_cell")) + Expect(actualPlan.InstanceGroupsPlan[0].MaxInFlightOverride).To(Equal("10")) Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveLen(1)) Expect(actualPlan.InstanceGroupsPlan[0].PlannedResolutions).To(HaveKeyWithValue("unresponsive_agent", *skipResolution.Name)) Expect(actualPlan.InstanceGroupsPlan[1].Name).To(Equal("router")) + Expect(actualPlan.InstanceGroupsPlan[1].MaxInFlightOverride).To(Equal("50%")) Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveLen(2)) Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("missing_vm", *recreateResolution.Name)) Expect(actualPlan.InstanceGroupsPlan[1].PlannedResolutions).To(HaveKeyWithValue("mount_info_mismatch", *reattachDiskAndRebootResolution.Name)) @@ -256,6 +263,29 @@ var _ = Describe("CreateRecoveryPlanCmd", func() { Expect(err.Error()).To(ContainSubstring("fake-err")) }) + It("does not override max_in_flight if not confirmed", func() { + ui.AskedConfirmationErr = errors.New("fake-err") + + err := act() + Expect(err).ToNot(HaveOccurred()) + + Expect(fakeFS.WriteFileCallCount).To(Equal(1)) + Expect(fakeFS.FileExists("/tmp/foo.yml")).To(BeTrue()) + bytes, err := fakeFS.ReadFile("/tmp/foo.yml") + Expect(err).ToNot(HaveOccurred()) + + var actualPlan RecoveryPlan + Expect(yaml.Unmarshal(bytes, &actualPlan)).ToNot(HaveOccurred()) + + Expect(actualPlan.InstanceGroupsPlan).To(HaveLen(2)) + + Expect(actualPlan.InstanceGroupsPlan[0].Name).To(Equal("diego_cell")) + Expect(actualPlan.InstanceGroupsPlan[0].MaxInFlightOverride).To(BeEmpty()) + + Expect(actualPlan.InstanceGroupsPlan[1].Name).To(Equal("router")) + Expect(actualPlan.InstanceGroupsPlan[1].MaxInFlightOverride).To(BeEmpty()) + }) + Context("director does not return instance group", func() { BeforeEach(func() { grouplessProbs := severalProbs From 85dafdda6e48e965c5b6640d903627ed31e6627e Mon Sep 17 00:00:00 2001 From: Chris Selzo Date: Wed, 5 Jul 2023 11:01:28 -0700 Subject: [PATCH 4/5] Use `fmt.Sprintf` instead of a type switch to convert to string The `%v` format string will do the right thing when the `max_in_flight` value is either a string or an integer. (of course, this just moves the type switch into the standard library) [#185483613] Authored-by: Chris Selzo --- cmd/create_recovery_plan.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/cmd/create_recovery_plan.go b/cmd/create_recovery_plan.go index 52d5201b4..c2d5f26b6 100644 --- a/cmd/create_recovery_plan.go +++ b/cmd/create_recovery_plan.go @@ -3,7 +3,6 @@ package cmd import ( "fmt" "sort" - "strconv" "gopkg.in/yaml.v2" @@ -120,23 +119,12 @@ func (c CreateRecoveryPlanCmd) getMaxInFlightByInstanceGroup() (map[string]strin if groupMaxInFlight == nil { groupMaxInFlight = globalMaxInFlight } - flightMap[instanceGroup.Name] = ensureString(groupMaxInFlight) + flightMap[instanceGroup.Name] = fmt.Sprintf("%v", groupMaxInFlight) } return flightMap, nil } -func ensureString(i interface{}) string { - switch v := i.(type) { - case int: - return strconv.Itoa(v) - case string: - return v - } - - return i.(string) -} - func sortedInstanceGroups(problemsByInstanceGroup map[string][]boshdir.Problem) []string { var instanceGroups []string for k := range problemsByInstanceGroup { From dd7ff536a675c61d4c86552035b46e27d67e22a0 Mon Sep 17 00:00:00 2001 From: Chris Selzo Date: Wed, 5 Jul 2023 13:06:52 -0700 Subject: [PATCH 5/5] Present problem types in a consistent order. Go ranges over map keys in a random order by design. The `FakeUI` test framework requires you to give answers to ui prompts in a specific order, and does not allow you to react to the prompts with a given input. [#185483613] Authored-by: Chris Selzo --- cmd/create_recovery_plan.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cmd/create_recovery_plan.go b/cmd/create_recovery_plan.go index c2d5f26b6..396e36c3e 100644 --- a/cmd/create_recovery_plan.go +++ b/cmd/create_recovery_plan.go @@ -53,7 +53,7 @@ func (c CreateRecoveryPlanCmd) Run(opts CreateRecoveryPlanOpts) error { } var plan RecoveryPlan - for _, instanceGroup := range sortedInstanceGroups(problemsByInstanceGroup) { + for _, instanceGroup := range sortedMapKeys(problemsByInstanceGroup) { c.ui.PrintLinef("Instance Group '%s'\n", instanceGroup) instanceGroupResolutions, err := c.processProblemsByType(problemsByInstanceGroup[instanceGroup]) @@ -125,21 +125,22 @@ func (c CreateRecoveryPlanCmd) getMaxInFlightByInstanceGroup() (map[string]strin return flightMap, nil } -func sortedInstanceGroups(problemsByInstanceGroup map[string][]boshdir.Problem) []string { - var instanceGroups []string - for k := range problemsByInstanceGroup { - instanceGroups = append(instanceGroups, k) +func sortedMapKeys(problemMap map[string][]boshdir.Problem) []string { + var keys []string + for k := range problemMap { + keys = append(keys, k) } - sort.Strings(instanceGroups) + sort.Strings(keys) - return instanceGroups + return keys } func (c CreateRecoveryPlanCmd) processProblemsByType(problems []boshdir.Problem) (map[string]string, error) { problemsByType := mapProblemsByTrait(problems, func(p boshdir.Problem) string { return p.Type }) resolutions := make(map[string]string) - for problemType, problemsForType := range problemsByType { + for _, problemType := range sortedMapKeys(problemsByType) { + problemsForType := problemsByType[problemType] c.printProblemTable(problemType, problemsForType) var opts []string