Skip to content

Commit

Permalink
Add instance update and reactivation
Browse files Browse the repository at this point in the history
Adds functionality to perform Bottlerocket version upgrades and
reactivates drained instances after successful upgrade.

[Issue: #9]
  • Loading branch information
Will Moore committed Apr 30, 2021
1 parent 7e60386 commit e755b66
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 33 deletions.
80 changes: 52 additions & 28 deletions updater/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ecs"
"github.com/aws/aws-sdk-go/service/ssm"
)
Expand Down Expand Up @@ -155,7 +156,7 @@ func (u *updater) activateInstance(containerInstance *string) error {
if len(resp.Failures) != 0 {
return fmt.Errorf("Container instance %s failed to activate: %#v", aws.StringValue(containerInstance), resp.Failures)
}
log.Printf("Container instance state changed to ACTIVE")
log.Printf("Container instance %s state changed to ACTIVE", aws.StringValue(containerInstance))
return nil
}

Expand Down Expand Up @@ -209,37 +210,60 @@ func (u *updater) sendCommand(instanceIDs []string, ssmCommand string) (string,
return commandID, nil
}

func (u *updater) checkSSMCommandOutput(commandID string, instanceIDs []string) ([]string, error) {
updateCandidates := make([]string, 0)
for _, v := range instanceIDs {
resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{
CommandId: aws.String(commandID),
InstanceId: aws.String(v),
})
if err != nil {
return nil, fmt.Errorf("failed to retrieve command invocation output: %#v", err)
}
func (u *updater) getCommandResult(commandID string, instanceID string) ([]byte, error) {
resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{
CommandId: aws.String(commandID),
InstanceId: aws.String(instanceID),
})
if err != nil {
return nil, fmt.Errorf("failed to retrieve command invocation output: %#v", err)
}
commandResults := []byte(aws.StringValue(resp.StandardOutputContent))
return commandResults, nil
}

type updateCheckResult struct {
UpdateState string `json:"update_state"`
}
func isUpdateAvailable(commandOutput []byte) (bool, string) {
type updateCheckResult struct {
UpdateState string `json:"update_state"`
}

var result updateCheckResult
err = json.Unmarshal([]byte(*resp.StandardOutputContent), &result)
if err != nil {
log.Printf("failed to unmarshal command invocation output: %#v", err)
}
log.Println("update_state: ", result)
var updateState updateCheckResult
err := json.Unmarshal(commandOutput, &updateState)
if err != nil {
log.Printf("failed to unmarshal update state: %#v", err)
return false, "no data"
}
return updateState.UpdateState == "Available", ""
}

switch result.UpdateState {
case "Available":
updateCandidates = append(updateCandidates, v)
}
// getActiveVersion unmarshals GetCommandInvocation output to determine the active version of a Bottlerocket instance.
// Takes an SSM Command ID and EC2ID as parameters and returns the active version in use.
func getActiveVersion(commandOutput []byte) (string, error) {
type version struct {
Version string `json:"version"`
}

type image struct {
Image version `json:"image"`
}

if len(updateCandidates) == 0 {
log.Printf("No instances to update")
return nil, nil
type partition struct {
ActivePartition image `json:"active_partition"`
}
return updateCandidates, nil

var activeVersion partition
err := json.Unmarshal(commandOutput, &activeVersion)
if err != nil {
log.Printf("failed to unmarshal command invocation output: %#v", err)
return "", err
}
versionInUse := activeVersion.ActivePartition.Image.Version
return versionInUse, nil
}

// waitUntilOk takes an EC2 ID as a parameter and waits until the specified EC2 instance is in an Ok status.
func (u *updater) waitUntilOk(ec2ID string) error {
return u.ec2.WaitUntilInstanceStatusOk(&ec2.DescribeInstanceStatusInput{
InstanceIds: []*string{aws.String(ec2ID)},
})
}
77 changes: 72 additions & 5 deletions updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ecs"
"github.com/aws/aws-sdk-go/service/ssm"
)
Expand All @@ -22,6 +23,7 @@ type updater struct {
cluster string
ecs ECSAPI
ssm *ssm.SSM
ec2 *ec2.EC2
}

func main() {
Expand Down Expand Up @@ -50,6 +52,7 @@ func _main() error {
cluster: *flagCluster,
ecs: ecs.New(sess, aws.NewConfig().WithLogLevel(aws.LogDebugWithHTTPBody)),
ssm: ssm.New(sess, aws.NewConfig().WithLogLevel(aws.LogDebugWithHTTPBody)),
ec2: ec2.New(sess, aws.NewConfig().WithLogLevel(aws.LogDebugWithHTTPBody)),
}

listedInstances, err := u.listContainerInstances()
Expand Down Expand Up @@ -78,16 +81,22 @@ func _main() error {
return err
}

candidates, err := u.checkSSMCommandOutput(commandID, instances)
if err != nil {
return err
candidates := make([]string, 0)
for _, ec2ID := range instances {
commandOutput, err := u.getCommandResult(commandID, ec2ID)
if err != nil {
return err
}
updateState, _ := isUpdateAvailable(commandOutput)
if updateState {
candidates = append(candidates, ec2ID)
}
}

if len(candidates) == 0 {
log.Printf("No instances to update")
return nil
}
fmt.Println("Instances ready for update: ", candidates)
log.Print("Instances ready for update: ", candidates)

for ec2ID, containerInstance := range ec2IDtoECSARN {
err := u.drain(containerInstance)
Expand All @@ -96,6 +105,64 @@ func _main() error {
continue
}
log.Printf("Instance %s drained", ec2ID)

ec2IDs := []string{ec2ID}
_, err = u.sendCommand(ec2IDs, "apiclient update apply --reboot")
if err != nil {
// TODO add nuanced error checking to determine the type of failure, act accordingly.
log.Printf("%#v", err)
err2 := u.activateInstance(aws.String(containerInstance))
if err2 != nil {
log.Printf("failed to reactivate %s after failure to execute update command. Aborting update operations.", ec2ID)
return err2
}
continue
}

err = u.waitUntilOk(ec2ID)
if err != nil {
return fmt.Errorf("instance %s failed to enter an Ok status after reboot. Aborting update operations: %#v", ec2ID, err)
}

err = u.activateInstance(aws.String(containerInstance))
if err != nil {
log.Printf("instance %s failed to return to ACTIVE after reboot. Aborting update operations.", ec2ID)
return err
}

updateStatus, err := u.sendCommand(ec2IDs, "apiclient update check")
if err != nil {
log.Printf("%#v", err)
continue
}

updateResult, err := u.getCommandResult(updateStatus, ec2ID)
if err != nil {
log.Printf("%#v", err)
continue
}

// TODO version before and after comparison.
updateState, empty := isUpdateAvailable(updateResult)
if updateState {
log.Printf("Instance %s did not update. Manual update advised.", ec2ID)
continue
} else if empty != "" {
log.Printf("Unable to determine update result. Manual verification of %s required", ec2ID)
continue
} else {
log.Printf("Instance %s updated successfully", ec2ID)
}

updatedVersion, err := getActiveVersion(updateResult)
if err != nil {
log.Printf("%#v", err)
}
if len(updatedVersion) != 0 {
log.Printf("Instance %s running Bottlerocket: %s", ec2ID, updatedVersion)
} else {
log.Printf("Unable to verify active version. Manual verification of %s required.", ec2ID)
}
}
return nil
}

0 comments on commit e755b66

Please sign in to comment.