Skip to content

Commit

Permalink
Merge pull request #362 from atc0005/i341-extend-metrics-for-check-da…
Browse files Browse the repository at this point in the history
…tastore-plugin

Extend metrics for `check_vmware_datastore` plugin
  • Loading branch information
atc0005 authored Oct 5, 2021
2 parents 41e1f8c + c4a9d9a commit bec1c0e
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 39 deletions.
63 changes: 41 additions & 22 deletions cmd/check_vmware_datastore/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"context"
"errors"
"fmt"
"time"

"github.com/atc0005/go-nagios"
"github.com/vmware/govmomi/units"
Expand All @@ -23,6 +24,10 @@ import (

func main() {

// Start the timer. We'll use this to emit the plugin runtime as a
// performance data metric.
pluginStart := time.Now()

// Set initial "state" as valid, adjust as we go.
var nagiosExitState = nagios.ExitState{
LastError: nil,
Expand Down Expand Up @@ -157,11 +162,30 @@ func main() {
log.Debug().Msg("Successfully retrieved datastore by name")

log.Debug().Msg("Generating datastore usage summary")
dsUsage := vsphere.NewDatastoreUsageSummary(
dsUsage, dsUsageErr := vsphere.NewDatastoreUsageSummary(
ctx,
c.Client,
datastore,
cfg.DatastoreUsageCritical,
cfg.DatastoreUsageWarning,
)
if dsUsageErr != nil {
log.Error().Err(dsUsageErr).Msg(
"error generating datastore usage summary",
)

nagiosExitState.LastError = dsUsageErr
nagiosExitState.ServiceOutput = fmt.Sprintf(
"%s: Error generating summary for datastore %q",
nagios.StateCRITICALLabel,
cfg.DatastoreName,
)
nagiosExitState.ExitStatusCode = nagios.StateCRITICALExitCode

return
}

log.Debug().Msg("Successfully generated datastore usage summary")

log.Debug().
Str("datastore_name", datastore.Name).
Expand All @@ -174,27 +198,13 @@ func main() {
Int("datastore_warning_threshold", dsUsage.WarningThreshold).
Msg("Datastore usage summary")

log.Debug().Msg("Retrieving VMs for datastore")
dsVMs, dsVMsFetchErr := vsphere.GetVMsFromDatastore(ctx, c.Client, datastore, true)
if dsVMsFetchErr != nil {
log.Error().Err(dsFetchErr).Msg(
"error retrieving VirtualMachines from datastore",
)

nagiosExitState.LastError = dsVMsFetchErr
nagiosExitState.ServiceOutput = fmt.Sprintf(
"%s: Error retrieving VirtualMachines from datastore %q",
nagios.StateCRITICALLabel,
cfg.DatastoreName,
)
nagiosExitState.ExitStatusCode = nagios.StateCRITICALExitCode

return
}

log.Debug().Msg("Compiling Performance Data details")

pd := []nagios.PerformanceData{
{
Label: "time",
Value: fmt.Sprintf("%dms", time.Since(pluginStart).Milliseconds()),
},
{
Label: "datastore_usage",
Value: fmt.Sprintf("%.2f", dsUsage.StorageUsedPercent),
Expand All @@ -209,6 +219,18 @@ func main() {
Max: fmt.Sprintf("%d", dsUsage.StorageTotal),
Min: "0",
},
{
Label: "vms",
Value: fmt.Sprintf("%d", len(dsUsage.VMs)),
},
{
Label: "vms_powered_off",
Value: fmt.Sprintf("%d", dsUsage.VMs.NumVMsPoweredOff()),
},
{
Label: "vms_powered_on",
Value: fmt.Sprintf("%d", dsUsage.VMs.NumVMsPoweredOn()),
},
}

log.Debug().Msg("Evaluating datastore usage state")
Expand All @@ -231,7 +253,6 @@ func main() {

nagiosExitState.LongServiceOutput = vsphere.DatastoreUsageReport(
c.Client,
dsVMs,
dsUsage,
)

Expand Down Expand Up @@ -263,7 +284,6 @@ func main() {

nagiosExitState.LongServiceOutput = vsphere.DatastoreUsageReport(
c.Client,
dsVMs,
dsUsage,
)

Expand All @@ -288,7 +308,6 @@ func main() {

nagiosExitState.LongServiceOutput = vsphere.DatastoreUsageReport(
c.Client,
dsVMs,
dsUsage,
)

Expand Down
116 changes: 99 additions & 17 deletions internal/vsphere/datastores.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,33 @@ var ErrDatastoreUsageThresholdCrossed = errors.New("datastore usage exceeds spec
// DatastoreIDToNameIndex maps a Datastore's ID value to its name.
type DatastoreIDToNameIndex map[string]string

// DatastoreUsageSummary tracks usage details for a specific Datastore
// DatastoreVMs provides an overview of all (visible) VirtualMachines residing
// on a specific Datastore.
type DatastoreVMs []DatastoreVM

// DatastoreVM is a summary of details for a VirtualMachine found on a
// specific datastore.
type DatastoreVM struct {

// Name is the display name of the VirtualMachine.
Name string

// VMSize is the human readable or formatted size of the VirtualMachine.
VMSize string

// DatastoreUsage is the human readable or formatted percentage of the
// Datastore space consumed by this VirtualMachine.
DatastoreUsage string

// PowerState tracks the current power state for a VirtualMachine.
PowerState types.VirtualMachinePowerState

// DatastoreMOID is the MOID or MoRef ID for the Datastore where this
// VirtualMachine resides.
DatastoreMOID types.ManagedObjectReference
}

// DatastoreUsageSummary tracks usage details for a specific Datastore.
type DatastoreUsageSummary struct {
Datastore mo.Datastore
StorageRemainingPercent float64
Expand All @@ -40,21 +66,67 @@ type DatastoreUsageSummary struct {
StorageRemaining int64
CriticalThreshold int
WarningThreshold int
VMs DatastoreVMs
}

// DatastoreVMsSummary evaluates provided Datastore and collection of
// VirtualMachines and provides a basic human readable / formatted summary of
// VirtualMachine details.
func DatastoreVMsSummary(ds mo.Datastore, vms []mo.VirtualMachine) DatastoreVMs {

datastoreVMs := make(DatastoreVMs, 0, len(vms))

for _, vm := range vms {

var vmStorageUsed int64
for _, usage := range vm.Storage.PerDatastoreUsage {
if usage.Datastore == ds.Reference() {
vmStorageUsed += usage.Committed + usage.Uncommitted
}
}

vmPercentOfDSUsed := float64(vmStorageUsed) / float64(ds.Summary.Capacity) * 100
dsVM := DatastoreVM{
Name: vm.Name,
VMSize: units.ByteSize(vmStorageUsed).String(),
DatastoreUsage: fmt.Sprintf("%2.2f%%", vmPercentOfDSUsed),
PowerState: vm.Runtime.PowerState,
}

datastoreVMs = append(datastoreVMs, dsVM)

}

return datastoreVMs

}

// NewDatastoreUsageSummary receives a Datastore and generates summary
// information used to determine if usage levels have crossed user-specified
// thresholds.
func NewDatastoreUsageSummary(ds mo.Datastore, criticalThreshold int, warningThreshold int) DatastoreUsageSummary {
// func NewDatastoreUsageSummary(ds mo.Datastore, dsVMs []mo.VirtualMachine, criticalThreshold int, warningThreshold int) DatastoreUsageSummary {
func NewDatastoreUsageSummary(
ctx context.Context,
c *vim25.Client,
ds mo.Datastore,
criticalThreshold int,
warningThreshold int,
) (DatastoreUsageSummary, error) {

storageRemainingPercentage := float64(ds.Summary.FreeSpace) / float64(ds.Summary.Capacity) * 100
storageUsedPercentage := 100 - storageRemainingPercentage
storageRemaining := ds.Summary.FreeSpace
storageTotal := ds.Summary.Capacity
storageUsed := storageTotal - storageRemaining

dsVMs, err := GetVMsFromDatastore(ctx, c, ds, true)
if err != nil {
return DatastoreUsageSummary{}, err
}

dsUsage := DatastoreUsageSummary{
Datastore: ds,
VMs: DatastoreVMsSummary(ds, dsVMs),
StorageRemainingPercent: storageRemainingPercentage,
StorageUsedPercent: storageUsedPercentage,
StorageTotal: storageTotal,
Expand All @@ -64,7 +136,7 @@ func NewDatastoreUsageSummary(ds mo.Datastore, criticalThreshold int, warningThr
WarningThreshold: warningThreshold,
}

return dsUsage
return dsUsage, nil

}

Expand All @@ -81,6 +153,26 @@ func (dus DatastoreUsageSummary) IsCriticalState() bool {
return dus.StorageUsedPercent >= float64(dus.CriticalThreshold)
}

// NumVMsPoweredOn indicates how many VirtualMachines on a specific Datastore
// are powered on.
func (dsVMs DatastoreVMs) NumVMsPoweredOn() int {

var numOn int
for _, vm := range dsVMs {
if vm.PowerState == types.VirtualMachinePowerStatePoweredOn {
numOn++
}
}

return numOn
}

// NumVMsPoweredOff indicates how many VirtualMachines on a specific Datastore
// are powered off OR suspended.
func (dsVMs DatastoreVMs) NumVMsPoweredOff() int {
return len(dsVMs) - dsVMs.NumVMsPoweredOn()
}

// GetDatastores accepts a context, a connected client and a boolean value
// indicating whether a subset of properties per Datastore are retrieved. A
// collection of Datastores with requested properties is returned. If
Expand Down Expand Up @@ -265,7 +357,6 @@ func DatastoreUsageOneLineCheckSummary(
// the web UI or in the body of many notifications.
func DatastoreUsageReport(
c *vim25.Client,
dsVMs []mo.VirtualMachine,
dsUsageSummary DatastoreUsageSummary,
) string {

Expand Down Expand Up @@ -306,22 +397,13 @@ func DatastoreUsageReport(
nagios.CheckOutputEOL,
)

for _, vm := range dsVMs {

var vmStorageUsed int64
for _, usage := range vm.Storage.PerDatastoreUsage {
if usage.Datastore == dsUsageSummary.Datastore.Reference() {
vmStorageUsed += usage.Committed + usage.Uncommitted
}
}

vmPercentOfDSUsed := float64(vmStorageUsed) / float64(dsUsageSummary.StorageTotal) * 100
for _, vm := range dsUsageSummary.VMs {
fmt.Fprintf(
&report,
"* %s [Size: %v, Datastore Usage: %2.2f%%]%s",
"* %s [Size: %s, Datastore Usage: %s]%s",
vm.Name,
units.ByteSize(vmStorageUsed),
vmPercentOfDSUsed,
vm.VMSize,
vm.DatastoreUsage,
nagios.CheckOutputEOL,
)
}
Expand Down

0 comments on commit bec1c0e

Please sign in to comment.