-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MGMT-17217: Add AlarmDictionary to resourceTypes API
This PR includes the following: * Added alarms/definitions.json file. * Added AlarmDefinitionHandler that returns objects from definitions.json (with mappings according to spec) * Created AlarmDictionary for each ResourceType * Added to ResourceTypeHandler * Filter AlarmDefinitions according to ResourceClass E.g. for Nodes -> filter by 'COMPUTE'
- Loading branch information
1 parent
b64dba1
commit 2540dcc
Showing
12 changed files
with
644 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
[ | ||
{ | ||
"alarmDefinitionId": "Watchdog", | ||
"alarmName": "An alert that should always be firing to certify that Alertmanager is working properly.", | ||
"alarmDescription": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n" | ||
}, | ||
{ | ||
"alarmDefinitionId": "UpdateAvailable", | ||
"alarmName": "Your upstream update recommendation service recommends you update your cluster.", | ||
"alarmDescription": "For more information refer to 'oc adm upgrade'" | ||
}, | ||
{ | ||
"alarmDefinitionId": "ClusterNotUpgradeable", | ||
"alarmName": "One or more cluster operators have been blocking minor version cluster upgrades for at least an hour.", | ||
"alarmDescription": "In most cases, you will still be able to apply patch releases. Reason AdminAckRequired.", | ||
"proposedRepairActions": "For more information refer to 'oc adm upgrade' or https://console-openshift-console.apps.<cluster_domain>/settings/cluster/." | ||
}, | ||
{ | ||
"alarmDefinitionId": "AlertmanagerReceiversNotConfigured", | ||
"alarmName": "Receivers (notification integrations) are not configured on Alertmanager", | ||
"alarmDescription": "Alerts are not configured to be sent to a notification system, meaning that you may not be notified in a timely fashion when important failures occur.", | ||
"proposedRepairActions": "Check the OpenShift documentation to learn how to configure notifications with Alertmanager." | ||
}, | ||
{ | ||
"alarmDefinitionId": "HighOverallControlPlaneMemory", | ||
"alarmName": "Memory utilization across all control plane nodes is high, and could impact responsiveness and stability.", | ||
"alarmDescription": "Given three control plane nodes, the overall memory utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the kube-apiserver and etcd my be slow to respond.", | ||
"proposedRepairActions": "To fix this, increase memory of the control plane nodes." | ||
}, | ||
{ | ||
"alarmDefinitionId": "NodeClockNotSynchronising", | ||
"alarmName": "Clock not synchronising.", | ||
"alarmDescription": "Clock on host is not synchronising. Ensure NTP is configured on this host.", | ||
"alarmAdditionalFields": { | ||
"resourceClass": "COMPUTE" | ||
} | ||
}, | ||
{ | ||
"alarmDefinitionId": "NodeClockSkewDetected", | ||
"alarmName": "Clock skew detected.", | ||
"alarmDescription": "Clock is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host.", | ||
"alarmAdditionalFields": { | ||
"resourceClass": "COMPUTE" | ||
} | ||
}, | ||
{ | ||
"alarmDefinitionId": "IngressWithoutClassName", | ||
"alarmName": "Ingress without IngressClassName for 1 day", | ||
"alarmDescription": "This alert fires when there is an Ingress with an unset IngressClassName for longer than one day.", | ||
"alarmAdditionalFields": { | ||
"resourceClass": "COMPUTE" | ||
} | ||
}, | ||
{ | ||
"alarmDefinitionId": "NodeMemoryHighUtilization", | ||
"alarmName": "Host is running out of memory.", | ||
"alarmDescription": "Memory is filling up, has been above memory high utilization threshold for the last 15 minutes", | ||
"alarmAdditionalFields": { | ||
"resourceClass": "COMPUTE" | ||
} | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package files | ||
|
||
import "embed" | ||
|
||
var ( | ||
//go:embed alarms | ||
Alarms embed.FS | ||
) | ||
|
||
const ( | ||
AlarmDictionaryVersion = "v1" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
/* | ||
Copyright 2023 Red Hat Inc. | ||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in | ||
compliance with the License. You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software distributed under the License is | ||
distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
implied. See the License for the specific language governing permissions and limitations under the | ||
License. | ||
*/ | ||
|
||
package service | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"errors" | ||
"fmt" | ||
"log/slog" | ||
|
||
jsoniter "github.com/json-iterator/go" | ||
|
||
"github.com/openshift-kni/oran-o2ims/internal/data" | ||
"github.com/openshift-kni/oran-o2ims/internal/files" | ||
"github.com/openshift-kni/oran-o2ims/internal/k8s" | ||
) | ||
|
||
const ( | ||
alarmsDefinitionsPath = "alarms/definitions.json" | ||
alarmsProbableCausesPath = "alarms/probable_causes.json" | ||
) | ||
|
||
// AlarmDefinitionHandlerBuilder contains the data and logic needed to create a new alarm | ||
// definition collection handler. Don't create instances of this type directly, use the NewAlarmDefinitionHandler | ||
// function instead. | ||
type AlarmDefinitionHandlerBuilder struct { | ||
logger *slog.Logger | ||
} | ||
|
||
// AlarmDefinitionHandler knows how to respond to requests to list alarms. Don't create | ||
// instances of this type directly, use the NewAlarmDefinitionHandler function instead. | ||
type AlarmDefinitionHandler struct { | ||
logger *slog.Logger | ||
jsonAPI jsoniter.API | ||
} | ||
|
||
// NewAlarmDefinitionHandler creates a builder that can then be used to configure and create a | ||
// handler for the collection of alarms. | ||
func NewAlarmDefinitionHandler() *AlarmDefinitionHandlerBuilder { | ||
return &AlarmDefinitionHandlerBuilder{} | ||
} | ||
|
||
// SetLogger sets the logger that the handler will use to write to the log. This is mandatory. | ||
func (b *AlarmDefinitionHandlerBuilder) SetLogger( | ||
value *slog.Logger) *AlarmDefinitionHandlerBuilder { | ||
b.logger = value | ||
return b | ||
} | ||
|
||
// Build uses the data stored in the builder to create and configure a new handler. | ||
func (b *AlarmDefinitionHandlerBuilder) Build() ( | ||
result *AlarmDefinitionHandler, err error) { | ||
// Check parameters: | ||
if b.logger == nil { | ||
err = errors.New("logger is mandatory") | ||
return | ||
} | ||
|
||
// Prepare the JSON iterator API: | ||
jsonConfig := jsoniter.Config{ | ||
IndentionStep: 2, | ||
} | ||
jsonAPI := jsonConfig.Froze() | ||
|
||
// Create and populate the object: | ||
result = &AlarmDefinitionHandler{ | ||
logger: b.logger, | ||
jsonAPI: jsonAPI, | ||
} | ||
return | ||
} | ||
|
||
// List is part of the implementation of the collection handler interface. | ||
func (h *AlarmDefinitionHandler) List(ctx context.Context, | ||
request *ListRequest) (response *ListResponse, err error) { | ||
|
||
// Transform the items into what we need: | ||
definitions, err := h.fetchItems() | ||
if err != nil { | ||
return | ||
} | ||
|
||
// Return the result: | ||
response = &ListResponse{ | ||
Items: definitions, | ||
} | ||
return | ||
} | ||
|
||
// Get is part of the implementation of the object handler interface. | ||
func (h *AlarmDefinitionHandler) Get(ctx context.Context, | ||
request *GetRequest) (response *GetResponse, err error) { | ||
|
||
// Fetch the object: | ||
definition, err := h.fetchItem(ctx, request.Variables[0]) | ||
if err != nil { | ||
return | ||
} | ||
|
||
// Return the result: | ||
response = &GetResponse{ | ||
Object: definition, | ||
} | ||
|
||
return | ||
} | ||
|
||
func (h *AlarmDefinitionHandler) fetchItems() (result data.Stream, err error) { | ||
jsonFile, err := files.Alarms.ReadFile(alarmsDefinitionsPath) | ||
if err != nil { | ||
return nil, err | ||
} | ||
reader := bytes.NewReader(jsonFile) | ||
|
||
definitions, err := k8s.NewStream(). | ||
SetLogger(h.logger). | ||
SetReader(reader). | ||
Build() | ||
|
||
// Transform to AlarmDefinitions objects | ||
result = data.Map(definitions, h.mapItem) | ||
|
||
return | ||
} | ||
|
||
func (h *AlarmDefinitionHandler) fetchItem(ctx context.Context, | ||
id string) (probableCause data.Object, err error) { | ||
|
||
probableCauses, err := h.fetchItems() | ||
if err != nil { | ||
return | ||
} | ||
|
||
// Filter by ID | ||
probableCauses = data.Select( | ||
probableCauses, | ||
func(ctx context.Context, item data.Object) (result bool, err error) { | ||
result = item["probableCauseId"] == id | ||
return | ||
}, | ||
) | ||
|
||
// Get first result | ||
probableCause, err = probableCauses.Next(ctx) | ||
|
||
return | ||
} | ||
|
||
// Map Definition to an O2 AlarmDefinitions object. | ||
func (h *AlarmDefinitionHandler) mapItem(ctx context.Context, | ||
from data.Object) (to data.Object, err error) { | ||
|
||
alarmDefinitionId, err := data.GetString(from, "alarmDefinitionId") | ||
if err != nil { | ||
return | ||
} | ||
|
||
alarmName, err := data.GetString(from, "alarmName") | ||
if err != nil { | ||
return | ||
} | ||
|
||
alarmDescription, err := data.GetString(from, "alarmDescription") | ||
if err != nil { | ||
return | ||
} | ||
|
||
proposedRepairActions, err := data.GetString(from, "proposedRepairActions") | ||
if err != nil { | ||
// Property is optional | ||
h.logger.Debug(fmt.Sprintf("'%s' is missing from alarm definition (optional)", "proposedRepairActions")) | ||
} | ||
|
||
alarmAdditionalFields, err := data.GetObj(from, "alarmAdditionalFields") | ||
if err != nil { | ||
// Property is optional | ||
h.logger.Debug(fmt.Sprintf("'%s' is missing from alarm definition (optional)", "alarmAdditionalFields")) | ||
err = nil | ||
} | ||
|
||
to = data.Object{ | ||
"alarmDefinitionId": alarmDefinitionId, | ||
"alarmName": alarmName, | ||
"alarmDescription": alarmDescription, | ||
"proposedRepairActions": proposedRepairActions, | ||
"managementInterfaceId": "O2IMS", | ||
"pkNotificationField": "alarmDefinitionID", | ||
"alarmAdditionalFields": alarmAdditionalFields, | ||
} | ||
|
||
return | ||
} |
Oops, something went wrong.