diff --git a/internal/constants/containerinsights.go b/internal/constants/containerinsights.go new file mode 100644 index 0000000000..bfc694ec9b --- /dev/null +++ b/internal/constants/containerinsights.go @@ -0,0 +1,87 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package constants + +const ( + ClusterNameKey = "ClusterName" + NodeNameKey = "NodeName" // Attribute names + InstanceIDKey = "InstanceId" + InstanceTypeKey = "InstanceType" + AutoScalingGroupNameKey = "AutoScalingGroupName" + VersionKey = "Version" + MetricType = "Type" + SourcesKey = "Sources" + GpuDeviceKey = "GpuDevice" + + ClusterQueueNameKey = "ClusterQueue" + ClusterQueueStatusKey = "Status" + ClusterQueueReasonKey = "Reason" + ClusterQueueResourceKey = "Resource" + Flavor = "Flavor" + + GpuUtilization = "gpu_utilization" + GpuMemUtilization = "gpu_memory_utilization" + GpuMemUsed = "gpu_memory_used" + GpuMemTotal = "gpu_memory_total" + GpuTemperature = "gpu_temperature" + GpuPowerDraw = "gpu_power_draw" + GpuUniqueID = "UUID" + + NeuronCoreUtilization = "neuroncore_utilization" + NeuronCoreMemoryUtilizationTotal = "neuroncore_memory_usage_total" + NeuronCoreMemoryUtilizationConstants = "neuroncore_memory_usage_constants" + NeuronCoreMemoryUtilizationModelCode = "neuroncore_memory_usage_model_code" + NeuronCoreMemoryUtilizationSharedScratchpad = "neuroncore_memory_usage_model_shared_scratchpad" + NeuronCoreMemoryUtilizationRuntimeMemory = "neuroncore_memory_usage_runtime_memory" + NeuronCoreMemoryUtilizationTensors = "neuroncore_memory_usage_tensors" + NeuronDeviceHardwareEccEvents = "neurondevice_hw_ecc_events" + NeuronExecutionStatus = "neuron_execution_status" + NeuronExecutionErrors = "neuron_execution_errors" + NeuronRuntimeMemoryUsage = "neurondevice_runtime_memory_used_bytes" + NeuronInstanceInfo = "instance_info" + NeuronHardware = "neuron_hardware" + NeuronExecutionLatency = "neuron_execution_latency" + + // Converted metrics for NVME metrics + NvmeReadOpsTotal = "diskio_ebs_total_read_ops" + NvmeWriteOpsTotal = "diskio_ebs_total_write_ops" + NvmeReadBytesTotal = "diskio_ebs_total_read_bytes" + NvmeWriteBytesTotal = "diskio_ebs_total_write_bytes" + NvmeReadTime = "diskio_ebs_total_read_time" + NvmeWriteTime = "diskio_ebs_total_write_time" + NvmeExceededIOPSTime = "diskio_ebs_volume_performance_exceeded_iops" + NvmeExceededTPTime = "diskio_ebs_volume_performance_exceeded_tp" + NvmeExceededEC2IOPSTime = "diskio_ebs_ec2_instance_performance_exceeded_iops" + NvmeExceededEC2TPTime = "diskio_ebs_ec2_instance_performance_exceeded_tp" + NvmeVolumeQueueLength = "diskio_ebs_volume_queue_length" + + TypeCluster = "Cluster" + TypeService = "Service" + + // Both TypeInstance and TypeNode mean EC2 Instance, they are used in ECS and EKS separately + TypeInstance = "Instance" + TypeNode = "Node" + TypeGpuContainer = "ContainerGPU" + TypeGpuPod = "PodGPU" + TypeGpuNode = "NodeGPU" + TypeGpuCluster = "ClusterGPU" + TypeNodeEBS = "NodeEBS" + TypePod = "Pod" + TypeContainer = "Container" + + Kubernetes = "kubernetes" + K8sNamespace = "Namespace" + PodIDKey = "PodId" + FullPodNameKey = "FullPodName" + PodNameKey = "PodName" + K8sPodNameKey = "K8sPodName" + ContainerNamekey = "ContainerName" + ContainerIdkey = "ContainerId" + PodOwnersKey = "PodOwners" + HostKey = "host" + K8sKey = "kubernetes" + K8sLabelsKey = "labels" + + Timestamp = "Timestamp" +) diff --git a/internal/containerinsightscommon/const.go b/internal/containerinsightscommon/const.go deleted file mode 100644 index c6840e9331..0000000000 --- a/internal/containerinsightscommon/const.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package containerinsightscommon - -const ( - GoPSUtilProcDirEnv = "HOST_PROC" - - MinTimeDiff = 50 * 1000 // We assume 50 micro-seconds is the minimal gap between two collected data sample to be valid to calculate delta - - ClusterNameKey = "ClusterName" - NodeNameKey = "NodeName" // Attribute names - InstanceIdKey = "InstanceId" - InstanceTypeKey = "InstanceType" - AutoScalingGroupNameKey = "AutoScalingGroupName" - VersionKey = "Version" - MetricType = "Type" - SourcesKey = "Sources" - GpuDeviceKey = "GpuDevice" - - ClusterQueueNameKey = "ClusterQueue" - ClusterQueueStatusKey = "Status" - ClusterQueueReasonKey = "Reason" - ClusterQueueResourceKey = "Resource" - Flavor = "Flavor" - - // metric collected - CpuTotal = "cpu_usage_total" - CpuUser = "cpu_usage_user" - CpuSystem = "cpu_usage_system" - CpuLimit = "cpu_limit" - CpuUtilization = "cpu_utilization" - CpuRequest = "cpu_request" - CpuReservedCapacity = "cpu_reserved_capacity" - CpuUtilizationOverPodLimit = "cpu_utilization_over_pod_limit" - - MemUsage = "memory_usage" - MemCache = "memory_cache" - MemRss = "memory_rss" - MemMaxusage = "memory_max_usage" - MemSwap = "memory_swap" - MemFailcnt = "memory_failcnt" - MemMappedfile = "memory_mapped_file" - MemWorkingset = "memory_working_set" - MemPgfault = "memory_pgfault" - MemPgmajfault = "memory_pgmajfault" - MemHierarchicalPgfault = "memory_hierarchical_pgfault" - MemHierarchicalPgmajfault = "memory_hierarchical_pgmajfault" - MemLimit = "memory_limit" - MemRequest = "memory_request" - MemUtilization = "memory_utilization" - MemReservedCapacity = "memory_reserved_capacity" - MemUtilizationOverPodLimit = "memory_utilization_over_pod_limit" - - NetIfce = "interface" - NetRxBytes = "network_rx_bytes" - NetRxPackets = "network_rx_packets" - NetRxDropped = "network_rx_dropped" - NetRxErrors = "network_rx_errors" - NetTxBytes = "network_tx_bytes" - NetTxPackets = "network_tx_packets" - NetTxDropped = "network_tx_dropped" - NetTxErrors = "network_tx_errors" - NetTotalBytes = "network_total_bytes" - - DiskDev = "device" - EbsVolumeId = "ebs_volume_id" - - FSType = "fstype" - FSUsage = "filesystem_usage" - FSCapacity = "filesystem_capacity" - FSAvailable = "filesystem_available" - FSInodes = "filesystem_inodes" - FSInodesfree = "filesystem_inodes_free" - FSUtilization = "filesystem_utilization" - - DiskIOServiceBytesPrefix = "diskio_io_service_bytes_" - DiskIOServicedPrefix = "diskio_io_serviced_" - DiskIOAsync = "Async" - DiskIORead = "Read" - DiskIOSync = "Sync" - DiskIOWrite = "Write" - DiskIOTotal = "Total" - - GpuUtilization = "gpu_utilization" - GpuMemUtilization = "gpu_memory_utilization" - GpuMemUsed = "gpu_memory_used" - GpuMemTotal = "gpu_memory_total" - GpuTemperature = "gpu_temperature" - GpuPowerDraw = "gpu_power_draw" - GpuRequest = "gpu_request" - GpuLimit = "gpu_limit" - GpuTotal = "gpu_total" - GpuUniqueId = "UUID" - - NeuronCoreUtilization = "neuroncore_utilization" - NeuronCoreMemoryUtilizationTotal = "neuroncore_memory_usage_total" - NeuronCoreMemoryUtilizationConstants = "neuroncore_memory_usage_constants" - NeuronCoreMemoryUtilizationModelCode = "neuroncore_memory_usage_model_code" - NeuronCoreMemoryUtilizationSharedScratchpad = "neuroncore_memory_usage_model_shared_scratchpad" - NeuronCoreMemoryUtilizationRuntimeMemory = "neuroncore_memory_usage_runtime_memory" - NeuronCoreMemoryUtilizationTensors = "neuroncore_memory_usage_tensors" - NeuronDeviceHardwareEccEvents = "neurondevice_hw_ecc_events" - NeuronExecutionStatus = "neuron_execution_status" - NeuronExecutionErrors = "neuron_execution_errors" - NeuronRuntimeMemoryUsage = "neurondevice_runtime_memory_used_bytes" - NeuronInstanceInfo = "instance_info" - NeuronHardware = "neuron_hardware" - NeuronExecutionLatency = "neuron_execution_latency" - - // Converted metrics for NVME metrics - NvmeReadOpsTotal = "diskio_ebs_total_read_ops" - NvmeWriteOpsTotal = "diskio_ebs_total_write_ops" - NvmeReadBytesTotal = "diskio_ebs_total_read_bytes" - NvmeWriteBytesTotal = "diskio_ebs_total_write_bytes" - NvmeReadTime = "diskio_ebs_total_read_time" - NvmeWriteTime = "diskio_ebs_total_write_time" - NvmeExceededIOPSTime = "diskio_ebs_volume_performance_exceeded_iops" - NvmeExceededTPTime = "diskio_ebs_volume_performance_exceeded_tp" - NvmeExceededEC2IOPSTime = "diskio_ebs_ec2_instance_performance_exceeded_iops" - NvmeExceededEC2TPTime = "diskio_ebs_ec2_instance_performance_exceeded_tp" - NvmeVolumeQueueLength = "diskio_ebs_volume_queue_length" - - KueuePendingWorkloads = "kueue_pending_workloads" - KueueEvictedWorkloadsTotal = "kueue_evicted_workloads_total" - KueueAdmittedActiveWorkloads = "kueue_admitted_active_workloads" - KueueClusterQueueResourceUsage = "kueue_cluster_queue_resource_usage" - KueueClusterQueueNominalUsage = "kueue_cluster_queue_nominal_quota" - - TypeCluster = "Cluster" - TypeClusterService = "ClusterService" - TypeClusterNamespace = "ClusterNamespace" - TypeService = "Service" - TypeClusterQueue = "ClusterQueue" - - // Both TypeInstance and TypeNode mean EC2 Instance, they are used in ECS and EKS separately - TypeInstance = "Instance" - TypeNode = "Node" - TypeInstanceFS = "InstanceFS" - TypeNodeFS = "NodeFS" - TypeInstanceNet = "InstanceNet" - TypeNodeNet = "NodeNet" - TypeInstanceDiskIO = "InstanceDiskIO" - TypeNodeDiskIO = "NodeDiskIO" - TypeGpuContainer = "ContainerGPU" - TypeGpuPod = "PodGPU" - TypeGpuNode = "NodeGPU" - TypeGpuCluster = "ClusterGPU" - TypeNodeEBS = "NodeEBS" - - TypePod = "Pod" - TypePodNet = "PodNet" - TypeContainer = "Container" - TypeContainerFS = "ContainerFS" - TypeContainerDiskIO = "ContainerDiskIO" - // Special type for pause container, introduced in https://github.com/aws/amazon-cloudwatch-agent/issues/188 - // because containerd does not set container name pause container name to POD like docker does. - TypeInfraContainer = "InfraContainer" -) diff --git a/internal/containerinsightscommon/ecsconst.go b/internal/containerinsightscommon/ecsconst.go deleted file mode 100644 index ac0f9fdcc4..0000000000 --- a/internal/containerinsightscommon/ecsconst.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package containerinsightscommon - -const ( - ContainerInstanceIdKey = "ContainerInstanceId" - RunningTaskCount = "number_of_running_tasks" - ECS = "ecs" -) diff --git a/internal/containerinsightscommon/k8sconst.go b/internal/containerinsightscommon/k8sconst.go deleted file mode 100644 index 309c0ea9ef..0000000000 --- a/internal/containerinsightscommon/k8sconst.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package containerinsightscommon - -const ( - EKS = "eks" - KubeSecurePort = "10250" - BearerToken = "/var/run/secrets/kubernetes.io/serviceaccount/token" - CAFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - - Kubernetes = "kubernetes" - K8sNamespace = "Namespace" - PodIdKey = "PodId" - FullPodNameKey = "FullPodName" - PodNameKey = "PodName" - K8sPodNameKey = "K8sPodName" - ContainerNamekey = "ContainerName" - ContainerIdkey = "ContainerId" - PodOwnersKey = "PodOwners" - HostKey = "host" - K8sKey = "kubernetes" - K8sLabelsKey = "labels" - - RunningPodCount = "number_of_running_pods" - RunningContainerCount = "number_of_running_containers" - ContainerCount = "number_of_containers" - NodeCount = "node_count" - FailedNodeCount = "failed_node_count" - ContainerRestartCount = "number_of_container_restarts" - - PodStatus = "pod_status" - ContainerStatus = "container_status" - - ContainerStatusReason = "container_status_reason" - ContainerLastTerminationReason = "container_last_termination_reason" - - Timestamp = "Timestamp" - - //Pod Owners - ReplicaSet = "ReplicaSet" - ReplicationController = "ReplicationController" - StatefulSet = "StatefulSet" - DaemonSet = "DaemonSet" - Deployment = "Deployment" - Job = "Job" - CronJob = "CronJob" -) diff --git a/internal/containerinsightscommon/nodeCapacity.go b/internal/containerinsightscommon/nodeCapacity.go deleted file mode 100644 index d58197b71f..0000000000 --- a/internal/containerinsightscommon/nodeCapacity.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package containerinsightscommon - -import ( - "log" - "os" - - "github.com/shirou/gopsutil/v3/cpu" - "github.com/shirou/gopsutil/v3/mem" -) - -type NodeCapacity struct { - MemCapacity int64 - CPUCapacity int64 -} - -func NewNodeCapacity() *NodeCapacity { - if _, err := os.Lstat("/rootfs/proc"); os.IsNotExist(err) { - log.Panic("E! /rootfs/proc does not exist") - } - if err := os.Setenv(GoPSUtilProcDirEnv, "/rootfs/proc"); err != nil { - log.Printf("E! NodeCapacity cannot set goPSUtilProcDirEnv to /rootfs/proc %v", err) - } - nc := &NodeCapacity{} - nc.parseCpu() - nc.parseMemory() - return nc -} - -func (n *NodeCapacity) parseMemory() { - if memStats, err := mem.VirtualMemory(); err == nil { - n.MemCapacity = int64(memStats.Total) - } else { - // If any error happen, then there will be no mem utilization metrics - log.Printf("E! NodeCapacity cannot get memStats from psUtil %v", err) - } -} - -func (n *NodeCapacity) parseCpu() { - if cpuInfos, err := cpu.Info(); err == nil { - numCores := len(cpuInfos) - n.CPUCapacity = int64(numCores) - } else { - // If any error happen, then there will be no cpu utilization metrics - log.Printf("E! NodeCapacity cannot get cpuInfo from psUtil %v", err) - } -} diff --git a/internal/containerinsightscommon/util.go b/internal/containerinsightscommon/util.go deleted file mode 100644 index 7e7d65b159..0000000000 --- a/internal/containerinsightscommon/util.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package containerinsightscommon - -import ( - "log" -) - -func IsNode(mType string) bool { - return mType == TypeNode || mType == TypeNodeNet || mType == TypeNodeFS || mType == TypeNodeDiskIO -} -func IsInstance(mType string) bool { - return mType == TypeInstance || mType == TypeInstanceNet || mType == TypeInstanceFS || mType == TypeInstanceDiskIO -} -func IsContainer(mType string) bool { - return mType == TypeContainer || mType == TypeContainerDiskIO || mType == TypeContainerFS -} -func IsPod(mType string) bool { - return mType == TypePod || mType == TypePodNet -} - -func MetricName(mType string, name string) string { - prefix := "" - instancePrefix := "instance_" - nodePrefix := "node_" - instanceNetPrefix := "instance_interface_" - nodeNetPrefix := "node_interface_" - podPrefix := "pod_" - podNetPrefix := "pod_interface_" - containerPrefix := "container_" - service := "service_" - cluster := "cluster_" - namespace := "namespace_" - - switch mType { - case TypeInstance, TypeInstanceFS, TypeInstanceDiskIO: - prefix = instancePrefix - case TypeInstanceNet: - prefix = instanceNetPrefix - case TypeNode, TypeNodeFS, TypeNodeDiskIO, TypeGpuNode: - prefix = nodePrefix - case TypeNodeNet: - prefix = nodeNetPrefix - case TypePod, TypeGpuPod: - prefix = podPrefix - case TypePodNet: - prefix = podNetPrefix - case TypeContainer, TypeContainerDiskIO, TypeContainerFS, TypeGpuContainer: - prefix = containerPrefix - case TypeService: - prefix = service - case TypeCluster, TypeGpuCluster, TypeClusterQueue: - prefix = cluster - case K8sNamespace: - prefix = namespace - default: - log.Printf("E! Unexpected MetricType: %s", mType) - } - return prefix + name -} diff --git a/internal/k8sCommon/k8sclient/clientset.go b/internal/k8sCommon/k8sclient/clientset.go deleted file mode 100644 index 66bd4bb7b7..0000000000 --- a/internal/k8sCommon/k8sclient/clientset.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "log" - "os" - "path/filepath" - "sync" - "time" - - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/clientcmd" -) - -const ( - cacheTTL = 10 * time.Minute -) - -var client *K8sClient -var Get func() *K8sClient - -func get() *K8sClient { - if !client.inited { - client.init() - } - return client -} - -type K8sClient struct { - sync.Mutex - inited bool - - ClientSet *kubernetes.Clientset - - Ep EpClient - Pod PodClient - Node NodeClient - - ReplicaSet ReplicaSetClient -} - -func (c *K8sClient) init() { - c.Lock() - defer c.Unlock() - if c.inited { - return - } - - config, err := rest.InClusterConfig() - if err != nil { - log.Printf("W! Cannot find in cluster config: %v", err) - config, err = clientcmd.BuildConfigFromFlags("", filepath.Join(os.Getenv("HOME"), ".kube/config")) - if err != nil { - log.Printf("E! Failed to build config: %v", err) - return - } - } - client, err := kubernetes.NewForConfig(config) - if err != nil { - log.Printf("E! Failed to build ClientSet: %v", err) - return - } - c.ClientSet = client - c.Ep = new(epClient) - c.Pod = new(podClient) - c.Node = new(nodeClient) - c.ReplicaSet = new(replicaSetClient) - c.inited = true -} - -func (c *K8sClient) shutdown() { - c.Lock() - defer c.Unlock() - if !c.inited { - return - } - if c.Ep != nil { - c.Ep.Shutdown() - } - if c.Pod != nil { - c.Pod.Shutdown() - } - if c.Node != nil { - c.Node.Shutdown() - } - if c.ReplicaSet != nil { - c.ReplicaSet.Shutdown() - } - c.inited = false -} - -func init() { - client = new(K8sClient) - Get = get -} diff --git a/internal/k8sCommon/k8sclient/endpoint.go b/internal/k8sCommon/k8sclient/endpoint.go deleted file mode 100644 index bafb237c6e..0000000000 --- a/internal/k8sCommon/k8sclient/endpoint.go +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "context" - "errors" - "fmt" - "log" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/watch" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" - "k8s.io/klog/v2/klogr" - - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/k8sCommon/k8sutil" -) - -type Service struct { - ServiceName string - Namespace string -} - -func NewService(name, namespace string) Service { - return Service{ServiceName: name, Namespace: namespace} -} - -type EpClient interface { - PodKeyToServiceNames() map[string][]string - ServiceToPodNum() map[Service]int - - Init() - Shutdown() -} - -type epClient struct { - sync.RWMutex - - stopChan chan struct{} - store *ObjStore - - inited bool - - podKeyToServiceNamesMap map[string][]string - serviceToPodNumMap map[Service]int //only running pods will show behind endpoints -} - -func (c *epClient) PodKeyToServiceNames() map[string][]string { - if !c.inited { - c.Init() - } - if c.store.Refreshed() { - c.refresh() - } - c.RLock() - defer c.RUnlock() - return c.podKeyToServiceNamesMap -} - -func (c *epClient) ServiceToPodNum() map[Service]int { - if !c.inited { - c.Init() - } - if c.store.Refreshed() { - c.refresh() - } - c.RLock() - defer c.RUnlock() - return c.serviceToPodNumMap -} - -func (c *epClient) refresh() { - c.Lock() - defer c.Unlock() - - objsList := c.store.List() - - tmpMap := make(map[string]map[string]struct{}) //pod key to service names - serviceToPodNumMapNew := make(map[Service]int) - - for _, obj := range objsList { - ep := obj.(*endpointInfo) - serviceName := ep.name - namespace := ep.namespace - - // each obj should be a uniq service. - // ignore the service which has 0 pods. - if len(ep.podKeyList) > 0 { - serviceToPodNumMapNew[NewService(serviceName, namespace)] = len(ep.podKeyList) - } - - for _, podKey := range ep.podKeyList { - var serviceNamesMap map[string]struct{} - var ok bool - if _, ok = tmpMap[podKey]; !ok { - tmpMap[podKey] = make(map[string]struct{}) - } - serviceNamesMap = tmpMap[podKey] - serviceNamesMap[serviceName] = struct{}{} - } - } - - podKeyToServiceNamesMapNew := make(map[string][]string) - - for podKey, serviceNamesMap := range tmpMap { - serviceNamesList := make([]string, 0, len(serviceNamesMap)) - for serviceName := range serviceNamesMap { - serviceNamesList = append(serviceNamesList, serviceName) - } - podKeyToServiceNamesMapNew[podKey] = serviceNamesList - } - c.podKeyToServiceNamesMap = podKeyToServiceNamesMapNew - c.serviceToPodNumMap = serviceToPodNumMapNew -} - -func (c *epClient) Init() { - c.Lock() - defer c.Unlock() - if c.inited { - return - } - - c.stopChan = make(chan struct{}) - - c.store = NewObjStore(transformFuncEndpoint) - - lw := createEndpointListWatch(Get().ClientSet, metav1.NamespaceAll) - reflector := cache.NewReflector(lw, &v1.Endpoints{}, c.store, 0) - klog.SetLogger(klogr.New().WithName("k8s_client_runtime").V(3)) - go reflector.Run(c.stopChan) - - if err := wait.Poll(50*time.Millisecond, 2*time.Second, func() (done bool, err error) { - return reflector.LastSyncResourceVersion() != "", nil - }); err != nil { - log.Printf("W! Endpoint initial sync timeout: %v", err) - } - - c.inited = true -} - -func (c *epClient) Shutdown() { - c.Lock() - defer c.Unlock() - if !c.inited { - return - } - - close(c.stopChan) - - c.inited = false -} - -func transformFuncEndpoint(obj interface{}) (interface{}, error) { - endpoint, ok := obj.(*v1.Endpoints) - if !ok { - return nil, errors.New(fmt.Sprintf("input obj %v is not Endpoint type", obj)) - } - info := new(endpointInfo) - info.name = endpoint.Name - info.namespace = endpoint.Namespace - info.podKeyList = []string{} - if subsets := endpoint.Subsets; subsets != nil { - for _, subset := range subsets { - if addresses := subset.Addresses; addresses != nil { - for _, address := range addresses { - if targetRef := address.TargetRef; targetRef != nil && targetRef.Kind == containerinsightscommon.TypePod { - podKey := k8sutil.CreatePodKey(targetRef.Namespace, targetRef.Name) - if podKey == "" { - log.Printf("W! Invalid pod metadata, namespace: %s, podName: %s", targetRef.Namespace, targetRef.Name) - continue - } - info.podKeyList = append(info.podKeyList, podKey) - } - } - } - } - } - return info, nil -} - -func createEndpointListWatch(client kubernetes.Interface, ns string) cache.ListerWatcher { - ctx := context.Background() - return &cache.ListWatch{ - ListFunc: func(opts metav1.ListOptions) (runtime.Object, error) { - return client.CoreV1().Endpoints(ns).List(ctx, opts) - }, - WatchFunc: func(opts metav1.ListOptions) (watch.Interface, error) { - return client.CoreV1().Endpoints(ns).Watch(ctx, opts) - }, - } -} diff --git a/internal/k8sCommon/k8sclient/endpoint_info.go b/internal/k8sCommon/k8sclient/endpoint_info.go deleted file mode 100644 index 6eec55475e..0000000000 --- a/internal/k8sCommon/k8sclient/endpoint_info.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -type endpointInfo struct { - name string //service name - namespace string //namespace name - podKeyList []string -} diff --git a/internal/k8sCommon/k8sclient/endpoint_test.go b/internal/k8sCommon/k8sclient/endpoint_test.go deleted file mode 100644 index 4d5e567748..0000000000 --- a/internal/k8sCommon/k8sclient/endpoint_test.go +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "log" - "testing" - "time" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/awsutil" - "gotest.tools/v3/assert" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var endpointsArray = []interface{}{ - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "guestbook", - GenerateName: "", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/endpoints/guestbook", - UID: "a885b78c-5573-11e9-b47e-066a7a20bac8", - ResourceVersion: "1550348", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "app": "guestbook", - }, - }, - Subsets: []v1.EndpointSubset{ - { - Addresses: []v1.EndpointAddress{ - { - IP: "192.168.122.125", - Hostname: "", - NodeName: aws.String("ip-192-168-76-61.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "default", - Name: "guestbook-qjqnz", - UID: "9ca74e86-5573-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "1550311", - FieldPath: "", - }, - }, - { - IP: "192.168.176.235", - Hostname: "", - NodeName: aws.String("ip-192-168-153-1.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "default", - Name: "guestbook-92wmq", - UID: "9ca662bb-5573-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "1550313", - FieldPath: "", - }, - }, - { - IP: "192.168.251.65", - Hostname: "", - NodeName: aws.String("ip-192-168-200-63.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "default", - Name: "guestbook-qbdv8", - UID: "9ca76fd6-5573-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "1550319", - FieldPath: "", - }, - }, - }, - Ports: []v1.EndpointPort{ - { - Name: "", - Port: 3000, - Protocol: "TCP", - }, - }, - }, - }, - }, - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "kubernetes", - GenerateName: "", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/endpoints/kubernetes", - UID: "4daf1688-4c0a-11e9-b47e-066a7a20bac8", - ResourceVersion: "5807557", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - }, - Subsets: []v1.EndpointSubset{ - { - Addresses: []v1.EndpointAddress{ - { - IP: "192.168.174.242", - Hostname: "", - }, - { - IP: "192.168.82.3", - Hostname: "", - }, - }, - Ports: []v1.EndpointPort{ - { - Name: "https", - Port: 443, - Protocol: "TCP", - }, - }, - }, - }, - }, - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "redis-master", - GenerateName: "", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/endpoints/redis-master", - UID: "74ac431b-5573-11e9-b47e-066a7a20bac8", - ResourceVersion: "1550146", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "app": "redis", - "role": "master", - }, - }, - Subsets: []v1.EndpointSubset{ - { - Addresses: []v1.EndpointAddress{ - { - IP: "192.168.108.68", - Hostname: "", - NodeName: aws.String("ip-192-168-76-61.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "default", - Name: "redis-master-rh2bd", - UID: "5d7825f3-5573-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "1550097", - FieldPath: "", - }, - }, - }, - Ports: []v1.EndpointPort{ - { - Name: "", - Port: 6379, - Protocol: "TCP", - }, - }, - }, - }, - }, - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "redis-slave", - GenerateName: "", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/endpoints/redis-slave", - UID: "8dee375e-5573-11e9-b47e-066a7a20bac8", - ResourceVersion: "1550242", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "app": "redis", - "role": "slave", - }, - }, - Subsets: []v1.EndpointSubset{ - { - Addresses: []v1.EndpointAddress{ - { - IP: "192.168.186.217", - Hostname: "", - NodeName: aws.String("ip-192-168-153-1.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "default", - Name: "redis-slave-mdjsj", - UID: "8137c74b-5573-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "1550223", - FieldPath: "", - }, - }, - { - IP: "192.168.68.108", - Hostname: "", - NodeName: aws.String("ip-192-168-76-61.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "default", - Name: "redis-slave-gtd5x", - UID: "813878c3-5573-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "1550226", - FieldPath: "", - }, - }, - }, - Ports: []v1.EndpointPort{ - { - Name: "", - Port: 6379, - Protocol: "TCP", - }, - }, - }, - }, - }, - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "kube-controller-manager", - GenerateName: "", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/endpoints/kube-controller-manager", - UID: "4f77dc4b-4c0a-11e9-b47e-066a7a20bac8", - ResourceVersion: "6461574", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Annotations: map[string]string{ - "control-plane.alpha.kubernetes.io/leader": "{\"holderIdentity\":\"ip-10-0-189-120.eu-west-1.compute.internal_89407f85-57e1-11e9-b6ea-02eb484bead6\",\"leaseDurationSeconds\":15,\"acquireTime\":\"2019-04-05T20:34:54Z\",\"renewTime\":\"2019-05-06T20:04:02Z\",\"leaderTransitions\":1}", - }, - }, - }, - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "kube-dns", - GenerateName: "", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/endpoints/kube-dns", - UID: "5049bf97-4c0a-11e9-b47e-066a7a20bac8", - ResourceVersion: "5847", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "eks.amazonaws.com/component": "kube-dns", - "k8s-app": "kube-dns", - "kubernetes.io/cluster-service": "true", - "kubernetes.io/name": "CoreDNS", - }, - }, - Subsets: []v1.EndpointSubset{ - { - Addresses: []v1.EndpointAddress{ - { - IP: "192.168.212.227", - Hostname: "", - NodeName: aws.String("ip-192-168-200-63.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "kube-system", - Name: "coredns-7554568866-26jdf", - UID: "503e1eae-4c0a-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "5842", - FieldPath: "", - }, - }, - { - IP: "192.168.222.250", - Hostname: "", - NodeName: aws.String("ip-192-168-200-63.eu-west-1.compute.internal"), - TargetRef: &v1.ObjectReference{ - Kind: "Pod", - Namespace: "kube-system", - Name: "coredns-7554568866-shwn6", - UID: "503f9b07-4c0a-11e9-b47e-066a7a20bac8", - APIVersion: "", - ResourceVersion: "5839", - FieldPath: "", - }, - }, - }, - Ports: []v1.EndpointPort{ - { - Name: "dns", - Port: 53, - Protocol: "UDP", - }, - { - Name: "dns-tcp", - Port: 53, - Protocol: "TCP", - }, - }, - }, - }, - }, - &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "kube-scheduler", - GenerateName: "", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/endpoints/kube-scheduler", - UID: "4e8782bc-4c0a-11e9-b47e-066a7a20bac8", - ResourceVersion: "6461575", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Annotations: map[string]string{ - "control-plane.alpha.kubernetes.io/leader": "{\"holderIdentity\":\"ip-10-0-189-120.eu-west-1.compute.internal_949a4400-57e1-11e9-a7bb-02eb484bead6\",\"leaseDurationSeconds\":15,\"acquireTime\":\"2019-04-05T20:34:57Z\",\"renewTime\":\"2019-05-06T20:04:02Z\",\"leaderTransitions\":1}", - }, - }, - }, -} - -func setUpEndpointClient() (*epClient, chan struct{}) { - stopChan := make(chan struct{}) - - client := &epClient{ - stopChan: stopChan, - store: NewObjStore(transformFuncEndpoint), - inited: true, //make it true to avoid further initialization invocation. - } - return client, stopChan -} - -func TestEpClient_PodKeyToServiceNames(t *testing.T) { - client, stopChan := setUpEndpointClient() - defer close(stopChan) - - client.store.Replace(endpointsArray, "") - - expectedMap := map[string][]string{ - "namespace:default,podName:redis-master-rh2bd": {"redis-master"}, - "namespace:default,podName:redis-slave-mdjsj": {"redis-slave"}, - "namespace:default,podName:redis-slave-gtd5x": {"redis-slave"}, - "namespace:kube-system,podName:coredns-7554568866-26jdf": {"kube-dns"}, - "namespace:kube-system,podName:coredns-7554568866-shwn6": {"kube-dns"}, - "namespace:default,podName:guestbook-qjqnz": {"guestbook"}, - "namespace:default,podName:guestbook-92wmq": {"guestbook"}, - "namespace:default,podName:guestbook-qbdv8": {"guestbook"}, - } - resultMap := client.PodKeyToServiceNames() - log.Printf("PodKeyToServiceNames (len=%v): %v", len(resultMap), awsutil.Prettify(resultMap)) - assert.DeepEqual(t, resultMap, expectedMap) -} - -func TestEpClient_ServiceNameToPodNum(t *testing.T) { - client, stopChan := setUpEndpointClient() - defer close(stopChan) - - client.store.Replace(endpointsArray, "") - - expectedMap := map[Service]int{ - NewService("redis-slave", "default"): 2, - NewService("kube-dns", "kube-system"): 2, - NewService("redis-master", "default"): 1, - NewService("guestbook", "default"): 3, - } - resultMap := client.ServiceToPodNum() - log.Printf("ServiceNameToPodNum (len=%v): %v", len(resultMap), awsutil.Prettify(resultMap)) - assert.DeepEqual(t, resultMap, expectedMap) -} diff --git a/internal/k8sCommon/k8sclient/job_info.go b/internal/k8sCommon/k8sclient/job_info.go deleted file mode 100644 index 2211f73fb3..0000000000 --- a/internal/k8sCommon/k8sclient/job_info.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -type jobInfo struct { - name string - owners []*jobOwner -} - -type jobOwner struct { - kind string - name string -} diff --git a/internal/k8sCommon/k8sclient/kubernetes_utils_test.go b/internal/k8sCommon/k8sclient/kubernetes_utils_test.go deleted file mode 100644 index 4becf13fed..0000000000 --- a/internal/k8sCommon/k8sclient/kubernetes_utils_test.go +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "sync" - "testing" - "time" - - "github.com/google/uuid" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// TestAttachNamespace function -func TestAttachNamespace(t *testing.T) { - result := attachNamespace("testResource", "testNamespace") - if result != "testResource@testNamespace" { - t.Errorf("attachNamespace was incorrect, got: %s, want: %s.", result, "testResource@testNamespace") - } -} - -// TestGetServiceAndNamespace function -func TestGetServiceAndNamespace(t *testing.T) { - service := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testService", - Namespace: "testNamespace", - }, - } - result := getServiceAndNamespace(service) - if result != "testService@testNamespace" { - t.Errorf("getServiceAndNamespace was incorrect, got: %s, want: %s.", result, "testService@testNamespace") - } -} - -// TestExtractResourceAndNamespace function -func TestExtractResourceAndNamespace(t *testing.T) { - // Test normal case - name, namespace := ExtractResourceAndNamespace("testService@testNamespace") - if name != "testService" || namespace != "testNamespace" { - t.Errorf("extractResourceAndNamespace was incorrect, got: %s and %s, want: %s and %s.", name, namespace, "testService", "testNamespace") - } - - // Test invalid case - name, namespace = ExtractResourceAndNamespace("invalid") - if name != "" || namespace != "" { - t.Errorf("extractResourceAndNamespace was incorrect, got: %s and %s, want: %s and %s.", name, namespace, "", "") - } -} - -func TestExtractWorkloadNameFromRS(t *testing.T) { - testCases := []struct { - name string - replicaSetName string - want string - shouldErr bool - }{ - { - name: "Valid ReplicaSet Name", - replicaSetName: "my-deployment-5859ffc7ff", - want: "my-deployment", - shouldErr: false, - }, - { - name: "Invalid ReplicaSet Name - No Hyphen", - replicaSetName: "mydeployment5859ffc7ff", - want: "", - shouldErr: true, - }, - { - name: "Invalid ReplicaSet Name - Less Than 10 Suffix Characters", - replicaSetName: "my-deployment-bc2", - want: "", - shouldErr: true, - }, - { - name: "Invalid ReplicaSet Name - More Than 10 Suffix Characters", - replicaSetName: "my-deployment-5859ffc7ffx", - want: "", - shouldErr: true, - }, - { - name: "Invalid ReplicaSet Name - Invalid Characters in Suffix", - replicaSetName: "my-deployment-aeiou12345", - want: "", - shouldErr: true, - }, - { - name: "Invalid ReplicaSet Name - Empty String", - replicaSetName: "", - want: "", - shouldErr: true, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got, err := extractWorkloadNameFromRS(tc.replicaSetName) - - if (err != nil) != tc.shouldErr { - t.Errorf("extractWorkloadNameFromRS() error = %v, wantErr %v", err, tc.shouldErr) - return - } - - if got != tc.want { - t.Errorf("extractWorkloadNameFromRS() = %v, want %v", got, tc.want) - } - }) - } -} - -func TestExtractWorkloadNameFromPodName(t *testing.T) { - testCases := []struct { - name string - podName string - want string - shouldErr bool - }{ - { - name: "Valid Pod Name", - podName: "my-replicaset-bc24f", - want: "my-replicaset", - shouldErr: false, - }, - { - name: "Invalid Pod Name - No Hyphen", - podName: "myreplicasetbc24f", - want: "", - shouldErr: true, - }, - { - name: "Invalid Pod Name - Less Than 5 Suffix Characters", - podName: "my-replicaset-bc2", - want: "", - shouldErr: true, - }, - { - name: "Invalid Pod Name - More Than 5 Suffix Characters", - podName: "my-replicaset-bc24f5", - want: "", - shouldErr: true, - }, - { - name: "Invalid Pod Name - Empty String", - podName: "", - want: "", - shouldErr: true, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got, err := extractWorkloadNameFromPodName(tc.podName) - - if (err != nil) != tc.shouldErr { - t.Errorf("extractWorkloadNameFromPodName() error = %v, wantErr %v", err, tc.shouldErr) - return - } - - if got != tc.want { - t.Errorf("extractWorkloadNameFromPodName() = %v, want %v", got, tc.want) - } - }) - } -} - -// TestGetWorkloadAndNamespace function -func TestGetWorkloadAndNamespace(t *testing.T) { - // Test ReplicaSet case - pod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testPod", - Namespace: "testNamespace", - OwnerReferences: []metav1.OwnerReference{ - { - Kind: "ReplicaSet", - Name: "testDeployment-5d68bc5f49", - }, - }, - }, - } - result := GetWorkloadAndNamespace(pod) - if result != "testDeployment@testNamespace" { - t.Errorf("getDeploymentAndNamespace was incorrect, got: %s, want: %s.", result, "testDeployment@testNamespace") - } - - // Test StatefulSet case - pod.ObjectMeta.OwnerReferences[0].Kind = "StatefulSet" - pod.ObjectMeta.OwnerReferences[0].Name = "testStatefulSet" - result = GetWorkloadAndNamespace(pod) - if result != "testStatefulSet@testNamespace" { - t.Errorf("getWorkloadAndNamespace was incorrect, got: %s, want: %s.", result, "testStatefulSet@testNamespace") - } - - // Test Other case - pod.ObjectMeta.OwnerReferences[0].Kind = "Other" - pod.ObjectMeta.OwnerReferences[0].Name = "testOther" - result = GetWorkloadAndNamespace(pod) - if result != "" { - t.Errorf("getWorkloadAndNamespace was incorrect, got: %s, want: %s.", result, "") - } - - // Test no OwnerReferences case - pod.ObjectMeta.OwnerReferences = nil - result = GetWorkloadAndNamespace(pod) - if result != "" { - t.Errorf("getWorkloadAndNamespace was incorrect, got: %s, want: %s.", result, "") - } -} - -func TestExtractIPPort(t *testing.T) { - // Test valid IP:Port - ip, port, ok := ExtractIPPort("192.0.2.0:8080") - assert.Equal(t, "192.0.2.0", ip) - assert.Equal(t, "8080", port) - assert.True(t, ok) - - // Test invalid IP:Port - ip, port, ok = ExtractIPPort("192.0.2:8080") - assert.Equal(t, "", ip) - assert.Equal(t, "", port) - assert.False(t, ok) - - // Test IP only - ip, port, ok = ExtractIPPort("192.0.2.0") - assert.Equal(t, "", ip) - assert.Equal(t, "", port) - assert.False(t, ok) -} - -func TestInferWorkloadName(t *testing.T) { - testCases := []struct { - name string - input string - service string - expected string - }{ - {"StatefulSet single digit", "mysql-0", "service", "mysql"}, - {"StatefulSet multiple digits", "mysql-10", "service", "mysql"}, - {"ReplicaSet bare pod", "nginx-b2dfg", "service", "nginx"}, - {"Deployment-based ReplicaSet pod", "nginx-76977669dc-lwx64", "service", "nginx"}, - {"Non matching", "simplepod", "service", "service"}, - {"ReplicaSet name with number suffix", "nginx-123-d9stt", "service", "nginx-123"}, - {"Some confusing case with a replicaSet/daemonset name matching the pattern", "nginx-245678-d9stt", "nginx-service", "nginx"}, - // when the regex pattern doesn't matter, we just fall back to service name to handle all the edge cases - {"Some confusing case with a replicaSet/daemonset name not matching the pattern", "nginx-123456-d9stt", "nginx-service", "nginx-123456"}, - {"Empty", "", "service", "service"}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got := InferWorkloadName(tc.input, tc.service) - if got != tc.expected { - t.Errorf("InferWorkloadName(%q) = %q; expected %q", tc.input, got, tc.expected) - } - }) - } -} - -// UUIDString wraps a string with a unique identifier. -type UUIDString struct { - value string - uuid string -} - -// NewUUIDString returns a new UUIDString instance with a generated UUID. -func NewUUIDString(s string) UUIDString { - return UUIDString{ - value: s, - uuid: uuid.NewString(), - } -} - -// String returns the string value and implements the fmt.Stringer interface. -func (u UUIDString) String() string { - return u.value -} - -func (u UUIDString) UUID() string { - return u.uuid -} - -// TestDeleteWithDelay_NoUpdate verifies that if the value is not updated, -// the key is deleted after the delay. -func TestTimedDeleterWithIDCheck_DeleteWithDelay_NoUpdate(t *testing.T) { - m := &sync.Map{} - key := "testKey" - initialVal := NewUUIDString("value") - m.Store(key, initialVal) - - // Use a short delay to make the test run quickly. - td := TimedDeleterWithIDCheck{Delay: 10 * time.Millisecond} - td.DeleteWithDelay(m, key) - - // Wait for longer than the deletion delay. - time.Sleep(20 * time.Millisecond) - - if _, ok := m.Load(key); ok { - t.Errorf("Expected key %q to be deleted, but it still exists", key) - } -} - -// TestDeleteWithDelay_WithUpdate verifies that if the value is updated before the deletion delay expires, -// the key is not deleted. -func TestTimedDeleterWithIDCheck_DeleteWithDelay_WithUpdate(t *testing.T) { - m := &sync.Map{} - key := "testKey" - initialVal := NewUUIDString("value") - m.Store(key, initialVal) - - td := TimedDeleterWithIDCheck{Delay: 20 * time.Millisecond} - td.DeleteWithDelay(m, key) - - // Wait a bit before updating (less than td.Delay). - time.Sleep(10 * time.Millisecond) - updatedVal := NewUUIDString("value") // same content, but a new instance (different UUID) - m.Store(key, updatedVal) - - // Wait long enough for the deletion delay to expire. - time.Sleep(20 * time.Millisecond) - - if _, ok := m.Load(key); !ok { - t.Errorf("Expected key %q to remain after update, but it was deleted", key) - } -} - -// TestDeleteWithDelay_InvalidType verifies that if the value stored is not a UUIDString, -// no deletion occurs. -func TestTimedDeleterWithIDCheck_DeleteWithDelay_InvalidType(t *testing.T) { - m := &sync.Map{} - key := "invalidKey" - // Store a plain string instead of UUIDString. - m.Store(key, "a simple string") - - td := TimedDeleterWithIDCheck{Delay: 10 * time.Millisecond} - td.DeleteWithDelay(m, key) - - time.Sleep(20 * time.Millisecond) - if _, ok := m.Load(key); !ok { - t.Errorf("Expected key %q to remain since value is not a UUIDString, but it was deleted", key) - } -} diff --git a/internal/k8sCommon/k8sclient/node.go b/internal/k8sCommon/k8sclient/node.go deleted file mode 100644 index 4de0f0d94e..0000000000 --- a/internal/k8sCommon/k8sclient/node.go +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "context" - "errors" - "fmt" - "log" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/watch" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" -) - -// This needs to be reviewed for newer versions of k8s. -var failedNodeConditions = map[v1.NodeConditionType]bool{ - //v1.NodeOutOfDisk: true, - v1.NodeMemoryPressure: true, - v1.NodeDiskPressure: true, - v1.NodePIDPressure: true, - v1.NodeNetworkUnavailable: true, -} - -type NodeClient interface { - ClusterFailedNodeCount() int - ClusterNodeCount() int - - Init() - Shutdown() -} - -type nodeClient struct { - sync.RWMutex - - stopChan chan struct{} - store *ObjStore - - inited bool - - clusterFailedNodeCount int - clusterNodeCount int -} - -func (c *nodeClient) ClusterFailedNodeCount() int { - if !c.inited { - c.Init() - } - if c.store.Refreshed() { - c.refresh() - } - c.RLock() - defer c.RUnlock() - return c.clusterFailedNodeCount -} - -func (c *nodeClient) ClusterNodeCount() int { - if !c.inited { - c.Init() - } - if c.store.Refreshed() { - c.refresh() - } - c.RLock() - defer c.RUnlock() - return c.clusterNodeCount -} - -func (c *nodeClient) refresh() { - c.Lock() - defer c.Unlock() - - objsList := c.store.List() - - clusterFailedNodeCountNew := 0 - clusterNodeCountNew := 0 - for _, obj := range objsList { - node := obj.(*nodeInfo) - - clusterNodeCountNew++ - - failed := false - - Loop: - for _, condition := range node.conditions { - if _, ok := failedNodeConditions[condition.Type]; ok { - // match the failedNodeConditions type we care about - if condition.Status != v1.ConditionFalse { - // if this is not false, i.e. true or unknown - failed = true - break Loop - } - } - } - - if failed { - clusterFailedNodeCountNew++ - } - } - - c.clusterFailedNodeCount = clusterFailedNodeCountNew - c.clusterNodeCount = clusterNodeCountNew -} - -func (c *nodeClient) Init() { - c.Lock() - defer c.Unlock() - if c.inited { - return - } - - c.stopChan = make(chan struct{}) - - c.store = NewObjStore(transformFuncNode) - - lw := createNodeListWatch(Get().ClientSet) - reflector := cache.NewReflector(lw, &v1.Node{}, c.store, 0) - go reflector.Run(c.stopChan) - - if err := wait.Poll(50*time.Millisecond, 2*time.Second, func() (done bool, err error) { - return reflector.LastSyncResourceVersion() != "", nil - }); err != nil { - log.Printf("W! Node initial sync timeout: %v", err) - } - - c.inited = true -} - -func (c *nodeClient) Shutdown() { - c.Lock() - defer c.Unlock() - if !c.inited { - return - } - - close(c.stopChan) - - c.inited = false -} - -func transformFuncNode(obj interface{}) (interface{}, error) { - node, ok := obj.(*v1.Node) - if !ok { - return nil, errors.New(fmt.Sprintf("input obj %v is not Node type", obj)) - } - info := new(nodeInfo) - info.conditions = []*nodeCondition{} - for _, condition := range node.Status.Conditions { - info.conditions = append(info.conditions, &nodeCondition{ - Type: condition.Type, - Status: condition.Status, - }) - } - return info, nil -} - -func createNodeListWatch(client kubernetes.Interface) cache.ListerWatcher { - ctx := context.Background() - return &cache.ListWatch{ - ListFunc: func(opts metav1.ListOptions) (runtime.Object, error) { - opts.ResourceVersion = "" - // Passing emput context as this was not required by old List() - return client.CoreV1().Nodes().List(ctx, opts) - }, - WatchFunc: func(opts metav1.ListOptions) (watch.Interface, error) { - // Passing empty context as this was not required by old Watch() - return client.CoreV1().Nodes().Watch(ctx, opts) - }, - } -} diff --git a/internal/k8sCommon/k8sclient/node_info.go b/internal/k8sCommon/k8sclient/node_info.go deleted file mode 100644 index 6014c30ecd..0000000000 --- a/internal/k8sCommon/k8sclient/node_info.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - v1 "k8s.io/api/core/v1" -) - -type nodeInfo struct { - conditions []*nodeCondition -} - -type nodeCondition struct { - Type v1.NodeConditionType - Status v1.ConditionStatus -} diff --git a/internal/k8sCommon/k8sclient/node_test.go b/internal/k8sCommon/k8sclient/node_test.go deleted file mode 100644 index e90a4b2d79..0000000000 --- a/internal/k8sCommon/k8sclient/node_test.go +++ /dev/null @@ -1,314 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "log" - "testing" - "time" - - "github.com/stretchr/testify/assert" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var nodeArray = []interface{}{ - &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ip-192-168-200-63.eu-west-1.compute.internal", - GenerateName: "", - Namespace: "", - SelfLink: "/api/v1/nodes/ip-192-168-200-63.eu-west-1.compute.internal", - UID: "9e31e901-4c14-11e9-9bd4-02cf86190d00", - ResourceVersion: "6505830", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "t3.medium", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eu-west-1", - "failure-domain.beta.kubernetes.io/zone": "eu-west-1c", - "kubernetes.io/hostname": "ip-192-168-200-63.eu-west-1.compute.internal", - }, - Annotations: map[string]string{ - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: "MemoryPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasSufficientMemory", - Message: "kubelet has sufficient memory available", - }, - { - Type: "DiskPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasNoDiskPressure", - Message: "kubelet has no disk pressure", - }, - { - Type: "PIDPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasSufficientPID", - Message: "kubelet has sufficient PID available", - }, - { - Type: "Ready", - Status: "True", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletReady", - Message: "kubelet is posting ready status", - }, - }, - NodeInfo: v1.NodeSystemInfo{ - MachineID: "ec2bb261412a689dd19139d9a526407f", - SystemUUID: "EC2BB261-412A-689D-D191-39D9A526407F", - BootID: "1d5db5f1-03e8-48f3-9c49-21781a9ba1ae", - KernelVersion: "4.14.97-90.72.amzn2.x86_64", - OSImage: "Amazon Linux 2", - ContainerRuntimeVersion: "docker://18.6.1", - KubeletVersion: "v1.11.5", - KubeProxyVersion: "v1.11.5", - OperatingSystem: "linux", - Architecture: "amd64", - }, - }, - }, - &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ip-192-168-76-61.eu-west-1.compute.internal", - GenerateName: "", - Namespace: "", - SelfLink: "/api/v1/nodes/ip-192-168-76-61.eu-west-1.compute.internal", - UID: "9f9e79a7-4c14-11e9-b47e-066a7a20bac8", - ResourceVersion: "6505829", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eu-west-1", - "failure-domain.beta.kubernetes.io/zone": "eu-west-1a", - "kubernetes.io/hostname": "ip-192-168-76-61.eu-west-1.compute.internal", - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "t3.medium", - }, - Annotations: map[string]string{ - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: "MemoryPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasSufficientMemory", - Message: "kubelet has sufficient memory available", - }, - { - Type: "DiskPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasNoDiskPressure", - Message: "kubelet has no disk pressure", - }, - { - Type: "PIDPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasSufficientPID", - Message: "kubelet has sufficient PID available", - }, - { - Type: "Ready", - Status: "True", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletReady", - Message: "kubelet is posting ready status", - }, - }, - NodeInfo: v1.NodeSystemInfo{ - MachineID: "ec275328a762912e9c1777bc59328231", - SystemUUID: "EC275328-A762-912E-9C17-77BC59328231", - BootID: "02a66fbd-7030-4f7d-85c4-935a32b5d3e9", - KernelVersion: "4.14.97-90.72.amzn2.x86_64", - OSImage: "Amazon Linux 2", - ContainerRuntimeVersion: "docker://18.6.1", - KubeletVersion: "v1.11.5", - KubeProxyVersion: "v1.11.5", - OperatingSystem: "linux", - Architecture: "amd64", - }, - }, - }, - &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ip-192-168-153-1.eu-west-1.compute.internal", - GenerateName: "", - Namespace: "", - SelfLink: "/api/v1/nodes/ip-192-168-153-1.eu-west-1.compute.internal", - UID: "9eb3a09d-4c14-11e9-b47e-066a7a20bac8", - ResourceVersion: "6505831", - Generation: 0, - CreationTimestamp: metav1.Time{ - Time: time.Now(), - }, - Labels: map[string]string{ - "beta.kubernetes.io/arch": "amd64", - "beta.kubernetes.io/instance-type": "t3.medium", - "beta.kubernetes.io/os": "linux", - "failure-domain.beta.kubernetes.io/region": "eu-west-1", - "failure-domain.beta.kubernetes.io/zone": "eu-west-1b", - "kubernetes.io/hostname": "ip-192-168-153-1.eu-west-1.compute.internal", - }, - Annotations: map[string]string{ - "node.alpha.kubernetes.io/ttl": "0", - "volumes.kubernetes.io/controller-managed-attach-detach": "true", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: "MemoryPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasSufficientMemory", - Message: "kubelet has sufficient memory available", - }, - { //This entry shows failed node - Type: "DiskPressure", - Status: "True", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasDiskPressure", - Message: "kubelet has disk pressure", - }, - { - Type: "PIDPressure", - Status: "False", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletHasSufficientPID", - Message: "kubelet has sufficient PID available", - }, - { - Type: "Ready", - Status: "True", - LastHeartbeatTime: metav1.Time{ - Time: time.Now(), - }, - LastTransitionTime: metav1.Time{ - Time: time.Now(), - }, - Reason: "KubeletReady", - Message: "kubelet is posting ready status", - }, - }, - NodeInfo: v1.NodeSystemInfo{ - MachineID: "ec2eb21af60b929ba89f44fb5d86508f", - SystemUUID: "EC2EB21A-F60B-929B-A89F-44FB5D86508F", - BootID: "3b67d19f-cfa6-4925-a728-ce3f3e28991b", - KernelVersion: "4.14.97-90.72.amzn2.x86_64", - OSImage: "Amazon Linux 2", - ContainerRuntimeVersion: "docker://18.6.1", - KubeletVersion: "v1.11.5", - KubeProxyVersion: "v1.11.5", - OperatingSystem: "linux", - Architecture: "amd64", - }, - }, - }, -} - -func setUpNodeClient() (*nodeClient, chan struct{}) { - stopChan := make(chan struct{}) - - client := &nodeClient{ - stopChan: stopChan, - store: NewObjStore(transformFuncNode), - inited: true, //make it true to avoid further initialization invocation. - } - return client, stopChan -} - -func TestNodeClient(t *testing.T) { - client, stopChan := setUpNodeClient() - defer close(stopChan) - - client.store.Replace(nodeArray, "") - - expectedClusterNodeCount := 3 - expectedClusterFailedNodeCount := 1 - clusterNodeCount := client.ClusterNodeCount() - clusterFailedNodeCount := client.ClusterFailedNodeCount() - log.Printf("clusterNodeCount: %v, clusterFailedNodeCount: %v", clusterNodeCount, clusterFailedNodeCount) - - assert.Equal(t, clusterNodeCount, expectedClusterNodeCount) - assert.Equal(t, clusterFailedNodeCount, expectedClusterFailedNodeCount) -} diff --git a/internal/k8sCommon/k8sclient/obj_store.go b/internal/k8sCommon/k8sclient/obj_store.go deleted file mode 100644 index a88cd8c0c0..0000000000 --- a/internal/k8sCommon/k8sclient/obj_store.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "log" - "sync" - - "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/types" -) - -type ObjStore struct { - sync.RWMutex - - refreshed bool - objs map[types.UID]interface{} - - transformFunc func(interface{}) (interface{}, error) -} - -func NewObjStore(transformFunc func(interface{}) (interface{}, error)) *ObjStore { - return &ObjStore{ - transformFunc: transformFunc, - objs: map[types.UID]interface{}{}, - } -} - -// Track whether the underlying data store is refreshed or not. -// Calling this func itself will reset the state to false. -func (s *ObjStore) Refreshed() bool { - s.Lock() - defer s.Unlock() - - refreshed := s.refreshed - if refreshed { - s.refreshed = false - } - return refreshed -} - -func (s *ObjStore) Add(obj interface{}) error { - o, err := meta.Accessor(obj) - if err != nil { - log.Printf("W! Cannot find the metadata for %v.", obj) - return err - } - - var toCacheObj interface{} - if toCacheObj, err = s.transformFunc(obj); err != nil { - log.Printf("W! Failed to update obj %v in the cached store.", obj) - return err - } - - s.Lock() - defer s.Unlock() - - s.objs[o.GetUID()] = toCacheObj - s.refreshed = true - - return nil -} - -// Update updates the existing entry in the ObjStore. -func (s *ObjStore) Update(obj interface{}) error { - return s.Add(obj) -} - -// Delete deletes an existing entry in the ObjStore. -func (s *ObjStore) Delete(obj interface{}) error { - - o, err := meta.Accessor(obj) - if err != nil { - return err - } - - s.Lock() - defer s.Unlock() - - delete(s.objs, o.GetUID()) - - s.refreshed = true - - return nil -} - -// List implements the List method of the store interface. -func (s *ObjStore) List() []interface{} { - s.RLock() - defer s.RUnlock() - - result := make([]interface{}, 0, len(s.objs)) - for _, v := range s.objs { - result = append(result, v) - } - return result -} - -// ListKeys implements the ListKeys method of the store interface. -func (s *ObjStore) ListKeys() []string { - s.RLock() - defer s.RUnlock() - - result := make([]string, 0, len(s.objs)) - for k := range s.objs { - result = append(result, string(k)) - } - return result -} - -// Get implements the Get method of the store interface. -func (s *ObjStore) Get(obj interface{}) (item interface{}, exists bool, err error) { - return nil, false, nil -} - -// GetByKey implements the GetByKey method of the store interface. -func (s *ObjStore) GetByKey(key string) (item interface{}, exists bool, err error) { - return nil, false, nil -} - -// Replace will delete the contents of the store, using instead the -// given list. -func (s *ObjStore) Replace(list []interface{}, _ string) error { - s.Lock() - s.objs = map[types.UID]interface{}{} - s.Unlock() - - for _, o := range list { - err := s.Add(o) - if err != nil { - return err - } - } - - return nil -} - -// Resync implements the Resync method of the store interface. -func (s *ObjStore) Resync() error { - return nil -} diff --git a/internal/k8sCommon/k8sclient/pod.go b/internal/k8sCommon/k8sclient/pod.go deleted file mode 100644 index 304368722b..0000000000 --- a/internal/k8sCommon/k8sclient/pod.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "context" - "errors" - "fmt" - "log" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/watch" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" -) - -type PodClient interface { - NamespaceToRunningPodNum() map[string]int - - Init() - Shutdown() -} - -type podClient struct { - sync.RWMutex - - stopChan chan struct{} - store *ObjStore - - inited bool - - namespaceToRunningPodNumMap map[string]int -} - -func (c *podClient) NamespaceToRunningPodNum() map[string]int { - if !c.inited { - c.Init() - } - //if c.store.Refreshed() { - // log.Printf("I! store refresh %v", c.store.refreshed) - // c.refresh() - //} - c.refresh() - c.RLock() - defer c.RUnlock() - return c.namespaceToRunningPodNumMap -} - -func (c *podClient) refresh() { - c.Lock() - defer c.Unlock() - - objsList := c.store.List() - namespaceToRunningPodNumMapNew := make(map[string]int) - for _, obj := range objsList { - pod := obj.(*podInfo) - if pod.phase == v1.PodRunning { - if podNum, ok := namespaceToRunningPodNumMapNew[pod.namespace]; !ok { - namespaceToRunningPodNumMapNew[pod.namespace] = 1 - } else { - namespaceToRunningPodNumMapNew[pod.namespace] = podNum + 1 - } - } - } - c.namespaceToRunningPodNumMap = namespaceToRunningPodNumMapNew -} - -func (c *podClient) Init() { - c.Lock() - defer c.Unlock() - if c.inited { - return - } - - c.stopChan = make(chan struct{}) - - c.store = NewObjStore(transformFuncPod) - - lw := createPodListWatch(Get().ClientSet, metav1.NamespaceAll) - reflector := cache.NewReflector(lw, &v1.Pod{}, c.store, 0) - go reflector.Run(c.stopChan) - - if err := wait.Poll(50*time.Millisecond, 2*time.Second, func() (done bool, err error) { - return reflector.LastSyncResourceVersion() != "", nil - }); err != nil { - log.Printf("W! Pod initial sync timeout: %v", err) - } - - c.inited = true -} - -func (c *podClient) Shutdown() { - c.Lock() - defer c.Unlock() - if !c.inited { - return - } - - close(c.stopChan) - - c.inited = false -} - -func transformFuncPod(obj interface{}) (interface{}, error) { - pod, ok := obj.(*v1.Pod) - if !ok { - return nil, errors.New(fmt.Sprintf("input obj %v is not Pod type", obj)) - } - info := new(podInfo) - info.namespace = pod.Namespace - info.phase = pod.Status.Phase - return info, nil -} - -func createPodListWatch(client kubernetes.Interface, ns string) cache.ListerWatcher { - ctx := context.Background() - return &cache.ListWatch{ - ListFunc: func(opts metav1.ListOptions) (runtime.Object, error) { - opts.ResourceVersion = "" - // Passing empty context as this was not required by old List() - return client.CoreV1().Pods(ns).List(ctx, opts) - }, - WatchFunc: func(opts metav1.ListOptions) (watch.Interface, error) { - // Passing empty context as this was not required by old Watch() - return client.CoreV1().Pods(ns).Watch(ctx, opts) - }, - } -} diff --git a/internal/k8sCommon/k8sclient/pod_info.go b/internal/k8sCommon/k8sclient/pod_info.go deleted file mode 100644 index 82ae28fa01..0000000000 --- a/internal/k8sCommon/k8sclient/pod_info.go +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - v1 "k8s.io/api/core/v1" -) - -type podInfo struct { - namespace string - phase v1.PodPhase -} diff --git a/internal/k8sCommon/k8sclient/pod_test.go b/internal/k8sCommon/k8sclient/pod_test.go deleted file mode 100644 index 45a13b3ddb..0000000000 --- a/internal/k8sCommon/k8sclient/pod_test.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "log" - "testing" - - "github.com/aws/aws-sdk-go/aws/awsutil" - "gotest.tools/v3/assert" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var podArray = []interface{}{ - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "bc5f5839-f62e-44b9-a79e-af250d92dcb1", - Name: "kube-proxy-csm88", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/pods/kube-proxy-csm88", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "75ab40d2-552a-4c05-82c9-0ddcb3008657", - Name: "coredns-7554568866-26jdf", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/pods/coredns-7554568866-26jdf", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "b0280963-d68a-4096-ac56-4ecfbaee37f6", - Name: "aws-node-wf7jj", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/pods/aws-node-wf7jj", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "eb09849f-b3f6-4c2c-ba4a-3e2c6eaf24f4", - Name: "cloudwatch-agent-rnjfp", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/cloudwatch-agent-rnjfp", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "11d078c2-6fed-49c3-83a8-b94915a6451f", - Name: "guestbook-qbdv8", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/guestbook-qbdv8", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "52a76177-8431-400c-95b2-109f9b28b3b1", - Name: "kube-proxy-v5l9h", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/pods/kube-proxy-v5l9h", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "bb003966-4134-4ebf-9be3-dfb1741a1499", - Name: "redis-slave-mdjsj", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/redis-slave-mdjsj", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "440854dd-73e8-4616-b21a-3a63831d27e3", - Name: "guestbook-qjqnz", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/guestbook-qjqnz", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "b79e85a1-1fc8-439c-b3a5-bd854ed29e10", - Name: "kube-proxy-h5tsv", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/pods/kube-proxy-h5tsv", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "f9ecca30-f5de-4809-9b63-79dc8369a0a6", - Name: "cloudwatch-agent-ksd26", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/cloudwatch-agent-ksd26", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "cfc496eb-2e5e-490e-9547-f0fe5399c219", - Name: "aws-node-pqxp2", - Namespace: "kube-system", - SelfLink: "/api/v1/namespaces/kube-system/pods/aws-node-pqxp2", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "20d46c94-5341-429d-af0c-55d97314db7b", - Name: "cloudwatch-agent-2x7p4", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/cloudwatch-agent-2x7p4", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "885c6c56-da31-4a63-b823-eed50172193d", - Name: "guestbook-92wmq", - Namespace: "default", - SelfLink: "/api/v1/namespaces/default/pods/guestbook-92wmq", - }, - Status: v1.PodStatus{ - Phase: "Running", - }, - }, -} - -func setUpPodClient() (*podClient, chan struct{}) { - stopChan := make(chan struct{}) - client := &podClient{ - stopChan: stopChan, - store: NewObjStore(transformFuncPod), - inited: true, //make it true to avoid further initialization invocation. - } - return client, stopChan -} - -func TestPodClient_NamespaceToRunningPodNum(t *testing.T) { - client, stopChan := setUpPodClient() - defer close(stopChan) - - client.store.Replace(podArray, "") - - expectedMap := map[string]int{ - "kube-system": 6, - "default": 7, - } - resultMap := client.NamespaceToRunningPodNum() - log.Printf("NamespaceToRunningPodNum (len=%v): %v", len(resultMap), awsutil.Prettify(resultMap)) - assert.DeepEqual(t, resultMap, expectedMap) -} diff --git a/internal/k8sCommon/k8sclient/replicaset.go b/internal/k8sCommon/k8sclient/replicaset.go deleted file mode 100644 index b31d5003b6..0000000000 --- a/internal/k8sCommon/k8sclient/replicaset.go +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "context" - "errors" - "fmt" - "log" - "sync" - "time" - - appsv1 "k8s.io/api/apps/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/watch" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" - - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" -) - -type ReplicaSetClient interface { - ReplicaSetToDeployment() map[string]string - - Init() - Shutdown() -} - -type replicaSetClient struct { - sync.RWMutex - - stopChan chan struct{} - store *ObjStore - - inited bool - - cachedReplicaSetMap map[string]time.Time - replicaSetToDeploymentMap map[string]string -} - -func (c *replicaSetClient) ReplicaSetToDeployment() map[string]string { - if !c.inited { - c.Init() - } - if c.store.Refreshed() { - c.refresh() - } - c.RLock() - defer c.RUnlock() - return c.replicaSetToDeploymentMap -} - -func (c *replicaSetClient) refresh() { - c.Lock() - defer c.Unlock() - - objsList := c.store.List() - - tmpMap := make(map[string]string) - for _, obj := range objsList { - replicaSet := obj.(*replicaSetInfo) - ownerLoop: - for _, owner := range replicaSet.owners { - if owner.kind == containerinsightscommon.Deployment && owner.name != "" { - tmpMap[replicaSet.name] = owner.name - break ownerLoop - } - } - } - - if c.replicaSetToDeploymentMap == nil { - c.replicaSetToDeploymentMap = make(map[string]string) - } - - if c.cachedReplicaSetMap == nil { - c.cachedReplicaSetMap = make(map[string]time.Time) - } - - lastRefreshTime := time.Now() - - for k, v := range c.cachedReplicaSetMap { - if lastRefreshTime.Sub(v) > cacheTTL { - delete(c.replicaSetToDeploymentMap, k) - delete(c.cachedReplicaSetMap, k) - } - } - - for k, v := range tmpMap { - c.replicaSetToDeploymentMap[k] = v - c.cachedReplicaSetMap[k] = lastRefreshTime - } -} - -func (c *replicaSetClient) Init() { - c.Lock() - defer c.Unlock() - if c.inited { - return - } - - c.stopChan = make(chan struct{}) - - c.store = NewObjStore(transformFuncReplicaSet) - - lw := createReplicaSetListWatch(Get().ClientSet, metav1.NamespaceAll) - reflector := cache.NewReflector(lw, &appsv1.ReplicaSet{}, c.store, 0) - go reflector.Run(c.stopChan) - - if err := wait.Poll(50*time.Millisecond, 2*time.Second, func() (done bool, err error) { - return reflector.LastSyncResourceVersion() != "", nil - }); err != nil { - log.Printf("W! ReplicaSet initial sync timeout: %v", err) - } - - c.inited = true -} - -func (c *replicaSetClient) Shutdown() { - c.Lock() - defer c.Unlock() - if !c.inited { - return - } - - close(c.stopChan) - - c.inited = false -} - -func transformFuncReplicaSet(obj interface{}) (interface{}, error) { - replicaSet, ok := obj.(*appsv1.ReplicaSet) - if !ok { - return nil, errors.New(fmt.Sprintf("input obj %v is not ReplicaSet type", obj)) - } - info := new(replicaSetInfo) - info.name = replicaSet.Name - info.owners = []*replicaSetOwner{} - for _, owner := range replicaSet.OwnerReferences { - info.owners = append(info.owners, &replicaSetOwner{kind: owner.Kind, name: owner.Name}) - } - return info, nil -} - -func createReplicaSetListWatch(client kubernetes.Interface, ns string) cache.ListerWatcher { - ctx := context.Background() - return &cache.ListWatch{ - ListFunc: func(opts metav1.ListOptions) (runtime.Object, error) { - opts.ResourceVersion = "" - // Passing empty context as this was not required by old List() - return client.AppsV1().ReplicaSets(ns).List(ctx, opts) - }, - WatchFunc: func(opts metav1.ListOptions) (watch.Interface, error) { - // Passing empty context as this was not required by old Watch() - return client.AppsV1().ReplicaSets(ns).Watch(ctx, opts) - }, - } -} diff --git a/internal/k8sCommon/k8sclient/replicaset_info.go b/internal/k8sCommon/k8sclient/replicaset_info.go deleted file mode 100644 index a7d5bd0525..0000000000 --- a/internal/k8sCommon/k8sclient/replicaset_info.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -type replicaSetInfo struct { - name string - owners []*replicaSetOwner -} - -type replicaSetOwner struct { - kind string - name string -} diff --git a/internal/k8sCommon/k8sclient/replicaset_test.go b/internal/k8sCommon/k8sclient/replicaset_test.go deleted file mode 100644 index 80bc7f1edb..0000000000 --- a/internal/k8sCommon/k8sclient/replicaset_test.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sclient - -import ( - "testing" - - "gotest.tools/v3/assert" - appsv1 "k8s.io/api/apps/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var replicaSetArray = []interface{}{ - &appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - UID: "bc5f5839-f62e-44b9-a79e-af250d92dcb1", - Name: "cloudwatch-agent-statsd-7f8459d648", - Namespace: "amazon-cloudwatch", - OwnerReferences: []metav1.OwnerReference{ - { - Kind: "Deployment", - Name: "cloudwatch-agent-statsd", - UID: "219887d3-8d2e-11e9-9cbd-064a0c5a2714", - }, - }, - }, - }, - &appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - UID: "75ab40d2-552a-4c05-82c9-0ddcb3008657", - Name: "cloudwatch-agent-statsd-d6487f8459", - Namespace: "amazon-cloudwatch", - OwnerReferences: []metav1.OwnerReference{ - { - Kind: "Deployment", - Name: "cloudwatch-agent-statsd", - UID: "219887d3-8d2e-11e9-9cbd-064a0c5a2714", - }, - }, - }, - }, -} - -func setUpReplicaSetClient() (*replicaSetClient, chan struct{}) { - stopChan := make(chan struct{}) - client := &replicaSetClient{ - stopChan: stopChan, - store: NewObjStore(transformFuncReplicaSet), - inited: true, //make it true to avoid further initialization invocation. - } - return client, stopChan -} - -func TestReplicaSetClient_ReplicaSetToDeployment(t *testing.T) { - client, stopChan := setUpReplicaSetClient() - defer close(stopChan) - - client.store.Replace(replicaSetArray, "") - - expectedMap := map[string]string{ - "cloudwatch-agent-statsd-7f8459d648": "cloudwatch-agent-statsd", - "cloudwatch-agent-statsd-d6487f8459": "cloudwatch-agent-statsd", - } - resultMap := client.ReplicaSetToDeployment() - assert.DeepEqual(t, resultMap, expectedMap) -} diff --git a/internal/k8sCommon/k8sutil/util.go b/internal/k8sCommon/k8sutil/util.go deleted file mode 100644 index cc550379c1..0000000000 --- a/internal/k8sCommon/k8sutil/util.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sutil - -import ( - "fmt" -) - -func CreatePodKey(namespace, podName string) string { - if namespace == "" || podName == "" { - return "" - } - return fmt.Sprintf("namespace:%s,podName:%s", namespace, podName) -} - -func CreateContainerKey(namespace, podName, containerName string) string { - if namespace == "" || podName == "" || containerName == "" { - return "" - } - return fmt.Sprintf("namespace:%s,podName:%s,containerName:%s", namespace, podName, containerName) -} diff --git a/internal/k8sCommon/k8sutil/util_test.go b/internal/k8sCommon/k8sutil/util_test.go deleted file mode 100644 index ecc6566653..0000000000 --- a/internal/k8sCommon/k8sutil/util_test.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sutil - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestCreatePodKey(t *testing.T) { - assert.Equal(t, "namespace:default,podName:testPod", CreatePodKey("default", "testPod")) -} diff --git a/internal/k8sCommon/kubeletutil/kubeletclient.go b/internal/k8sCommon/kubeletutil/kubeletclient.go deleted file mode 100644 index ec5428b919..0000000000 --- a/internal/k8sCommon/kubeletutil/kubeletclient.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package kubeletutil - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "log" - "net/http" - "os" - "strings" - "time" - - corev1 "k8s.io/api/core/v1" - - "github.com/aws/amazon-cloudwatch-agent/internal/tls" -) - -type KubeClient struct { - Port string - BearerToken string - KubeIP string - responseTimeout time.Duration - roundTripper http.RoundTripper - tls.ClientConfig -} - -var ErrKubeClientAccessFailure = errors.New("KubeClient Access Failure") - -func (k *KubeClient) ListPods() ([]corev1.Pod, error) { - var result []corev1.Pod - url := fmt.Sprintf("https://%s:%s/pods", k.KubeIP, k.Port) - - var req, _ = http.NewRequest("GET", url, nil) - var resp *http.Response - - k.InsecureSkipVerify = true - tlsCfg, err := k.ClientConfig.TLSConfig() - if err != nil { - return result, err - } - - if k.roundTripper == nil { - // Set default values - if k.responseTimeout < time.Second { - k.responseTimeout = time.Second * 5 - } - k.roundTripper = &http.Transport{ - TLSHandshakeTimeout: 5 * time.Second, - TLSClientConfig: tlsCfg, - ResponseHeaderTimeout: k.responseTimeout, - } - } - - if k.BearerToken != "" { - token, err := os.ReadFile(k.BearerToken) - if err != nil { - return result, err - } - req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token))) - } - req.Header.Add("Accept", "application/json") - - resp, err = k.roundTripper.RoundTrip(req) - if err != nil { - log.Printf("E! error making HTTP request to %s: %s", url, err) - return result, ErrKubeClientAccessFailure - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - log.Printf("E! %s returned HTTP status %s", url, resp.Status) - return result, ErrKubeClientAccessFailure - } - - b, err := io.ReadAll(resp.Body) - if err != nil { - log.Printf("E! Fail to read request %s body: %s", url, err) - return result, err - } - - pods := corev1.PodList{} - err = json.Unmarshal(b, &pods) - if err != nil { - log.Printf("E! parsing response: %s", err) - return result, err - } - - return pods.Items, nil -} diff --git a/internal/k8sCommon/kubeletutil/kubeletclient_test.go b/internal/k8sCommon/kubeletutil/kubeletclient_test.go deleted file mode 100644 index f080cb424d..0000000000 --- a/internal/k8sCommon/kubeletutil/kubeletclient_test.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package kubeletutil - -import ( - "io" - "net/http" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" -) - -// Test Data -const ( - podJson = ` -{ - "kind": "PodList", - "apiVersion": "v1", - "metadata": { - - }, - "items": [ - { - "metadata": { - "name": "cpu-limit", - "namespace": "default", - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "blockOwnerDeletion": true, - "controller": true, - "kind": "DaemonSet", - "name": "DaemonSetTest", - "uid": "36779a62-4aca-11e9-977b-0672b6c6fc94" - } - ], - "selfLink": "/api/v1/namespaces/default/pods/cpu-limit", - "uid": "764d01e1-2a2f-11e9-95ea-0a695d7ce286", - "resourceVersion": "5671573", - "creationTimestamp": "2019-02-06T16:51:34Z", - "labels": { - "app": "hello_test" - }, - "annotations": { - "kubernetes.io/config.seen": "2019-02-19T00:06:56.109155665Z", - "kubernetes.io/config.source": "api" - } - }, - "spec": { - "volumes": [ - { - "name": "default-token-tlgw7", - "secret": { - "secretName": "default-token-tlgw7", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "ubuntu", - "image": "ubuntu", - "command": [ - "/bin/bash" - ], - "args": [ - "-c", - "sleep 300000000" - ], - "resources": { - "limits": { - "cpu": "10m", - "memory": "50Mi" - }, - "requests": { - "cpu": "10m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-tlgw7", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "ip-192-168-67-127.us-west-2.compute.internal", - "securityContext": { - - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-02-06T16:51:34Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-02-06T16:51:43Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-02-06T16:51:34Z" - } - ], - "hostIP": "192.168.67.127", - "podIP": "192.168.76.93", - "startTime": "2019-02-06T16:51:34Z", - "containerStatuses": [ - { - "name": "ubuntu", - "state": { - "running": { - "startedAt": "2019-02-06T16:51:42Z" - } - }, - "lastState": { - - }, - "ready": true, - "restartCount": 0, - "image": "ubuntu:latest", - "imageID": "docker-pullable://ubuntu@sha256:7a47ccc3bbe8a451b500d2b53104868b46d60ee8f5b35a24b41a86077c650210", - "containerID": "docker://637631e2634ea92c0c1aa5d24734cfe794f09c57933026592c12acafbaf6972c" - } - ], - "qosClass": "Guaranteed" - } - } - ] -}` -) - -type MockHttpRoundTripper struct { - http.RoundTripper - mock.Mock -} - -func (m *MockHttpRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { - args := m.Called() - return args.Get(0).(*http.Response), nil -} - -// Test -func TestKubeClient_ListPods(t *testing.T) { - mockRoundTripper := new(MockHttpRoundTripper) - mockRoundTripper.On("RoundTrip", mock.Anything).Return(&http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(strings.NewReader(podJson))}) - client := KubeClient{roundTripper: mockRoundTripper} - pods, err := client.ListPods() - assert.Equal(t, nil, err) - assert.Equal(t, 1, len(pods)) -} diff --git a/plugins/inputs/prometheus/metrics_handler.go b/plugins/inputs/prometheus/metrics_handler.go index d515e015bf..1eac499054 100644 --- a/plugins/inputs/prometheus/metrics_handler.go +++ b/plugins/inputs/prometheus/metrics_handler.go @@ -10,7 +10,7 @@ import ( "github.com/influxdata/telegraf" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" ) // Use metricMaterial instead of mbMetric to avoid unnecessary tags&fields copy @@ -69,8 +69,8 @@ func (mh *metricsHandler) setEmfMetadata(mms []*metricMaterial) { if mh.clusterName != "" { // Customer can specified the cluster name in the scraping job's relabel_config // CWAgent won't overwrite in this case to support cross-cluster monitoring - if _, ok := mm.tags[containerinsightscommon.ClusterNameKey]; !ok { - mm.tags[containerinsightscommon.ClusterNameKey] = mh.clusterName + if _, ok := mm.tags[constants.ClusterNameKey]; !ok { + mm.tags[constants.ClusterNameKey] = mh.clusterName } } diff --git a/plugins/plugins.go b/plugins/plugins.go index 4bf1c8a918..4dd989ff6b 100644 --- a/plugins/plugins.go +++ b/plugins/plugins.go @@ -5,8 +5,6 @@ package plugins import ( //Enable cloudwatch-agent process plugins - _ "github.com/aws/amazon-cloudwatch-agent/plugins/processors/ecsdecorator" - _ "github.com/aws/amazon-cloudwatch-agent/plugins/processors/k8sdecorator" // Enabled cloudwatch-agent input plugins _ "github.com/aws/amazon-cloudwatch-agent/plugins/inputs/logfile" diff --git a/plugins/processors/ecsdecorator/cgroup.go b/plugins/processors/ecsdecorator/cgroup.go deleted file mode 100644 index 90ca090c0c..0000000000 --- a/plugins/processors/ecsdecorator/cgroup.go +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "bufio" - "fmt" - "log" - "math" - "os" - "path" - "path/filepath" - "strconv" - "strings" -) - -const ( - kernelMagicCodeNotSet = int64(9223372036854771712) // infinity magic number for cgroup: https://unix.stackexchange.com/questions/420906/what-is-the-value-for-the-cgroups-limit-in-bytes-if-the-memory-is-not-restricte - ecsInstanceMountConfigPath = "/proc/self/mountinfo" -) - -type cgroupScanner struct { - mountPoint string -} - -func newCGroupScanner(mountConfigPath string) (c *cgroupScanner) { - mp, err := getCGroupMountPoint(mountConfigPath) - if err != nil { - log.Printf("D! failed to get the cgroup mount point, error: %v, fallback to /cgroup", err) - mp = "/cgroup" - } - - c = &cgroupScanner{ - mountPoint: mp, - } - return c -} -func newCGroupScannerForContainer() *cgroupScanner { - return newCGroupScanner(ecsInstanceMountConfigPath) -} - -func (c *cgroupScanner) getCPUReserved(taskID string, clusterName string) int64 { - cpuPath, err := getCGroupPathForTask(c.mountPoint, "cpu", taskID, clusterName) - if err != nil { - log.Printf("E! failed to get cpu cgroup path for task: %v", err) - return int64(0) - } - - // check if hard limit is configured - if cfsQuota, err := readInt64(cpuPath, "cpu.cfs_quota_us"); err == nil && cfsQuota != -1 { - if cfsPeriod, err := readInt64(cpuPath, "cpu.cfs_period_us"); err == nil && cfsPeriod > 0 { - return int64(math.Ceil(float64(1024*cfsQuota) / float64(cfsPeriod))) - } - } - - if shares, err := readInt64(cpuPath, "cpu.shares"); err == nil { - return shares - } - - return int64(0) -} - -func (c *cgroupScanner) getMEMReserved(taskID string, clusterName string, containers []ECSContainer) int64 { - memPath, err := getCGroupPathForTask(c.mountPoint, "memory", taskID, clusterName) - if err != nil { - log.Printf("E! failed to get memory cgroup path for task: %v", err) - return int64(0) - } - - if memReserved, err := readInt64(memPath, "memory.limit_in_bytes"); err == nil && memReserved != kernelMagicCodeNotSet { - return memReserved - } - - // sum the containers' memory if the task's memory limit is not configured - sum := int64(0) - for _, container := range containers { - containerPath := path.Join(memPath, container.DockerId) - - //soft limit first - - if softLimit, err := readInt64(containerPath, "memory.soft_limit_in_bytes"); err == nil && softLimit != kernelMagicCodeNotSet { - sum += softLimit - continue - } - - // try hard limit when soft limit is not configured - if hardLimit, err := readInt64(containerPath, "memory.limit_in_bytes"); err == nil && hardLimit != kernelMagicCodeNotSet { - sum += hardLimit - } - } - return sum -} - -func readString(dirpath string, file string) (string, error) { - cgroupFile := path.Join(dirpath, file) - - // Read - out, err := os.ReadFile(cgroupFile) - if err != nil { - // Ignore non-existent files - log.Printf("W! readString: Failed to read %q: %s", cgroupFile, err) - return "", err - } - return strings.TrimSpace(string(out)), nil -} - -func readInt64(dirpath string, file string) (int64, error) { - out, err := readString(dirpath, file) - if err != nil { - return 0, err - } - - if out == "" || out == "max" { - return 0, err - } - - val, err := strconv.ParseInt(out, 10, 64) - if err != nil { - log.Printf("W! readInt64: Failed to parse int %q from file %q: %s", out, path.Join(dirpath, file), err) - return 0, err - } - - return val, nil -} -func getCGroupMountPoint(mountConfigPath string) (string, error) { - f, err := os.Open(mountConfigPath) - if err != nil { - return "", err - } - defer f.Close() - scanner := bufio.NewScanner(f) - for scanner.Scan() { - if err := scanner.Err(); err != nil { - return "", err - } - var ( - text = scanner.Text() - fields = strings.Split(text, " ") - // safe as mountinfo encodes mountpoints with spaces as \040. - // an example: 26 22 0:23 / /cgroup/cpu rw,relatime - cgroup cgroup rw,cpu - index = strings.Index(text, " - ") - postSeparatorFields = strings.Fields(text[index+3:]) - numPostFields = len(postSeparatorFields) - ) - // this is an error as we can't detect if the mount is for "cgroup" - if numPostFields == 0 { - return "", fmt.Errorf("Found no fields post '-' in %q", text) - } - if postSeparatorFields[0] == "cgroup" { - // check that the mount is properly formated. - if numPostFields < 3 { - return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) - } - return filepath.Dir(fields[4]), nil - } - } - return "", fmt.Errorf("mount point not existed") -} - -func getCGroupPathForTask(cgroupMount, controller, taskID, clusterName string) (string, error) { - taskPath := path.Join(cgroupMount, controller, "ecs", taskID) - if _, err := os.Stat(taskPath); os.IsNotExist(err) { - // Task cgroup path does not exist, fallback to try legacy Task cgroup path, - // legacy cgroup path of task with new format ARN used to contain cluster name, - // before ECS Agent PR https://github.com/aws/amazon-ecs-agent/pull/2497/ - taskPath = path.Join(cgroupMount, controller, "ecs", clusterName, taskID) - if _, err := os.Stat(taskPath); os.IsNotExist(err) { - return "", fmt.Errorf("CGroup Path %q does not exist", taskPath) - } - } - return taskPath, nil -} diff --git a/plugins/processors/ecsdecorator/cgroup_test.go b/plugins/processors/ecsdecorator/cgroup_test.go deleted file mode 100644 index 9cf5818a69..0000000000 --- a/plugins/processors/ecsdecorator/cgroup_test.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "path" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestGetCGroupMountPoint(t *testing.T) { - result, _ := getCGroupMountPoint("test/mountinfo") - assert.Equal(t, "test", result, "Expected to be equal") -} - -func TestGetCPUReservedFromShares(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - - assert.Equal(t, int64(128), cgroup.getCPUReserved("test1", "")) - assert.Equal(t, int64(128), cgroup.getCPUReserved("test4", "myCluster")) -} - -func TestGetCPUReservedFromQuota(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - assert.Equal(t, int64(256), cgroup.getCPUReserved("test2", "")) -} - -func TestGetCPUReservedFromBoth(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - assert.Equal(t, int64(256), cgroup.getCPUReserved("test3", "")) -} - -func TestGetCPUReservedFromFalseTaskID(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - assert.Equal(t, int64(0), cgroup.getCPUReserved("fake", "")) -} - -func TestGetMEMReservedFromTask(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - containers := []ECSContainer{} - assert.Equal(t, int64(256), cgroup.getMEMReserved("test1", "", containers)) - assert.Equal(t, int64(256), cgroup.getMEMReserved("test3", "myCluster", containers)) -} - -func TestGetMEMReservedFromContainers(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - containers := []ECSContainer{ECSContainer{DockerId: "container1"}, ECSContainer{DockerId: "container2"}} - assert.Equal(t, int64(384), cgroup.getMEMReserved("test2", "", containers)) -} - -func TestGetMEMReservedFromFalseTaskID(t *testing.T) { - cgroup := newCGroupScanner("test/mountinfo") - containers := []ECSContainer{ECSContainer{DockerId: "container1"}, ECSContainer{DockerId: "container2"}} - assert.Equal(t, int64(0), cgroup.getMEMReserved("fake", "", containers)) -} - -func TestGetCGroupPathForTask(t *testing.T) { - cgroupMount := "test" - controller := "cpu" - taskID := "test1" - clusterName := "myCluster" - result, _ := getCGroupPathForTask(cgroupMount, controller, taskID, clusterName) - assert.Equal(t, path.Join(cgroupMount, controller, "ecs", taskID), result) - - taskID = "test4" - result, _ = getCGroupPathForTask(cgroupMount, controller, taskID, clusterName) - assert.Equal(t, path.Join(cgroupMount, controller, "ecs", clusterName, taskID), result) -} diff --git a/plugins/processors/ecsdecorator/ecsdecorator.go b/plugins/processors/ecsdecorator/ecsdecorator.go deleted file mode 100644 index 324f76ebbe..0000000000 --- a/plugins/processors/ecsdecorator/ecsdecorator.go +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "fmt" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/processors" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/logscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -const ( - cloudwatchNamespace = "ECS/ContainerInsights" -) - -type ECSDecorator struct { - HostIP string `toml:"host_ip"` - ecsInfo *ecsInfo - *NodeCapacity -} - -func (e *ECSDecorator) Description() string { - return "Decorate metrics collected by cadivisor with ecs metadata" -} - -var sampleConfig = ` - ## ecs ec2 node private ip - host_ip = "10.13.14.15" -` - -func (e *ECSDecorator) SampleConfig() string { - return sampleConfig -} - -func (e *ECSDecorator) Init() error { - e.ecsInfo = newECSInfo(e.HostIP) - if e.ecsInfo.clusterName == "" { - return fmt.Errorf("ECSDecorator failed to get cluster name of ecs") - } - return nil -} - -func (e *ECSDecorator) Stop() { - e.ecsInfo.shutdown() -} - -func (e *ECSDecorator) Apply(in ...telegraf.Metric) []telegraf.Metric { - var out []telegraf.Metric - - for _, metric := range in { - metric.AddTag(ClusterNameKey, e.ecsInfo.clusterName) - tags := metric.Tags() - fields := metric.Fields() - - e.tagLogGroup(metric, tags) - e.tagLogStream(metric) - e.tagContainerInstanceId(metric) - e.tagMetricSource(metric, tags) - e.tagVersion(metric) - - e.decorateCPU(metric, fields) - e.decorateMem(metric, fields) - e.decorateTaskCount(metric, tags) - e.tagMetricRule(metric) - out = append(out, metric) - } - - return out -} - -func (e *ECSDecorator) tagLogGroup(metric telegraf.Metric, tags map[string]string) { - logGroup := fmt.Sprintf("/aws/ecs/containerinsights/%s/performance", tags[ClusterNameKey]) - metric.AddTag(logscommon.LogGroupNameTag, logGroup) -} - -func (e *ECSDecorator) tagLogStream(metric telegraf.Metric) { - logStream := fmt.Sprintf("NodeTelemetry-%s", e.ecsInfo.containerInstanceId) - metric.AddTag(logscommon.LogStreamNameTag, logStream) -} - -func (e *ECSDecorator) tagContainerInstanceId(metric telegraf.Metric) { - metric.AddTag(ContainerInstanceIdKey, e.ecsInfo.containerInstanceId) -} - -func (e *ECSDecorator) decorateCPU(metric telegraf.Metric, fields map[string]interface{}) { - if cpuTotal, ok := fields[MetricName(TypeInstance, CpuTotal)]; ok && e.CPUCapacity > 0 { - metric.AddField(MetricName(TypeInstance, CpuLimit), e.getCPUCapacityInCadvisorStandard()) - metric.AddField(MetricName(TypeInstance, CpuUtilization), cpuTotal.(float64)/float64(e.getCPUCapacityInCadvisorStandard())*100) - metric.AddField(MetricName(TypeInstance, CpuReservedCapacity), float64(e.ecsInfo.getCpuReserved())/float64(e.getCPUCapacityInCgroupStandard())*100) - } -} - -func (e *ECSDecorator) decorateMem(metric telegraf.Metric, fields map[string]interface{}) { - if memWorkingset, ok := fields[MetricName(TypeInstance, MemWorkingset)]; ok && e.getMemCapacity() > 0 { - metric.AddField(MetricName(TypeInstance, MemLimit), e.getMemCapacity()) - metric.AddField(MetricName(TypeInstance, MemUtilization), float64(memWorkingset.(uint64))/float64(e.getMemCapacity())*100) - metric.AddField(MetricName(TypeInstance, MemReservedCapacity), float64(e.ecsInfo.getMemReserved())/float64(e.getMemCapacity())*100) - } -} - -func (e *ECSDecorator) decorateTaskCount(metric telegraf.Metric, tags map[string]string) { - if metricType := tags[MetricType]; metricType == TypeInstance { - metric.AddField(MetricName(TypeInstance, RunningTaskCount), e.ecsInfo.getRunningTaskCount()) - } -} - -func (e *ECSDecorator) tagMetricRule(metric telegraf.Metric) { - rules, ok := staticMetricRule[metric.Tags()[MetricType]] - if !ok { - return - } - structuredlogscommon.AttachMetricRule(metric, rules) -} - -func (e *ECSDecorator) tagMetricSource(metric telegraf.Metric, tags map[string]string) { - metricType, ok := tags[MetricType] - if !ok { - return - } - - var sources []string - switch metricType { - case TypeInstance: - sources = append(sources, []string{"cadvisor", "/proc", "ecsagent", "calculated"}...) - case TypeInstanceFS: - sources = append(sources, []string{"cadvisor", "calculated"}...) - case TypeInstanceNet: - sources = append(sources, []string{"cadvisor", "calculated"}...) - case TypeInstanceDiskIO: - sources = append(sources, []string{"cadvisor"}...) - } - - if len(sources) > 0 { - structuredlogscommon.AppendAttributesInFields(SourcesKey, sources, metric) - } -} - -func (e *ECSDecorator) tagVersion(metric telegraf.Metric) { - structuredlogscommon.AddVersion(metric) -} - -func (e *ECSDecorator) getCPUCapacityInCadvisorStandard() int64 { - // cadvisor treat 1 core as 1000 millicores - return e.CPUCapacity * 1000 -} - -func (e *ECSDecorator) getCPUCapacityInCgroupStandard() int64 { - // cgroup treat one core as 1024 cpu unit - return e.CPUCapacity * 1024 -} - -func (e *ECSDecorator) getMemCapacity() int64 { - return e.MemCapacity -} - -func init() { - processors.Add("ecsdecorator", func() telegraf.Processor { - return &ECSDecorator{NodeCapacity: NewNodeCapacity()} - }) -} diff --git a/plugins/processors/ecsdecorator/ecsdecorator_test.go b/plugins/processors/ecsdecorator/ecsdecorator_test.go deleted file mode 100644 index 203c49933a..0000000000 --- a/plugins/processors/ecsdecorator/ecsdecorator_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "testing" - "time" - - "github.com/influxdata/telegraf/metric" - "github.com/stretchr/testify/assert" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/logscommon" -) - -func TestTagMetricSourceForTypeInstance(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricSource(m, tags) - - assert.Equal(t, "Sources", m.Tags()["attributesInFields"], "Expected to be equal") - assert.Equal(t, []string{"cadvisor", "/proc", "ecsagent", "calculated"}, m.Fields()[SourcesKey], "Expected to be equal") -} - -func TestTagMetricSourceForTypeInstanceFS(t *testing.T) { - tags := map[string]string{MetricType: TypeInstanceFS, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricSource(m, tags) - - assert.Equal(t, "Sources", m.Tags()["attributesInFields"], "Expected to be equal") - assert.Equal(t, []string{"cadvisor", "calculated"}, m.Fields()[SourcesKey], "Expected to be equal") -} - -func TestTagMetricSourceForTypeInstanceNet(t *testing.T) { - tags := map[string]string{MetricType: TypeInstanceNet, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricSource(m, tags) - - assert.Equal(t, "Sources", m.Tags()["attributesInFields"], "Expected to be equal") - assert.Equal(t, []string{"cadvisor", "calculated"}, m.Fields()[SourcesKey], "Expected to be equal") -} - -func TestTagMetricSourceForTypeInstanceDiskIO(t *testing.T) { - tags := map[string]string{MetricType: TypeInstanceDiskIO, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricSource(m, tags) - - assert.Equal(t, "Sources", m.Tags()["attributesInFields"], "Expected to be equal") - assert.Equal(t, []string{"cadvisor"}, m.Fields()[SourcesKey], "Expected to be equal") -} - -func TestTagLogGroup(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagLogGroup(m, tags) - - assert.Equal(t, "/aws/ecs/containerinsights/TestClusterName/performance", m.Tags()[logscommon.LogGroupNameTag], "Expected to be equal") - -} - -func TestDecorateCpu(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 1.0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - (&ECSDecorator{ecsInfo: &ecsInfo{cpuReserved: 1024}, NodeCapacity: &NodeCapacity{CPUCapacity: 8}}).decorateCPU(m, fields) - - assert.Equal(t, 0.0125, m.Fields()[MetricName(TypeInstance, CpuUtilization)], "Expected to be equal") - assert.Equal(t, 12.5, m.Fields()[MetricName(TypeInstance, CpuReservedCapacity)], "Expected to be equal") -} - -func TestDecorateMem(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 1.0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): uint64(1), MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - (&ECSDecorator{ecsInfo: &ecsInfo{memReserved: 1}, NodeCapacity: &NodeCapacity{MemCapacity: 8}}).decorateMem(m, fields) - - assert.Equal(t, 12.5, m.Fields()[MetricName(TypeInstance, MemUtilization)], "Expected to be equal") - assert.Equal(t, 12.5, m.Fields()[MetricName(TypeInstance, MemReservedCapacity)], "Expected to be equal") -} - -func TestDecorateTaskCount(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 1.0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): uint64(1), MetricName(TypeInstance, MemLimit): 0} - - m := metric.New("test", tags, fields, time.Now()) - (&ECSDecorator{ecsInfo: &ecsInfo{runningTaskCount: 5}}).decorateTaskCount(m, tags) - - assert.Equal(t, int64(5), m.Fields()[MetricName(TypeInstance, RunningTaskCount)], "Expected to be equal") - -} diff --git a/plugins/processors/ecsdecorator/ecsinfo.go b/plugins/processors/ecsdecorator/ecsinfo.go deleted file mode 100644 index bee0a15d54..0000000000 --- a/plugins/processors/ecsdecorator/ecsinfo.go +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "encoding/json" - "fmt" - "log" - "strings" - "sync" - "time" - - "github.com/aws/amazon-cloudwatch-agent/internal/httpclient" -) - -type ecsInfo struct { - hostIP string - clusterName string - containerInstanceId string - cgroup *cgroupScanner - runningTaskCount int64 - cpuReserved int64 - memReserved int64 - refreshInterval time.Duration - shutdownC chan bool - httpClient *httpclient.HttpClient - sync.RWMutex -} - -const ( - ecsAgentEndpoint = "http://%s:51678/v1/metadata" - ecsAgentTaskInfoEndpoint = "http://%s:51678/v1/tasks" - taskStatusRunning = "RUNNING" -) - -type ContainerInstance struct { - Cluster string - ContainerInstanceArn string -} - -type ECSContainer struct { - DockerId string -} -type ECSTask struct { - KnownStatus string - ARN string - Containers []ECSContainer -} - -type ECSTasksInfo struct { - Tasks []ECSTask -} - -func (e *ecsInfo) updateRunningTaskCount() { - ecsTasksInfo := e.getTasksInfo() - runningTaskCount := int64(0) - cpuReserved := int64(0) - memReserved := int64(0) - for _, task := range ecsTasksInfo.Tasks { - if task.KnownStatus != taskStatusRunning { - continue - } - taskId, err := getTaskCgroupPathFromARN(task.ARN) - if err != nil { - log.Printf("W! failed to get ecs taskid from arn: %v", err) - continue - } - - // ignore the one only consume 2 shares which is the default value in cgroup - if cr := e.cgroup.getCPUReserved(taskId, e.clusterName); cr > 2 { - cpuReserved += cr - } - memReserved += e.cgroup.getMEMReserved(taskId, e.clusterName, task.Containers) - - runningTaskCount += 1 - } - - e.Lock() - defer e.Unlock() - e.runningTaskCount = runningTaskCount - e.cpuReserved = cpuReserved - e.memReserved = memReserved -} - -func (e *ecsInfo) getRunningTaskCount() int64 { - e.RLock() - defer e.RUnlock() - return e.runningTaskCount -} - -func (e *ecsInfo) getCpuReserved() int64 { - e.RLock() - defer e.RUnlock() - return e.cpuReserved -} - -func (e *ecsInfo) getMemReserved() int64 { - e.RLock() - defer e.RUnlock() - return e.memReserved -} - -func newECSInfo(hostIP string) (e *ecsInfo) { - e = &ecsInfo{hostIP: hostIP, refreshInterval: 1 * time.Minute, shutdownC: make(chan bool), httpClient: httpclient.New()} - containerInstance := e.getContainerInstanceInfo() - //Sample Cluster Name: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-introspection.html - e.clusterName = containerInstance.Cluster - e.containerInstanceId = e.getContainerInstanceIdFromArn(containerInstance.ContainerInstanceArn) - e.cgroup = newCGroupScannerForContainer() - e.updateRunningTaskCount() - go func() { - refreshTicker := time.NewTicker(e.refreshInterval) - defer refreshTicker.Stop() - for { - select { - case <-refreshTicker.C: - e.updateRunningTaskCount() - case <-e.shutdownC: - refreshTicker.Stop() - return - } - } - }() - return -} - -func (e *ecsInfo) shutdown() { - close(e.shutdownC) -} - -func (e *ecsInfo) getECSAgentEndpoint() string { - return fmt.Sprintf(ecsAgentEndpoint, e.hostIP) -} - -func (e *ecsInfo) getECSAgentTaskInfoEndpoint() string { - return fmt.Sprintf(ecsAgentTaskInfoEndpoint, e.hostIP) - -} - -// There are two formats of ContainerInstance ARN (https://docs.aws.amazon.com/AmazonECS/latest/userguide/ecs-account-settings.html#ecs-resource-ids) -// arn:aws:ecs:region:aws_account_id:container-instance/container-instance-id -// arn:aws:ecs:region:aws_account_id:container-instance/cluster-name/container-instance-id -// This function will return "container-instance-id" for both ARN format -func (e *ecsInfo) getContainerInstanceIdFromArn(arn string) (containerInstanceId string) { - // When splitting the ARN with ":", the 6th segments could be either: - // container-instance/47c0ab6e-2c2c-475e-9c30-b878fa7a8c3d or - // container-instance/cluster-name/47c0ab6e-2c2c-475e-9c30-b878fa7a8c3d - if splitedList := strings.Split(arn, ":"); len(splitedList) >= 6 { - // Further splitting tmpResult with "/", it could be splitted into either 2 or 3 - // Characters of "cluster-name" is only allowed to be letters, numbers and hyphens - tmpResult := strings.Split(splitedList[5], "/") - if len(tmpResult) == 2 { - containerInstanceId = tmpResult[1] - return - } else if len(tmpResult) == 3 { - containerInstanceId = tmpResult[2] - return - } - } - - log.Printf("E! can't get ecs container instance id from ContainerInstance arn: %v", arn) - return - -} - -func (e *ecsInfo) getContainerInstanceInfo() (containerInstance *ContainerInstance) { - containerInstance = &ContainerInstance{} - resp, err := e.httpClient.Request(e.getECSAgentEndpoint()) - if err != nil { - log.Printf("W! failing to call ecsagent endpoint, error: %v", err) - return containerInstance - } - - err = json.Unmarshal(resp, containerInstance) - if err != nil { - log.Printf("W! unable to parse resp from ecsagent endpoint, error: %v", err) - log.Printf("D! resp content is %s", string(resp)) - } - return -} - -func (e *ecsInfo) getTasksInfo() (ecsTasksInfo *ECSTasksInfo) { - ecsTasksInfo = &ECSTasksInfo{} - resp, err := e.httpClient.Request(e.getECSAgentTaskInfoEndpoint()) - if err != nil { - log.Printf("W! failing to call ecsagent taskinfo endpoint, error: %v", err) - return ecsTasksInfo - } - - err = json.Unmarshal(resp, ecsTasksInfo) - if err != nil { - log.Printf("W! unable to parse resp from ecsagent taskinfo endpoint, error: %v", err) - log.Printf("D! resp content is %s", string(resp)) - } - return -} - -// There are two formats of Task ARN (https://docs.aws.amazon.com/AmazonECS/latest/userguide/ecs-account-settings.html#ecs-resource-ids) -// arn:aws:ecs:region:aws_account_id:task/task-id -// arn:aws:ecs:region:aws_account_id:task/cluster-name/task-id -// we should get "task-id" as result no matter what format the ARN is. -func getTaskCgroupPathFromARN(arn string) (string, error) { - result := strings.Split(arn, ":") - if len(result) < 6 { - return "", fmt.Errorf("invalid ecs task arn: %v", result) - } - - result = strings.Split(result[5], "/") - if len(result) == 2 { - return result[1], nil - } else if len(result) == 3 { - return result[2], nil - } else { - return "", fmt.Errorf("invalid ecs task arn: %v", result) - } -} diff --git a/plugins/processors/ecsdecorator/ecsinfo_test.go b/plugins/processors/ecsdecorator/ecsinfo_test.go deleted file mode 100644 index d47da49921..0000000000 --- a/plugins/processors/ecsdecorator/ecsinfo_test.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestGetTaskCgroupPathFromARN(t *testing.T) { - oldFormatARN := "arn:aws:ecs:region:aws_account_id:task/task-id" - newFormatARN := "arn:aws:ecs:region:aws_account_id:task/cluster-name/task-id" - result, _ := getTaskCgroupPathFromARN(oldFormatARN) - assert.Equal(t, "task-id", result, "Expected to be equal") - result, _ = getTaskCgroupPathFromARN(newFormatARN) - assert.Equal(t, "task-id", result, "Expected to be equal") -} diff --git a/plugins/processors/ecsdecorator/metricRule.go b/plugins/processors/ecsdecorator/metricRule.go deleted file mode 100644 index 1cc2a08958..0000000000 --- a/plugins/processors/ecsdecorator/metricRule.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -const ( - Bytes = "Bytes" - BytesPerSec = "Bytes/Second" - Count = "Count" - Percent = "Percent" -) - -var nodeMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypeInstance, CpuUtilization)}, - {Unit: Percent, Name: MetricName(TypeInstance, CpuReservedCapacity)}, - {Unit: Percent, Name: MetricName(TypeInstance, MemUtilization)}, - {Unit: Percent, Name: MetricName(TypeInstance, MemReservedCapacity)}, - {Unit: BytesPerSec, Name: MetricName(TypeInstance, NetTotalBytes)}, - {Unit: Count, Name: MetricName(TypeInstance, RunningTaskCount)}}, - DimensionSets: [][]string{{ContainerInstanceIdKey, InstanceIdKey, ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypeInstance, CpuUtilization)}, - {Unit: Percent, Name: MetricName(TypeInstance, MemUtilization)}, - {Unit: BytesPerSec, Name: MetricName(TypeInstance, NetTotalBytes)}, - {Unit: Percent, Name: MetricName(TypeInstance, CpuReservedCapacity)}, - {Unit: Percent, Name: MetricName(TypeInstance, MemReservedCapacity)}, - {Unit: Count, Name: MetricName(TypeInstance, RunningTaskCount)}, - {Name: MetricName(TypeInstance, CpuTotal)}, - {Name: MetricName(TypeInstance, CpuLimit)}, - {Unit: Bytes, Name: MetricName(TypeInstance, MemWorkingset)}, - {Unit: Bytes, Name: MetricName(TypeInstance, MemLimit)}}, - DimensionSets: [][]string{{ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var nodeFSMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypeInstanceFS, FSUtilization)}}, - DimensionSets: [][]string{{ContainerInstanceIdKey, InstanceIdKey, ClusterNameKey}, {ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var staticMetricRule = map[string][]structuredlogscommon.MetricRule{ - TypeInstance: nodeMetricRules, - TypeInstanceFS: nodeFSMetricRules, -} diff --git a/plugins/processors/ecsdecorator/metricRule_test.go b/plugins/processors/ecsdecorator/metricRule_test.go deleted file mode 100644 index 2cd3bf6942..0000000000 --- a/plugins/processors/ecsdecorator/metricRule_test.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package ecsdecorator - -import ( - "encoding/json" - "fmt" - "testing" - "time" - - "github.com/influxdata/telegraf/metric" - "github.com/stretchr/testify/assert" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -func TestNodeFull(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - expected := []structuredlogscommon.MetricRule{} - deepCopy(&expected, nodeMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestNodeLackOfCpuUtilization(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(nodeMetricRules)) - deepCopy(&expected, nodeMetricRules) - deleteMetricFromMetricRules(MetricName(TypeInstance, CpuUtilization), expected) - - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestNodeLackOfInstanceId(t *testing.T) { - tags := map[string]string{MetricType: TypeInstance, ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstance, CpuUtilization): 0, MetricName(TypeInstance, MemUtilization): 0, - MetricName(TypeInstance, NetTotalBytes): 0, MetricName(TypeInstance, CpuReservedCapacity): 0, MetricName(TypeInstance, MemReservedCapacity): 0, - MetricName(TypeInstance, RunningTaskCount): 0, MetricName(TypeInstance, CpuTotal): 0, - MetricName(TypeInstance, CpuLimit): 0, MetricName(TypeInstance, MemWorkingset): 0, MetricName(TypeInstance, MemLimit): 0} - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(nodeMetricRules)) - deepCopy(&expected, nodeMetricRules) - expected = append(expected[:0], expected[1:]...) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestNodeFSFull(t *testing.T) { - tags := map[string]string{MetricType: TypeInstanceFS, InstanceIdKey: "TestEC2InstanceId", ContainerInstanceIdKey: "TestContainerInstanceId", ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeInstanceFS, FSUtilization): 0} - m := metric.New("test", tags, fields, time.Now()) - new(ECSDecorator).tagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(nodeFSMetricRules)) - deepCopy(&expected, nodeFSMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func deleteMetricFromMetricRules(metric string, rules []structuredlogscommon.MetricRule) { - for i := 0; i < len(rules); i++ { - rule := rules[i] - metricAttrs := rule.Metrics - idx := -1 - for i := 0; i < len(metricAttrs); i++ { - if metricAttrs[i].Name == metric { - idx = i - break - } - } - if idx != -1 { - metricAttrs = append(metricAttrs[:idx], metricAttrs[idx+1:]...) - rules[i].Metrics = metricAttrs - } - } -} - -func deleteDimensionFromMetricRules(dimension string, rules []structuredlogscommon.MetricRule) { - for i := 0; i < len(rules); i++ { - rule := rules[i] - var dimsSet [][]string - loop: - for _, dims := range rule.DimensionSets { - for _, dim := range dims { - if dim == dimension { - continue loop - } - } - dimsSet = append(dimsSet, dims) - } - rules[i].DimensionSets = dimsSet - } -} - -func deepCopy(dst interface{}, src interface{}) error { - if dst == nil { - return fmt.Errorf("dst cannot be nil") - } - if src == nil { - return fmt.Errorf("src cannot be nil") - } - bytes, err := json.Marshal(src) - if err != nil { - return fmt.Errorf("Unable to marshal src: %s", err) - } - err = json.Unmarshal(bytes, dst) - if err != nil { - return fmt.Errorf("Unable to unmarshal into dst: %s", err) - } - return nil -} diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/myCluster/test4/cpu.shares b/plugins/processors/ecsdecorator/test/cpu/ecs/myCluster/test4/cpu.shares deleted file mode 100644 index a949a93dfc..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/myCluster/test4/cpu.shares +++ /dev/null @@ -1 +0,0 @@ -128 diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/test1/cpu.shares b/plugins/processors/ecsdecorator/test/cpu/ecs/test1/cpu.shares deleted file mode 100644 index a949a93dfc..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/test1/cpu.shares +++ /dev/null @@ -1 +0,0 @@ -128 diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/test2/cpu.cfs_period_us b/plugins/processors/ecsdecorator/test/cpu/ecs/test2/cpu.cfs_period_us deleted file mode 100644 index 83b33d238d..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/test2/cpu.cfs_period_us +++ /dev/null @@ -1 +0,0 @@ -1000 diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/test2/cpu.cfs_quota_us b/plugins/processors/ecsdecorator/test/cpu/ecs/test2/cpu.cfs_quota_us deleted file mode 100644 index cb1a40df0f..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/test2/cpu.cfs_quota_us +++ /dev/null @@ -1 +0,0 @@ -250 diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.cfs_period_us b/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.cfs_period_us deleted file mode 100644 index 83b33d238d..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.cfs_period_us +++ /dev/null @@ -1 +0,0 @@ -1000 diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.cfs_quota_us b/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.cfs_quota_us deleted file mode 100644 index cb1a40df0f..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.cfs_quota_us +++ /dev/null @@ -1 +0,0 @@ -250 diff --git a/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.shares b/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.shares deleted file mode 100644 index a949a93dfc..0000000000 --- a/plugins/processors/ecsdecorator/test/cpu/ecs/test3/cpu.shares +++ /dev/null @@ -1 +0,0 @@ -128 diff --git a/plugins/processors/ecsdecorator/test/memory/ecs/myCluster/test3/memory.limit_in_bytes b/plugins/processors/ecsdecorator/test/memory/ecs/myCluster/test3/memory.limit_in_bytes deleted file mode 100644 index 9183bf03fc..0000000000 --- a/plugins/processors/ecsdecorator/test/memory/ecs/myCluster/test3/memory.limit_in_bytes +++ /dev/null @@ -1 +0,0 @@ -256 diff --git a/plugins/processors/ecsdecorator/test/memory/ecs/test1/memory.limit_in_bytes b/plugins/processors/ecsdecorator/test/memory/ecs/test1/memory.limit_in_bytes deleted file mode 100644 index 9183bf03fc..0000000000 --- a/plugins/processors/ecsdecorator/test/memory/ecs/test1/memory.limit_in_bytes +++ /dev/null @@ -1 +0,0 @@ -256 diff --git a/plugins/processors/ecsdecorator/test/memory/ecs/test2/container1/memory.limit_in_bytes b/plugins/processors/ecsdecorator/test/memory/ecs/test2/container1/memory.limit_in_bytes deleted file mode 100644 index 9183bf03fc..0000000000 --- a/plugins/processors/ecsdecorator/test/memory/ecs/test2/container1/memory.limit_in_bytes +++ /dev/null @@ -1 +0,0 @@ -256 diff --git a/plugins/processors/ecsdecorator/test/memory/ecs/test2/container1/memory.soft_limit_in_bytes b/plugins/processors/ecsdecorator/test/memory/ecs/test2/container1/memory.soft_limit_in_bytes deleted file mode 100644 index a949a93dfc..0000000000 --- a/plugins/processors/ecsdecorator/test/memory/ecs/test2/container1/memory.soft_limit_in_bytes +++ /dev/null @@ -1 +0,0 @@ -128 diff --git a/plugins/processors/ecsdecorator/test/memory/ecs/test2/container2/memory.limit_in_bytes b/plugins/processors/ecsdecorator/test/memory/ecs/test2/container2/memory.limit_in_bytes deleted file mode 100644 index 9183bf03fc..0000000000 --- a/plugins/processors/ecsdecorator/test/memory/ecs/test2/container2/memory.limit_in_bytes +++ /dev/null @@ -1 +0,0 @@ -256 diff --git a/plugins/processors/ecsdecorator/test/memory/ecs/test2/memory.limit_in_bytes b/plugins/processors/ecsdecorator/test/memory/ecs/test2/memory.limit_in_bytes deleted file mode 100644 index 564113cfaf..0000000000 --- a/plugins/processors/ecsdecorator/test/memory/ecs/test2/memory.limit_in_bytes +++ /dev/null @@ -1 +0,0 @@ -9223372036854771712 diff --git a/plugins/processors/ecsdecorator/test/mountinfo b/plugins/processors/ecsdecorator/test/mountinfo deleted file mode 100644 index 247295c8fe..0000000000 --- a/plugins/processors/ecsdecorator/test/mountinfo +++ /dev/null @@ -1,16 +0,0 @@ -17 22 0:4 / /proc rw,relatime - proc proc rw -18 22 0:17 / /sys rw,relatime - sysfs sysfs rw -19 22 0:6 / /dev rw,relatime - devtmpfs devtmpfs rw,size=82501516k,nr_inodes=20625379,mode=755 -20 19 0:18 / /dev/pts rw,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000 -21 19 0:19 / /dev/shm rw,relatime - tmpfs tmpfs rw -22 0 202:1 / / rw,noatime - ext4 /dev/xvda1 rw,data=ordered -23 20 0:20 / /dev/pts rw,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000 -24 17 0:21 / /proc/sys/fs/binfmt_misc rw,relatime - binfmt_misc none rw -25 22 0:22 / test/cpuset rw,relatime - cgroup cgroup rw,cpuset -26 22 0:23 / /cgroup/cpu rw,relatime - cgroup cgroup rw,cpu -27 22 0:24 / /cgroup/cpuacct rw,relatime - cgroup cgroup rw,cpuacct -28 22 0:25 / /cgroup/memory rw,relatime - cgroup cgroup rw,memory -29 22 0:26 / /cgroup/devices rw,relatime - cgroup cgroup rw,devices -30 22 0:27 / /cgroup/freezer rw,relatime - cgroup cgroup rw,freezer -31 22 0:28 / /cgroup/net_cls rw,relatime - cgroup cgroup rw,net_cls -32 22 0:29 / /cgroup/blkio rw,relatime - cgroup cgroup rw,blkio diff --git a/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator.go b/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator.go index eb321ddf61..e013ba8db4 100644 --- a/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator.go +++ b/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator.go @@ -7,15 +7,15 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" ) var memoryMetricsNames = map[string]struct{}{ - containerinsightscommon.NeuronCoreMemoryUtilizationConstants: {}, - containerinsightscommon.NeuronCoreMemoryUtilizationModelCode: {}, - containerinsightscommon.NeuronCoreMemoryUtilizationSharedScratchpad: {}, - containerinsightscommon.NeuronCoreMemoryUtilizationRuntimeMemory: {}, - containerinsightscommon.NeuronCoreMemoryUtilizationTensors: {}, + constants.NeuronCoreMemoryUtilizationConstants: {}, + constants.NeuronCoreMemoryUtilizationModelCode: {}, + constants.NeuronCoreMemoryUtilizationSharedScratchpad: {}, + constants.NeuronCoreMemoryUtilizationRuntimeMemory: {}, + constants.NeuronCoreMemoryUtilizationTensors: {}, } type NeuronCoreInfo struct { @@ -67,7 +67,7 @@ func (d *AwsNeuronMemoryMetricsAggregator) AggregateMemoryMetric(originalMetric func (d *AwsNeuronMemoryMetricsAggregator) FlushAggregatedMemoryMetric() pmetric.Metric { aggregatedMemoryMetric := pmetric.NewMetric() - aggregatedMemoryMetric.SetName(containerinsightscommon.NeuronCoreMemoryUtilizationTotal) + aggregatedMemoryMetric.SetName(constants.NeuronCoreMemoryUtilizationTotal) datapoints := aggregatedMemoryMetric.SetEmptySum().DataPoints() for neuronCoreInfo, totalMemoryUsed := range d.memoryMetricValuesAggregator { diff --git a/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator_test.go b/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator_test.go index a2612d6708..2aa3b7fa3c 100644 --- a/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator_test.go +++ b/plugins/processors/gpuattributes/internal/awsneuron_memory_metric_aggregator_test.go @@ -10,7 +10,7 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" ) const ( @@ -28,7 +28,7 @@ func TestMemoryMetricAggregator_AggregateMemoryMetric(t *testing.T) { aggregator := NewMemoryMemoryAggregator() // Create a sample original metric with gauge data points - tensorsMemoryUsage := createSampleMetric(containerinsightscommon.NeuronCoreMemoryUtilizationTensors) + tensorsMemoryUsage := createSampleMetric(constants.NeuronCoreMemoryUtilizationTensors) nonNeuronMetric := createSampleMetric(dummy) // Call the method being tested @@ -65,8 +65,8 @@ func TestMemoryMetricAggregator_FlushAggregatedMemoryMetric(t *testing.T) { // Add some data to the aggregator // Create a sample original metric with gauge data points - tensorsMemoryUsage := createSampleMetric(containerinsightscommon.NeuronCoreMemoryUtilizationTensors) - constantsMemoryUsage := createSampleMetric(containerinsightscommon.NeuronCoreMemoryUtilizationConstants) + tensorsMemoryUsage := createSampleMetric(constants.NeuronCoreMemoryUtilizationTensors) + constantsMemoryUsage := createSampleMetric(constants.NeuronCoreMemoryUtilizationConstants) nonNeuronMetric := createSampleMetric(dummy) // Call the method being tested @@ -79,7 +79,7 @@ func TestMemoryMetricAggregator_FlushAggregatedMemoryMetric(t *testing.T) { aggregatedMetricDatapoints := aggregatedMetric.Sum().DataPoints() // Assert the result assert.NotNil(t, aggregatedMetric) - assert.Equal(t, containerinsightscommon.NeuronCoreMemoryUtilizationTotal, aggregatedMetric.Name()) + assert.Equal(t, constants.NeuronCoreMemoryUtilizationTotal, aggregatedMetric.Name()) assert.Equal(t, 2, aggregatedMetricDatapoints.Len()) for i := 0; i < aggregatedMetricDatapoints.Len(); i++ { diff --git a/plugins/processors/gpuattributes/internal/awsneuron_metric_modifier.go b/plugins/processors/gpuattributes/internal/awsneuron_metric_modifier.go index 878cf96448..b2fdddf4a4 100644 --- a/plugins/processors/gpuattributes/internal/awsneuron_metric_modifier.go +++ b/plugins/processors/gpuattributes/internal/awsneuron_metric_modifier.go @@ -10,7 +10,7 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/zap" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" ) const ( @@ -47,8 +47,8 @@ const ( Region = "region" SubnetId = "subnet_id" RuntimeTagOverride = "DEFAULT" - NeuronExecutionErrorsAggregatedMetric = containerinsightscommon.NeuronExecutionErrors + "_total" - NeuronDeviceHardwareEccEventsAggregatedMetric = containerinsightscommon.NeuronDeviceHardwareEccEvents + "_total" + NeuronExecutionErrorsAggregatedMetric = constants.NeuronExecutionErrors + "_total" + NeuronDeviceHardwareEccEventsAggregatedMetric = constants.NeuronDeviceHardwareEccEvents + "_total" NeuronCoreLabel = "neuroncore" NeuronCorePerDevice = 2 ) @@ -77,32 +77,32 @@ type NeuronCoreUtilizationDatapointAggregationKey struct { var ( metricModificationsMap = map[string]MetricModifications{ - containerinsightscommon.NeuronExecutionErrors: {DuplicationTypes: []string{containerinsightscommon.TypeNode}, UniqueAttribute: ErrorType, LogTypeSuffix: "", Unit: Count}, - containerinsightscommon.NeuronExecutionStatus: {DuplicationTypes: []string{containerinsightscommon.TypeNode}, UniqueAttribute: StatusType, LogTypeSuffix: "", Unit: Count}, - containerinsightscommon.NeuronRuntimeMemoryUsage: {DuplicationTypes: []string{containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Bytes}, - containerinsightscommon.NeuronCoreMemoryUtilizationTotal: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, - containerinsightscommon.NeuronCoreMemoryUtilizationConstants: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, - containerinsightscommon.NeuronCoreMemoryUtilizationModelCode: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, - containerinsightscommon.NeuronCoreMemoryUtilizationSharedScratchpad: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, - containerinsightscommon.NeuronCoreMemoryUtilizationRuntimeMemory: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, - containerinsightscommon.NeuronCoreMemoryUtilizationTensors: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, - containerinsightscommon.NeuronCoreUtilization: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Percent}, - containerinsightscommon.NeuronInstanceInfo: {DuplicationTypes: []string{}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Count}, - containerinsightscommon.NeuronHardware: {DuplicationTypes: []string{}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Count}, - containerinsightscommon.NeuronExecutionLatency: {DuplicationTypes: []string{containerinsightscommon.TypeNode}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Seconds}, - containerinsightscommon.NeuronDeviceHardwareEccEvents: {DuplicationTypes: []string{containerinsightscommon.TypeContainer, containerinsightscommon.TypePod, containerinsightscommon.TypeNode}, UniqueAttribute: EventType, LogTypeSuffix: Device, Unit: Count}, + constants.NeuronExecutionErrors: {DuplicationTypes: []string{constants.TypeNode}, UniqueAttribute: ErrorType, LogTypeSuffix: "", Unit: Count}, + constants.NeuronExecutionStatus: {DuplicationTypes: []string{constants.TypeNode}, UniqueAttribute: StatusType, LogTypeSuffix: "", Unit: Count}, + constants.NeuronRuntimeMemoryUsage: {DuplicationTypes: []string{constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Bytes}, + constants.NeuronCoreMemoryUtilizationTotal: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, + constants.NeuronCoreMemoryUtilizationConstants: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, + constants.NeuronCoreMemoryUtilizationModelCode: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, + constants.NeuronCoreMemoryUtilizationSharedScratchpad: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, + constants.NeuronCoreMemoryUtilizationRuntimeMemory: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, + constants.NeuronCoreMemoryUtilizationTensors: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Bytes}, + constants.NeuronCoreUtilization: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: Core, Unit: Percent}, + constants.NeuronInstanceInfo: {DuplicationTypes: []string{}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Count}, + constants.NeuronHardware: {DuplicationTypes: []string{}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Count}, + constants.NeuronExecutionLatency: {DuplicationTypes: []string{constants.TypeNode}, UniqueAttribute: "", LogTypeSuffix: "", Unit: Seconds}, + constants.NeuronDeviceHardwareEccEvents: {DuplicationTypes: []string{constants.TypeContainer, constants.TypePod, constants.TypeNode}, UniqueAttribute: EventType, LogTypeSuffix: Device, Unit: Count}, } attributeValuePrefixingMap = map[string]string{NeuronCoreAttributeKey: "core", NeuronDeviceAttributeKey: "device"} uniquesDatapointsToAggregatedMetricMappings = map[string]map[string]string{ - containerinsightscommon.NeuronExecutionErrors: {"generic": NeuronExecutionErrorsAggregatedMetric, + constants.NeuronExecutionErrors: {"generic": NeuronExecutionErrorsAggregatedMetric, "numerical": NeuronExecutionErrorsAggregatedMetric, "transient": NeuronExecutionErrorsAggregatedMetric, "model": NeuronExecutionErrorsAggregatedMetric, "runtime": NeuronExecutionErrorsAggregatedMetric, "hardware": NeuronExecutionErrorsAggregatedMetric}, // execution_status metric will be added here incrementally - containerinsightscommon.NeuronDeviceHardwareEccEvents: {"mem_ecc_corrected": NeuronDeviceHardwareEccEventsAggregatedMetric, + constants.NeuronDeviceHardwareEccEvents: {"mem_ecc_corrected": NeuronDeviceHardwareEccEventsAggregatedMetric, "mem_ecc_uncorrected": NeuronDeviceHardwareEccEventsAggregatedMetric, "sram_ecc_corrected": NeuronDeviceHardwareEccEventsAggregatedMetric, "sram_ecc_uncorrected": NeuronDeviceHardwareEccEventsAggregatedMetric}, @@ -137,9 +137,9 @@ func (md *AwsNeuronMetricModifier) ModifyMetric(originalMetric pmetric.Metric, m // The neuron metrics sent by the neuron monitor are not homogeneous // and some metrics require special processing. // We perform those special processing before duplicating metric for pod, node and container. - if originalMetricName == containerinsightscommon.NeuronExecutionLatency { + if originalMetricName == constants.NeuronExecutionLatency { keepSpecificDatapointBasedOnAttribute(originalMetric, Percentile, "p50") - } else if originalMetricName == containerinsightscommon.NeuronRuntimeMemoryUsage { + } else if originalMetricName == constants.NeuronRuntimeMemoryUsage { keepSpecificDatapointBasedOnAttribute(originalMetric, MemoryLocation, "neuron_device") } @@ -148,7 +148,7 @@ func (md *AwsNeuronMetricModifier) ModifyMetric(originalMetric pmetric.Metric, m // For NeuronCoreUtilization metrics, perform additional aggregation to calculate the maximum utilization // value per core across all datapoints. This ensures we capture peak utilization rather than average values, // which is more useful for monitoring core performance and potential bottlenecks. - if originalMetric.Name() == containerinsightscommon.NeuronCoreUtilization { + if originalMetric.Name() == constants.NeuronCoreUtilization { modifiedMetricSlice = md.aggregateCoreUtilizationMetrics(originalMetric) } else { modifiedMetricSlice = md.extractDatapointsAsMetricsAndAggregate(originalMetric) @@ -296,7 +296,7 @@ func (md *AwsNeuronMetricModifier) duplicateMetrics(metricsSlice pmetric.MetricS for i := 0; i < metricsSlice.Len(); i++ { metric := metricsSlice.At(i) if duplicateForNodeOnly { - duplicateMetricForType(metric, containerinsightscommon.TypeNode, originalMetricName, metrics) + duplicateMetricForType(metric, constants.TypeNode, originalMetricName, metrics) } else { for _, prefix := range metricModifications.DuplicationTypes { duplicateMetricForType(metric, prefix, originalMetricName, metrics) @@ -347,7 +347,7 @@ func duplicateMetricForType(metric pmetric.Metric, duplicateType string, origina datapoints := metricCopy.Sum().DataPoints() for i := 0; i < datapoints.Len(); i++ { - datapoints.At(i).Attributes().PutStr(containerinsightscommon.MetricType, duplicateType+logTypeSuffix+metricModificationsMap[originalMetricName].LogTypeSuffix) + datapoints.At(i).Attributes().PutStr(constants.MetricType, duplicateType+logTypeSuffix+metricModificationsMap[originalMetricName].LogTypeSuffix) } } diff --git a/plugins/processors/gpuattributes/internal/metricFilters/gpumetricfilters.go b/plugins/processors/gpuattributes/internal/metricFilters/gpumetricfilters.go index dedc259432..230064d28b 100644 --- a/plugins/processors/gpuattributes/internal/metricFilters/gpumetricfilters.go +++ b/plugins/processors/gpuattributes/internal/metricFilters/gpumetricfilters.go @@ -4,7 +4,7 @@ package metricFilters import ( - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/gpuattributes/internal" ) @@ -21,136 +21,136 @@ const ( ) var ContainerGpuLabelFilter = map[string]map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.InstanceIdKey: nil, - containerinsightscommon.GpuDeviceKey: nil, - containerinsightscommon.MetricType: nil, - containerinsightscommon.NodeNameKey: nil, - containerinsightscommon.K8sNamespace: nil, - containerinsightscommon.FullPodNameKey: nil, - containerinsightscommon.PodNameKey: nil, - containerinsightscommon.TypeService: nil, - containerinsightscommon.GpuUniqueId: nil, - containerinsightscommon.ContainerNamekey: nil, - containerinsightscommon.InstanceTypeKey: nil, - containerinsightscommon.VersionKey: nil, - containerinsightscommon.SourcesKey: nil, - containerinsightscommon.Timestamp: nil, - containerinsightscommon.K8sKey: { - containerinsightscommon.HostKey: nil, - containerinsightscommon.K8sLabelsKey: nil, - pod_id: nil, - pod_name: nil, - pod_owners: nil, - namespace: nil, - container_name: nil, - containerd: nil, + constants.ClusterNameKey: nil, + constants.InstanceIDKey: nil, + constants.GpuDeviceKey: nil, + constants.MetricType: nil, + constants.NodeNameKey: nil, + constants.K8sNamespace: nil, + constants.FullPodNameKey: nil, + constants.PodNameKey: nil, + constants.TypeService: nil, + constants.GpuUniqueID: nil, + constants.ContainerNamekey: nil, + constants.InstanceTypeKey: nil, + constants.VersionKey: nil, + constants.SourcesKey: nil, + constants.Timestamp: nil, + constants.K8sKey: { + constants.HostKey: nil, + constants.K8sLabelsKey: nil, + pod_id: nil, + pod_name: nil, + pod_owners: nil, + namespace: nil, + container_name: nil, + containerd: nil, }, } var PodGpuLabelFilter = map[string]map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.InstanceIdKey: nil, - containerinsightscommon.GpuDeviceKey: nil, - containerinsightscommon.MetricType: nil, - containerinsightscommon.NodeNameKey: nil, - containerinsightscommon.K8sNamespace: nil, - containerinsightscommon.FullPodNameKey: nil, - containerinsightscommon.PodNameKey: nil, - containerinsightscommon.TypeService: nil, - containerinsightscommon.GpuUniqueId: nil, - containerinsightscommon.InstanceTypeKey: nil, - containerinsightscommon.VersionKey: nil, - containerinsightscommon.SourcesKey: nil, - containerinsightscommon.Timestamp: nil, - containerinsightscommon.K8sKey: { - containerinsightscommon.HostKey: nil, - containerinsightscommon.K8sLabelsKey: nil, - pod_id: nil, - pod_name: nil, - pod_owners: nil, - namespace: nil, + constants.ClusterNameKey: nil, + constants.InstanceIDKey: nil, + constants.GpuDeviceKey: nil, + constants.MetricType: nil, + constants.NodeNameKey: nil, + constants.K8sNamespace: nil, + constants.FullPodNameKey: nil, + constants.PodNameKey: nil, + constants.TypeService: nil, + constants.GpuUniqueID: nil, + constants.InstanceTypeKey: nil, + constants.VersionKey: nil, + constants.SourcesKey: nil, + constants.Timestamp: nil, + constants.K8sKey: { + constants.HostKey: nil, + constants.K8sLabelsKey: nil, + pod_id: nil, + pod_name: nil, + pod_owners: nil, + namespace: nil, }, } var NodeGpuLabelFilter = map[string]map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.InstanceIdKey: nil, - containerinsightscommon.GpuDeviceKey: nil, - containerinsightscommon.MetricType: nil, - containerinsightscommon.NodeNameKey: nil, - containerinsightscommon.InstanceTypeKey: nil, - containerinsightscommon.VersionKey: nil, - containerinsightscommon.SourcesKey: nil, - containerinsightscommon.Timestamp: nil, - containerinsightscommon.K8sKey: { - containerinsightscommon.HostKey: nil, + constants.ClusterNameKey: nil, + constants.InstanceIDKey: nil, + constants.GpuDeviceKey: nil, + constants.MetricType: nil, + constants.NodeNameKey: nil, + constants.InstanceTypeKey: nil, + constants.VersionKey: nil, + constants.SourcesKey: nil, + constants.Timestamp: nil, + constants.K8sKey: { + constants.HostKey: nil, }, } var PodNeuronLabelFilter = map[string]map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.FullPodNameKey: nil, - containerinsightscommon.InstanceIdKey: nil, - containerinsightscommon.InstanceTypeKey: nil, - containerinsightscommon.K8sPodNameKey: nil, - containerinsightscommon.K8sNamespace: nil, - internal.NeuronDevice: nil, - containerinsightscommon.NodeNameKey: nil, - containerinsightscommon.PodNameKey: nil, - containerinsightscommon.TypeService: nil, - internal.AvailabilityZone: nil, - containerinsightscommon.K8sKey: { - containerinsightscommon.HostKey: nil, - pod_id: nil, - pod_owners: nil, - containerinsightscommon.K8sLabelsKey: nil, + constants.ClusterNameKey: nil, + constants.FullPodNameKey: nil, + constants.InstanceIDKey: nil, + constants.InstanceTypeKey: nil, + constants.K8sPodNameKey: nil, + constants.K8sNamespace: nil, + internal.NeuronDevice: nil, + constants.NodeNameKey: nil, + constants.PodNameKey: nil, + constants.TypeService: nil, + internal.AvailabilityZone: nil, + constants.K8sKey: { + constants.HostKey: nil, + pod_id: nil, + pod_owners: nil, + constants.K8sLabelsKey: nil, }, - internal.Region: nil, - internal.SubnetId: nil, - internal.NeuronCore: nil, - containerinsightscommon.MetricType: nil, + internal.Region: nil, + internal.SubnetId: nil, + internal.NeuronCore: nil, + constants.MetricType: nil, } var ContainerNeuronLabelFilter = map[string]map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.ContainerNamekey: nil, - containerinsightscommon.FullPodNameKey: nil, - containerinsightscommon.InstanceIdKey: nil, - containerinsightscommon.InstanceTypeKey: nil, - containerinsightscommon.K8sPodNameKey: nil, - containerinsightscommon.K8sNamespace: nil, - internal.NeuronDevice: nil, - containerinsightscommon.NodeNameKey: nil, - containerinsightscommon.PodNameKey: nil, - containerinsightscommon.TypeService: nil, - internal.AvailabilityZone: nil, - containerinsightscommon.Kubernetes: { - containerinsightscommon.HostKey: nil, - "containerd": nil, - pod_id: nil, - pod_owners: nil, - containerinsightscommon.K8sLabelsKey: nil, + constants.ClusterNameKey: nil, + constants.ContainerNamekey: nil, + constants.FullPodNameKey: nil, + constants.InstanceIDKey: nil, + constants.InstanceTypeKey: nil, + constants.K8sPodNameKey: nil, + constants.K8sNamespace: nil, + internal.NeuronDevice: nil, + constants.NodeNameKey: nil, + constants.PodNameKey: nil, + constants.TypeService: nil, + internal.AvailabilityZone: nil, + constants.Kubernetes: { + constants.HostKey: nil, + "containerd": nil, + pod_id: nil, + pod_owners: nil, + constants.K8sLabelsKey: nil, }, - internal.Region: nil, - internal.SubnetId: nil, - internal.NeuronCore: nil, - containerinsightscommon.MetricType: nil, + internal.Region: nil, + internal.SubnetId: nil, + internal.NeuronCore: nil, + constants.MetricType: nil, } var NodeNeuronLabelFilter = map[string]map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.InstanceIdKey: nil, - containerinsightscommon.InstanceTypeKey: nil, - containerinsightscommon.K8sNamespace: nil, - internal.NeuronDevice: nil, - containerinsightscommon.NodeNameKey: nil, - containerinsightscommon.TypeService: nil, - internal.AvailabilityZone: nil, - containerinsightscommon.Kubernetes: { - containerinsightscommon.HostKey: nil, - containerinsightscommon.K8sLabelsKey: nil, + constants.ClusterNameKey: nil, + constants.InstanceIDKey: nil, + constants.InstanceTypeKey: nil, + constants.K8sNamespace: nil, + internal.NeuronDevice: nil, + constants.NodeNameKey: nil, + constants.TypeService: nil, + internal.AvailabilityZone: nil, + constants.Kubernetes: { + constants.HostKey: nil, + constants.K8sLabelsKey: nil, }, - internal.Region: nil, - internal.SubnetId: nil, - internal.NeuronCore: nil, - containerinsightscommon.MetricType: nil, + internal.Region: nil, + internal.SubnetId: nil, + internal.NeuronCore: nil, + constants.MetricType: nil, } diff --git a/plugins/processors/gpuattributes/processor.go b/plugins/processors/gpuattributes/processor.go index d37ff0298a..4e6f4b5ab1 100644 --- a/plugins/processors/gpuattributes/processor.go +++ b/plugins/processors/gpuattributes/processor.go @@ -13,7 +13,7 @@ import ( "go.uber.org/zap" "golang.org/x/exp/maps" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/gpuattributes/internal" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/gpuattributes/internal/metricFilters" ) @@ -42,8 +42,8 @@ const ( // // - Node // - ClusterName -// - ClusterName, InstanceIdKey, NodeName -// - ClusterName, InstanceIdKey, NodeName, GpuDevice +// - ClusterName, InstanceIDKey, NodeName +// - ClusterName, InstanceIDKey, NodeName, GpuDevice type gpuAttributesProcessor struct { *Config logger *zap.Logger @@ -142,7 +142,7 @@ func (d *gpuAttributesProcessor) processMetricAttributes(m pmetric.Metric) { case pmetric.MetricTypeSum: dps = m.Sum().DataPoints() default: - d.logger.Debug("Ignore unknown metric type", zap.String(containerinsightscommon.MetricType, m.Type().String())) + d.logger.Debug("Ignore unknown metric type", zap.String(constants.MetricType, m.Type().String())) } for i := 0; i < dps.Len(); i++ { @@ -210,7 +210,7 @@ func (d *gpuAttributesProcessor) filterGpuMetricsWithoutPodName(metrics pmetric. case pmetric.MetricTypeSum: dps = m.Sum().DataPoints() default: - d.logger.Debug("Ignore unknown metric type", zap.String(containerinsightscommon.MetricType, m.Type().String())) + d.logger.Debug("Ignore unknown metric type", zap.String(constants.MetricType, m.Type().String())) } dps.RemoveIf(func(dp pmetric.NumberDataPoint) bool { diff --git a/plugins/processors/k8sdecorator/k8sdecorator.go b/plugins/processors/k8sdecorator/k8sdecorator.go deleted file mode 100644 index 3b4a0723c0..0000000000 --- a/plugins/processors/k8sdecorator/k8sdecorator.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sdecorator - -import ( - "time" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/processors" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/logscommon" - "github.com/aws/amazon-cloudwatch-agent/plugins/processors/k8sdecorator/stores" - "github.com/aws/amazon-cloudwatch-agent/plugins/processors/k8sdecorator/structuredlogsadapter" -) - -type K8sDecorator struct { - started bool - stores []stores.K8sStore - shutdownC chan bool - DisableMetricExtraction bool `toml:"disable_metric_extraction"` - TagService bool `toml:"tag_service"` - ClusterName string `toml:"cluster_name"` - HostIP string `toml:"host_ip"` - NodeName string `toml:"node_name"` - PrefFullPodName bool `toml:"prefer_full_pod_name"` -} - -func (k *K8sDecorator) Description() string { - return "" -} - -func (k *K8sDecorator) SampleConfig() string { - return "" -} - -func (k *K8sDecorator) Apply(in ...telegraf.Metric) []telegraf.Metric { - if !k.started { - k.start() - } - - var out []telegraf.Metric - -OUTER: - for _, metric := range in { - metric.AddTag(ClusterNameKey, k.ClusterName) - k.handleHostname(metric) - kubernetesBlob := make(map[string]interface{}) - for _, store := range k.stores { - if !store.Decorate(metric, kubernetesBlob) { - // drop the unexpected metric - continue OUTER - } - } - structuredlogsadapter.AddKubernetesInfo(metric, kubernetesBlob) - structuredlogsadapter.TagMetricSource(metric) - if !k.DisableMetricExtraction { - structuredlogsadapter.TagMetricRule(metric) - } - structuredlogsadapter.TagLogGroup(metric) - metric.AddTag(logscommon.LogStreamNameTag, k.NodeName) - out = append(out, metric) - } - - return out -} - -// Shutdown currently does not get called, as telegraf does not have a cleanup hook for Filter plugins -func (k *K8sDecorator) Shutdown() { - close(k.shutdownC) -} - -func (k *K8sDecorator) start() { - k.shutdownC = make(chan bool) - - k.stores = append(k.stores, stores.NewPodStore(k.HostIP, k.PrefFullPodName)) - if k.TagService { - k.stores = append(k.stores, stores.NewServiceStore()) - } - - for _, store := range k.stores { - store.RefreshTick() - } - - go func() { - refreshTicker := time.NewTicker(time.Second) - defer refreshTicker.Stop() - for { - select { - case <-refreshTicker.C: - for _, store := range k.stores { - store.RefreshTick() - } - case <-k.shutdownC: - refreshTicker.Stop() - return - } - } - }() - k.started = true -} - -func (k *K8sDecorator) handleHostname(metric telegraf.Metric) { - metricType := metric.Tags()[MetricType] - // Add NodeName for node, pod and container - if IsNode(metricType) || IsInstance(metricType) || IsPod(metricType) || IsContainer(metricType) { - metric.AddTag(NodeNameKey, k.NodeName) - } - // remove the tag "host" - metric.RemoveTag("host") -} - -// init adds this plugin to the framework's "processors" registry -func init() { - processors.Add("k8sdecorator", func() telegraf.Processor { - return &K8sDecorator{TagService: true} - }) -} diff --git a/plugins/processors/k8sdecorator/k8sdecorator_test.go b/plugins/processors/k8sdecorator/k8sdecorator_test.go deleted file mode 100644 index 97504d5a0e..0000000000 --- a/plugins/processors/k8sdecorator/k8sdecorator_test.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package k8sdecorator - -import ( - "testing" - "time" - - "github.com/influxdata/telegraf/metric" - "github.com/stretchr/testify/assert" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -func TestDisableMetricExtraction(t *testing.T) { - - tags := map[string]string{MetricType: TypeCluster} - fields := map[string]interface{}{MetricName(TypeCluster, NodeCount): 10, MetricName(TypeCluster, FailedNodeCount): 1} - - testCases := map[string]struct { - k8sDecorator *K8sDecorator - expectedAttributesInFields string - expectedCloudWatchMetrics interface{} - }{ - "WithDisableMetricExtractionDefault": { - k8sDecorator: &K8sDecorator{ - started: true, - ClusterName: "TestK8sCluster", - }, - expectedAttributesInFields: "Sources,CloudWatchMetrics", - expectedCloudWatchMetrics: []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - { - Unit: "Count", - Name: "cluster_node_count", - }, - { - Unit: "Count", - Name: "cluster_failed_node_count", - }, - }, - DimensionSets: [][]string{{"ClusterName"}}, - Namespace: "ContainerInsights", - }, - }, - }, - "WithDisableMetricExtractionFalse": { - k8sDecorator: &K8sDecorator{ - started: true, - DisableMetricExtraction: false, - ClusterName: "TestK8sCluster", - }, - expectedAttributesInFields: "Sources,CloudWatchMetrics", - expectedCloudWatchMetrics: []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - { - Unit: "Count", - Name: "cluster_node_count", - }, - { - Unit: "Count", - Name: "cluster_failed_node_count", - }, - }, - DimensionSets: [][]string{{"ClusterName"}}, - Namespace: "ContainerInsights", - }, - }, - }, - "WithDisableMetricExtractionTrue": { - k8sDecorator: &K8sDecorator{ - started: true, - DisableMetricExtraction: true, - ClusterName: "TestK8sCluster", - }, - expectedAttributesInFields: "Sources", - expectedCloudWatchMetrics: nil, - }, - } - - for name, testCase := range testCases { - t.Run(name, func(t *testing.T) { - // Given a metric & k8s decorator configuration, - testMetric := metric.New("testClusterMetric", tags, fields, time.Now()) - // When the processor is applied, - testCase.k8sDecorator.Apply(testMetric) - // Then the metric is expected to be converted to EMF with the following fields & tags - assert.Equal(t, testCase.expectedAttributesInFields, testMetric.Tags()["attributesInFields"]) - assert.Equal(t, testCase.expectedCloudWatchMetrics, testMetric.Fields()["CloudWatchMetrics"]) - }) - } -} diff --git a/plugins/processors/k8sdecorator/stores/nodeinfo.go b/plugins/processors/k8sdecorator/stores/nodeinfo.go deleted file mode 100644 index 7875acce8e..0000000000 --- a/plugins/processors/k8sdecorator/stores/nodeinfo.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "bufio" - "fmt" - "log" - "os" - "regexp" - "strings" - "sync" - "time" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/mapWithExpiry" -) - -type nodeStats struct { - podCnt int - containerCnt int - cpuReq int64 - memReq int64 -} - -type nodeInfo struct { - nodeStats nodeStats - // ebsIds for persistent volume of pod - ebsIds *mapWithExpiry.MapWithExpiry - // mutex for ebsIds - sync.RWMutex - *NodeCapacity -} - -func (n *nodeInfo) refreshEbsId() { - // rootfs is mounted with the root dir on host - file, err := os.Open("/rootfs/proc/mounts") - if err != nil { - log.Printf("D! cannot open /rootfs/proc/mounts %v", err) - return - } - defer file.Close() - - reader := bufio.NewReader(file) - - n.Lock() - defer n.Unlock() - n.extractEbsId(reader) -} - -func (n *nodeInfo) extractEbsId(reader *bufio.Reader) { - ebsMountPointRegex := regexp.MustCompile(`kubernetes.io/aws-ebs/mounts/aws/(.+)/(vol-\w+)`) - - for { - line, isPrefix, err := reader.ReadLine() - - // err could be EOF in normal case - if err != nil { - break - } - - // isPrefix is set when a line exceeding 4KB which we treat it as error when reading mount file - if isPrefix { - break - } - - lineStr := string(line) - if strings.TrimSpace(lineStr) == "" { - continue - } - - //example line: /dev/nvme1n1 /var/lib/kubelet/plugins/kubernetes.io/aws-ebs/mounts/aws/us-west-2b/vol-0d9f0816149eb2050 ext4 rw,relatime,data=ordered 0 0 - keys := strings.Split(lineStr, " ") - if len(keys) < 2 { - continue - } - matches := ebsMountPointRegex.FindStringSubmatch(keys[1]) - if len(matches) > 0 { - // Set {"/dev/nvme1n1": "aws://us-west-2b/vol-0d9f0816149eb2050"} - n.ebsIds.Set(keys[0], fmt.Sprintf("aws://%s/%s", matches[1], matches[2])) - } - } -} - -func (n *nodeInfo) getEbsVolumeId(devName string) string { - n.RLock() - defer n.RUnlock() - if volId, ok := n.ebsIds.Get(devName); ok { - return volId.(string) - } - return "" -} - -func (n *nodeInfo) cleanUp(now time.Time) { - n.ebsIds.CleanUp(now) -} - -func newNodeInfo() *nodeInfo { - nc := &nodeInfo{ebsIds: mapWithExpiry.NewMapWithExpiry(2 * refreshInterval), NodeCapacity: NewNodeCapacity()} - return nc -} - -func (n *nodeInfo) getCPUCapacity() int64 { - return n.CPUCapacity * 1000 -} - -func (n *nodeInfo) getMemCapacity() int64 { - return n.MemCapacity -} diff --git a/plugins/processors/k8sdecorator/stores/nodeinfo_test.go b/plugins/processors/k8sdecorator/stores/nodeinfo_test.go deleted file mode 100644 index aadcc94475..0000000000 --- a/plugins/processors/k8sdecorator/stores/nodeinfo_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "bufio" - "os" - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/aws/amazon-cloudwatch-agent/internal/mapWithExpiry" -) - -func TestNodeInfo_extractEbsId(t *testing.T) { - nodeInfo := &nodeInfo{ebsIds: mapWithExpiry.NewMapWithExpiry(60 * time.Second)} - file, _ := os.Open("./sampleMountFile/mounts") - defer file.Close() - reader := bufio.NewReader(file) - - nodeInfo.extractEbsId(reader) - assert.Equal(t, 1, nodeInfo.ebsIds.Size()) - volId, _ := nodeInfo.ebsIds.Get("/dev/nvme1n1") - assert.Equal(t, "aws://us-west-2b/vol-0d9f0816149eb2050", volId.(string)) -} diff --git a/plugins/processors/k8sdecorator/stores/podstore.go b/plugins/processors/k8sdecorator/stores/podstore.go deleted file mode 100644 index cf4227cdf0..0000000000 --- a/plugins/processors/k8sdecorator/stores/podstore.go +++ /dev/null @@ -1,601 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "log" - "regexp" - "strings" - "sync" - "time" - - "github.com/influxdata/telegraf" - corev1 "k8s.io/api/core/v1" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/k8sCommon/k8sclient" - "github.com/aws/amazon-cloudwatch-agent/internal/k8sCommon/kubeletutil" - "github.com/aws/amazon-cloudwatch-agent/internal/mapWithExpiry" - "github.com/aws/amazon-cloudwatch-agent/profiler" -) - -const ( - refreshInterval = 30 * time.Second - MeasurementsExpiry = 10 * time.Minute - PodsExpiry = 2 * time.Minute - memoryKey = "memory" - cpuKey = "cpu" - splitRegexStr = "\\.|-" - kubeProxy = "kube-proxy" - ignoreAnnotation = "aws.amazon.com/cloudwatch-agent-ignore" -) - -var ( - re = regexp.MustCompile(splitRegexStr) -) - -type cachedEntry struct { - pod corev1.Pod - creation time.Time -} - -type Owner struct { - OwnerKind string `json:"owner_kind"` - OwnerName string `json:"owner_name"` -} - -type prevPodMeasurement struct { - containersRestarts int -} - -type prevContainerMeasurement struct { - restarts int -} - -type PodStore struct { - cache *mapWithExpiry.MapWithExpiry - prevMeasurements map[string]*mapWithExpiry.MapWithExpiry //preMeasurements per each Type (Pod, Container, etc) - kubeClient *kubeletutil.KubeClient - lastRefreshed time.Time - nodeInfo *nodeInfo - prefFullPodName bool - sync.Mutex -} - -func NewPodStore(hostIP string, prefFullPodName bool) *PodStore { - podStore := &PodStore{ - cache: mapWithExpiry.NewMapWithExpiry(PodsExpiry), - prevMeasurements: make(map[string]*mapWithExpiry.MapWithExpiry), - kubeClient: &kubeletutil.KubeClient{Port: KubeSecurePort, BearerToken: BearerToken, KubeIP: hostIP}, - nodeInfo: newNodeInfo(), - prefFullPodName: prefFullPodName, - } - - // Try to detect kubelet permission issue here - if _, err := podStore.kubeClient.ListPods(); err != nil { - log.Panicf("Cannot get pod from kubelet, err: %v", err) - } - - return podStore -} - -func (p *PodStore) getPrevMeasurement(metricType, metricKey string) (interface{}, bool) { - prevMeasurement, ok := p.prevMeasurements[metricType] - if !ok { - return nil, false - } - - content, ok := prevMeasurement.Get(metricKey) - - if !ok { - return nil, false - } - - return content, true -} - -func (p *PodStore) setPrevMeasurement(metricType, metricKey string, content interface{}) { - prevMeasurement, ok := p.prevMeasurements[metricType] - if !ok { - prevMeasurement = mapWithExpiry.NewMapWithExpiry(MeasurementsExpiry) - p.prevMeasurements[metricType] = prevMeasurement - } - prevMeasurement.Set(metricKey, content) -} - -func (p *PodStore) RefreshTick() { - now := time.Now() - if now.Sub(p.lastRefreshed) >= refreshInterval { - p.refresh(now) - // call cleanup every refresh cycle - p.cleanup(now) - p.lastRefreshed = now - } -} - -func (p *PodStore) Decorate(metric telegraf.Metric, kubernetesBlob map[string]interface{}) bool { - tags := metric.Tags() - p.decorateDiskDevice(metric, tags) - - if tags[MetricType] == TypeNode { - p.decorateNode(metric) - } else if _, ok := tags[K8sPodNameKey]; ok { - podKey := createPodKeyFromMetric(tags) - if podKey == "" { - log.Printf("E! podKey is unavailable when decorating pod.") - return false - } - - entry := p.getCachedEntry(podKey) - if entry == nil { - log.Printf("I! no pod is found for %s, refresh the cache now...", podKey) - p.refresh(time.Now()) - entry = p.getCachedEntry(podKey) - } - - // If pod is still not found, insert a placeholder to avoid too many refresh - if entry == nil { - log.Printf("W! no pod is found after reading through kubelet, add a placeholder for %s", podKey) - p.setCachedEntry(podKey, &cachedEntry{creation: time.Now()}) - return false - } - - // Ignore if we're told to ignore - if strings.EqualFold(entry.pod.ObjectMeta.Annotations[ignoreAnnotation], "true") { - return false - } - - // If the entry is not a placeholder, decorate the pod - if entry.pod.Name != "" { - p.decorateCpu(metric, tags, &entry.pod) - p.decorateMem(metric, tags, &entry.pod) - p.addStatus(metric, tags, &entry.pod) - addContainerCount(metric, tags, &entry.pod) - addContainerId(&entry.pod, tags, metric, kubernetesBlob) - p.addPodOwnersAndPodName(metric, &entry.pod, kubernetesBlob) - addLabels(&entry.pod, kubernetesBlob) - } else { - log.Printf("W! no pod information is found in podstore for pod %s", podKey) - return false - } - } - return true -} - -func (p *PodStore) getCachedEntry(podKey string) *cachedEntry { - p.Lock() - defer p.Unlock() - if content, ok := p.cache.Get(podKey); ok { - return content.(*cachedEntry) - } - return nil -} - -func (p *PodStore) setCachedEntry(podKey string, entry *cachedEntry) { - p.Lock() - defer p.Unlock() - p.cache.Set(podKey, entry) -} - -func (p *PodStore) setNodeStats(stats nodeStats) { - p.Lock() - defer p.Unlock() - p.nodeInfo.nodeStats = stats -} - -func (p *PodStore) getNodeStats() nodeStats { - p.Lock() - defer p.Unlock() - return p.nodeInfo.nodeStats -} - -func (p *PodStore) refresh(now time.Time) { - podList, _ := p.kubeClient.ListPods() - p.refreshInternal(now, podList) - p.nodeInfo.refreshEbsId() -} - -func (p *PodStore) cleanup(now time.Time) { - for _, prevMeasurement := range p.prevMeasurements { - prevMeasurement.CleanUp(now) - } - p.nodeInfo.cleanUp(now) - - p.Lock() - defer p.Unlock() - p.cache.CleanUp(now) -} - -func (p *PodStore) refreshInternal(now time.Time, podList []corev1.Pod) { - var podCount int - var containerCount int - var cpuRequest int64 - var memRequest int64 - - for _, pod := range podList { - podKey := createPodKeyFromMetaData(&pod) - if podKey == "" { - log.Printf("W! podKey is unavailable refresh pod store for pod %s", pod.Name) - continue - } - tmpCpuReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer) - cpuRequest += tmpCpuReq - tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer) - memRequest += tmpMemReq - if pod.Status.Phase == corev1.PodRunning { - podCount += 1 - } - - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.State.Running != nil { - containerCount += 1 - } - } - - p.setCachedEntry(podKey, &cachedEntry{ - pod: pod, - creation: now}) - } - - p.setNodeStats(nodeStats{podCnt: podCount, containerCnt: containerCount, memReq: memRequest, cpuReq: cpuRequest}) -} - -func (p *PodStore) decorateDiskDevice(metric telegraf.Metric, tags map[string]string) { - if tags[MetricType] == TypeContainerFS || tags[MetricType] == TypeNodeFS || tags[MetricType] == TypeNodeDiskIO || tags[MetricType] == TypeContainerDiskIO { - if deviceName, ok := tags[DiskDev]; ok { - if volId := p.nodeInfo.getEbsVolumeId(deviceName); volId != "" { - metric.AddTag(EbsVolumeId, volId) - } - } - } -} - -func (p *PodStore) decorateNode(metric telegraf.Metric) { - nodeStats := p.getNodeStats() - - if metric.HasField(MetricName(TypeNode, CpuTotal)) { - metric.AddField(MetricName(TypeNode, CpuLimit), p.nodeInfo.getCPUCapacity()) - metric.AddField(MetricName(TypeNode, CpuRequest), nodeStats.cpuReq) - if p.nodeInfo.getCPUCapacity() != 0 { - metric.AddField(MetricName(TypeNode, CpuUtilization), metric.Fields()[MetricName(TypeNode, CpuTotal)].(float64)/float64(p.nodeInfo.getCPUCapacity())*100) - metric.AddField(MetricName(TypeNode, CpuReservedCapacity), float64(nodeStats.cpuReq)/float64(p.nodeInfo.getCPUCapacity())*100) - } - } - - if metric.HasField(MetricName(TypeNode, MemWorkingset)) { - metric.AddField(MetricName(TypeNode, MemLimit), p.nodeInfo.getMemCapacity()) - metric.AddField(MetricName(TypeNode, MemRequest), nodeStats.memReq) - if p.nodeInfo.getMemCapacity() != 0 { - metric.AddField(MetricName(TypeNode, MemUtilization), float64(metric.Fields()[MetricName(TypeNode, MemWorkingset)].(uint64))/float64(p.nodeInfo.getMemCapacity())*100) - metric.AddField(MetricName(TypeNode, MemReservedCapacity), float64(nodeStats.memReq)/float64(p.nodeInfo.getMemCapacity())*100) - } - } - - metric.AddField(MetricName(TypeNode, RunningPodCount), nodeStats.podCnt) - metric.AddField(MetricName(TypeNode, RunningContainerCount), nodeStats.containerCnt) -} - -func (p *PodStore) decorateCpu(metric telegraf.Metric, tags map[string]string, pod *corev1.Pod) { - if tags[MetricType] == TypePod { - // add cpu limit and request for pod cpu - if metric.HasField(MetricName(TypePod, CpuTotal)) { - podCpuReq, _ := getResourceSettingForPod(pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer) - // set podReq to the sum of containerReq which has req - if podCpuReq != 0 { - metric.AddField(MetricName(TypePod, CpuRequest), podCpuReq) - } - - if p.nodeInfo.getCPUCapacity() != 0 { - metric.AddField(MetricName(TypePod, CpuUtilization), metric.Fields()[MetricName(TypePod, CpuTotal)].(float64)/float64(p.nodeInfo.getCPUCapacity())*100) - if podCpuReq != 0 { - metric.AddField(MetricName(TypePod, CpuReservedCapacity), float64(podCpuReq)/float64(p.nodeInfo.getCPUCapacity())*100) - } - } - - podCpuLimit, ok := getResourceSettingForPod(pod, p.nodeInfo.getCPUCapacity(), cpuKey, getLimitForContainer) - // only set podLimit when all the containers has limit - if ok && podCpuLimit != 0 { - metric.AddField(MetricName(TypePod, CpuLimit), podCpuLimit) - metric.AddField(MetricName(TypePod, CpuUtilizationOverPodLimit), metric.Fields()[MetricName(TypePod, CpuTotal)].(float64)/float64(podCpuLimit)*100) - } - } - } else if tags[MetricType] == TypeContainer { - // add cpu limit and request for container - if metric.HasField(MetricName(TypeContainer, CpuTotal)) { - if p.nodeInfo.getCPUCapacity() != 0 { - metric.AddField(MetricName(TypeContainer, CpuUtilization), metric.Fields()[MetricName(TypeContainer, CpuTotal)].(float64)/float64(p.nodeInfo.getCPUCapacity())*100) - } - if containerName, ok := tags[ContainerNamekey]; ok { - for _, containerSpec := range pod.Spec.Containers { - if containerSpec.Name == containerName { - if cpuLimit, ok := getLimitForContainer(cpuKey, containerSpec); ok { - metric.AddField(MetricName(TypeContainer, CpuLimit), cpuLimit) - } - if cpuReq, ok := getRequestForContainer(cpuKey, containerSpec); ok { - metric.AddField(MetricName(TypeContainer, CpuRequest), cpuReq) - } - } - } - } - } - } -} - -func (p *PodStore) decorateMem(metric telegraf.Metric, tags map[string]string, pod *corev1.Pod) { - if tags[MetricType] == TypePod { - if metric.HasField(MetricName(TypePod, MemWorkingset)) { - // add mem limit and request for pod mem - podMemReq, _ := getResourceSettingForPod(pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer) - // set podReq to the sum of containerReq which has req - if podMemReq != 0 { - metric.AddField(MetricName(TypePod, MemRequest), podMemReq) - } - - if p.nodeInfo.getMemCapacity() != 0 { - metric.AddField(MetricName(TypePod, MemUtilization), getFloat64(metric.Fields()[MetricName(TypePod, MemWorkingset)])/float64(p.nodeInfo.getMemCapacity())*100) - if podMemReq != 0 { - metric.AddField(MetricName(TypePod, MemReservedCapacity), float64(podMemReq)/float64(p.nodeInfo.getMemCapacity())*100) - } - } - - podMemLimit, ok := getResourceSettingForPod(pod, p.nodeInfo.getMemCapacity(), memoryKey, getLimitForContainer) - // only set podLimit when all the containers has limit - if ok && podMemLimit != 0 { - metric.AddField(MetricName(TypePod, MemLimit), podMemLimit) - metric.AddField(MetricName(TypePod, MemUtilizationOverPodLimit), getFloat64(metric.Fields()[MetricName(TypePod, MemWorkingset)])/float64(podMemLimit)*100) - } - } - } else if tags[MetricType] == TypeContainer { - // add mem limit and request for container - if metric.HasField(MetricName(TypeContainer, MemWorkingset)) { - if p.nodeInfo.getMemCapacity() != 0 { - metric.AddField(MetricName(TypeContainer, MemUtilization), getFloat64(metric.Fields()[MetricName(TypeContainer, MemWorkingset)])/float64(p.nodeInfo.getMemCapacity())*100) - } - if containerName, ok := tags[ContainerNamekey]; ok { - for _, containerSpec := range pod.Spec.Containers { - if containerSpec.Name == containerName { - if memLimit, ok := getLimitForContainer(memoryKey, containerSpec); ok { - metric.AddField(MetricName(TypeContainer, MemLimit), memLimit) - } - if memReq, ok := getRequestForContainer(memoryKey, containerSpec); ok { - metric.AddField(MetricName(TypeContainer, MemRequest), memReq) - } - } - } - } - } - } -} - -func getFloat64(v interface{}) float64 { - var value float64 - - switch t := v.(type) { - case int: - value = float64(t) - case int32: - value = float64(t) - case int64: - value = float64(t) - case uint: - value = float64(t) - case uint32: - value = float64(t) - case uint64: - value = float64(t) - case float64: - value = t - default: - log.Printf("value type does not support: %v, %T", v, v) - } - return value -} - -func (p *PodStore) addStatus(metric telegraf.Metric, tags map[string]string, pod *corev1.Pod) { - if tags[MetricType] == TypePod { - metric.AddField(PodStatus, string(pod.Status.Phase)) - var curContainerRestarts int - for _, containerStatus := range pod.Status.ContainerStatuses { - curContainerRestarts += int(containerStatus.RestartCount) - } - podKey := createPodKeyFromMetric(tags) - if podKey != "" { - content, ok := p.getPrevMeasurement(TypePod, podKey) - if ok { - prevMeasurement := content.(prevPodMeasurement) - result := 0 - if curContainerRestarts > prevMeasurement.containersRestarts { - result = curContainerRestarts - prevMeasurement.containersRestarts - } - metric.AddField(MetricName(TypePod, ContainerRestartCount), result) - } - p.setPrevMeasurement(TypePod, podKey, prevPodMeasurement{containersRestarts: curContainerRestarts}) - } - } else if tags[MetricType] == TypeContainer { - if containerName, ok := tags[ContainerNamekey]; ok { - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.Name == containerName { - if containerStatus.State.Running != nil { - metric.AddField(ContainerStatus, "Running") - } else if containerStatus.State.Waiting != nil { - metric.AddField(ContainerStatus, "Waiting") - if containerStatus.State.Waiting.Reason != "" { - metric.AddField(ContainerStatusReason, containerStatus.State.Waiting.Reason) - } - } else if containerStatus.State.Terminated != nil { - metric.AddField(ContainerStatus, "Terminated") - if containerStatus.State.Terminated.Reason != "" { - metric.AddField(ContainerStatusReason, containerStatus.State.Terminated.Reason) - } - } - if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.Reason != "" { - metric.AddField(ContainerLastTerminationReason, containerStatus.LastTerminationState.Terminated.Reason) - } - containerKey := createContainerKeyFromMetric(tags) - if containerKey != "" { - content, ok := p.getPrevMeasurement(TypeContainer, containerKey) - if ok { - prevMeasurement := content.(prevContainerMeasurement) - result := 0 - if int(containerStatus.RestartCount) > prevMeasurement.restarts { - result = int(containerStatus.RestartCount) - prevMeasurement.restarts - } - metric.AddField(ContainerRestartCount, result) - } - p.setPrevMeasurement(TypeContainer, containerKey, prevContainerMeasurement{restarts: int(containerStatus.RestartCount)}) - } - } - } - } - } -} - -// It could be used to get limit/request(depend on the passed-in fn) per pod -// return the sum of ResourceSetting and a bool which indicate whether all container set Resource -func getResourceSettingForPod(pod *corev1.Pod, bound int64, resource corev1.ResourceName, fn func(resource corev1.ResourceName, spec corev1.Container) (int64, bool)) (int64, bool) { - var result int64 - allSet := true - for _, containerSpec := range pod.Spec.Containers { - val, ok := fn(resource, containerSpec) - if ok { - result += val - } else { - allSet = false - } - } - if bound != 0 && result > bound { - result = bound - } - return result, allSet -} - -func getLimitForContainer(resource corev1.ResourceName, spec corev1.Container) (int64, bool) { - if v, ok := spec.Resources.Limits[resource]; ok { - var limit int64 - if resource == cpuKey { - limit = v.MilliValue() - } else { - limit = v.Value() - } - return limit, true - } - return 0, false -} - -func getRequestForContainer(resource corev1.ResourceName, spec corev1.Container) (int64, bool) { - if v, ok := spec.Resources.Requests[resource]; ok { - var req int64 - if resource == cpuKey { - req = v.MilliValue() - } else { - req = v.Value() - } - return req, true - } - return 0, false -} - -func addContainerId(pod *corev1.Pod, tags map[string]string, metric telegraf.Metric, kubernetesBlob map[string]interface{}) { - if _, ok := tags[ContainerNamekey]; ok { - rawId := "" - for _, container := range pod.Status.ContainerStatuses { - if tags[ContainerNamekey] == container.Name { - rawId = container.ContainerID - if rawId != "" { - ids := strings.Split(rawId, "://") - if len(ids) == 2 { - kubernetesBlob[ids[0]] = map[string]string{"container_id": ids[1]} - } else { - log.Printf("W! Cannot parse container id from %s for container %s", rawId, container.Name) - kubernetesBlob["container_id"] = rawId - } - } - break - } - } - if rawId == "" { - kubernetesBlob["container_id"] = tags[ContainerIdkey] - } - metric.RemoveTag(ContainerIdkey) - } -} - -func addLabels(pod *corev1.Pod, kubernetesBlob map[string]interface{}) { - labels := make(map[string]string) - for k, v := range pod.Labels { - labels[k] = v - } - if len(labels) > 0 { - kubernetesBlob["labels"] = labels - } -} - -func getJobNamePrefix(podName string) string { - return re.Split(podName, 2)[0] -} - -func (p *PodStore) addPodOwnersAndPodName(metric telegraf.Metric, pod *corev1.Pod, kubernetesBlob map[string]interface{}) { - var owners []Owner - podName := "" - for _, owner := range pod.OwnerReferences { - if owner.Kind != "" && owner.Name != "" { - kind := owner.Kind - name := owner.Name - if owner.Kind == ReplicaSet { - rsToDeployment := k8sclient.Get().ReplicaSet.ReplicaSetToDeployment() - if parent := rsToDeployment[owner.Name]; parent != "" { - kind = Deployment - name = parent - } else if parent := parseDeploymentFromReplicaSet(owner.Name); parent != "" { - profiler.Profiler.AddStats([]string{"k8sdecorator", "podstore", "rsToDeploymentMiss"}, 1) - kind = Deployment - name = parent - } - } else if owner.Kind == Job { - if parent := parseCronJobFromJob(owner.Name); parent != "" { - kind = CronJob - name = parent - } else if !p.prefFullPodName { - name = getJobNamePrefix(name) - } - } - owners = append(owners, Owner{OwnerKind: kind, OwnerName: name}) - - if podName == "" { - if owner.Kind == StatefulSet { - podName = pod.Name - } else if owner.Kind == DaemonSet || owner.Kind == Job || owner.Kind == ReplicaSet || owner.Kind == ReplicationController { - podName = name - } - } - } - } - if len(owners) > 0 { - kubernetesBlob["pod_owners"] = owners - } - - // if podName is not set according to a well-known controllers, then set it to its own name - if podName == "" { - if strings.HasPrefix(pod.Name, kubeProxy) && !p.prefFullPodName { - podName = kubeProxy - } else { - podName = pod.Name - } - } - - metric.AddTag(PodNameKey, podName) -} - -func addContainerCount(metric telegraf.Metric, tags map[string]string, pod *corev1.Pod) { - runningContainerCount := 0 - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.State.Running != nil { - runningContainerCount += 1 - } - } - if tags[MetricType] == TypePod { - metric.AddField(MetricName(TypePod, RunningContainerCount), runningContainerCount) - metric.AddField(MetricName(TypePod, ContainerCount), len(pod.Status.ContainerStatuses)) - } -} diff --git a/plugins/processors/k8sdecorator/stores/podstore_test.go b/plugins/processors/k8sdecorator/stores/podstore_test.go deleted file mode 100644 index 20c33c3db5..0000000000 --- a/plugins/processors/k8sdecorator/stores/podstore_test.go +++ /dev/null @@ -1,635 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "encoding/json" - "fmt" - "testing" - "time" - - "github.com/influxdata/telegraf/metric" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - corev1 "k8s.io/api/core/v1" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/k8sCommon/k8sclient" - "github.com/aws/amazon-cloudwatch-agent/internal/mapWithExpiry" -) - -func getBaseTestPodInfo() *corev1.Pod { - podJson := ` -{ - "kind": "PodList", - "apiVersion": "v1", - "metadata": { - - }, - "items": [ - { - "metadata": { - "name": "cpu-limit", - "namespace": "default", - "ownerReferences": [ - { - "apiVersion": "apps/v1", - "blockOwnerDeletion": true, - "controller": true, - "kind": "DaemonSet", - "name": "DaemonSetTest", - "uid": "36779a62-4aca-11e9-977b-0672b6c6fc94" - } - ], - "selfLink": "/api/v1/namespaces/default/pods/cpu-limit", - "uid": "764d01e1-2a2f-11e9-95ea-0a695d7ce286", - "resourceVersion": "5671573", - "creationTimestamp": "2019-02-06T16:51:34Z", - "labels": { - "app": "hello_test" - }, - "annotations": { - "kubernetes.io/config.seen": "2019-02-19T00:06:56.109155665Z", - "kubernetes.io/config.source": "api" - } - }, - "spec": { - "volumes": [ - { - "name": "default-token-tlgw7", - "secret": { - "secretName": "default-token-tlgw7", - "defaultMode": 420 - } - } - ], - "containers": [ - { - "name": "ubuntu", - "image": "ubuntu", - "command": [ - "/bin/bash" - ], - "args": [ - "-c", - "sleep 300000000" - ], - "resources": { - "limits": { - "cpu": "10m", - "memory": "50Mi" - }, - "requests": { - "cpu": "10m", - "memory": "50Mi" - } - }, - "volumeMounts": [ - { - "name": "default-token-tlgw7", - "readOnly": true, - "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" - } - ], - "terminationMessagePath": "/dev/termination-log", - "terminationMessagePolicy": "File", - "imagePullPolicy": "Always" - } - ], - "restartPolicy": "Always", - "terminationGracePeriodSeconds": 30, - "dnsPolicy": "ClusterFirst", - "serviceAccountName": "default", - "serviceAccount": "default", - "nodeName": "ip-192-168-67-127.us-west-2.compute.internal", - "securityContext": { - - }, - "schedulerName": "default-scheduler", - "tolerations": [ - { - "key": "node.kubernetes.io/not-ready", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - }, - { - "key": "node.kubernetes.io/unreachable", - "operator": "Exists", - "effect": "NoExecute", - "tolerationSeconds": 300 - } - ], - "priority": 0 - }, - "status": { - "phase": "Running", - "conditions": [ - { - "type": "Initialized", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-02-06T16:51:34Z" - }, - { - "type": "Ready", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-02-06T16:51:43Z" - }, - { - "type": "ContainersReady", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": null - }, - { - "type": "PodScheduled", - "status": "True", - "lastProbeTime": null, - "lastTransitionTime": "2019-02-06T16:51:34Z" - } - ], - "hostIP": "192.168.67.127", - "podIP": "192.168.76.93", - "startTime": "2019-02-06T16:51:34Z", - "containerStatuses": [ - { - "name": "ubuntu", - "state": { - "running": { - "startedAt": "2019-02-06T16:51:42Z" - } - }, - "lastState": { - - }, - "ready": true, - "restartCount": 0, - "image": "ubuntu:latest", - "imageID": "docker-pullable://ubuntu@sha256:7a47ccc3bbe8a451b500d2b53104868b46d60ee8f5b35a24b41a86077c650210", - "containerID": "docker://637631e2634ea92c0c1aa5d24734cfe794f09c57933026592c12acafbaf6972c" - } - ], - "qosClass": "Guaranteed" - } - } - ] -}` - pods := corev1.PodList{} - err := json.Unmarshal([]byte(podJson), &pods) - if err != nil { - panic(fmt.Sprintf("unmarshal pod err %v", err)) - } - - return &pods.Items[0] -} - -func TestPodStore_decorateCpu(t *testing.T) { - podStore := &PodStore{nodeInfo: &nodeInfo{NodeCapacity: &NodeCapacity{MemCapacity: 400 * 1024 * 1024, CPUCapacity: 4}}} - pod := getBaseTestPodInfo() - - tags := map[string]string{MetricType: TypePod} - fields := map[string]interface{}{MetricName(TypePod, CpuTotal): float64(1)} - - m := metric.New("test", tags, fields, time.Now()) - - podStore.decorateCpu(m, tags, pod) - - resultFields := m.Fields() - assert.Equal(t, int64(10), resultFields["pod_cpu_request"]) - assert.Equal(t, int64(10), resultFields["pod_cpu_limit"]) - assert.Equal(t, float64(0.25), resultFields["pod_cpu_reserved_capacity"]) - assert.Equal(t, float64(10), resultFields["pod_cpu_utilization_over_pod_limit"]) - assert.Equal(t, float64(1), resultFields["pod_cpu_usage_total"]) - assert.Equal(t, float64(0.025), resultFields["pod_cpu_utilization"]) -} - -func TestPodStore_decorateMem(t *testing.T) { - podStore := &PodStore{nodeInfo: &nodeInfo{NodeCapacity: &NodeCapacity{MemCapacity: 400 * 1024 * 1024, CPUCapacity: 4}}} - pod := getBaseTestPodInfo() - - tags := map[string]string{MetricType: TypePod} - fields := map[string]interface{}{MetricName(TypePod, MemWorkingset): int64(10 * 1024 * 1024)} - - m := metric.New("test", tags, fields, time.Now()) - - podStore.decorateMem(m, tags, pod) - - resultFields := m.Fields() - assert.Equal(t, int64(52428800), resultFields["pod_memory_request"]) - assert.Equal(t, int64(52428800), resultFields["pod_memory_limit"]) - assert.Equal(t, float64(12.5), resultFields["pod_memory_reserved_capacity"]) - assert.Equal(t, float64(20), resultFields["pod_memory_utilization_over_pod_limit"]) - assert.Equal(t, int64(10*1024*1024), resultFields["pod_memory_working_set"]) - assert.Equal(t, float64(2.5), resultFields["pod_memory_utilization"]) -} - -func TestPodStore_Decorate(t *testing.T) { - podStore := &PodStore{nodeInfo: &nodeInfo{NodeCapacity: &NodeCapacity{MemCapacity: 400 * 1024 * 1024, CPUCapacity: 4}}, cache: mapWithExpiry.NewMapWithExpiry(PodsExpiry)} - pod := getBaseTestPodInfo() - pod.ObjectMeta.Annotations[ignoreAnnotation] = "true" - podStore.setCachedEntry("namespace:test,podName:test", &cachedEntry{ - pod: *pod, - creation: time.Now(), - }) - - tags := map[string]string{MetricType: TypePod, K8sPodNameKey: "test", K8sNamespace: "test"} - fields := map[string]interface{}{MetricName(TypePod, MemWorkingset): int64(10 * 1024 * 1024)} - - m := metric.New("test", tags, fields, time.Now()) - kubernetesBlob := map[string]interface{}{} - - assert.False(t, podStore.Decorate(m, kubernetesBlob)) -} - -func TestPodStore_addContainerCount(t *testing.T) { - pod := getBaseTestPodInfo() - tags := map[string]string{MetricType: TypePod} - m := metric.New("test", tags, map[string]interface{}{}, time.Now()) - addContainerCount(m, tags, pod) - assert.Equal(t, int64(1), m.Fields()[MetricName(TypePod, RunningContainerCount)]) - assert.Equal(t, int64(1), m.Fields()[MetricName(TypePod, ContainerCount)]) - - pod.Status.ContainerStatuses[0].State.Running = nil - addContainerCount(m, tags, pod) - assert.Equal(t, int64(0), m.Fields()[MetricName(TypePod, RunningContainerCount)]) - assert.Equal(t, int64(1), m.Fields()[MetricName(TypePod, ContainerCount)]) -} - -func TestPodStore_addStatus(t *testing.T) { - pod := getBaseTestPodInfo() - tags := map[string]string{MetricType: TypePod, K8sNamespace: "default", K8sPodNameKey: "cpu-limit"} - m := metric.New("test", tags, map[string]interface{}{}, time.Now()) - podStore := &PodStore{prevMeasurements: make(map[string]*mapWithExpiry.MapWithExpiry)} - - podStore.addStatus(m, tags, pod) - assert.Equal(t, "Running", m.Fields()[PodStatus].(string)) - _, ok := m.Fields()[MetricName(TypePod, ContainerRestartCount)] - assert.False(t, ok) - - tags = map[string]string{MetricType: TypeContainer, K8sNamespace: "default", K8sPodNameKey: "cpu-limit", ContainerNamekey: "ubuntu"} - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - podStore.addStatus(m, tags, pod) - assert.Equal(t, "Running", m.Fields()[ContainerStatus].(string)) - _, ok = m.Fields()[ContainerRestartCount] - assert.False(t, ok) - - pod.Status.ContainerStatuses[0].State.Running = nil - pod.Status.ContainerStatuses[0].State.Terminated = &corev1.ContainerStateTerminated{} - pod.Status.ContainerStatuses[0].LastTerminationState.Terminated = &corev1.ContainerStateTerminated{Reason: "OOMKilled"} - pod.Status.ContainerStatuses[0].RestartCount = 1 - pod.Status.Phase = "Succeeded" - - tags = map[string]string{MetricType: TypePod, K8sNamespace: "default", K8sPodNameKey: "cpu-limit"} - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - podStore.addStatus(m, tags, pod) - assert.Equal(t, "Succeeded", m.Fields()[PodStatus].(string)) - assert.Equal(t, int64(1), m.Fields()[MetricName(TypePod, ContainerRestartCount)].(int64)) - - tags = map[string]string{MetricType: TypeContainer, K8sNamespace: "default", K8sPodNameKey: "cpu-limit", ContainerNamekey: "ubuntu"} - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - podStore.addStatus(m, tags, pod) - assert.Equal(t, "Terminated", m.Fields()[ContainerStatus].(string)) - assert.Equal(t, "OOMKilled", m.Fields()[ContainerLastTerminationReason].(string)) - assert.Equal(t, int64(1), m.Fields()[ContainerRestartCount].(int64)) - - // test delta of restartCount - pod.Status.ContainerStatuses[0].RestartCount = 3 - tags = map[string]string{MetricType: TypePod, K8sNamespace: "default", K8sPodNameKey: "cpu-limit"} - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - podStore.addStatus(m, tags, pod) - assert.Equal(t, int64(2), m.Fields()[MetricName(TypePod, ContainerRestartCount)].(int64)) - - tags = map[string]string{MetricType: TypeContainer, K8sNamespace: "default", K8sPodNameKey: "cpu-limit", ContainerNamekey: "ubuntu"} - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - podStore.addStatus(m, tags, pod) - assert.Equal(t, int64(2), m.Fields()[ContainerRestartCount].(int64)) -} - -func TestPodStore_addContainerId(t *testing.T) { - pod := getBaseTestPodInfo() - tags := map[string]string{ContainerNamekey: "ubuntu", ContainerIdkey: "123"} - m := metric.New("test", tags, map[string]interface{}{}, time.Now()) - kubernetesBlob := map[string]interface{}{} - addContainerId(pod, tags, m, kubernetesBlob) - - expected := map[string]interface{}{} - expected["docker"] = map[string]string{"container_id": "637631e2634ea92c0c1aa5d24734cfe794f09c57933026592c12acafbaf6972c"} - assert.Equal(t, expected, kubernetesBlob) - assert.Equal(t, map[string]string{ContainerNamekey: "ubuntu"}, m.Tags()) - - tags = map[string]string{ContainerNamekey: "notUbuntu", ContainerIdkey: "123"} - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - kubernetesBlob = map[string]interface{}{} - addContainerId(pod, tags, m, kubernetesBlob) - - expected = map[string]interface{}{} - expected["container_id"] = "123" - assert.Equal(t, expected, kubernetesBlob) - assert.Equal(t, map[string]string{ContainerNamekey: "notUbuntu"}, m.Tags()) -} - -func TestPodStore_addLabel(t *testing.T) { - pod := getBaseTestPodInfo() - kubernetesBlob := map[string]interface{}{} - addLabels(pod, kubernetesBlob) - expected := map[string]interface{}{} - expected["labels"] = map[string]string{"app": "hello_test"} - assert.Equal(t, expected, kubernetesBlob) -} - -// Mock client start -var mockClient = new(MockClient) - -var mockK8sClient = &k8sclient.K8sClient{ - ReplicaSet: mockClient, -} - -func mockGet() *k8sclient.K8sClient { - return mockK8sClient -} - -type MockClient struct { - k8sclient.ReplicaSetClient - - mock.Mock -} - -// k8sclient.ReplicaSetClient -func (client *MockClient) ReplicaSetToDeployment() map[string]string { - args := client.Called() - return args.Get(0).(map[string]string) -} - -func (client *MockClient) Init() { -} - -func (client *MockClient) Shutdown() { -} - -// -// Mock client end -// - -// Mock client 2 start -var mockClient2 = new(MockClient2) - -var mockK8sClient2 = &k8sclient.K8sClient{ - ReplicaSet: mockClient2, -} - -func mockGet2() *k8sclient.K8sClient { - return mockK8sClient2 -} - -type MockClient2 struct { - k8sclient.ReplicaSetClient - - mock.Mock -} - -// k8sclient.ReplicaSetClient -func (client *MockClient2) ReplicaSetToDeployment() map[string]string { - args := client.Called() - return args.Get(0).(map[string]string) -} - -func (client *MockClient2) Init() { -} - -func (client *MockClient2) Shutdown() { -} - -// -// Mock client 2 end -// - -func TestGetJobNamePrefix(t *testing.T) { - assert.Equal(t, "abcd", getJobNamePrefix("abcd-efg")) - assert.Equal(t, "abcd", getJobNamePrefix("abcd.efg")) - assert.Equal(t, "abcd", getJobNamePrefix("abcd-e.fg")) - assert.Equal(t, "abc", getJobNamePrefix("abc.d-efg")) - assert.Equal(t, "abcd", getJobNamePrefix("abcd-.efg")) - assert.Equal(t, "abcd", getJobNamePrefix("abcd.-efg")) - assert.Equal(t, "abcdefg", getJobNamePrefix("abcdefg")) - assert.Equal(t, "abcdefg", getJobNamePrefix("abcdefg-")) - assert.Equal(t, "", getJobNamePrefix(".abcd-efg")) - assert.Equal(t, "", getJobNamePrefix("")) -} - -func TestPodStore_addPodOwnersAndPodNameFallback(t *testing.T) { - k8sclient.Get = mockGet2 - mockClient2.On("ReplicaSetToDeployment").Return(map[string]string{}) - - podStore := &PodStore{} - pod := getBaseTestPodInfo() - tags := map[string]string{MetricType: TypePod, ContainerNamekey: "ubuntu"} - - // Test ReplicaSet - m := metric.New("test", tags, map[string]interface{}{}, time.Now()) - rsName := "ReplicaSetTest" - suffix := "-42kcz" - pod.OwnerReferences[0].Kind = ReplicaSet - pod.OwnerReferences[0].Name = rsName + suffix - kubernetesBlob := map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner := map[string]interface{}{} - expectedOwner["pod_owners"] = []Owner{{OwnerKind: Deployment, OwnerName: rsName}} - expectedOwnerName := rsName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test Job - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - jobName := "Job" - suffix = "-0123456789" - pod.OwnerReferences[0].Kind = Job - pod.OwnerReferences[0].Name = jobName + suffix - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: CronJob, OwnerName: jobName}} - expectedOwnerName = jobName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) -} - -func TestPodStore_addPodOwnersAndPodName(t *testing.T) { - k8sclient.Get = mockGet - mockClient.On("ReplicaSetToDeployment").Return(map[string]string{"DeploymentTest-sftrz2785": "DeploymentTest"}) - - podStore := &PodStore{} - - pod := getBaseTestPodInfo() - tags := map[string]string{MetricType: TypePod, ContainerNamekey: "ubuntu"} - m := metric.New("test", tags, map[string]interface{}{}, time.Now()) - - // Test DaemonSet - kubernetesBlob := map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - - expectedOwner := map[string]interface{}{} - expectedOwner["pod_owners"] = []Owner{{OwnerKind: DaemonSet, OwnerName: "DaemonSetTest"}} - expectedOwnerName := "DaemonSetTest" - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test ReplicaSet - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - rsName := "ReplicaSetTest" - pod.OwnerReferences[0].Kind = ReplicaSet - pod.OwnerReferences[0].Name = rsName - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: ReplicaSet, OwnerName: rsName}} - expectedOwnerName = rsName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test StatefulSet - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - ssName := "StatefulSetTest" - pod.OwnerReferences[0].Kind = StatefulSet - pod.OwnerReferences[0].Name = ssName - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: StatefulSet, OwnerName: ssName}} - expectedOwnerName = "cpu-limit" - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test ReplicationController - rcName := "ReplicationControllerTest" - pod.OwnerReferences[0].Kind = ReplicationController - pod.OwnerReferences[0].Name = rcName - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: ReplicationController, OwnerName: rcName}} - expectedOwnerName = rcName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test Job - podStore.prefFullPodName = true - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - jobName := "JobTest" - pod.OwnerReferences[0].Kind = Job - surfixHash := ".088123x12" - pod.OwnerReferences[0].Name = jobName + surfixHash - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: Job, OwnerName: jobName + surfixHash}} - expectedOwnerName = jobName + surfixHash - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - podStore.prefFullPodName = false - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: Job, OwnerName: jobName}} - expectedOwnerName = jobName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test Deployment - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - dpName := "DeploymentTest" - pod.OwnerReferences[0].Kind = ReplicaSet - pod.OwnerReferences[0].Name = dpName + "-sftrz2785" - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: Deployment, OwnerName: dpName}} - expectedOwnerName = dpName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test CronJob - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - cjName := "CronJobTest" - pod.OwnerReferences[0].Kind = Job - pod.OwnerReferences[0].Name = cjName + "-1556582405" - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - expectedOwner["pod_owners"] = []Owner{{OwnerKind: CronJob, OwnerName: cjName}} - expectedOwnerName = cjName - assert.Equal(t, expectedOwnerName, m.Tags()[PodNameKey]) - assert.Equal(t, expectedOwner, kubernetesBlob) - - // Test kube-proxy created in kops - podStore.prefFullPodName = true - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - kpName := kubeProxy + "-xyz1" - pod.OwnerReferences = nil - pod.Name = kpName - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - assert.Equal(t, kpName, m.Tags()[PodNameKey]) - assert.True(t, len(kubernetesBlob) == 0) - - podStore.prefFullPodName = false - m = metric.New("test", tags, map[string]interface{}{}, time.Now()) - pod.OwnerReferences = nil - pod.Name = kpName - kubernetesBlob = map[string]interface{}{} - podStore.addPodOwnersAndPodName(m, pod, kubernetesBlob) - assert.Equal(t, kubeProxy, m.Tags()[PodNameKey]) - assert.True(t, len(kubernetesBlob) == 0) -} - -func TestPodStore_refreshInternal(t *testing.T) { - pod := getBaseTestPodInfo() - podList := []corev1.Pod{*pod} - - podStore := &PodStore{cache: mapWithExpiry.NewMapWithExpiry(time.Minute), nodeInfo: &nodeInfo{NodeCapacity: &NodeCapacity{MemCapacity: 400 * 1024 * 1024, CPUCapacity: 4}}} - podStore.refreshInternal(time.Now(), podList) - - assert.Equal(t, int64(10), podStore.nodeInfo.nodeStats.cpuReq) - assert.Equal(t, int64(50*1024*1024), podStore.nodeInfo.nodeStats.memReq) - assert.Equal(t, 1, podStore.nodeInfo.nodeStats.podCnt) - assert.Equal(t, 1, podStore.nodeInfo.nodeStats.containerCnt) - assert.Equal(t, 1, podStore.cache.Size()) -} - -func TestPodStore_decorateNode(t *testing.T) { - pod := getBaseTestPodInfo() - podList := []corev1.Pod{*pod} - - podStore := &PodStore{cache: mapWithExpiry.NewMapWithExpiry(time.Minute), nodeInfo: &nodeInfo{NodeCapacity: &NodeCapacity{MemCapacity: 400 * 1024 * 1024, CPUCapacity: 4}}} - podStore.refreshInternal(time.Now(), podList) - - tags := map[string]string{MetricType: TypeNode} - fields := map[string]interface{}{MetricName(TypeNode, CpuTotal): float64(100), MetricName(TypeNode, MemWorkingset): uint64(100 * 1024 * 1024)} - - m := metric.New("test", tags, fields, time.Now()) - podStore.decorateNode(m) - - resultFields := m.Fields() - assert.Equal(t, int64(10), resultFields["node_cpu_request"]) - assert.Equal(t, int64(4000), resultFields["node_cpu_limit"]) - assert.Equal(t, float64(0.25), resultFields["node_cpu_reserved_capacity"]) - assert.Equal(t, float64(100), resultFields["node_cpu_usage_total"]) - assert.Equal(t, float64(2.5), resultFields["node_cpu_utilization"]) - - assert.Equal(t, int64(50*1024*1024), resultFields["node_memory_request"]) - assert.Equal(t, int64(400*1024*1024), resultFields["node_memory_limit"]) - assert.Equal(t, float64(12.5), resultFields["node_memory_reserved_capacity"]) - assert.Equal(t, uint64(100*1024*1024), resultFields["node_memory_working_set"]) - assert.Equal(t, float64(25), resultFields["node_memory_utilization"]) - - assert.Equal(t, int64(1), resultFields["node_number_of_running_containers"]) - assert.Equal(t, int64(1), resultFields["node_number_of_running_pods"]) -} - -func TestPodStore_decorateDiskDevice(t *testing.T) { - nodeInfo := &nodeInfo{NodeCapacity: &NodeCapacity{MemCapacity: 400 * 1024 * 1024, CPUCapacity: 4}, ebsIds: mapWithExpiry.NewMapWithExpiry(2 * refreshInterval)} - podStore := &PodStore{nodeInfo: nodeInfo} - podStore.nodeInfo.ebsIds.Set("/dev/xvda", "aws://us-west-2b/vol-0d9f0816149eb2050") - - tags := map[string]string{MetricType: TypeNodeFS, DiskDev: "/dev/xvda"} - - m := metric.New("test", tags, nil, time.Now()) - podStore.decorateDiskDevice(m, tags) - - assert.Equal(t, "aws://us-west-2b/vol-0d9f0816149eb2050", m.Tags()[EbsVolumeId]) -} diff --git a/plugins/processors/k8sdecorator/stores/sampleMountFile/mounts b/plugins/processors/k8sdecorator/stores/sampleMountFile/mounts deleted file mode 100644 index a1d246852f..0000000000 --- a/plugins/processors/k8sdecorator/stores/sampleMountFile/mounts +++ /dev/null @@ -1,2 +0,0 @@ -/dev/nvme1n1 /var/lib/kubelet/plugins/kubernetes.io/aws-ebs/mounts/aws/us-west-2b/vol-0d9f0816149eb2050 ext4 rw,relatime,data=ordered 0 0 -/dev/nvme1n1 /var/lib/kubelet/pods/df570351-2e4c-11e9-95ea-0a695d7ce286/volumes/kubernetes.io~aws-ebs/pvc-df563cf6-2e4c-11e9-95ea-0a695d7ce286 ext4 rw,relatime,data=ordered 0 0 \ No newline at end of file diff --git a/plugins/processors/k8sdecorator/stores/servicestore.go b/plugins/processors/k8sdecorator/stores/servicestore.go deleted file mode 100644 index 210e2a4bae..0000000000 --- a/plugins/processors/k8sdecorator/stores/servicestore.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "log" - "sync" - "time" - - "github.com/influxdata/telegraf" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/k8sCommon/k8sclient" -) - -const ( - refreshIntervalService = 10 //10s -) - -type ServiceStore struct { - podKeyToServiceNamesMap map[string][]string - sync.Mutex - lastRefreshed time.Time -} - -func NewServiceStore() *ServiceStore { - serviceStore := &ServiceStore{ - podKeyToServiceNamesMap: make(map[string][]string), - } - return serviceStore -} - -func (s *ServiceStore) RefreshTick() { - now := time.Now() - if now.Sub(s.lastRefreshed).Seconds() >= refreshIntervalService { - s.refresh() - s.lastRefreshed = now - } -} - -// service info is not mandatory -func (s *ServiceStore) Decorate(metric telegraf.Metric, kubernetesBlob map[string]interface{}) bool { - tags := metric.Tags() - if _, ok := tags[K8sPodNameKey]; ok { - podKey := createPodKeyFromMetric(tags) - if podKey == "" { - log.Printf("E! podKey is unavailable when decorating service.") - return false - } - if serviceList, ok := s.podKeyToServiceNamesMap[podKey]; ok { - if len(serviceList) > 0 { - addServiceNameTag(metric, serviceList) - } - } - } - return true -} - -func (s *ServiceStore) refresh() { - s.podKeyToServiceNamesMap = k8sclient.Get().Ep.PodKeyToServiceNames() -} - -func addServiceNameTag(metric telegraf.Metric, serviceNames []string) { - // TODO handle serviceNames len is larger than 1. We need to duplicate the metric object - metric.AddTag(TypeService, serviceNames[0]) -} diff --git a/plugins/processors/k8sdecorator/stores/store.go b/plugins/processors/k8sdecorator/stores/store.go deleted file mode 100644 index fe0caba460..0000000000 --- a/plugins/processors/k8sdecorator/stores/store.go +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "github.com/influxdata/telegraf" -) - -type K8sStore interface { - Decorate(metric telegraf.Metric, kubernetesBlob map[string]interface{}) bool - RefreshTick() -} diff --git a/plugins/processors/k8sdecorator/stores/utils.go b/plugins/processors/k8sdecorator/stores/utils.go deleted file mode 100644 index 66be58e237..0000000000 --- a/plugins/processors/k8sdecorator/stores/utils.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "regexp" - "strconv" - "strings" - - corev1 "k8s.io/api/core/v1" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/k8sCommon/k8sutil" -) - -func createPodKeyFromMetaData(pod *corev1.Pod) string { - namespace := pod.Namespace - podName := pod.Name - return k8sutil.CreatePodKey(namespace, podName) -} - -func createPodKeyFromMetric(tags map[string]string) string { - namespace := tags[K8sNamespace] - podName := tags[K8sPodNameKey] - return k8sutil.CreatePodKey(namespace, podName) -} - -func createContainerKeyFromMetric(tags map[string]string) string { - namespace := tags[K8sNamespace] - podName := tags[K8sPodNameKey] - containerName := tags[ContainerNamekey] - return k8sutil.CreateContainerKey(namespace, podName, containerName) -} - -var ( - // deploymentAllowedRegExp holds the characters allowed in replicaset names from as parent deployment - // https://github.com/kubernetes/apimachinery/blob/master/pkg/util/rand/rand.go#L83 - deploymentAllowedRegExp = regexp.MustCompile(`^[b-hj-np-tv-xz24-9]+$`) - // cronJobAllowedRegexp ensures the characters in cron job name are only numbers. - cronJobAllowedRegexp = regexp.MustCompile(`^\d+$`) -) - -// get the deployment name by stripping the last dash following some rules -// return empty if it is not following the rule -func parseDeploymentFromReplicaSet(name string) string { - lastDash := strings.LastIndexAny(name, "-") - if lastDash == -1 { - // No dash - return "" - } - suffix := name[lastDash+1:] - if len(suffix) >= 3 && deploymentAllowedRegExp.MatchString(suffix) { - // Invalid suffix if it is less than 3 - return name[:lastDash] - } - - return "" -} - -// Get the cronJob name by stripping the last dash following by the naming convention: JobName-UnixTime -// based on https://github.com/kubernetes/kubernetes/blob/c4d752765b3bbac2237bf87cf0b1c2e307844666/pkg/controller/cronjob/cronjob_controllerv2.go#L594-L596. -// Before v1.21 CronJob in Kubernetes has used Unix Time in second; after v1.21 is a Unix Time in Minutes. - -func parseCronJobFromJob(name string) string { - lastDash := strings.LastIndexAny(name, "-") - - //Return empty since the naming convention is: JobName-UnixTime, if it does not have the "-", meanings the job name is empty - if lastDash == -1 { - return "" - } - - suffix := name[lastDash+1:] - suffixInt, err := strconv.ParseInt(suffix, 10, 64) - - if err != nil { - return "" - } - - //Convert Unix Time In Minutes to Unix Time - suffixStringMultiply := strconv.FormatInt(suffixInt*60, 10) - //Checking if the suffix is a unix time by checking: the length and contains character - //Checking for the length: CronJobControllerV2 is Unix Time in Minutes (7-9 characters) while CronJob is Unix Time (10 characters). - //However, multiply by 60 to convert the Unix Time In Minutes back to Unix Time in order to have the same condition as Unix Time - if len(suffix) == 10 && cronJobAllowedRegexp.MatchString(suffix) { //Condition for CronJob before k8s v1.21 - return name[:lastDash] - } - - if len(suffixStringMultiply) == 10 && cronJobAllowedRegexp.MatchString(suffixStringMultiply) { //Condition for CronJobControllerV2 after k8s v1.21 - return name[:lastDash] - } - - return "" -} diff --git a/plugins/processors/k8sdecorator/stores/utils_test.go b/plugins/processors/k8sdecorator/stores/utils_test.go deleted file mode 100644 index 3c9beb41cb..0000000000 --- a/plugins/processors/k8sdecorator/stores/utils_test.go +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package stores - -import ( - "strconv" - "testing" - "time" - - "github.com/stretchr/testify/assert" -) - -func TestUtils_parseDeploymentFromReplicaSet(t *testing.T) { - testcases := []struct { - name string - inputString string - expected string - }{ - { - name: "Get ReplicaSet Name with unallowed characters", - inputString: "cloudwatch-ag", - expected: "", - }, - { - name: "Get ReplicaSet Name with allowed characters smaller than 3 characters", - inputString: "cloudwatch-agent-bj", - expected: "", - }, - { - name: "Get ReplicaSet Name with allowed characters", - inputString: "cloudwatch-agent-42kcz", - expected: "cloudwatch-agent", - }, - { - name: "Get ReplicaSet Name with string smaller than 3 characters", - inputString: "cloudwatch-agent-sd", - expected: "", - }, - } - - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, parseDeploymentFromReplicaSet(tc.inputString), tc.expected) - }) - } -} - -func TestUtils_parseCronJobFromJob(t *testing.T) { - unixTime := time.Now().Unix() - unixTimeString := strconv.FormatInt(unixTime, 10) - unixTimeMinutesString := strconv.FormatInt(unixTime/60, 10) - - testcases := []struct { - name string - inputString string - expected string - }{ - { - name: "Get CronJobControllerV2 or CronJob's Name with alphabet characters", - inputString: "hello-name", - expected: "", - }, - { - name: "Get CronJobControllerV2 or CronJob's Name with special characters and exact 10 characters", - inputString: "hello-1678995&64", - expected: "", - }, - { - name: "Get CronJobControllerV2 or CronJob's Name with Unix Time not equal to 10 letters", - inputString: "hello-238", - expected: "", - }, - { - name: "Get CronJobControllerV2's Name after k8s v1.21 with correct Unix Time", - inputString: "hello-" + unixTimeMinutesString, - expected: "hello", - }, - { - name: "Get CronJobControllerV2's Name after k8s v1.21 with alphabet Unix Time", - inputString: "hello-" + unixTimeMinutesString + "a28bc", - expected: "", - }, - - { - name: "Get CronJobControllerV2's Name after k8s v1.21 with Unix Time not equal to 10 letters", - inputString: "hello" + unixTimeMinutesString + "523", - expected: "", - }, - { - name: "Get CronJob's Name before k8s v1.21 with correct Unix Time", - inputString: "hello-" + unixTimeString, - expected: "hello", - }, - { - name: "Get CronJob's Name before k8s v1.21 with special characters", - inputString: "hello-" + unixTimeString + "@", - expected: "", - }, - } - - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, parseCronJobFromJob(tc.inputString), tc.expected) - }) - } -} diff --git a/plugins/processors/k8sdecorator/structuredlogsadapter/metricruletagger.go b/plugins/processors/k8sdecorator/structuredlogsadapter/metricruletagger.go deleted file mode 100644 index d44a428a5e..0000000000 --- a/plugins/processors/k8sdecorator/structuredlogsadapter/metricruletagger.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package structuredlogsadapter - -import ( - "github.com/influxdata/telegraf" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -const ( - cloudwatchNamespace = "ContainerInsights" - Bytes = "Bytes" - BytesPerSec = "Bytes/Second" - Count = "Count" - Percent = "Percent" -) - -var nodeMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypeNode, CpuUtilization)}, - {Unit: Percent, Name: MetricName(TypeNode, MemUtilization)}, - {Unit: BytesPerSec, Name: MetricName(TypeNode, NetTotalBytes)}, - {Unit: Percent, Name: MetricName(TypeNode, CpuReservedCapacity)}, - {Unit: Percent, Name: MetricName(TypeNode, MemReservedCapacity)}, - {Unit: Count, Name: MetricName(TypeNode, RunningPodCount)}, - {Unit: Count, Name: MetricName(TypeNode, RunningContainerCount)}}, - DimensionSets: [][]string{{NodeNameKey, InstanceIdKey, ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypeNode, CpuUtilization)}, - {Unit: Percent, Name: MetricName(TypeNode, MemUtilization)}, - {Unit: BytesPerSec, Name: MetricName(TypeNode, NetTotalBytes)}, - {Unit: Percent, Name: MetricName(TypeNode, CpuReservedCapacity)}, - {Unit: Percent, Name: MetricName(TypeNode, MemReservedCapacity)}, - {Unit: Count, Name: MetricName(TypeNode, RunningPodCount)}, - {Unit: Count, Name: MetricName(TypeNode, RunningContainerCount)}, - {Name: MetricName(TypeNode, CpuTotal)}, - {Name: MetricName(TypeNode, CpuLimit)}, - {Unit: Bytes, Name: MetricName(TypeNode, MemWorkingset)}, - {Unit: Bytes, Name: MetricName(TypeNode, MemLimit)}}, - DimensionSets: [][]string{{ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var podMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypePod, CpuUtilization)}, - {Unit: Percent, Name: MetricName(TypePod, MemUtilization)}, - {Unit: BytesPerSec, Name: MetricName(TypePod, NetRxBytes)}, - {Unit: BytesPerSec, Name: MetricName(TypePod, NetTxBytes)}, - {Unit: Percent, Name: MetricName(TypePod, CpuUtilizationOverPodLimit)}, - {Unit: Percent, Name: MetricName(TypePod, MemUtilizationOverPodLimit)}}, - DimensionSets: [][]string{{PodNameKey, K8sNamespace, ClusterNameKey}, {TypeService, K8sNamespace, ClusterNameKey}, {K8sNamespace, ClusterNameKey}, {ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypePod, CpuReservedCapacity)}, - {Unit: Percent, Name: MetricName(TypePod, MemReservedCapacity)}}, - DimensionSets: [][]string{{PodNameKey, K8sNamespace, ClusterNameKey}, {ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Count, Name: MetricName(TypePod, ContainerRestartCount)}}, - DimensionSets: [][]string{{PodNameKey, K8sNamespace, ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var nodeFSMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Percent, Name: MetricName(TypeNodeFS, FSUtilization)}}, - DimensionSets: [][]string{{NodeNameKey, InstanceIdKey, ClusterNameKey}, {ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var clusterMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Count, Name: MetricName(TypeCluster, NodeCount)}, - {Unit: Count, Name: MetricName(TypeCluster, FailedNodeCount)}}, - DimensionSets: [][]string{{ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var serviceMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Count, Name: MetricName(TypeService, RunningPodCount)}}, - DimensionSets: [][]string{{TypeService, K8sNamespace, ClusterNameKey}, {ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var namespaceMetricRules = []structuredlogscommon.MetricRule{ - { - Metrics: []structuredlogscommon.MetricAttr{ - {Unit: Count, Name: MetricName(K8sNamespace, RunningPodCount)}}, - DimensionSets: [][]string{{K8sNamespace, ClusterNameKey}, {ClusterNameKey}}, - Namespace: cloudwatchNamespace, - }, -} - -var staticMetricRule = map[string][]structuredlogscommon.MetricRule{ - TypeCluster: clusterMetricRules, - TypeClusterService: serviceMetricRules, - TypeClusterNamespace: namespaceMetricRules, - TypeNode: nodeMetricRules, - TypePod: podMetricRules, - TypeNodeFS: nodeFSMetricRules, -} - -func TagMetricRule(metric telegraf.Metric) { - rules, ok := staticMetricRule[metric.Tags()[MetricType]] - if !ok { - return - } - structuredlogscommon.AttachMetricRule(metric, rules) -} diff --git a/plugins/processors/k8sdecorator/structuredlogsadapter/metricruletagger_test.go b/plugins/processors/k8sdecorator/structuredlogsadapter/metricruletagger_test.go deleted file mode 100644 index 778eaf4325..0000000000 --- a/plugins/processors/k8sdecorator/structuredlogsadapter/metricruletagger_test.go +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package structuredlogsadapter - -import ( - "encoding/json" - "fmt" - "testing" - "time" - - "github.com/influxdata/telegraf/metric" - "github.com/stretchr/testify/assert" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -func TestNodeFull(t *testing.T) { - tags := map[string]string{MetricType: TypeNode, NodeNameKey: "TestNodeName", ClusterNameKey: "TestClusterName", InstanceIdKey: "i-123"} - fields := map[string]interface{}{MetricName(TypeNode, CpuUtilization): 0, MetricName(TypeNode, MemUtilization): 0, - MetricName(TypeNode, NetTotalBytes): 0, MetricName(TypeNode, CpuReservedCapacity): 0, MetricName(TypeNode, MemReservedCapacity): 0, - MetricName(TypeNode, RunningPodCount): 0, MetricName(TypeNode, RunningContainerCount): 0, MetricName(TypeNode, CpuTotal): 0, - MetricName(TypeNode, CpuLimit): 0, MetricName(TypeNode, MemWorkingset): 0, MetricName(TypeNode, MemLimit): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - expected := []structuredlogscommon.MetricRule{} - deepCopy(&expected, nodeMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestNodeLackOfCpuUtilization(t *testing.T) { - tags := map[string]string{MetricType: TypeNode, NodeNameKey: "TestNodeName", ClusterNameKey: "TestClusterName", InstanceIdKey: "i-123"} - fields := map[string]interface{}{MetricName(TypeNode, MemUtilization): 0, - MetricName(TypeNode, NetTotalBytes): 0, MetricName(TypeNode, CpuReservedCapacity): 0, MetricName(TypeNode, MemReservedCapacity): 0, - MetricName(TypeNode, RunningPodCount): 0, MetricName(TypeNode, RunningContainerCount): 0, MetricName(TypeNode, CpuTotal): 0, - MetricName(TypeNode, CpuLimit): 0, MetricName(TypeNode, MemWorkingset): 0, MetricName(TypeNode, MemLimit): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(nodeMetricRules)) - deepCopy(&expected, nodeMetricRules) - deleteMetricFromMetricRules(MetricName(TypeNode, CpuUtilization), expected) - - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestNodeLackOfNodeNameKey(t *testing.T) { - tags := map[string]string{MetricType: TypeNode, ClusterNameKey: "TestClusterName", InstanceIdKey: "i-123"} - fields := map[string]interface{}{MetricName(TypeNode, CpuUtilization): 0, MetricName(TypeNode, MemUtilization): 0, - MetricName(TypeNode, NetTotalBytes): 0, MetricName(TypeNode, CpuReservedCapacity): 0, MetricName(TypeNode, MemReservedCapacity): 0, - MetricName(TypeNode, RunningPodCount): 0, MetricName(TypeNode, RunningContainerCount): 0, MetricName(TypeNode, CpuTotal): 0, - MetricName(TypeNode, CpuLimit): 0, MetricName(TypeNode, MemWorkingset): 0, MetricName(TypeNode, MemLimit): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(nodeMetricRules)) - deepCopy(&expected, nodeMetricRules) - expected = append(expected[:0], expected[1:]...) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestPodFull(t *testing.T) { - tags := map[string]string{MetricType: TypePod, PodNameKey: "TestPodName", ClusterNameKey: "TestClusterName", TypeService: "TestServiceName", K8sNamespace: "TestNamespace"} - fields := map[string]interface{}{MetricName(TypePod, CpuUtilization): 0, MetricName(TypePod, MemUtilization): 0, - MetricName(TypePod, NetRxBytes): 0, MetricName(TypePod, NetTxBytes): 0, MetricName(TypePod, CpuUtilizationOverPodLimit): 0, - MetricName(TypePod, MemUtilizationOverPodLimit): 0, MetricName(TypePod, CpuReservedCapacity): 0, MetricName(TypePod, MemReservedCapacity): 0, MetricName(TypePod, ContainerRestartCount): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - expected := []structuredlogscommon.MetricRule{} - deepCopy(&expected, podMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestPodFullLackOfService(t *testing.T) { - tags := map[string]string{MetricType: TypePod, PodNameKey: "TestPodName", ClusterNameKey: "TestClusterName", K8sNamespace: "TestNamespace"} - fields := map[string]interface{}{MetricName(TypePod, CpuUtilization): 0, MetricName(TypePod, MemUtilization): 0, - MetricName(TypePod, NetRxBytes): 0, MetricName(TypePod, NetTxBytes): 0, MetricName(TypePod, CpuUtilizationOverPodLimit): 0, - MetricName(TypePod, MemUtilizationOverPodLimit): 0, MetricName(TypePod, CpuReservedCapacity): 0, MetricName(TypePod, MemReservedCapacity): 0, MetricName(TypePod, ContainerRestartCount): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - expected := []structuredlogscommon.MetricRule{} - deepCopy(&expected, podMetricRules) - deleteDimensionFromMetricRules(TypeService, expected) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestNodeFSFull(t *testing.T) { - tags := map[string]string{MetricType: TypeNodeFS, NodeNameKey: "TestNodeName", ClusterNameKey: "TestClusterName", InstanceIdKey: "i-123"} - fields := map[string]interface{}{MetricName(TypeNodeFS, FSUtilization): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(nodeFSMetricRules)) - deepCopy(&expected, nodeFSMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestClusterFull(t *testing.T) { - tags := map[string]string{MetricType: TypeCluster, ClusterNameKey: "TestClusterName"} - fields := map[string]interface{}{MetricName(TypeCluster, NodeCount): 0, MetricName(TypeCluster, FailedNodeCount): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(clusterMetricRules)) - deepCopy(&expected, clusterMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestClusterServiceFull(t *testing.T) { - tags := map[string]string{MetricType: TypeClusterService, ClusterNameKey: "TestClusterName", TypeService: "TestServiceName", K8sNamespace: "default"} - fields := map[string]interface{}{MetricName(TypeService, RunningPodCount): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(serviceMetricRules)) - deepCopy(&expected, serviceMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func TestClusterNamespaceFull(t *testing.T) { - tags := map[string]string{MetricType: TypeClusterNamespace, ClusterNameKey: "TestClusterName", K8sNamespace: "TestNamespace"} - fields := map[string]interface{}{MetricName(K8sNamespace, RunningPodCount): 0} - m := metric.New("test", tags, fields, time.Now()) - TagMetricRule(m) - actual := m.Fields()[structuredlogscommon.MetricRuleKey].([]structuredlogscommon.MetricRule) - - expected := make([]structuredlogscommon.MetricRule, len(namespaceMetricRules)) - deepCopy(&expected, namespaceMetricRules) - assert.Equal(t, expected, actual, "Expected to be equal") -} - -func deleteMetricFromMetricRules(metric string, rules []structuredlogscommon.MetricRule) { - for i := 0; i < len(rules); i++ { - rule := rules[i] - metricAttrs := rule.Metrics - idx := -1 - for i := 0; i < len(metricAttrs); i++ { - if metricAttrs[i].Name == metric { - idx = i - break - } - } - if idx != -1 { - metricAttrs = append(metricAttrs[:idx], metricAttrs[idx+1:]...) - rules[i].Metrics = metricAttrs - } - } -} - -func deleteDimensionFromMetricRules(dimension string, rules []structuredlogscommon.MetricRule) { - for i := 0; i < len(rules); i++ { - rule := rules[i] - var dimsSet [][]string - loop: - for _, dims := range rule.DimensionSets { - for _, dim := range dims { - if dim == dimension { - continue loop - } - } - dimsSet = append(dimsSet, dims) - } - rules[i].DimensionSets = dimsSet - } -} - -func deepCopy(dst interface{}, src interface{}) error { - if dst == nil { - return fmt.Errorf("dst cannot be nil") - } - if src == nil { - return fmt.Errorf("src cannot be nil") - } - bytes, err := json.Marshal(src) - if err != nil { - return fmt.Errorf("Unable to marshal src: %s", err) - } - err = json.Unmarshal(bytes, dst) - if err != nil { - return fmt.Errorf("Unable to unmarshal into dst: %s", err) - } - return nil -} diff --git a/plugins/processors/k8sdecorator/structuredlogsadapter/utils.go b/plugins/processors/k8sdecorator/structuredlogsadapter/utils.go deleted file mode 100644 index 9a317bbe73..0000000000 --- a/plugins/processors/k8sdecorator/structuredlogsadapter/utils.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package structuredlogsadapter - -import ( - "fmt" - - "github.com/influxdata/telegraf" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/logscommon" - "github.com/aws/amazon-cloudwatch-agent/internal/structuredlogscommon" -) - -func TagMetricSource(metric telegraf.Metric) { - metricType, ok := metric.Tags()[MetricType] - if !ok { - return - } - - var sources []string - switch metricType { - case TypeNode: - sources = append(sources, []string{"cadvisor", "/proc", "pod", "calculated"}...) - case TypeNodeFS: - sources = append(sources, []string{"cadvisor", "calculated"}...) - case TypeNodeNet: - sources = append(sources, []string{"cadvisor", "calculated"}...) - case TypeNodeDiskIO: - sources = append(sources, []string{"cadvisor"}...) - case TypePod: - sources = append(sources, []string{"cadvisor", "pod", "calculated"}...) - case TypePodNet: - sources = append(sources, []string{"cadvisor", "calculated"}...) - case TypeContainer: - sources = append(sources, []string{"cadvisor", "pod", "calculated"}...) - case TypeContainerFS: - sources = append(sources, []string{"cadvisor", "calculated"}...) - case TypeContainerDiskIO: - sources = append(sources, []string{"cadvisor"}...) - case TypeCluster, TypeClusterService, TypeClusterNamespace: - sources = append(sources, []string{"apiserver"}...) - } - - if len(sources) > 0 { - structuredlogscommon.AppendAttributesInFields(SourcesKey, sources, metric) - } -} - -func TagLogGroup(metric telegraf.Metric) { - logGroup := fmt.Sprintf("/aws/containerinsights/%s/performance", metric.Tags()[ClusterNameKey]) - metric.AddTag(logscommon.LogGroupNameTag, logGroup) -} - -func AddKubernetesInfo(metric telegraf.Metric, kubernetesBlob map[string]interface{}) { - tags := metric.Tags() - needMoveToKubernetes := map[string]string{ContainerNamekey: "container_name", K8sPodNameKey: "pod_name", - PodIdKey: "pod_id"} - needCopyToKubernetes := map[string]string{K8sNamespace: "namespace_name", TypeService: "service_name", NodeNameKey: "host"} - - for k, v := range needMoveToKubernetes { - if metric.HasTag(k) { - kubernetesBlob[v] = tags[k] - metric.RemoveTag(k) - } - } - for k, v := range needCopyToKubernetes { - if metric.HasTag(k) { - kubernetesBlob[v] = tags[k] - } - } - - if len(kubernetesBlob) > 0 { - structuredlogscommon.AppendAttributesInFields(Kubernetes, kubernetesBlob, metric) - } - structuredlogscommon.AddVersion(metric) -} diff --git a/plugins/processors/k8sdecorator/structuredlogsadapter/utils_test.go b/plugins/processors/k8sdecorator/structuredlogsadapter/utils_test.go deleted file mode 100644 index 090ac645af..0000000000 --- a/plugins/processors/k8sdecorator/structuredlogsadapter/utils_test.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - -package structuredlogsadapter - -import ( - "testing" - "time" - - "github.com/influxdata/telegraf/metric" - "github.com/stretchr/testify/assert" - - . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" -) - -func TestUtils_addKubernetesInfo(t *testing.T) { - tags := map[string]string{ContainerNamekey: "testContainer", K8sPodNameKey: "testPod", PodIdKey: "123", K8sNamespace: "testNamespace", TypeService: "testService", NodeNameKey: "testNode"} - m := metric.New("test", tags, map[string]interface{}{}, time.Now()) - kubernetesBlob := map[string]interface{}{} - AddKubernetesInfo(m, kubernetesBlob) - assert.Equal(t, "", m.Tags()[ContainerNamekey]) - assert.Equal(t, "", m.Tags()[K8sPodNameKey]) - assert.Equal(t, "", m.Tags()[PodIdKey]) - assert.Equal(t, "testNamespace", m.Tags()[K8sNamespace]) - assert.Equal(t, "testService", m.Tags()[TypeService]) - assert.Equal(t, "testNode", m.Tags()[NodeNameKey]) - assert.Equal(t, "0", m.Tags()["Version"]) - - expectedKubeBlob := map[string]interface{}{"container_name": "testContainer", "host": "testNode", "namespace_name": "testNamespace", "pod_id": "123", "pod_name": "testPod", "service_name": "testService"} - assert.Equal(t, expectedKubeBlob, kubernetesBlob) -} diff --git a/plugins/processors/kueueattributes/processor.go b/plugins/processors/kueueattributes/processor.go index 8769f797ff..c555e7bb26 100644 --- a/plugins/processors/kueueattributes/processor.go +++ b/plugins/processors/kueueattributes/processor.go @@ -11,7 +11,7 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/zap" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" ) const ( @@ -19,13 +19,13 @@ const ( ) var kueueLabelFilter = map[string]interface{}{ - containerinsightscommon.ClusterNameKey: nil, - containerinsightscommon.ClusterQueueNameKey: nil, - containerinsightscommon.ClusterQueueStatusKey: nil, - containerinsightscommon.ClusterQueueReasonKey: nil, - containerinsightscommon.ClusterQueueResourceKey: nil, - containerinsightscommon.Flavor: nil, - containerinsightscommon.NodeNameKey: nil, + constants.ClusterNameKey: nil, + constants.ClusterQueueNameKey: nil, + constants.ClusterQueueStatusKey: nil, + constants.ClusterQueueReasonKey: nil, + constants.ClusterQueueResourceKey: nil, + constants.Flavor: nil, + constants.NodeNameKey: nil, } type kueueAttributesProcessor struct { @@ -73,7 +73,7 @@ func (d *kueueAttributesProcessor) processMetricAttributes(m pmetric.Metric) { case pmetric.MetricTypeSum: dps = m.Sum().DataPoints() default: - d.logger.Debug("Ignore unknown metric type", zap.String(containerinsightscommon.MetricType, m.Type().String())) + d.logger.Debug("Ignore unknown metric type", zap.String(constants.MetricType, m.Type().String())) } for i := 0; i < dps.Len(); i++ { diff --git a/translator/tocwconfig/totomlconfig/tomlConfigTemplate/tomlConfig.go b/translator/tocwconfig/totomlconfig/tomlConfigTemplate/tomlConfig.go index 49f071999b..5b1363edcf 100644 --- a/translator/tocwconfig/totomlconfig/tomlConfigTemplate/tomlConfig.go +++ b/translator/tocwconfig/totomlconfig/tomlConfigTemplate/tomlConfig.go @@ -36,7 +36,7 @@ type ( Disk []diskConfig DiskIo []diskioConfig Ethtool []ethtoolConfig - K8sapiserver []k8sApiServerConfig + K8sapiserver []k8sAPIServerConfig Logfile []logFileConfig Mem []memConfig Net []netConfig @@ -58,9 +58,7 @@ type ( processorsConfig struct { Delta []processorDelta - EcsDecorator []ecsDecoratorConfig EmfProcessor []emfProcessorConfig - K8sDecorator []k8sDecoratorConfig } // Input Plugins @@ -138,7 +136,7 @@ type ( Filters []fileConfigFilter } - k8sApiServerConfig struct { + k8sAPIServerConfig struct { Interval string NodeName string `toml:"node_name"` Tags map[string]string @@ -291,12 +289,6 @@ type ( processorDelta struct { } - ecsDecoratorConfig struct { - HostIp string `toml:"host_ip"` - Order int - TagPass map[string][]string - } - emfProcessorConfig struct { MetricDeclarationDedup bool `toml:"metric_declaration_dedup"` MetricNamespace string `toml:"metric_namespace"` @@ -313,15 +305,4 @@ type ( MetricSelector []string `toml:"metric_selectors"` SourceLabels []string `toml:"source_labels"` } - - k8sDecoratorConfig struct { - ClusterName string `toml:"cluster_name"` - DisableMetricExtraction bool `toml:"disable_metric_extraction"` - HostIp string `toml:"host_ip"` - NodeName string `toml:"host_name_from_env"` - Order int - PreferFullPodName bool `toml:"prefer_full_pod_name"` - TagService bool `toml:"tag_service"` - TagPass map[string][]string - } ) diff --git a/translator/translate/otel/processor/metricstransformprocessor/translator.go b/translator/translate/otel/processor/metricstransformprocessor/translator.go index 0bcab4632f..2b44d78a84 100644 --- a/translator/translate/otel/processor/metricstransformprocessor/translator.go +++ b/translator/translate/otel/processor/metricstransformprocessor/translator.go @@ -12,7 +12,7 @@ import ( "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/processor" - "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" + "github.com/aws/amazon-cloudwatch-agent/internal/constants" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/receiver/awscontainerinsight" ) @@ -24,48 +24,48 @@ var metricTransformJmxConfig string var appSignalsRuntimeConfig string var metricDuplicateTypes = []string{ - containerinsightscommon.TypeGpuContainer, - containerinsightscommon.TypeGpuPod, - containerinsightscommon.TypeGpuNode, + constants.TypeGpuContainer, + constants.TypeGpuPod, + constants.TypeGpuNode, } var renameMapForDcgm = map[string]string{ - "DCGM_FI_DEV_GPU_UTIL": containerinsightscommon.GpuUtilization, - "DCGM_FI_DEV_FB_USED_PERCENT": containerinsightscommon.GpuMemUtilization, - "DCGM_FI_DEV_FB_USED": containerinsightscommon.GpuMemUsed, - "DCGM_FI_DEV_FB_TOTAL": containerinsightscommon.GpuMemTotal, - "DCGM_FI_DEV_GPU_TEMP": containerinsightscommon.GpuTemperature, - "DCGM_FI_DEV_POWER_USAGE": containerinsightscommon.GpuPowerDraw, + "DCGM_FI_DEV_GPU_UTIL": constants.GpuUtilization, + "DCGM_FI_DEV_FB_USED_PERCENT": constants.GpuMemUtilization, + "DCGM_FI_DEV_FB_USED": constants.GpuMemUsed, + "DCGM_FI_DEV_FB_TOTAL": constants.GpuMemTotal, + "DCGM_FI_DEV_GPU_TEMP": constants.GpuTemperature, + "DCGM_FI_DEV_POWER_USAGE": constants.GpuPowerDraw, } var renameMapForNeuronMonitor = map[string]string{ - "execution_errors_total": containerinsightscommon.NeuronExecutionErrors, - "execution_status_total": containerinsightscommon.NeuronExecutionStatus, - "neuron_runtime_memory_used_bytes": containerinsightscommon.NeuronRuntimeMemoryUsage, - "neuroncore_memory_usage_constants": containerinsightscommon.NeuronCoreMemoryUtilizationConstants, - "neuroncore_memory_usage_model_code": containerinsightscommon.NeuronCoreMemoryUtilizationModelCode, - "neuroncore_memory_usage_model_shared_scratchpad": containerinsightscommon.NeuronCoreMemoryUtilizationSharedScratchpad, - "neuroncore_memory_usage_runtime_memory": containerinsightscommon.NeuronCoreMemoryUtilizationRuntimeMemory, - "neuroncore_memory_usage_tensors": containerinsightscommon.NeuronCoreMemoryUtilizationTensors, - "neuroncore_utilization_ratio": containerinsightscommon.NeuronCoreUtilization, - "instance_info": containerinsightscommon.NeuronInstanceInfo, - "neuron_hardware": containerinsightscommon.NeuronHardware, - "hardware_ecc_events_total": containerinsightscommon.NeuronDeviceHardwareEccEvents, - "execution_latency_seconds": containerinsightscommon.NeuronExecutionLatency, + "execution_errors_total": constants.NeuronExecutionErrors, + "execution_status_total": constants.NeuronExecutionStatus, + "neuron_runtime_memory_used_bytes": constants.NeuronRuntimeMemoryUsage, + "neuroncore_memory_usage_constants": constants.NeuronCoreMemoryUtilizationConstants, + "neuroncore_memory_usage_model_code": constants.NeuronCoreMemoryUtilizationModelCode, + "neuroncore_memory_usage_model_shared_scratchpad": constants.NeuronCoreMemoryUtilizationSharedScratchpad, + "neuroncore_memory_usage_runtime_memory": constants.NeuronCoreMemoryUtilizationRuntimeMemory, + "neuroncore_memory_usage_tensors": constants.NeuronCoreMemoryUtilizationTensors, + "neuroncore_utilization_ratio": constants.NeuronCoreUtilization, + "instance_info": constants.NeuronInstanceInfo, + "neuron_hardware": constants.NeuronHardware, + "hardware_ecc_events_total": constants.NeuronDeviceHardwareEccEvents, + "execution_latency_seconds": constants.NeuronExecutionLatency, } var renameMapForNvme = map[string]string{ - "aws_ebs_csi_read_ops_total": containerinsightscommon.NvmeReadOpsTotal, - "aws_ebs_csi_write_ops_total": containerinsightscommon.NvmeWriteOpsTotal, - "aws_ebs_csi_read_bytes_total": containerinsightscommon.NvmeReadBytesTotal, - "aws_ebs_csi_write_bytes_total": containerinsightscommon.NvmeWriteBytesTotal, - "aws_ebs_csi_read_seconds_total": containerinsightscommon.NvmeReadTime, - "aws_ebs_csi_write_seconds_total": containerinsightscommon.NvmeWriteTime, - "aws_ebs_csi_exceeded_iops_seconds_total": containerinsightscommon.NvmeExceededIOPSTime, - "aws_ebs_csi_exceeded_tp_seconds_total": containerinsightscommon.NvmeExceededTPTime, - "aws_ebs_csi_ec2_exceeded_iops_seconds_total": containerinsightscommon.NvmeExceededEC2IOPSTime, - "aws_ebs_csi_ec2_exceeded_tp_seconds_total": containerinsightscommon.NvmeExceededEC2TPTime, - "aws_ebs_csi_volume_queue_length": containerinsightscommon.NvmeVolumeQueueLength, + "aws_ebs_csi_read_ops_total": constants.NvmeReadOpsTotal, + "aws_ebs_csi_write_ops_total": constants.NvmeWriteOpsTotal, + "aws_ebs_csi_read_bytes_total": constants.NvmeReadBytesTotal, + "aws_ebs_csi_write_bytes_total": constants.NvmeWriteBytesTotal, + "aws_ebs_csi_read_seconds_total": constants.NvmeReadTime, + "aws_ebs_csi_write_seconds_total": constants.NvmeWriteTime, + "aws_ebs_csi_exceeded_iops_seconds_total": constants.NvmeExceededIOPSTime, + "aws_ebs_csi_exceeded_tp_seconds_total": constants.NvmeExceededTPTime, + "aws_ebs_csi_ec2_exceeded_iops_seconds_total": constants.NvmeExceededEC2IOPSTime, + "aws_ebs_csi_ec2_exceeded_tp_seconds_total": constants.NvmeExceededEC2TPTime, + "aws_ebs_csi_volume_queue_length": constants.NvmeVolumeQueueLength, } type translator struct { @@ -108,11 +108,11 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { transformRules = append(transformRules, map[string]interface{}{ "include": oldNvmeMetric, "action": "update", - "new_name": containerinsightscommon.MetricName(containerinsightscommon.TypeNode, newNvmeMetric), + "new_name": metricName(constants.TypeNode, newNvmeMetric), "operations": []map[string]interface{}{{ "action": "add_label", - "new_label": containerinsightscommon.MetricType, - "new_value": containerinsightscommon.TypeNodeEBS, + "new_label": constants.MetricType, + "new_value": constants.TypeNodeEBS, }}, }) } @@ -137,12 +137,12 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { for old, new := range renameMapForDcgm { var operations []map[string]interface{} // convert decimals to percent - if new == containerinsightscommon.GpuMemUtilization { + if new == constants.GpuMemUtilization { operations = append(operations, map[string]interface{}{ "action": "experimental_scale_value", "experimental_scale": 100, }) - } else if new == containerinsightscommon.GpuMemTotal || new == containerinsightscommon.GpuMemUsed { + } else if new == constants.GpuMemTotal || new == constants.GpuMemUsed { operations = append(operations, map[string]interface{}{ "action": "experimental_scale_value", "experimental_scale": 1024 * 1024, @@ -152,11 +152,11 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { transformRules = append(transformRules, map[string]interface{}{ "include": old, "action": "insert", - "new_name": containerinsightscommon.MetricName(t, new), + "new_name": metricName(t, new), "operations": append([]map[string]interface{}{ { "action": "add_label", - "new_label": containerinsightscommon.MetricType, + "new_label": constants.MetricType, "new_value": t, }, }, operations...), @@ -166,7 +166,7 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { for oldName, newName := range renameMapForNeuronMonitor { var operations []map[string]interface{} - if newName == containerinsightscommon.NeuronCoreUtilization { + if newName == constants.NeuronCoreUtilization { operations = append(operations, map[string]interface{}{ "action": "experimental_scale_value", "experimental_scale": 100, @@ -228,3 +228,24 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { return cfg, nil } + +func metricName(mType string, name string) string { + prefix := "" + nodePrefix := "node_" + podPrefix := "pod_" + containerPrefix := "container_" + cluster := "cluster_" + + switch mType { + case constants.TypeContainer, constants.TypeGpuContainer: + prefix = containerPrefix + case constants.TypePod, constants.TypeGpuPod: + prefix = podPrefix + case constants.TypeNode, constants.TypeGpuNode: + prefix = nodePrefix + case constants.TypeCluster, constants.TypeGpuCluster: + prefix = cluster + default: + } + return prefix + name +}