Skip to content

Commit

Permalink
Add metrics for inbound HTTP requests (#97)
Browse files Browse the repository at this point in the history
This patch adds to all the servers a metrics server that provides the
default metrics generated by the Prometheus Go instrumentation library
as well as metrics for the number of duration of inbound HTTP requests.

The metrics handler will listen by default in port 8001, but that can be
changed with the following options:

- `--metrics-listener-address` - Metrics listen address. (default "localhost:8001")
- `--metrics-listener-tls-crt` - Metrics TLS certificate in PEM format.
- `--metrics-listener-tls-key` - Metrics TLS key in PEM format.

The metrics generated are the following:

- `inbound_request_count` - Number of API requests sent.
- `inbound_inbound_request_duration_sum` - Total time to send API requests, in seconds.
- `inbound_inbound_request_duration_count` - Total number of API requests measured.
- `inbound_inbound_request_duration_bucket` - Number of API requests organized in buckets.

With the following labels:

- `method` - Name of the HTTP method, for example GET or POST.
- `path` - Request path, for example /api/my/v1/resources.
- `code` - HTTP response code, for example 200 or 500.

In order to reduce the cardinality of the metrics the path label is
modified to remove the identifiers of the objects. For example, if the
original path is `.../deploymentManagers/123` then it will be replaced
by `.../deploymentManagers/-`, and the values will be accumulated. The
line returned by the metrics server will be like this:

```
inbound_request_count{code="200",method="GET",path=".../deploymentManagers/-"} 56
```

For example, these are some metrics generated by the deployment manager
server:

```
inbound_request_count{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers"} 4
inbound_request_duration_bucket{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers",le="0.1"} 0
inbound_request_duration_bucket{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers",le="1"} 0
inbound_request_duration_bucket{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers",le="10"} 0
inbound_request_duration_bucket{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers",le="30"} 0
inbound_request_duration_bucket{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers",le="+Inf"} 4
inbound_request_duration_sum{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers"} 602.541828752
inbound_request_duration_count{code="500",method="GET",path="/o2ims-infrastructureInventory/-/deploymentManagers"} 4
```

Signed-off-by: Juan Hernandez <[email protected]>
  • Loading branch information
jhernand authored May 2, 2024
1 parent 99b6106 commit b1a7a90
Show file tree
Hide file tree
Showing 17 changed files with 2,575 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ vet: ## Run go vet against code.
lint:
@echo "Run lint"
golangci-lint --version
golangci-lint run --verbose --print-resources-usage --modules-download-mode=vendor --timeout=5m0s
golangci-lint run --verbose --print-resources-usage --timeout=5m0s

.PHONY: deps-update
deps-update:
Expand Down
85 changes: 79 additions & 6 deletions internal/cmd/server/start_alarm_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,21 @@ package server

import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"net/url"
"strings"

"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/cobra"

"github.com/openshift-kni/oran-o2ims/internal"
"github.com/openshift-kni/oran-o2ims/internal/exit"
"github.com/openshift-kni/oran-o2ims/internal/logging"
"github.com/openshift-kni/oran-o2ims/internal/metrics"
"github.com/openshift-kni/oran-o2ims/internal/network"
"github.com/openshift-kni/oran-o2ims/internal/service"
)
Expand All @@ -49,6 +52,7 @@ func AlarmServer() *cobra.Command {
}
flags := result.Flags()
network.AddListenerFlags(flags, network.APIListener, network.APIAddress)
network.AddListenerFlags(flags, network.MetricsListener, network.MetricsAddress)
_ = flags.String(
cloudIDFlagName,
"",
Expand Down Expand Up @@ -105,6 +109,19 @@ func (c *AlarmServerCommand) run(cmd *cobra.Command, argv []string) error {
// Get the flags:
flags := cmd.Flags()

// Create the exit handler:
exitHandler, err := exit.NewHandler().
SetLogger(c.logger).
Build()
if err != nil {
c.logger.ErrorContext(
ctx,
"Failed to create exit handler",
slog.String("error", err.Error()),
)
return exit.Error(1)
}

// Get the cloud identifier:
cloudID, err := flags.GetString(cloudIDFlagName)
if err != nil {
Expand Down Expand Up @@ -228,6 +245,23 @@ func (c *AlarmServerCommand) run(cmd *cobra.Command, argv []string) error {
)
}

// Create the metrics wrapper:
metricsWrapper, err := metrics.NewHandlerWrapper().
AddPaths(
"/o2ims-infrastructureMonitoring/-/alarms/-",
"/o2ims-infrastructureMonitoring/-/alarmProbableCauses/-",
).
SetSubsystem("inbound").
Build()
if err != nil {
c.logger.ErrorContext(
ctx,
"Failed to create metrics wrapper",
slog.String("error", err.Error()),
)
return exit.Error(1)
}

// Create the router:
router := mux.NewRouter()
router.NotFoundHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand All @@ -236,6 +270,7 @@ func (c *AlarmServerCommand) run(cmd *cobra.Command, argv []string) error {
router.MethodNotAllowedHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
service.SendError(w, http.StatusMethodNotAllowed, "Method not allowed")
})
router.Use(metricsWrapper)

// Generate the search API URL according the backend URL
backendURL, err = c.generateAlarmmanagerApiUrl(backendURL)
Expand Down Expand Up @@ -275,24 +310,62 @@ func (c *AlarmServerCommand) run(cmd *cobra.Command, argv []string) error {
}
c.logger.InfoContext(
ctx,
"API listening",
"API server listening",
slog.String("address", apiListener.Addr().String()),
)
apiServer := http.Server{
apiServer := &http.Server{
Addr: apiListener.Addr().String(),
Handler: router,
}
err = apiServer.Serve(apiListener)
exitHandler.AddServer(apiServer)
go func() {
err = apiServer.Serve(apiListener)
if err != nil && !errors.Is(err, http.ErrServerClosed) {
c.logger.ErrorContext(
ctx,
"API server finished with error",
slog.String("error", err.Error()),
)
}
}()

// Start the metrics server:
metricsListener, err := network.NewListener().
SetLogger(c.logger).
SetFlags(flags, network.MetricsListener).
Build()
if err != nil {
c.logger.ErrorContext(
ctx,
"API server finished with error",
"Failed to create metrics listener",
slog.String("error", err.Error()),
)
return exit.Error(1)
}

return nil
c.logger.InfoContext(
ctx,
"Metrics server listening",
slog.String("address", metricsListener.Addr().String()),
)
metricsHandler := promhttp.Handler()
metricsServer := &http.Server{
Addr: metricsListener.Addr().String(),
Handler: metricsHandler,
}
exitHandler.AddServer(metricsServer)
go func() {
err = metricsServer.Serve(metricsListener)
if err != nil && !errors.Is(err, http.ErrServerClosed) {
c.logger.ErrorContext(
ctx,
"Metrics server finished with error",
slog.String("error", err.Error()),
)
}
}()

// Wait for exit signals:
return exitHandler.Wait(ctx)
}

func (c *AlarmServerCommand) createAlarmHandler(
Expand Down
84 changes: 78 additions & 6 deletions internal/cmd/server/start_alarm_subscription_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,20 @@ License.
package server

import (
"errors"
"log/slog"
"net/http"

"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/cobra"

"github.com/openshift-kni/oran-o2ims/internal"
"github.com/openshift-kni/oran-o2ims/internal/authentication"
"github.com/openshift-kni/oran-o2ims/internal/authorization"
"github.com/openshift-kni/oran-o2ims/internal/exit"
"github.com/openshift-kni/oran-o2ims/internal/logging"
"github.com/openshift-kni/oran-o2ims/internal/metrics"
"github.com/openshift-kni/oran-o2ims/internal/network"
"github.com/openshift-kni/oran-o2ims/internal/service"
)
Expand All @@ -45,6 +48,7 @@ func AlarmSubscriptionServer() *cobra.Command {
authorization.AddFlags(flags)

network.AddListenerFlags(flags, network.APIListener, network.APIAddress)
network.AddListenerFlags(flags, network.MetricsListener, network.MetricsAddress)
_ = flags.String(
cloudIDFlagName,
"",
Expand Down Expand Up @@ -80,6 +84,19 @@ func (c *AlarmSubscriptionServerCommand) run(cmd *cobra.Command, argv []string)
// Get the flags:
flags := cmd.Flags()

// Create the exit handler:
exitHandler, err := exit.NewHandler().
SetLogger(logger).
Build()
if err != nil {
logger.ErrorContext(
ctx,
"Failed to create exit handler",
slog.String("error", err.Error()),
)
return exit.Error(1)
}

// Get the cloud identifier:
cloudID, err := flags.GetString(cloudIDFlagName)
if err != nil {
Expand Down Expand Up @@ -161,6 +178,23 @@ func (c *AlarmSubscriptionServerCommand) run(cmd *cobra.Command, argv []string)
)
return exit.Error(1)
}

// Create the metrics wrapper:
metricsWrapper, err := metrics.NewHandlerWrapper().
AddPaths(
"/o2ims-infrastructureMonitoring/-/alarmSubscriptions/-",
).
SetSubsystem("inbound").
Build()
if err != nil {
logger.ErrorContext(
ctx,
"Failed to create metrics wrapper",
slog.String("error", err.Error()),
)
return exit.Error(1)
}

// Create the router:
router := mux.NewRouter()
router.NotFoundHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand All @@ -169,7 +203,7 @@ func (c *AlarmSubscriptionServerCommand) run(cmd *cobra.Command, argv []string)
router.MethodNotAllowedHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
service.SendError(w, http.StatusMethodNotAllowed, "Method not allowed")
})
router.Use(authenticationWrapper, authorizationWrapper)
router.Use(metricsWrapper, authenticationWrapper, authorizationWrapper)

// Create the handler:
handler, err := service.NewAlarmSubscriptionHandler().
Expand Down Expand Up @@ -226,22 +260,60 @@ func (c *AlarmSubscriptionServerCommand) run(cmd *cobra.Command, argv []string)
}
logger.InfoContext(
ctx,
"API listening",
"API server listening",
slog.String("address", apiListener.Addr().String()),
)
apiServer := http.Server{
apiServer := &http.Server{
Addr: apiListener.Addr().String(),
Handler: router,
}
err = apiServer.Serve(apiListener)
exitHandler.AddServer(apiServer)
go func() {
err = apiServer.Serve(apiListener)
if err != nil && !errors.Is(err, http.ErrServerClosed) {
logger.ErrorContext(
ctx,
"API server finished with error",
slog.String("error", err.Error()),
)
}
}()

// Start the metrics server:
metricsListener, err := network.NewListener().
SetLogger(logger).
SetFlags(flags, network.MetricsListener).
Build()
if err != nil {
logger.ErrorContext(
ctx,
"API server finished with error",
"Failed to create metrics listener",
slog.String("error", err.Error()),
)
return exit.Error(1)
}
logger.InfoContext(
ctx,
"Metrics server listening",
slog.String("address", metricsListener.Addr().String()),
)
metricsHandler := promhttp.Handler()
metricsServer := &http.Server{
Addr: metricsListener.Addr().String(),
Handler: metricsHandler,
}
exitHandler.AddServer(metricsServer)
go func() {
err = metricsServer.Serve(metricsListener)
if err != nil && !errors.Is(err, http.ErrServerClosed) {
logger.ErrorContext(
ctx,
"Metrics server finished with error",
slog.String("error", err.Error()),
)
}
}()

return nil
// Wait for exit signals:
return exitHandler.Wait(ctx)
}
Loading

0 comments on commit b1a7a90

Please sign in to comment.