Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move introspection server to ecs-agent #4470

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 15 additions & 97 deletions agent/handlers/introspection_server_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,118 +16,36 @@ package handlers

import (
"context"
"encoding/json"
"net/http"
"net/http/pprof"
"strconv"
"time"

"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/engine"
handlersutils "github.com/aws/amazon-ecs-agent/agent/handlers/utils"
v1 "github.com/aws/amazon-ecs-agent/agent/handlers/v1"
logginghandler "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/logging"
"github.com/aws/amazon-ecs-agent/ecs-agent/introspection"
"github.com/aws/amazon-ecs-agent/ecs-agent/metrics"
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry"
"github.com/cihub/seelog"
)

type rootResponse struct {
AvailableCommands []string
}

const (
// With pprof we need to increase the timeout so that there is enough time to do the profiling. Since the profiling
// time window for CPU is configurable in the request, this timeout effectively means the CPU profiling will be
// capped to 5 min.
writeTimeoutForPprof = time.Minute * 5
pprofBasePath = "/debug/pprof/"
pprofCMDLinePath = pprofBasePath + "cmdline"
pprofProfilePath = pprofBasePath + "profile"
pprofSymbolPath = pprofBasePath + "symbol"
pprofTracePath = pprofBasePath + "trace"
)

var (
// Injection points for testing
pprofIndexHandler = pprof.Index
pprofCmdlineHandler = pprof.Cmdline
pprofProfileHandler = pprof.Profile
pprofSymbolHandler = pprof.Symbol
pprofTraceHandler = pprof.Trace
)

func introspectionServerSetup(containerInstanceArn *string, taskEngine handlersutils.DockerStateResolver, cfg *config.Config) *http.Server {
paths := []string{v1.AgentMetadataPath, v1.TaskContainerMetadataPath, v1.LicensePath}

if cfg.EnableRuntimeStats.Enabled() {
paths = append(paths, pprofBasePath, pprofCMDLinePath, pprofProfilePath, pprofSymbolPath, pprofTracePath)
}

availableCommands := &rootResponse{paths}
// Autogenerated list of the above serverFunctions paths
availableCommandResponse, err := json.Marshal(&availableCommands)
if err != nil {
seelog.Errorf("Error marshaling JSON in introspection server setup: %s", err)
}

defaultHandler := func(w http.ResponseWriter, r *http.Request) {
w.Write(availableCommandResponse)
}

serverMux := http.NewServeMux()
serverMux.HandleFunc("/", defaultHandler)

v1HandlersSetup(serverMux, containerInstanceArn, taskEngine, cfg)
pprofHandlerSetup(serverMux, cfg)

// Log all requests and then pass through to serverMux
loggingServeMux := http.NewServeMux()
loggingServeMux.Handle("/", logginghandler.NewLoggingHandler(serverMux))

wTimeout := writeTimeout
if cfg.EnableRuntimeStats.Enabled() {
wTimeout = writeTimeoutForPprof
}
server := &http.Server{
Addr: ":" + strconv.Itoa(config.AgentIntrospectionPort),
Handler: loggingServeMux,
ReadTimeout: readTimeout,
WriteTimeout: wTimeout,
}

return server
}

// v1HandlersSetup adds all handlers except CredentialsHandler in v1 package to the server mux.
func v1HandlersSetup(serverMux *http.ServeMux,
containerInstanceArn *string,
taskEngine handlersutils.DockerStateResolver,
cfg *config.Config) {
serverMux.HandleFunc(v1.AgentMetadataPath, v1.AgentMetadataHandler(containerInstanceArn, cfg))
serverMux.HandleFunc(v1.TaskContainerMetadataPath, v1.TaskContainerMetadataHandler(taskEngine))
serverMux.HandleFunc(v1.LicensePath, v1.LicenseHandler)
}

func pprofHandlerSetup(serverMux *http.ServeMux, cfg *config.Config) {
if !cfg.EnableRuntimeStats.Enabled() {
return
}
serverMux.HandleFunc(pprofBasePath, pprofIndexHandler)
serverMux.HandleFunc(pprofCMDLinePath, pprofCmdlineHandler)
serverMux.HandleFunc(pprofProfilePath, pprofProfileHandler)
serverMux.HandleFunc(pprofSymbolPath, pprofSymbolHandler)
serverMux.HandleFunc(pprofTracePath, pprofTraceHandler)
}

// ServeIntrospectionHTTPEndpoint serves information about this agent/containerInstance and tasks
// running on it. "V1" here indicates the hostname version of this server instead
// of the handler versions, i.e. "V1" server can include "V1" and "V2" handlers.
// ServeIntrospectionHTTPEndpoint serves information about this agent/containerInstance and tasks running on it.
func ServeIntrospectionHTTPEndpoint(ctx context.Context, containerInstanceArn *string, taskEngine engine.TaskEngine, cfg *config.Config) {
// Is this the right level to type assert, assuming we'd abstract multiple taskengines here?
// Revisit if we ever add another type..
dockerTaskEngine := taskEngine.(*engine.DockerTaskEngine)
agentState := &v1.AgentStateImpl{
ContainerInstanceArn: containerInstanceArn,
ClusterName: cfg.Cluster,
TaskEngine: dockerTaskEngine,
}

server := introspectionServerSetup(containerInstanceArn, dockerTaskEngine, cfg)
server, _ := introspection.NewServer(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to deal with the error returned here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, this one is important. Updated to match metadata server logic.

agentState,
metrics.NewNopEntryFactory(),
introspection.WithReadTimeout(readTimeout),
introspection.WithWriteTimeout(writeTimeout),
introspection.WithRuntimeStats(cfg.EnableRuntimeStats.Enabled()),
)

go func() {
<-ctx.Done()
Expand Down
Loading
Loading