Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dumping logs for GCE scale tests #16266

Merged
merged 1 commit into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions cmd/kops/toolbox_dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,14 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
return fmt.Errorf("adding key to SSH agent: %w", err)
}

dumper := dump.NewLogDumper(cluster.ObjectMeta.Name, sshConfig, keyRing, options.Dir)
// look for a bastion instance and use it if exists
bastionAddress := ""
for _, instance := range d.Instances {
if strings.Contains(instance.Name, "bastion") {
bastionAddress = instance.PublicAddresses[0]
}
}
dumper := dump.NewLogDumper(bastionAddress, sshConfig, keyRing, options.Dir)

var additionalIPs []string
var additionalPrivateIPs []string
Expand All @@ -224,7 +231,7 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
}
}

if err := dumper.DumpAllNodes(ctx, nodes, additionalIPs, additionalPrivateIPs); err != nil {
if err := dumper.DumpAllNodes(ctx, nodes, options.MaxNodes, additionalIPs, additionalPrivateIPs); err != nil {
return fmt.Errorf("error dumping nodes: %v", err)
}

Expand Down
27 changes: 13 additions & 14 deletions pkg/dump/dumper.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ import (
"k8s.io/klog/v2"
)

const (
// MaxNodesToDump is the maximum number of nodes to dump
MaxNodesToDump = 500
)

// logDumper gets all the nodes from a kubernetes cluster and dumps a well-known set of logs
type logDumper struct {
sshClientFactory sshClientFactory
Expand All @@ -51,12 +46,15 @@ type logDumper struct {
}

// NewLogDumper is the constructor for a logDumper
func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
func NewLogDumper(bastionAddress string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
sshClientFactory := &sshClientFactoryImplementation{
bastion: "bastion." + clusterName,
keyRing: keyRing,
sshConfig: sshConfig,
}
if bastionAddress != "" {
log.Printf("detected a bastion instance, with the address: %s", bastionAddress)
sshClientFactory.bastion = bastionAddress
}

d := &logDumper{
sshClientFactory: sshClientFactory,
Expand Down Expand Up @@ -106,9 +104,10 @@ func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent
// if the IPs are not found from kubectl get nodes, then these will be dumped also.
// This allows for dumping log on nodes even if they don't register as a kubernetes
// node, or if a node fails to register, or if the whole cluster fails to start.
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, additionalIPs, additionalPrivateIPs []string) error {
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, maxNodesToDump int, additionalIPs, additionalPrivateIPs []string) error {
var special, regular, dumped []*corev1.Node

log.Printf("starting to dump %d nodes fetched through the Kubernetes APIs", len(nodes.Items))
for i := range nodes.Items {
node := &nodes.Items[i]

Expand Down Expand Up @@ -139,8 +138,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
}

for i := range regular {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
node := regular[i]
Expand All @@ -154,8 +153,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add

notDumped := findInstancesNotDumped(additionalIPs, dumped)
for _, ip := range notDumped {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
err := d.dumpNotRegistered(ctx, ip, false)
Expand All @@ -166,8 +165,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add

notDumped = findInstancesNotDumped(additionalPrivateIPs, dumped)
for _, ip := range notDumped {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
err := d.dumpNotRegistered(ctx, ip, true)
Expand Down
6 changes: 6 additions & 0 deletions pkg/resources/gce/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ func DumpManagedInstance(op *resources.DumpOperation, r *resources.Resource) err
klog.Warningf("instance %q not found", instance.Instance)
} else {
for _, ni := range instanceDetails.NetworkInterfaces {
if ni.NetworkIP != "" {
i.PrivateAddresses = append(i.PrivateAddresses, ni.NetworkIP)
}
if ni.Ipv6Address != "" {
i.PrivateAddresses = append(i.PrivateAddresses, ni.Ipv6Address)
}
for _, ac := range ni.AccessConfigs {
if ac.NatIP != "" {
i.PublicAddresses = append(i.PublicAddresses, ac.NatIP)
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/kubetest2-kops/deployer/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ func (d *deployer) initialize() error {
d.SSHPublicKeyPath = publicKey
}
d.createBucket = true
} else if d.SSHPrivateKeyPath == "" && os.Getenv("KUBE_SSH_KEY_PATH") != "" {
d.SSHPrivateKeyPath = os.Getenv("KUBE_SSH_KEY_PATH")
}
}

Expand Down
1 change: 1 addition & 0 deletions tests/e2e/kubetest2-kops/deployer/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type deployer struct {
ValidationWait time.Duration `flag:"validation-wait" desc:"time to wait for newly created cluster to pass validation"`
ValidationCount int `flag:"validation-count" desc:"how many times should a validation pass"`
ValidationInterval time.Duration `flag:"validation-interval" desc:"time in duration to wait between validation attempts"`
MaxNodesToDump string `flag:"max-nodes-to-dump" desc:"max number of nodes to dump logs from, helpful to set when running scale tests"`

TemplatePath string `flag:"template-path" desc:"The path to the manifest template used for cluster creation"`

Expand Down
4 changes: 4 additions & 0 deletions tests/e2e/kubetest2-kops/deployer/dumplogs.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ func (d *deployer) DumpClusterLogs() error {
"--private-key", d.SSHPrivateKeyPath,
"--ssh-user", d.SSHUser,
}

if d.MaxNodesToDump != "" {
args = append(args, "--max-nodes", d.MaxNodesToDump)
}
klog.Info(strings.Join(args, " "))
cmd := exec.Command(args[0], args[1:]...)
cmd.SetEnv(append(d.env(), "KOPS_TOOLBOX_DUMP_K8S_RESOURCES=1")...)
Expand Down