Skip to content

Commit

Permalink
Merge pull request #16266 from borg-land/dump-patch
Browse files Browse the repository at this point in the history
Fix dumping logs for GCE scale tests
  • Loading branch information
k8s-ci-robot authored Jan 26, 2024
2 parents 4db109f + 4717450 commit e5c4fe8
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 16 deletions.
11 changes: 9 additions & 2 deletions cmd/kops/toolbox_dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,14 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
return fmt.Errorf("adding key to SSH agent: %w", err)
}

dumper := dump.NewLogDumper(cluster.ObjectMeta.Name, sshConfig, keyRing, options.Dir)
// look for a bastion instance and use it if exists
bastionAddress := ""
for _, instance := range d.Instances {
if strings.Contains(instance.Name, "bastion") {
bastionAddress = instance.PublicAddresses[0]
}
}
dumper := dump.NewLogDumper(bastionAddress, sshConfig, keyRing, options.Dir)

var additionalIPs []string
var additionalPrivateIPs []string
Expand All @@ -224,7 +231,7 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
}
}

if err := dumper.DumpAllNodes(ctx, nodes, additionalIPs, additionalPrivateIPs); err != nil {
if err := dumper.DumpAllNodes(ctx, nodes, options.MaxNodes, additionalIPs, additionalPrivateIPs); err != nil {
return fmt.Errorf("error dumping nodes: %v", err)
}

Expand Down
27 changes: 13 additions & 14 deletions pkg/dump/dumper.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ import (
"k8s.io/klog/v2"
)

const (
// MaxNodesToDump is the maximum number of nodes to dump
MaxNodesToDump = 500
)

// logDumper gets all the nodes from a kubernetes cluster and dumps a well-known set of logs
type logDumper struct {
sshClientFactory sshClientFactory
Expand All @@ -51,12 +46,15 @@ type logDumper struct {
}

// NewLogDumper is the constructor for a logDumper
func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
func NewLogDumper(bastionAddress string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
sshClientFactory := &sshClientFactoryImplementation{
bastion: "bastion." + clusterName,
keyRing: keyRing,
sshConfig: sshConfig,
}
if bastionAddress != "" {
log.Printf("detected a bastion instance, with the address: %s", bastionAddress)
sshClientFactory.bastion = bastionAddress
}

d := &logDumper{
sshClientFactory: sshClientFactory,
Expand Down Expand Up @@ -106,9 +104,10 @@ func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent
// if the IPs are not found from kubectl get nodes, then these will be dumped also.
// This allows for dumping log on nodes even if they don't register as a kubernetes
// node, or if a node fails to register, or if the whole cluster fails to start.
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, additionalIPs, additionalPrivateIPs []string) error {
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, maxNodesToDump int, additionalIPs, additionalPrivateIPs []string) error {
var special, regular, dumped []*corev1.Node

log.Printf("starting to dump %d nodes fetched through the Kubernetes APIs", len(nodes.Items))
for i := range nodes.Items {
node := &nodes.Items[i]

Expand Down Expand Up @@ -139,8 +138,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
}

for i := range regular {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
node := regular[i]
Expand All @@ -154,8 +153,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add

notDumped := findInstancesNotDumped(additionalIPs, dumped)
for _, ip := range notDumped {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
err := d.dumpNotRegistered(ctx, ip, false)
Expand All @@ -166,8 +165,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add

notDumped = findInstancesNotDumped(additionalPrivateIPs, dumped)
for _, ip := range notDumped {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
err := d.dumpNotRegistered(ctx, ip, true)
Expand Down
6 changes: 6 additions & 0 deletions pkg/resources/gce/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ func DumpManagedInstance(op *resources.DumpOperation, r *resources.Resource) err
klog.Warningf("instance %q not found", instance.Instance)
} else {
for _, ni := range instanceDetails.NetworkInterfaces {
if ni.NetworkIP != "" {
i.PrivateAddresses = append(i.PrivateAddresses, ni.NetworkIP)
}
if ni.Ipv6Address != "" {
i.PrivateAddresses = append(i.PrivateAddresses, ni.Ipv6Address)
}
for _, ac := range ni.AccessConfigs {
if ac.NatIP != "" {
i.PublicAddresses = append(i.PublicAddresses, ac.NatIP)
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/kubetest2-kops/deployer/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ func (d *deployer) initialize() error {
d.SSHPublicKeyPath = publicKey
}
d.createBucket = true
} else if d.SSHPrivateKeyPath == "" && os.Getenv("KUBE_SSH_KEY_PATH") != "" {
d.SSHPrivateKeyPath = os.Getenv("KUBE_SSH_KEY_PATH")
}
}

Expand Down
1 change: 1 addition & 0 deletions tests/e2e/kubetest2-kops/deployer/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type deployer struct {
ValidationWait time.Duration `flag:"validation-wait" desc:"time to wait for newly created cluster to pass validation"`
ValidationCount int `flag:"validation-count" desc:"how many times should a validation pass"`
ValidationInterval time.Duration `flag:"validation-interval" desc:"time in duration to wait between validation attempts"`
MaxNodesToDump string `flag:"max-nodes-to-dump" desc:"max number of nodes to dump logs from, helpful to set when running scale tests"`

TemplatePath string `flag:"template-path" desc:"The path to the manifest template used for cluster creation"`

Expand Down
4 changes: 4 additions & 0 deletions tests/e2e/kubetest2-kops/deployer/dumplogs.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ func (d *deployer) DumpClusterLogs() error {
"--private-key", d.SSHPrivateKeyPath,
"--ssh-user", d.SSHUser,
}

if d.MaxNodesToDump != "" {
args = append(args, "--max-nodes", d.MaxNodesToDump)
}
klog.Info(strings.Join(args, " "))
cmd := exec.Command(args[0], args[1:]...)
cmd.SetEnv(append(d.env(), "KOPS_TOOLBOX_DUMP_K8S_RESOURCES=1")...)
Expand Down

0 comments on commit e5c4fe8

Please sign in to comment.