diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimits.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimits.java index 749db4601..57e9ba96b 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimits.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimits.java @@ -165,6 +165,48 @@ int getPodTemplateCount(String podTemplate) { return podTemplateCounts.getOrDefault(podTemplate, 0); } + /** + * Unregister executors using persistent field values (cloudName and templateId). + * This method works correctly even after Jenkins restart when transient template references are null. + * + * @param cloudName the kubernetes cloud name + * @param podTemplateId the pod template ID + * @param numExecutors the number of executors (pretty much always 1) + */ + public void unregisterByIds(@NonNull String cloudName, @NonNull String podTemplateId, int numExecutors) { + if (initInstance()) { + synchronized (this) { + // Decrement each counter independently if it was registered + int currentGlobalCount = getGlobalCount(cloudName); + if (currentGlobalCount > 0) { + int newGlobalCount = currentGlobalCount - numExecutors; + if (newGlobalCount < 0) { + LOGGER.log( + Level.WARNING, + "Global count for " + cloudName + + " went below zero. There is likely a bug in kubernetes-plugin"); + } + cloudCounts.put(cloudName, Math.max(0, newGlobalCount)); + LOGGER.log(Level.FINEST, () -> cloudName + " global limit: " + Math.max(0, newGlobalCount)); + } + + int currentPodTemplateCount = getPodTemplateCount(podTemplateId); + if (currentPodTemplateCount > 0) { + int newPodTemplateCount = currentPodTemplateCount - numExecutors; + if (newPodTemplateCount < 0) { + LOGGER.log( + Level.WARNING, + "Pod template count for " + podTemplateId + + " went below zero. There is likely a bug in kubernetes-plugin"); + } + podTemplateCounts.put(podTemplateId, Math.max(0, newPodTemplateCount)); + LOGGER.log( + Level.FINEST, () -> podTemplateId + " template limit: " + Math.max(0, newPodTemplateCount)); + } + } + } + } + @Extension public static class NodeListenerImpl extends NodeListener { @Override @@ -172,10 +214,10 @@ protected void onDeleted(@NonNull Node node) { if (node instanceof KubernetesSlave) { KubernetesProvisioningLimits instance = KubernetesProvisioningLimits.get(); KubernetesSlave kubernetesNode = (KubernetesSlave) node; - PodTemplate template = kubernetesNode.getTemplateOrNull(); - if (template != null) { - instance.unregister(kubernetesNode.getKubernetesCloud(), template, node.getNumExecutors()); - } + // Use persistent fields (cloudName, templateId) instead of transient template object + // This works correctly even after Jenkins restart when template reference is null + instance.unregisterByIds( + kubernetesNode.getCloudName(), kubernetesNode.getTemplateId(), node.getNumExecutors()); } } } diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java index ae43d991b..527d4e696 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java @@ -940,6 +940,12 @@ protected Object readResolve() { if (containers == null) { // upgrading from 0.8 containers = new ArrayList<>(); + } + + // Migrate deprecated fields from PodTemplate to first container + // This handles both legacy configs (plugin v0.8) and modern configs where + // deprecated setters were used before containers were added + if (containers.isEmpty() && image != null) { ContainerTemplate containerTemplate = new ContainerTemplate(KubernetesCloud.JNLP_NAME, this.image); containerTemplate.setCommand(command); containerTemplate.setArgs(PodTemplateUtils.isNullOrEmpty(args) ? FALLBACK_ARGUMENTS : args); @@ -956,6 +962,32 @@ protected Object readResolve() { containerTemplate.setResourceRequestEphemeralStorage(resourceRequestEphemeralStorage); containerTemplate.setWorkingDir(remoteFs); containers.add(containerTemplate); + } else if (!containers.isEmpty() + && (resourceLimitMemory != null + || resourceLimitCpu != null + || resourceRequestMemory != null + || resourceRequestCpu != null)) { + // Apply deprecated resource fields to first container if they're set + // but container doesn't have them (happens when deprecated setters were used) + ContainerTemplate first = containers.get(0); + if (resourceLimitMemory != null && first.getResourceLimitMemory() == null) { + first.setResourceLimitMemory(resourceLimitMemory); + } + if (resourceLimitCpu != null && first.getResourceLimitCpu() == null) { + first.setResourceLimitCpu(resourceLimitCpu); + } + if (resourceRequestMemory != null && first.getResourceRequestMemory() == null) { + first.setResourceRequestMemory(resourceRequestMemory); + } + if (resourceRequestCpu != null && first.getResourceRequestCpu() == null) { + first.setResourceRequestCpu(resourceRequestCpu); + } + if (resourceLimitEphemeralStorage != null && first.getResourceLimitEphemeralStorage() == null) { + first.setResourceLimitEphemeralStorage(resourceLimitEphemeralStorage); + } + if (resourceRequestEphemeralStorage != null && first.getResourceRequestEphemeralStorage() == null) { + first.setResourceRequestEphemeralStorage(resourceRequestEphemeralStorage); + } } if (annotations == null) { diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimitsTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimitsTest.java index 6e42cb797..0107ff70e 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimitsTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimitsTest.java @@ -91,4 +91,51 @@ public void lotsOfCloudsAndTemplates() throws InterruptedException { } } } + + @Test + public void testCounterDecrementAfterRestartWithEphemeralTemplate() throws Exception { + // Create a cloud with an ephemeral template + KubernetesCloud cloud = new KubernetesCloud("test-cloud"); + cloud.setContainerCap(10); + j.jenkins.clouds.add(cloud); + + // Create an ephemeral pod template (not saved to cloud config) + PodTemplate ephemeralTemplate = new PodTemplate(); + ephemeralTemplate.setName("ephemeral-template"); + ephemeralTemplate.setInstanceCap(5); + + // Register the template (simulates agent creation) + KubernetesProvisioningLimits limits = KubernetesProvisioningLimits.get(); + assertTrue("Should successfully register template", limits.register(cloud, ephemeralTemplate, 1)); + + // Get the template ID that was auto-generated + String templateId = ephemeralTemplate.getId(); + + // Verify counters were incremented after registration + assertEquals("Global count should be 1 after registration", 1, limits.getGlobalCount("test-cloud")); + assertEquals("Template count should be 1 after registration", 1, limits.getPodTemplateCount(templateId)); + + // Create a KubernetesSlave using Builder pattern + KubernetesSlave slave = new KubernetesSlave.Builder() + .podTemplate(ephemeralTemplate) + .cloud(cloud) + .nodeDescription("Test agent for counter leak fix") + .build(); + + // Add the slave to Jenkins + j.jenkins.addNode(slave); + + // Simulate ephemeral template removal (happens with idleMinutes config after build completion) + cloud.removeTemplate(ephemeralTemplate); + + // Remove the node (simulates agent deletion) + // The fix ensures counters are decremented using cloudName and templateId, + // even when template reference is null (as happens with ephemeral templates after removal/restart) + j.jenkins.removeNode(slave); + + // After deletion, counters should be decremented back to 0 + // This is the bug fix: should work even when template reference is null + assertEquals("Global count should be 0 after node deletion", 0, limits.getGlobalCount("test-cloud")); + assertEquals("Template count should be 0 after node deletion", 0, limits.getPodTemplateCount(templateId)); + } } diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/ResourceLimitPersistenceTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/ResourceLimitPersistenceTest.java new file mode 100644 index 000000000..8ee01b60a --- /dev/null +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/ResourceLimitPersistenceTest.java @@ -0,0 +1,134 @@ +package org.csanchez.jenkins.plugins.kubernetes; + +import static org.junit.Assert.*; + +import hudson.util.XStream2; +import java.io.StringReader; +import java.io.StringWriter; +import org.junit.Test; + +/** + * Test to verify resource limits persist across Jenkins restarts (XStream serialization/deserialization). + * Related to issue #2766. + */ +public class ResourceLimitPersistenceTest { + + @Test + public void testContainerTemplateResourceLimitMemoryPersistsAfterXStreamSerialization() { + // Create container with memory limit + ContainerTemplate original = new ContainerTemplate("jnlp", "jenkins/inbound-agent"); + original.setResourceLimitMemory("3072Mi"); + original.setResourceLimitCpu("2000m"); + original.setResourceRequestMemory("512Mi"); + original.setResourceRequestCpu("500m"); + + // Simulate XStream serialization/deserialization (what happens during Jenkins restart) + XStream2 xs = new XStream2(); + StringWriter writer = new StringWriter(); + xs.toXML(original, writer); + String xml = writer.toString(); + + System.out.println("Serialized XML:"); + System.out.println(xml); + + // Deserialize + ContainerTemplate deserialized = (ContainerTemplate) xs.fromXML(new StringReader(xml)); + + // Verify resource limits persisted + assertEquals( + "Memory limit should persist after deserialization", "3072Mi", deserialized.getResourceLimitMemory()); + assertEquals("CPU limit should persist after deserialization", "2000m", deserialized.getResourceLimitCpu()); + assertEquals( + "Memory request should persist after deserialization", + "512Mi", + deserialized.getResourceRequestMemory()); + assertEquals("CPU request should persist after deserialization", "500m", deserialized.getResourceRequestCpu()); + } + + @Test + public void testPodTemplateWithContainerResourceLimitsPersists() { + // Create pod template with container that has memory limits + ContainerTemplate container = new ContainerTemplate("jnlp", "jenkins/inbound-agent"); + container.setResourceLimitMemory("3072Mi"); + container.setResourceLimitCpu("2000m"); + + // Simulate XStream serialization/deserialization + XStream2 xs = new XStream2(); + StringWriter writer = new StringWriter(); + xs.toXML(container, writer); + String xml = writer.toString(); + + System.out.println("Container Serialized XML:"); + System.out.println(xml); + + // Deserialize + ContainerTemplate deserialized = (ContainerTemplate) xs.fromXML(new StringReader(xml)); + + // Verify container resource limits persisted + assertEquals("Container memory limit should persist", "3072Mi", deserialized.getResourceLimitMemory()); + assertEquals("Container CPU limit should persist", "2000m", deserialized.getResourceLimitCpu()); + } + + @Test + @SuppressWarnings("deprecation") + public void testPodTemplateLegacyFieldsMigrateToContainer() { + // This is the BUG scenario from #2766: + // 1. User sets memory limit using deprecated PodTemplate.setResourceLimitMemory() + // 2. At this point, containers list exists but is empty + // 3. Value gets stored in PodTemplate.resourceLimitMemory (deprecated field) + // 4. Jenkins restart -> readResolve() should migrate to container + + // Simulate: PodTemplate with deprecated fields set, containers list empty + String xml = "\n" + " test-id\n" + + " test-pod\n" + + " jenkins/inbound-agent\n" + + " false\n" + + " false\n" + + " false\n" + + " 2147483647\n" + + " 1000\n" + + " 0\n" + + " 0\n" + + " 3072Mi\n" + + " 2000m\n" + + " 512Mi\n" + + " 500m\n" + + " \n" + + " \n" + + // Empty containers list! + " \n" + + " \n" + + " \n" + + " false\n" + + ""; + + System.out.println("Simulating Jenkins restart with this XML (deprecated fields + empty containers):"); + + // Deserialize - readResolve() should migrate deprecated fields to container + XStream2 xs = new XStream2(); + PodTemplate deserialized = (PodTemplate) xs.fromXML(new StringReader(xml)); + + // After readResolve(), deprecated fields should be migrated to a NEW container + System.out.println("After readResolve(), containers: " + + deserialized.getContainers().size()); + + assertFalse( + "Container should be created from deprecated fields + image", + deserialized.getContainers().isEmpty()); + + ContainerTemplate container = deserialized.getContainers().get(0); + System.out.println("Container memory limit: " + container.getResourceLimitMemory()); + System.out.println("Container CPU limit: " + container.getResourceLimitCpu()); + System.out.println("Container memory request: " + container.getResourceRequestMemory()); + System.out.println("Container CPU request: " + container.getResourceRequestCpu()); + + assertEquals( + "Memory limit should be migrated from deprecated field", "3072Mi", container.getResourceLimitMemory()); + assertEquals("CPU limit should be migrated from deprecated field", "2000m", container.getResourceLimitCpu()); + assertEquals( + "Memory request should be migrated from deprecated field", + "512Mi", + container.getResourceRequestMemory()); + assertEquals("CPU request should be migrated from deprecated field", "500m", container.getResourceRequestCpu()); + } +}