diff --git a/src/dsmlp/app/config.py b/src/dsmlp/app/config.py index 0f6aa24..81f0100 100644 --- a/src/dsmlp/app/config.py +++ b/src/dsmlp/app/config.py @@ -1,2 +1,3 @@ GPU_LABEL = "nvidia.com/gpu" -GPU_LIMIT_ANNOTATION = 'gpu-limit' \ No newline at end of file +GPU_LIMIT_ANNOTATION = 'gpu-limit' +LOW_PRIORITY_CLASS = "low" \ No newline at end of file diff --git a/src/dsmlp/app/gpu_validator.py b/src/dsmlp/app/gpu_validator.py index b568d97..bdb8eaf 100644 --- a/src/dsmlp/app/gpu_validator.py +++ b/src/dsmlp/app/gpu_validator.py @@ -25,6 +25,11 @@ def validate_pod(self, request: Request): Validate pods for namespaces with the 'k8s-sync' label """ + # Low priority pods pass through + priority = request.object.spec.priorityClassName + if priority is not None and priority == LOW_PRIORITY_CLASS: + return + namespace = self.kube.get_namespace(request.namespace) curr_gpus = self.kube.get_gpus_in_namespace(request.namespace) diff --git a/src/dsmlp/app/types.py b/src/dsmlp/app/types.py index 8c65dc8..68211fb 100644 --- a/src/dsmlp/app/types.py +++ b/src/dsmlp/app/types.py @@ -47,6 +47,7 @@ class PodSpec: containers: List[Container] initContainers: Optional[List[Container]] = None securityContext: Optional[PodSecurityContext] = None + priorityClassName: Optional[str] = None @dataclass_json diff --git a/src/dsmlp/ext/kube.py b/src/dsmlp/ext/kube.py index 84998db..87780d4 100644 --- a/src/dsmlp/ext/kube.py +++ b/src/dsmlp/ext/kube.py @@ -18,10 +18,15 @@ def get_namespace(self, name: str) -> Namespace: api = self.get_policy_api() v1namespace: V1Namespace = api.read_namespace(name=name) metadata: V1ObjectMeta = v1namespace.metadata + + gpu_quota = 1 + if GPU_LIMIT_ANNOTATION in metadata.annotations: + gpu_quota = int(metadata.annotations[GPU_LIMIT_ANNOTATION]) + return Namespace( name=metadata.name, labels=metadata.labels, - gpu_quota=metadata.annotations[GPU_LIMIT_ANNOTATION]) + gpu_quota=gpu_quota) def get_gpus_in_namespace(self, name: str) -> int: api = self.get_policy_api() diff --git a/tests/app/test_gpu_validator.py b/tests/app/test_gpu_validator.py new file mode 100644 index 0000000..9c36f0b --- /dev/null +++ b/tests/app/test_gpu_validator.py @@ -0,0 +1,192 @@ +import inspect +from operator import contains +from dsmlp.app.validator import Validator +from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse +from dsmlp.plugin.kube import Namespace +from hamcrest import assert_that, contains_inanyorder, equal_to, has_item +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient + + +class TestValidator: + def setup_method(self) -> None: + self.logger = FakeLogger() + self.awsed_client = FakeAwsedClient() + self.kube_client = FakeKubeClient() + + self.awsed_client.add_user('user10', UserResponse(uid=10)) + self.awsed_client.add_teams('user10', ListTeamsResponse( + teams=[TeamJson(gid=1000)] + )) + + self.kube_client.add_namespace('user10', Namespace(name='user10', labels={'k8s-sync': 'true'}, gpu_quota=10)) + self.kube_client.set_existing_gpus('user10', 0) + + def test_no_gpus_requested(self): + response = self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user10" + }, + "namespace": "user10", + "object": { + "kind": "Pod", + "spec": { + "containers": [{}] + } + } + }} + ) + + assert_that(response, equal_to({ + "apiVersion": "admission.k8s.io/v1", + "kind": "AdmissionReview", + "response": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "allowed": True, "status": { + "message": "Allowed" + }}})) + + def test_quota_not_reached(self): + response = self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user10" + }, + "namespace": "user10", + "object": { + "kind": "Pod", + "spec": { + "containers": [{ + "resources": { + "requests": { + "nvidia.com/gpu": 10 + } + } + }] + } + } + }} + ) + + assert_that(response, equal_to({ + "apiVersion": "admission.k8s.io/v1", + "kind": "AdmissionReview", + "response": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "allowed": True, "status": { + "message": "Allowed" + }}})) + + def test_quota_exceeded(self): + response = self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user10" + }, + "namespace": "user10", + "object": { + "kind": "Pod", + "spec": { + "containers": [{ + "resources": { + "requests": { + "nvidia.com/gpu": 11 + } + } + }] + } + } + }} + ) + + assert_that(response, equal_to({ + "apiVersion": "admission.k8s.io/v1", + "kind": "AdmissionReview", + "response": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "allowed": False, "status": { + "message": "GPU quota exceeded. Requested 11 but with 0 already in use, the quota of 10 would be exceeded." + }}})) + + def test_sum_exceeded(self): + self.kube_client.set_existing_gpus('user10', 5) + + response = self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user10" + }, + "namespace": "user10", + "object": { + "kind": "Pod", + "spec": { + "containers": [{ + "resources": { + "requests": { + "nvidia.com/gpu": 6 + } + } + }] + } + } + }} + ) + + assert_that(response, equal_to({ + "apiVersion": "admission.k8s.io/v1", + "kind": "AdmissionReview", + "response": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "allowed": False, "status": { + "message": "GPU quota exceeded. Requested 6 but with 5 already in use, the quota of 10 would be exceeded." + }}})) + + def test_low_priority(self): + self.kube_client.set_existing_gpus('user10', 5) + + response = self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user10" + }, + "namespace": "user10", + "object": { + "kind": "Pod", + "spec": { + "containers": [{ + "resources": { + "requests": { + "nvidia.com/gpu": 6 + } + } + }], + "priorityClassName": "low" + } + } + }} + ) + + assert_that(response, equal_to({ + "apiVersion": "admission.k8s.io/v1", + "kind": "AdmissionReview", + "response": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "allowed": True, "status": { + "message": "Allowed" + }}})) + + def when_validate(self, json): + validator = Validator(self.awsed_client, self.kube_client, self.logger) + response = validator.validate_request(json) + + return response diff --git a/tests/app/test_validator.py b/tests/app/test_id_validator.py similarity index 100% rename from tests/app/test_validator.py rename to tests/app/test_id_validator.py