diff --git a/azimuth_caas_operator/tests/utils/test_ansible_runner.py b/azimuth_caas_operator/tests/utils/test_ansible_runner.py index 78851c4..5349248 100644 --- a/azimuth_caas_operator/tests/utils/test_ansible_runner.py +++ b/azimuth_caas_operator/tests/utils/test_ansible_runner.py @@ -1,9 +1,12 @@ +import base64 import json import os import unittest from unittest import mock import yaml +from easykube.rest.util import PropertyDict + from azimuth_caas_operator.models.v1alpha1 import cluster as cluster_crd from azimuth_caas_operator.models.v1alpha1 import cluster_type as cluster_type_crd from azimuth_caas_operator.tests import async_utils @@ -23,7 +26,9 @@ def test_get_job_remove(self): cluster = cluster_crd.get_fake() cluster_type = cluster_type_crd.get_fake() - job = ansible_runner.get_job(cluster, cluster_type.spec, remove=True) + job = ansible_runner.get_job( + cluster, cluster_type.spec, "test1-tfstate", remove=True + ) expected = """\ apiVersion: batch/v1 @@ -138,6 +143,7 @@ def test_get_job_remove(self): fsGroup: 1000 runAsGroup: 1000 runAsUser: 1000 + serviceAccountName: test1-tfstate ttlSecondsAfterFinished: 3600 volumes: - emptyDir: {} @@ -165,7 +171,6 @@ def test_get_job_remove(self): @mock.patch.dict( os.environ, { - "CONSUL_HTTP_ADDR": "fakeconsulurl", "ARA_API_SERVER": "fakearaurl", }, clear=True, @@ -173,8 +178,14 @@ def test_get_job_remove(self): def test_get_job_env_configmap(self): cluster = cluster_crd.get_fake() cluster_type = cluster_type_crd.get_fake() + global_extravars = { + "global_extravar1": "value1", + "global_extravar2": "value2", + } - config = ansible_runner.get_env_configmap(cluster, cluster_type.spec, "fakekey") + config = ansible_runner.get_env_configmap( + cluster, cluster_type.spec, "fakekey", global_extravars + ) expected = """\ apiVersion: v1 data: @@ -182,14 +193,12 @@ def test_get_job_env_configmap(self): ARA_API_SERVER: fakearaurl - CONSUL_HTTP_ADDR: fakeconsulurl - ' extravars: "cluster_deploy_ssh_public_key: fakekey\\ncluster_id: fakeclusterID1\\n\\ cluster_image: testimage1\\ncluster_name: test1\\ncluster_ssh_private_key_file:\\ - \\ /var/lib/caas/ssh/id_ed25519\\ncluster_type: type1\\nfoo: bar\\nnested:\\n baz:\\ - \\ bob\\nrandom_bool: true\\nrandom_dict:\\n random_str: foo\\nrandom_int: 8\\nvery_random_int:\\ - \\ 42\\n" + \\ /var/lib/caas/ssh/id_ed25519\\ncluster_type: type1\\nfoo: bar\\nglobal_extravar1:\\ + \\ value1\\nglobal_extravar2: value2\\nnested:\\n baz: bob\\nrandom_bool: true\\nrandom_dict:\\n\\ + \\ random_str: foo\\nrandom_int: 8\\nvery_random_int: 42\\n" kind: ConfigMap metadata: name: test1-create @@ -204,6 +213,85 @@ def test_get_job_env_configmap(self): class TestAsyncUtils(unittest.IsolatedAsyncioTestCase): + @mock.patch.dict( + os.environ, {"GLOBAL_EXTRAVARS_SECRET": "ns-1/extravars"}, clear=True + ) + async def test_get_global_extravars(self): + mock_client = mock.Mock() + mock_api = mock.AsyncMock() + mock_client.api.return_value = mock_api + mock_resource = mock.AsyncMock() + mock_api.resource.return_value = mock_resource + secret_data = { + "extravars": { + "extravar_1": "value1", + "extravar_2": "value2", + }, + "moreextravars": { + "extravar_3": "value3", + }, + } + mock_resource.fetch.return_value = { + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": "extravars", + "namespace": "ns-1", + }, + "data": { + k: base64.b64encode(yaml.safe_dump(v).encode()).decode() + for k, v in secret_data.items() + }, + } + + global_extravars = await ansible_runner.get_global_extravars(mock_client) + + self.assertEqual( + { + "extravar_1": "value1", + "extravar_2": "value2", + "extravar_3": "value3", + }, + global_extravars, + ) + + async def test_get_global_extravars_no_secret(self): + mock_client = mock.AsyncMock() + global_extravars = await ansible_runner.get_global_extravars(mock_client) + self.assertEqual({}, global_extravars) + + @mock.patch.dict( + os.environ, + {"ANSIBLE_RUNNER_CLUSTER_ROLE": "azimuth-caas-operator:tfstate"}, + clear=True, + ) + async def test_ensure_service_account(self): + mock_client = mock.AsyncMock() + + def fake_apply_object(obj, force=False): + return PropertyDict(obj) + + mock_client.apply_object.side_effect = fake_apply_object + cluster = cluster_crd.get_fake() + + service_account_name = await ansible_runner.ensure_service_account( + mock_client, cluster + ) + + class KindMatcher: + def __init__(self, kind): + self._kind = kind + + def __eq__(self, actual): + return actual["kind"] == self._kind + + self.assertEqual("test1-tfstate", service_account_name) + self.assertEqual(2, mock_client.apply_object.call_count) + mock_client.apply_object.assert_any_call( + KindMatcher("ServiceAccount"), force=True + ) + mock_client.apply_object.assert_any_call(KindMatcher("RoleBinding"), force=True) + @mock.patch.object(ansible_runner, "get_job_resource") async def test_get_jobs_for_cluster_create(self, mock_job_resource): fake_job_list = ["fakejob1", "fakejob2"] diff --git a/azimuth_caas_operator/utils/ansible_runner.py b/azimuth_caas_operator/utils/ansible_runner.py index 71d183f..879103b 100644 --- a/azimuth_caas_operator/utils/ansible_runner.py +++ b/azimuth_caas_operator/utils/ansible_runner.py @@ -2,6 +2,7 @@ import json import logging import os +import typing import yaml from cryptography.hazmat.primitives.asymmetric import ed25519 @@ -80,14 +81,96 @@ async def ensure_deploy_key_secret(client, cluster: cluster_crd.Cluster): return base64.b64decode(secret.data["id_ed25519.pub"]).decode() +async def ensure_service_account(client, cluster: cluster_crd.Cluster): + """ + Ensures that a service account exists with the given name. + """ + service_account = await client.apply_object( + { + "apiVersion": "v1", + "kind": "ServiceAccount", + "metadata": { + "name": f"{cluster.metadata.name}-tfstate", + "namespace": cluster.metadata.namespace, + "ownerReferences": [ + { + "apiVersion": cluster.api_version, + "kind": cluster.kind, + "name": cluster.metadata.name, + "uid": cluster.metadata.uid, + }, + ], + }, + }, + force=True, + ) + # If there is a cluster role specified, bind it to the service account + if "ANSIBLE_RUNNER_CLUSTER_ROLE" in os.environ: + await client.apply_object( + { + "apiVersion": "rbac.authorization.k8s.io/v1", + "kind": "RoleBinding", + "metadata": { + "name": service_account.metadata.name, + "namespace": cluster.metadata.namespace, + "ownerReferences": [ + { + "apiVersion": cluster.api_version, + "kind": cluster.kind, + "name": cluster.metadata.name, + "uid": cluster.metadata.uid, + }, + ], + }, + "roleRef": { + "apiGroup": "rbac.authorization.k8s.io", + "kind": "ClusterRole", + "name": os.environ["ANSIBLE_RUNNER_CLUSTER_ROLE"], + }, + "subjects": [ + { + "kind": "ServiceAccount", + "name": service_account.metadata.name, + "namespace": service_account.metadata.namespace, + }, + ], + }, + force=True, + ) + return service_account.metadata.name + + +async def get_global_extravars(client): + """ + Retrieves the global extra vars from the specified secret. + """ + # The secret is specified in the form namespace/name + secret_info = os.environ.get("GLOBAL_EXTRAVARS_SECRET") + if not secret_info: + return {} + LOG.info("extracting global extravars from %s", secret_info) + secret_resource = await client.api("v1").resource("secrets") + secret_namespace, secret_name = secret_info.split("/", maxsplit=1) + secret = await secret_resource.fetch(secret_name, namespace=secret_namespace) + # We parse each value from the secret as YAML and merge them together + global_extravars = {} + for b64data in sorted(secret.get("data", {}).values()): + data = base64.b64decode(b64data) + global_extravars.update(yaml.safe_load(data)) + return global_extravars + + def get_env_configmap( cluster: cluster_crd.Cluster, cluster_type_spec: cluster_type_crd.ClusterTypeSpec, cluster_deploy_ssh_public_key: str, + global_extravars: typing.Dict[str, typing.Any], remove=False, update=False, ): - extraVars = dict(cluster_type_spec.extraVars, **cluster.spec.extraVars) + extraVars = dict(global_extravars) + extraVars.update(cluster_type_spec.extraVars) + extraVars.update(cluster.spec.extraVars) extraVars["cluster_name"] = cluster.metadata.name extraVars["cluster_id"] = cluster.status.clusterID extraVars["cluster_type"] = cluster.spec.clusterTypeName @@ -98,10 +181,8 @@ def get_env_configmap( extraVars["cluster_state"] = "absent" envvars = dict(cluster_type_spec.envVars) - try: + if "CONSUL_HTTP_ADDR" in os.environ: envvars["CONSUL_HTTP_ADDR"] = os.environ["CONSUL_HTTP_ADDR"] - except KeyError: - raise RuntimeError("CONSUL_HTTP_ADDR is not set") if "ARA_API_SERVER" in os.environ: envvars["ARA_API_CLIENT"] = "http" envvars["ARA_API_SERVER"] = os.environ["ARA_API_SERVER"] @@ -138,6 +219,7 @@ def get_env_configmap( def get_job( cluster: cluster_crd.Cluster, cluster_type_spec: cluster_type_crd.ClusterTypeSpec, + service_account_name: str, remove=False, update=False, ): @@ -173,6 +255,7 @@ def get_job( # auto-remove delete jobs after one hour ttlSecondsAfterFinished: 3600 ''' if remove else ''} + serviceAccountName: {service_account_name} securityContext: runAsUser: 1000 runAsGroup: 1000 @@ -568,6 +651,9 @@ async def start_job( namespace=namespace, ) + # Extract the global extravars from the specified configmap, if specified + global_extravars = await get_global_extravars(client) + # ensure that we have generated an SSH key for the cluster cluster_deploy_ssh_public_key = await ensure_deploy_key_secret(client, cluster) @@ -577,15 +663,25 @@ async def start_job( cluster, cluster_type_spec, cluster_deploy_ssh_public_key, + global_extravars, remove=remove, update=update, ), force=True, ) + # Ensure that the service account exists for the cluster + service_account_name = await ensure_service_account(client, cluster) + # create the job await client.create_object( - get_job(cluster, cluster_type_spec, remove=remove, update=update) + get_job( + cluster, + cluster_type_spec, + service_account_name, + remove=remove, + update=update, + ) ) diff --git a/charts/operator/templates/clusterrole-operator.yaml b/charts/operator/templates/clusterrole-operator.yaml index 0d010be..5832800 100644 --- a/charts/operator/templates/clusterrole-operator.yaml +++ b/charts/operator/templates/clusterrole-operator.yaml @@ -31,7 +31,14 @@ rules: - pods - pods/log - secrets + - serviceaccounts verbs: ["*"] - apiGroups: ["rbac.authorization.k8s.io"] - resources: ["clusterrolebindings"] + resources: + - clusterrolebindings + - rolebindings verbs: ["get", "list", "create", "update", "patch"] + # Required so that the CaaS operator can grant permissions on leases for the TF plugin + - apiGroups: [coordination.k8s.io] + resources: [leases] + verbs: [get, create, update] diff --git a/charts/operator/templates/clusterrole-tfstate.yaml b/charts/operator/templates/clusterrole-tfstate.yaml new file mode 100644 index 0000000..1526d91 --- /dev/null +++ b/charts/operator/templates/clusterrole-tfstate.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "azimuth-caas-operator.fullname" . }}:tfstate + labels: {{ include "azimuth-caas-operator.labels" . | nindent 4 }} +rules: + # This clusterrole is attached to cluster service accounts using a rolebinding + # That means when it is used, it is namespace-scoped + # It includes the permissions that are required to manage OpenTofu states in secrets + - apiGroups: [""] + resources: [secrets] + verbs: [list, get, create, update] + # Leases are used to implement state locking + - apiGroups: [coordination.k8s.io] + resources: [leases] + verbs: [get, create, update] diff --git a/charts/operator/templates/deployment.yaml b/charts/operator/templates/deployment.yaml index 13ff648..b97113b 100644 --- a/charts/operator/templates/deployment.yaml +++ b/charts/operator/templates/deployment.yaml @@ -25,14 +25,24 @@ spec: image: {{ printf "%s:%s" .Values.image.repository (default .Chart.AppVersion .Values.image.tag) }} imagePullPolicy: {{ .Values.image.pullPolicy }} env: + {{- if .Values.config.consulUrl }} - name: CONSUL_HTTP_ADDR value: {{ tpl .Values.config.consulUrl . | quote }} + {{- end }} - name: ARA_API_SERVER value: {{ tpl .Values.config.araUrl . | quote }} - name: ANSIBLE_RUNNER_IMAGE_REPO value: {{ quote .Values.config.ansibleRunnerImage.repository }} - name: ANSIBLE_RUNNER_IMAGE_TAG value: {{ default .Chart.AppVersion .Values.config.ansibleRunnerImage.tag | quote }} + - name: GLOBAL_EXTRAVARS_SECRET + value: {{ + include "azimuth-caas-operator.fullname" . | + printf "%s/%s-extravars" .Release.Namespace | + quote + }} + - name: ANSIBLE_RUNNER_CLUSTER_ROLE + value: {{ include "azimuth-caas-operator.fullname" . }}:tfstate ports: - name: metrics containerPort: 8080 diff --git a/charts/operator/templates/secret-global-extravars.yaml b/charts/operator/templates/secret-global-extravars.yaml new file mode 100644 index 0000000..c6cb71d --- /dev/null +++ b/charts/operator/templates/secret-global-extravars.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "azimuth-caas-operator.fullname" . }}-extravars + labels: {{ include "azimuth-caas-operator.labels" . | nindent 4 }} +stringData: + extravars: | + {{- toYaml .Values.config.globalExtraVars | nindent 4 }} diff --git a/charts/operator/values.yaml b/charts/operator/values.yaml index 49a15ad..0cfbc44 100644 --- a/charts/operator/values.yaml +++ b/charts/operator/values.yaml @@ -1,13 +1,15 @@ # Config for the operator config: - # The URL for Consul - consulUrl: "{{ fail \"config.consulUrl is required\" }}" + # The URL for Consul, if using Consul + consulUrl: # The URL for ARA araUrl: "{{ printf \"http://%s-ara.%s:8000\" .Release.Name .Release.Namespace }}" # The image for ansible runner ansibleRunnerImage: repository: ghcr.io/stackhpc/azimuth-caas-operator-ee tag: "" # Defaults to appVersion + # Any global extravars to use + globalExtraVars: {} # The operator image to use image: diff --git a/tools/functional_test.sh b/tools/functional_test.sh index 2541547..58b4b4c 100755 --- a/tools/functional_test.sh +++ b/tools/functional_test.sh @@ -16,7 +16,8 @@ helm upgrade azimuth-caas-operator ./charts/operator \ --set-string image.tag=${GITHUB_SHA::7} \ --set-string config.ansibleRunnerImage.tag=${GITHUB_SHA::7} \ --set-string ara.image.tag=${GITHUB_SHA::7} \ - --set-string config.consulUrl=fakeconsul + --set-string config.globalExtraVars.extravar_1=value1 \ + --set-string config.globalExtraVars.extravar_2=value2 # add required secrets, not that they care used for this test echo "foo" >clouds.yaml diff --git a/tox.ini b/tox.ini index 4acf41e..3c6ad8b 100644 --- a/tox.ini +++ b/tox.ini @@ -57,7 +57,6 @@ commands = oslo_debug_helper {posargs} [testenv:kopf] passenv = - CONSUL_HTTP_ADDR ANSIBLE_RUNNER_IMAGE_TAG commands = pip install -U -e {tox_root}