Skip to content
This repository has been archived by the owner on Aug 5, 2024. It is now read-only.

Commit

Permalink
Support Kubernetes backend for OpenTofu states (azimuth-cloud#133)
Browse files Browse the repository at this point in the history
* Support global extravars for all appliances

* Fail more aggressively when extravars secret is badly formatted

* Add global extra vars to functional test

* Ensure service account for ansible-runner creating TF states

* Reinstate Consul URL for backwards compatibility

* Fix scoping issue

* Fix operator permissions
  • Loading branch information
mkjpryor committed Jul 6, 2024
1 parent 3f49207 commit cb6d1b1
Show file tree
Hide file tree
Showing 9 changed files with 245 additions and 18 deletions.
104 changes: 96 additions & 8 deletions azimuth_caas_operator/tests/utils/test_ansible_runner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import base64
import json
import os
import unittest
from unittest import mock
import yaml

from easykube.rest.util import PropertyDict

from azimuth_caas_operator.models.v1alpha1 import cluster as cluster_crd
from azimuth_caas_operator.models.v1alpha1 import cluster_type as cluster_type_crd
from azimuth_caas_operator.tests import async_utils
Expand All @@ -23,7 +26,9 @@ def test_get_job_remove(self):
cluster = cluster_crd.get_fake()
cluster_type = cluster_type_crd.get_fake()

job = ansible_runner.get_job(cluster, cluster_type.spec, remove=True)
job = ansible_runner.get_job(
cluster, cluster_type.spec, "test1-tfstate", remove=True
)

expected = """\
apiVersion: batch/v1
Expand Down Expand Up @@ -138,6 +143,7 @@ def test_get_job_remove(self):
fsGroup: 1000
runAsGroup: 1000
runAsUser: 1000
serviceAccountName: test1-tfstate
ttlSecondsAfterFinished: 3600
volumes:
- emptyDir: {}
Expand Down Expand Up @@ -165,31 +171,34 @@ def test_get_job_remove(self):
@mock.patch.dict(
os.environ,
{
"CONSUL_HTTP_ADDR": "fakeconsulurl",
"ARA_API_SERVER": "fakearaurl",
},
clear=True,
)
def test_get_job_env_configmap(self):
cluster = cluster_crd.get_fake()
cluster_type = cluster_type_crd.get_fake()
global_extravars = {
"global_extravar1": "value1",
"global_extravar2": "value2",
}

config = ansible_runner.get_env_configmap(cluster, cluster_type.spec, "fakekey")
config = ansible_runner.get_env_configmap(
cluster, cluster_type.spec, "fakekey", global_extravars
)
expected = """\
apiVersion: v1
data:
envvars: 'ARA_API_CLIENT: http
ARA_API_SERVER: fakearaurl
CONSUL_HTTP_ADDR: fakeconsulurl
'
extravars: "cluster_deploy_ssh_public_key: fakekey\\ncluster_id: fakeclusterID1\\n\\
cluster_image: testimage1\\ncluster_name: test1\\ncluster_ssh_private_key_file:\\
\\ /var/lib/caas/ssh/id_ed25519\\ncluster_type: type1\\nfoo: bar\\nnested:\\n baz:\\
\\ bob\\nrandom_bool: true\\nrandom_dict:\\n random_str: foo\\nrandom_int: 8\\nvery_random_int:\\
\\ 42\\n"
\\ /var/lib/caas/ssh/id_ed25519\\ncluster_type: type1\\nfoo: bar\\nglobal_extravar1:\\
\\ value1\\nglobal_extravar2: value2\\nnested:\\n baz: bob\\nrandom_bool: true\\nrandom_dict:\\n\\
\\ random_str: foo\\nrandom_int: 8\\nvery_random_int: 42\\n"
kind: ConfigMap
metadata:
name: test1-create
Expand All @@ -204,6 +213,85 @@ def test_get_job_env_configmap(self):


class TestAsyncUtils(unittest.IsolatedAsyncioTestCase):
@mock.patch.dict(
os.environ, {"GLOBAL_EXTRAVARS_SECRET": "ns-1/extravars"}, clear=True
)
async def test_get_global_extravars(self):
mock_client = mock.Mock()
mock_api = mock.AsyncMock()
mock_client.api.return_value = mock_api
mock_resource = mock.AsyncMock()
mock_api.resource.return_value = mock_resource
secret_data = {
"extravars": {
"extravar_1": "value1",
"extravar_2": "value2",
},
"moreextravars": {
"extravar_3": "value3",
},
}
mock_resource.fetch.return_value = {
"apiVersion": "v1",
"kind": "Secret",
"metadata": {
"name": "extravars",
"namespace": "ns-1",
},
"data": {
k: base64.b64encode(yaml.safe_dump(v).encode()).decode()
for k, v in secret_data.items()
},
}

global_extravars = await ansible_runner.get_global_extravars(mock_client)

self.assertEqual(
{
"extravar_1": "value1",
"extravar_2": "value2",
"extravar_3": "value3",
},
global_extravars,
)

async def test_get_global_extravars_no_secret(self):
mock_client = mock.AsyncMock()
global_extravars = await ansible_runner.get_global_extravars(mock_client)
self.assertEqual({}, global_extravars)

@mock.patch.dict(
os.environ,
{"ANSIBLE_RUNNER_CLUSTER_ROLE": "azimuth-caas-operator:tfstate"},
clear=True,
)
async def test_ensure_service_account(self):
mock_client = mock.AsyncMock()

def fake_apply_object(obj, force=False):
return PropertyDict(obj)

mock_client.apply_object.side_effect = fake_apply_object
cluster = cluster_crd.get_fake()

service_account_name = await ansible_runner.ensure_service_account(
mock_client, cluster
)

class KindMatcher:
def __init__(self, kind):
self._kind = kind

def __eq__(self, actual):
return actual["kind"] == self._kind

self.assertEqual("test1-tfstate", service_account_name)
self.assertEqual(2, mock_client.apply_object.call_count)
mock_client.apply_object.assert_any_call(
KindMatcher("ServiceAccount"), force=True
)
mock_client.apply_object.assert_any_call(KindMatcher("RoleBinding"), force=True)

@mock.patch.object(ansible_runner, "get_job_resource")
async def test_get_jobs_for_cluster_create(self, mock_job_resource):
fake_job_list = ["fakejob1", "fakejob2"]
Expand Down
106 changes: 101 additions & 5 deletions azimuth_caas_operator/utils/ansible_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import os
import typing
import yaml

from cryptography.hazmat.primitives.asymmetric import ed25519
Expand Down Expand Up @@ -80,14 +81,96 @@ async def ensure_deploy_key_secret(client, cluster: cluster_crd.Cluster):
return base64.b64decode(secret.data["id_ed25519.pub"]).decode()


async def ensure_service_account(client, cluster: cluster_crd.Cluster):
"""
Ensures that a service account exists with the given name.
"""
service_account = await client.apply_object(
{
"apiVersion": "v1",
"kind": "ServiceAccount",
"metadata": {
"name": f"{cluster.metadata.name}-tfstate",
"namespace": cluster.metadata.namespace,
"ownerReferences": [
{
"apiVersion": cluster.api_version,
"kind": cluster.kind,
"name": cluster.metadata.name,
"uid": cluster.metadata.uid,
},
],
},
},
force=True,
)
# If there is a cluster role specified, bind it to the service account
if "ANSIBLE_RUNNER_CLUSTER_ROLE" in os.environ:
await client.apply_object(
{
"apiVersion": "rbac.authorization.k8s.io/v1",
"kind": "RoleBinding",
"metadata": {
"name": service_account.metadata.name,
"namespace": cluster.metadata.namespace,
"ownerReferences": [
{
"apiVersion": cluster.api_version,
"kind": cluster.kind,
"name": cluster.metadata.name,
"uid": cluster.metadata.uid,
},
],
},
"roleRef": {
"apiGroup": "rbac.authorization.k8s.io",
"kind": "ClusterRole",
"name": os.environ["ANSIBLE_RUNNER_CLUSTER_ROLE"],
},
"subjects": [
{
"kind": "ServiceAccount",
"name": service_account.metadata.name,
"namespace": service_account.metadata.namespace,
},
],
},
force=True,
)
return service_account.metadata.name


async def get_global_extravars(client):
"""
Retrieves the global extra vars from the specified secret.
"""
# The secret is specified in the form namespace/name
secret_info = os.environ.get("GLOBAL_EXTRAVARS_SECRET")
if not secret_info:
return {}
LOG.info("extracting global extravars from %s", secret_info)
secret_resource = await client.api("v1").resource("secrets")
secret_namespace, secret_name = secret_info.split("/", maxsplit=1)
secret = await secret_resource.fetch(secret_name, namespace=secret_namespace)
# We parse each value from the secret as YAML and merge them together
global_extravars = {}
for b64data in sorted(secret.get("data", {}).values()):
data = base64.b64decode(b64data)
global_extravars.update(yaml.safe_load(data))
return global_extravars


def get_env_configmap(
cluster: cluster_crd.Cluster,
cluster_type_spec: cluster_type_crd.ClusterTypeSpec,
cluster_deploy_ssh_public_key: str,
global_extravars: typing.Dict[str, typing.Any],
remove=False,
update=False,
):
extraVars = dict(cluster_type_spec.extraVars, **cluster.spec.extraVars)
extraVars = dict(global_extravars)
extraVars.update(cluster_type_spec.extraVars)
extraVars.update(cluster.spec.extraVars)
extraVars["cluster_name"] = cluster.metadata.name
extraVars["cluster_id"] = cluster.status.clusterID
extraVars["cluster_type"] = cluster.spec.clusterTypeName
Expand All @@ -98,10 +181,8 @@ def get_env_configmap(
extraVars["cluster_state"] = "absent"

envvars = dict(cluster_type_spec.envVars)
try:
if "CONSUL_HTTP_ADDR" in os.environ:
envvars["CONSUL_HTTP_ADDR"] = os.environ["CONSUL_HTTP_ADDR"]
except KeyError:
raise RuntimeError("CONSUL_HTTP_ADDR is not set")
if "ARA_API_SERVER" in os.environ:
envvars["ARA_API_CLIENT"] = "http"
envvars["ARA_API_SERVER"] = os.environ["ARA_API_SERVER"]
Expand Down Expand Up @@ -138,6 +219,7 @@ def get_env_configmap(
def get_job(
cluster: cluster_crd.Cluster,
cluster_type_spec: cluster_type_crd.ClusterTypeSpec,
service_account_name: str,
remove=False,
update=False,
):
Expand Down Expand Up @@ -173,6 +255,7 @@ def get_job(
# auto-remove delete jobs after one hour
ttlSecondsAfterFinished: 3600
''' if remove else ''}
serviceAccountName: {service_account_name}
securityContext:
runAsUser: 1000
runAsGroup: 1000
Expand Down Expand Up @@ -568,6 +651,9 @@ async def start_job(
namespace=namespace,
)

# Extract the global extravars from the specified configmap, if specified
global_extravars = await get_global_extravars(client)

# ensure that we have generated an SSH key for the cluster
cluster_deploy_ssh_public_key = await ensure_deploy_key_secret(client, cluster)

Expand All @@ -577,15 +663,25 @@ async def start_job(
cluster,
cluster_type_spec,
cluster_deploy_ssh_public_key,
global_extravars,
remove=remove,
update=update,
),
force=True,
)

# Ensure that the service account exists for the cluster
service_account_name = await ensure_service_account(client, cluster)

# create the job
await client.create_object(
get_job(cluster, cluster_type_spec, remove=remove, update=update)
get_job(
cluster,
cluster_type_spec,
service_account_name,
remove=remove,
update=update,
)
)


Expand Down
9 changes: 8 additions & 1 deletion charts/operator/templates/clusterrole-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@ rules:
- pods
- pods/log
- secrets
- serviceaccounts
verbs: ["*"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["clusterrolebindings"]
resources:
- clusterrolebindings
- rolebindings
verbs: ["get", "list", "create", "update", "patch"]
# Required so that the CaaS operator can grant permissions on leases for the TF plugin
- apiGroups: [coordination.k8s.io]
resources: [leases]
verbs: [get, create, update]
16 changes: 16 additions & 0 deletions charts/operator/templates/clusterrole-tfstate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "azimuth-caas-operator.fullname" . }}:tfstate
labels: {{ include "azimuth-caas-operator.labels" . | nindent 4 }}
rules:
# This clusterrole is attached to cluster service accounts using a rolebinding
# That means when it is used, it is namespace-scoped
# It includes the permissions that are required to manage OpenTofu states in secrets
- apiGroups: [""]
resources: [secrets]
verbs: [list, get, create, update]
# Leases are used to implement state locking
- apiGroups: [coordination.k8s.io]
resources: [leases]
verbs: [get, create, update]
10 changes: 10 additions & 0 deletions charts/operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,24 @@ spec:
image: {{ printf "%s:%s" .Values.image.repository (default .Chart.AppVersion .Values.image.tag) }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
{{- if .Values.config.consulUrl }}
- name: CONSUL_HTTP_ADDR
value: {{ tpl .Values.config.consulUrl . | quote }}
{{- end }}
- name: ARA_API_SERVER
value: {{ tpl .Values.config.araUrl . | quote }}
- name: ANSIBLE_RUNNER_IMAGE_REPO
value: {{ quote .Values.config.ansibleRunnerImage.repository }}
- name: ANSIBLE_RUNNER_IMAGE_TAG
value: {{ default .Chart.AppVersion .Values.config.ansibleRunnerImage.tag | quote }}
- name: GLOBAL_EXTRAVARS_SECRET
value: {{
include "azimuth-caas-operator.fullname" . |
printf "%s/%s-extravars" .Release.Namespace |
quote
}}
- name: ANSIBLE_RUNNER_CLUSTER_ROLE
value: {{ include "azimuth-caas-operator.fullname" . }}:tfstate
ports:
- name: metrics
containerPort: 8080
Expand Down
8 changes: 8 additions & 0 deletions charts/operator/templates/secret-global-extravars.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "azimuth-caas-operator.fullname" . }}-extravars
labels: {{ include "azimuth-caas-operator.labels" . | nindent 4 }}
stringData:
extravars: |
{{- toYaml .Values.config.globalExtraVars | nindent 4 }}
Loading

0 comments on commit cb6d1b1

Please sign in to comment.