diff --git a/_run_shellcheck_tests.sh b/_run_shellcheck_tests.sh
index 93cd382b8..b2c176144 100755
--- a/_run_shellcheck_tests.sh
+++ b/_run_shellcheck_tests.sh
@@ -19,7 +19,7 @@
# You should have received a copy of the GNU General Public License
# along with KuberDock; if not, see .
#
-ERRORS_TRESHOLD=950
+ERRORS_TRESHOLD=960
tmpfile=$(mktemp /tmp/shellcheck-parse.XXXXXX)
find -iname '*.sh' | xargs shellcheck -s bash | tee $tmpfile
diff --git a/aws-kd-deploy/aws-kd-ami.sh b/aws-kd-deploy/aws-kd-ami.sh
new file mode 100755
index 000000000..1bc7b3f35
--- /dev/null
+++ b/aws-kd-deploy/aws-kd-ami.sh
@@ -0,0 +1,916 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+
+KUBERDOCK_DIR=kuberdock-files
+KUBE_ROOT=$(dirname "${BASH_SOURCE}")
+
+if [ -f "${KUBE_ROOT}/cluster/env.sh" ]; then
+ source "${KUBE_ROOT}/cluster/env.sh"
+fi
+
+# AWS_CMD, AWS_SSH_KEY, AWS_SSH_KEY_NAME, SUBNET_CIDR, ssh-key-setup
+source "${KUBE_ROOT}/cluster/aws/util.sh"
+
+AMI_BASE_IMAGE=${AMI_BASE_IMAGE:-ami-66728470}
+AMI_INSTANCE_TYPE=${AMI_INSTANCE_TYPE:-t2.micro}
+AMI_PREFIX=${AMI_PREFIX:-kuberdock}
+AMI_CLUSTER_NAME=${AMI_PREFIX}-ami
+AMI_VERSION=${AMI_VERSION:-}
+AWS_ACCOUNT_ID=${AMI_AWS_ACCOUNT_ID:-256284446245}
+AWS_AVAILABILITY_ZONE=${AMI_AWS_AVAILABILITY_ZONE:-us-east-1b}
+AWS_DEFAULT_REGION=${AWS_AVAILABILITY_ZONE%?}
+
+
+DO_CLEANUP=
+DO_DEPLOY="yes"
+DO_HELP=
+DO_IMAGE="yes"
+DO_INFO=
+DO_LIST=
+DO_RELEASE=
+DO_SETUP="yes"
+FORCED_VERSION=
+REGIONS="-"
+SKIP_CLEANUP=
+WITH_TESTING=${AMI_WITH_TESTING:-}
+
+
+ami_name() { echo "${AMI_PREFIX}-${1}-$(ami_version)" ;}
+aws_cmd() { ${AWS_CMD} "${@}" ;}
+aws_filter() { echo "Name=${1},Values=${2}" ;}
+check() { [ -n "${1:-}" ] ;}
+check_echo() { [ -n "${1:-}" ] && echo "${@}" ;}
+hash_file() { md5sum "${1}" | cut -d " " -f 1 ;}
+
+do_cleanup() { check "${DO_CLEANUP}" ;}
+do_deploy() { check "${DO_DEPLOY}" ;}
+do_help() { check "${DO_HELP}" ;}
+do_image() { check "${DO_IMAGE}" ;}
+do_info() { check "${DO_INFO}" ;}
+do_list() { check "${DO_LIST}" ;}
+do_release() { check "${DO_RELEASE}" ;}
+do_setup() { check "${DO_SETUP}" ;}
+
+
+ami_hash()
+{
+ local data=
+ local files="
+ aws-kd-deploy/aws-kd-ami.sh \
+ deploy.sh \
+ kuberdock.spec \
+ node_install.sh \
+ node_install_ami.sh \
+ node_install_common.sh \
+ node_prepare_ami.sh \
+ "
+
+ check_echo "${AMI_HASH:-}" && return
+
+ pushd "${KUBE_ROOT}"/.. >> /dev/null
+ AMI_HASH=$(cat ${files} | md5sum | cut -c-8)
+ popd >> /dev/null
+
+ echo "${AMI_HASH}"
+}
+
+
+ami_regions()
+{
+ if [ "${REGIONS}" = "*" ]; then
+ echo \
+ us-east-1 \
+ us-east-2 \
+ us-west-1 \
+ us-west-2 \
+ ca-central-1 \
+ ap-south-1 \
+ ap-northeast-2 \
+ ap-southeast-1 \
+ ap-southeast-2 \
+ ap-northeast-1 \
+ eu-central-1 \
+ eu-west-1 \
+ eu-west-2 \
+ sa-east-1 \
+ ;
+ return
+ fi
+
+ echo "${REGIONS}" | tr ',' '\n'| sed "s/^-$/${AWS_DEFAULT_REGION}/" | sort -u | tr '\n' ' '
+}
+
+
+ami_version()
+{
+ check_echo "${FORCED_VERSION}" && return
+ do_release || { check_echo "${AMI_VERSION}" && return ;}
+
+ if do_release; then
+ kd_version
+ else
+ ami_hash
+ fi
+}
+
+
+aws_filter_version()
+{
+ aws_filter tag:KuberDockAmiVersion "$(ami_version)"
+}
+
+
+k8s_node_version()
+{
+ local version
+
+ check_echo "${K8S_NODE_VERSION:-}" && return
+ version=$(grep "Requires: kubernetes-node" "${KUBE_ROOT}"/../kuberdock.spec | cut -d ' ' -f 4 | cut -d ':' -f 2)
+ K8S_NODE_VERSION=kubernetes-node-${version}
+
+ echo "${K8S_NODE_VERSION}"
+}
+
+
+kd_version()
+{
+ local version
+
+ check_echo "${KD_VERSION:-}" && return
+ version=$(git tag -l --points-at HEAD | awk -F 'kuberdock@' '{print $2}')
+ if ! check "${version}"; then
+ >&2 echo "ERROR: there is no tag on the current commit"
+ return 1
+ fi
+ KD_VERSION=${version}
+
+ echo "${KD_VERSION}"
+}
+
+
+print_help()
+{
+ for help_line in "${@}"; do
+ echo "${help_line}" | sed -e 's/^[[:space:]]*//' -e 's/^:/ /'
+ done
+}
+
+
+scp_wrapper()
+{
+ scp -i "${AWS_SSH_KEY}" -o StrictHostKeyChecking=no -q "${@:3}" "${SSH_USER}@${1}:${2}"
+}
+
+
+ssh_wrapper()
+{
+ echo "ssh ${1}: ${*:2}"
+ # "${@:2}" should be expanded on client side
+ # shellcheck disable=SC2029
+ ssh -i "${AWS_SSH_KEY}" -o StrictHostKeyChecking=no -q "${SSH_USER}@${1}" "${@:2}"
+}
+
+
+copy_image()
+{
+ local image_id
+ local source_image_id
+ local source_region
+
+ source_image_id=$(get_image "${1}")
+
+ if [ "${AWS_DEFAULT_REGION}" = "${2}" ]; then
+ echo "${source_image_id}"
+ return
+ fi
+
+ source_region=${AWS_DEFAULT_REGION}
+ local AWS_DEFAULT_REGION=${2}
+
+ image_id=$(get_image "${1}")
+ if ! check "${image_id}"; then
+ image_id=$(aws_cmd copy-image \
+ --name "$(ami_name "${1}")" \
+ --source-region "${source_region}" \
+ --source-image-id "${source_image_id}")
+ aws_cmd wait image-exists --image-ids "${image_id}"
+ create_tags "${image_id}" "$(ami_name "${1}")"
+ create_role_tag "${image_id}" "${1}"
+ fi
+ echo "${image_id}"
+}
+
+
+create_image()
+{
+ local image_id
+
+ image_id=$(aws_cmd create-image \
+ --instance-id "${instance_id}" \
+ --name "$(ami_name "${1}")" \
+ --query 'ImageId')
+ aws_cmd wait image-exists --image-ids "${image_id}"
+ create_tags "${image_id}" "$(ami_name "${1}")"
+ create_role_tag "${image_id}" "${1}"
+
+ echo "${image_id}"
+}
+
+
+create_internet_gateway()
+{
+ local internet_gateway_id
+
+ internet_gateway_id=$(aws_cmd create-internet-gateway \
+ --query 'InternetGateway.InternetGatewayId')
+ wait_internet_gateway "${internet_gateway_id}"
+ create_tags "${internet_gateway_id}" "${AMI_CLUSTER_NAME}"
+ aws_cmd attach-internet-gateway \
+ --internet-gateway-id "${internet_gateway_id}" \
+ --vpc-id "${1}" >> /dev/null
+
+ echo "${internet_gateway_id}"
+}
+
+
+create_role_tag()
+{
+ aws_cmd create-tags --resources "${1}" --tags Key=KuberDockClusterRole,Value="${2}"
+}
+
+
+create_route_table()
+{
+ local route_table_id
+
+ route_table_id=$(aws_cmd create-route-table \
+ --vpc-id "${1}" \
+ --query 'RouteTable.RouteTableId')
+ wait_route_table "${route_table_id}"
+ create_tags "${route_table_id}" "${AMI_CLUSTER_NAME}"
+ aws_cmd associate-route-table \
+ --route-table-id "${route_table_id}" \
+ --subnet-id "${2}" >> /dev/null
+ aws_cmd create-route \
+ --route-table-id "${route_table_id}" \
+ --destination-cidr-block 0.0.0.0/0 \
+ --gateway-id "${3}" >> /dev/null
+
+ echo "${route_table_id}"
+}
+
+
+create_security_group()
+{
+ local security_group_id
+
+ security_group_id=$(aws_cmd create-security-group \
+ --group-name "${AMI_CLUSTER_NAME}" \
+ --description "${AMI_CLUSTER_NAME}" \
+ --vpc-id "${1}" \
+ --query 'GroupId')
+ wait_security_group "${security_group_id}"
+ create_tags "${security_group_id}" "${AMI_CLUSTER_NAME}"
+ aws_cmd authorize-security-group-ingress \
+ --group-id "${security_group_id}" \
+ --protocol tcp \
+ --port 22 \
+ --cidr 0.0.0.0/0 >> /dev/null
+
+ echo "${security_group_id}"
+}
+
+
+create_subnet()
+{
+ local subnet_id
+
+ subnet_id=$(aws_cmd create-subnet \
+ --cidr-block "${SUBNET_CIDR}" \
+ --vpc-id "${1}" \
+ --availability-zone "${AWS_AVAILABILITY_ZONE}" \
+ --query 'Subnet.SubnetId')
+ aws_cmd wait subnet-available --subnet-ids "${subnet_id}"
+ create_tags "${subnet_id}" "${AMI_CLUSTER_NAME}"
+
+ echo "${subnet_id}"
+}
+
+
+create_tags()
+{
+ aws_cmd create-tags \
+ --resources "${1}" \
+ --tags \
+ Key=Name,Value="${2}" \
+ Key=KuberDockAmiVersion,Value="$(ami_version)"
+}
+
+
+create_vpc()
+{
+ local vpc_id
+
+ vpc_id=$(aws_cmd create-vpc \
+ --cidr-block "${VPC_CIDR}" \
+ --query 'Vpc.VpcId')
+ aws_cmd wait vpc-exists --vpc-ids "${vpc_id}"
+ create_tags "${vpc_id}" "${AMI_CLUSTER_NAME}"
+
+ echo "${vpc_id}"
+}
+
+
+delete_image()
+{
+ local snapshot_id
+
+ for image_id in $(get_image "${1}"); do
+ image_is_public "${image_id}" && continue
+ snapshot_id=$(get_image_snapshot "${image_id}")
+ echo "Deregister AMI: ${image_id}"
+ aws_cmd deregister-image --image-id "${image_id}"
+ echo "Delete Snapshot: ${snapshot_id}"
+ aws_cmd delete-snapshot --snapshot-id "${snapshot_id}"
+ done
+}
+
+
+
+delete_internet_gateway()
+{
+ local vpc_ids
+
+ for internet_gateway_id in $(get_internet_gateway); do
+ vpc_ids=$(aws_cmd describe-internet-gateways \
+ --internet-gateway-ids "${internet_gateway_id}" \
+ --query 'InternetGateways[].Attachments[].VpcId')
+ for vpc_id in ${vpc_ids}; do
+ aws_cmd detach-internet-gateway \
+ --internet-gateway-id "${internet_gateway_id}" \
+ --vpc-id "${vpc_id}"
+ done
+ echo "Delete Internet Gateway: ${internet_gateway_id}"
+ aws_cmd delete-internet-gateway --internet-gateway-id "${internet_gateway_id}"
+ done
+}
+
+
+delete_route_table()
+{
+ for route_table_id in $(get_route_table); do
+ echo "Delete Route Table: ${route_table_id}"
+ aws_cmd delete-route \
+ --route-table-id "${route_table_id}" \
+ --destination-cidr-block 0.0.0.0/0
+ aws_cmd delete-route-table --route-table-id "${route_table_id}"
+ done
+}
+
+
+delete_security_group()
+{
+ for security_group_id in $(get_security_group); do
+ echo "Delete Security Group: ${security_group_id}"
+ aws_cmd delete-security-group --group-id "${security_group_id}"
+ done
+}
+
+
+delete_subnet()
+{
+ for subnet_id in $(get_subnet); do
+ echo "Delete Subnet: ${subnet_id}"
+ aws_cmd delete-subnet --subnet-id "${subnet_id}"
+ done
+}
+
+
+delete_vpc()
+{
+ for vpc_id in $(get_vpc); do
+ echo "Delete VPC: ${vpc_id}"
+ aws_cmd delete-vpc --vpc-id "${vpc_id}"
+ done
+}
+
+
+get_image()
+{
+ check_echo "$(aws_cmd describe-images \
+ --filters \
+ "$(aws_filter owner-id "${AWS_ACCOUNT_ID}")" \
+ "$(aws_filter tag:KuberDockClusterRole "${1}")" \
+ "$(aws_filter_version)" \
+ --query 'Images[].ImageId')"
+}
+
+
+get_image_snapshot()
+{
+ root_device=$(aws_cmd describe-images \
+ --image-ids "${1}" \
+ --query 'Images[].RootDeviceName')
+ check_echo "$(aws_cmd describe-images \
+ --image-ids "${1}" \
+ --query 'Images[].BlockDeviceMappings[?DeviceName==`'"${root_device}"'`].Ebs[].SnapshotId')"
+}
+
+
+get_instance()
+{
+ check_echo "$(aws_cmd describe-instances \
+ --filters \
+ "$(aws_filter tag:KuberDockClusterRole "${1}")" \
+ "$(aws_filter_version)" \
+ --query 'Reservations[].Instances[].InstanceId')"
+}
+
+
+get_live_instance()
+{
+ aws_cmd describe-instances \
+ --filters \
+ "$(aws_filter instance-state-name pending,rebooting,running,stopped,stopping)" \
+ "$(aws_filter tag:KuberDockClusterRole "${1}")" \
+ "$(aws_filter_version)" \
+ --query 'Reservations[].Instances[].InstanceId'
+}
+
+
+get_internet_gateway()
+{
+ check_echo "$(aws_cmd describe-internet-gateways \
+ --filters "$(aws_filter_version)" \
+ --query 'InternetGateways[].InternetGatewayId')"
+}
+
+
+get_route_table()
+{
+ check_echo "$(aws_cmd describe-route-tables \
+ --filters "$(aws_filter_version)" \
+ --query 'RouteTables[].RouteTableId')"
+}
+
+
+get_public_ip()
+{
+ aws_cmd describe-instances \
+ --instance-ids "${1}" \
+ --query 'Reservations[].Instances[].PublicIpAddress'
+}
+
+
+get_security_group()
+{
+ check_echo "$(aws_cmd describe-security-groups \
+ --filters "$(aws_filter_version)" \
+ --query 'SecurityGroups[].GroupId')"
+}
+
+
+get_subnet()
+{
+ check_echo "$(aws_cmd describe-subnets \
+ --filters "$(aws_filter_version)" \
+ --query 'Subnets[].SubnetId')"
+}
+
+
+get_vpc()
+{
+ check_echo "$(aws_cmd describe-vpcs \
+ --filters "$(aws_filter_version)" \
+ --query 'Vpcs[].VpcId')"
+}
+
+
+image_is_public()
+{
+ check "$(aws_cmd describe-images \
+ --filters \
+ "$(aws_filter is-public true)" \
+ "$(aws_filter_version)" \
+ --image-ids "${1}" \
+ --query 'Images[].ImageId')"
+}
+
+
+run_instance()
+{
+ local block_device_mappings
+ local instance_id
+ local root_device
+
+ root_device=$(aws_cmd describe-images \
+ --image-ids "${AMI_BASE_IMAGE}" \
+ --query 'Images[].RootDeviceName')
+ block_device_mappings='[{"DeviceName":"'${root_device}'","Ebs":{"DeleteOnTermination":true}}]'
+ instance_id=$(aws_cmd run-instances \
+ --image-id "${AMI_BASE_IMAGE}" \
+ --instance-type "${AMI_INSTANCE_TYPE}" \
+ --subnet-id "${2}" \
+ --key-name "${AWS_SSH_KEY_NAME}" \
+ --security-group-ids "${3}" \
+ --associate-public-ip-address \
+ --block-device-mappings "${block_device_mappings}" \
+ --query 'Instances[].InstanceId')
+ create_tags "${instance_id}" "$(ami_name "${1}")"
+ create_role_tag "${instance_id}" "${1}"
+
+ echo "${instance_id}"
+}
+
+
+tag_snapshot()
+{
+ local snapshot_id
+
+ aws_cmd wait image-available --image-ids "${2}"
+ snapshot_id=$(get_image_snapshot "${2}")
+ create_tags "${snapshot_id}" "$(ami_name "${1}")"
+ create_role_tag "${snapshot_id}" "${1}"
+
+ echo "${snapshot_id}"
+}
+
+
+terminate_instance()
+{
+ local instance_id
+
+ instance_id=$(get_live_instance "${1}")
+ if check "${instance_id}"; then
+ echo "Terminate Instance: ${instance_id}"
+ aws_cmd terminate-instances --instance-ids "${instance_id}" >> /dev/null
+ aws_cmd delete-tags --resources "${instance_id}"
+ if check "${2:-}"; then
+ aws_cmd wait instance-terminated --instance-ids "${instance_id}"
+ fi
+ fi
+}
+
+
+wait_accessible()
+{
+ for _ in $(seq 40); do
+ ssh_wrapper "${1}" -o BatchMode=yes -o ConnectTimeout=1 true >> /dev/null && break
+ sleep 15
+ done
+}
+
+
+wait_image()
+{
+ for _ in $(seq 40); do
+ get_image "${1}" >> /dev/null && break
+ sleep 15
+ done
+}
+
+
+wait_internet_gateway()
+{
+ for _ in $(seq 40); do
+ check "$(aws_cmd describe-internet-gateways \
+ --internet-gateway-ids "${@}" 2> /dev/null)" && break
+ sleep 15
+ done
+}
+
+
+wait_route_table()
+{
+ for _ in $(seq 40); do
+ check "$(aws_cmd describe-route-tables \
+ --route-table-ids "${@}" 2> /dev/null)" && break
+ sleep 15
+ done
+}
+
+
+wait_security_group()
+{
+ for _ in $(seq 40); do
+ check "$(aws_cmd describe-security-groups \
+ --group-ids "${@}" 2> /dev/null)" && break
+ sleep 15
+ done
+}
+
+
+ami_cleanup()
+{
+ echo "* Cleanup *"
+
+ delete_image node
+ terminate_instance node wait
+ delete_security_group
+ delete_subnet
+ delete_internet_gateway
+ delete_route_table
+ delete_vpc
+}
+
+
+ami_deploy_node_copy_files()
+{
+ ssh_wrapper "${1}" rm -fr "${KUBERDOCK_DIR}"
+ ssh_wrapper "${1}" mkdir -p "${KUBERDOCK_DIR}"/node_storage_manage
+
+ pushd "${KUBE_ROOT}"/.. >> /dev/null
+
+ scp_wrapper "${1}" "${KUBERDOCK_DIR}" \
+ backup_node.py \
+ backup_node_merge.py \
+ fslimit.py \
+ kubelet_args.py \
+ node_install.sh \
+ node_install_ami.sh \
+ node_install_common.sh \
+ node_prepare_ami.sh \
+ pd.sh \
+ node_scripts/kd-docker-exec.sh \
+ node_scripts/kd-ssh-gc \
+ node_scripts/kd-ssh-user.sh \
+ node_scripts/kd-ssh-user-update.sh
+
+ scp_wrapper "${1}" "${KUBERDOCK_DIR}"/node_storage_manage \
+ node_storage_manage/__init__.py \
+ node_storage_manage/aws.py \
+ node_storage_manage/common.py \
+ node_storage_manage/manage.py \
+ node_storage_manage/node_lvm_manage.py \
+ node_storage_manage/node_zfs_manage.py
+
+ popd >> /dev/null
+
+ ssh_wrapper "${1}" -t sudo mv -f "${KUBERDOCK_DIR}"/backup_node.py /usr/bin/kd-backup-node
+ ssh_wrapper "${1}" -t sudo mv -f "${KUBERDOCK_DIR}"/backup_node_merge.py /usr/bin/kd-backup-node-merge
+ ssh_wrapper "${1}" -t sudo mv -f "${KUBERDOCK_DIR}"/* /
+ ssh_wrapper "${1}" rm -fr "${KUBERDOCK_DIR}"
+}
+
+
+ami_deploy_node_prepare_ami()
+{
+ node_k8s=$(k8s_node_version)
+ ssh_wrapper "${1}" -t "cd / && sudo AMI=True AWS=True NODE_KUBERNETES=${node_k8s} WITH_TESTING=${WITH_TESTING} ZFS=yes bash node_install.sh"
+ ssh_wrapper "${1}" -t "cd / && sudo bash node_prepare_ami.sh"
+}
+
+
+ami_deploy_node()
+{
+ echo "* Deploy Node *"
+
+ local node_instance_id
+ local node_public_ip
+
+ node_instance_id=$(get_instance node)
+ aws_cmd wait instance-running --instance-ids "${node_instance_id}"
+
+ node_public_ip=$(get_public_ip "${node_instance_id}")
+ wait_accessible "${node_public_ip}"
+
+ ami_deploy_node_copy_files "${node_public_ip}"
+ ami_deploy_node_prepare_ami "${node_public_ip}"
+}
+
+
+ami_help()
+{
+ print_help "\
+ bash ${BASH_SOURCE} [-h|--help] [-i|--info] [-l|--list] [-f|--force-version] [-r|--regions REGIONS] [-t|--with-testing] [-c|--cleanup] [--release] [--skip-setup] [--skip-deploy] [--skip-ami] [--skip-cleanup] [--use-ami AMI_ID]
+ : -h|--help : print this help
+ : -i|--info : print some info
+ : -l|--list : list available AMIs
+ : -f|--force-version : force AMI version
+ : -r|--regions REGIONS : comma-separated list of regions to use with
+ : : -l|--list or --release
+ : : default is '-' -- current region configured
+ : : to operate in
+ : -t|--with-testing : deploy with kube-testing repo
+ : -c|--cleanup : do cleanup
+ : --release : make release AMI
+ : --skip-setup : skip setup
+ : --skip-deploy : skip deploy
+ : --skip-ami : skip AMI creation
+ : --skip-cleanup : skip cleanup (skipped by default if --release
+ : : is not specified)
+ : --use-ami AMI_ID : use specified AMI to run instance
+ "
+}
+
+
+ami_image()
+{
+ echo "* Create AMI *"
+
+ delete_image "${1}"
+
+ local image_id
+ local instance_id
+ local snapshot_id
+
+ instance_id=$(get_instance "${1}")
+ if check "${instance_id}"; then
+ aws_cmd wait instance-running --instance-ids "${instance_id}"
+ image_id=$(get_image "${1}" || create_image "${1}")
+ echo "AMI: ${image_id}"
+ snapshot_id=$(tag_snapshot "${1}" "${image_id}")
+ echo "Snapshot: ${snapshot_id}"
+ fi
+}
+
+
+ami_info() {
+ local image_id
+ local internet_gateway_id
+ local node_instance_id
+ local node_public_ip
+ local route_table_id
+ local security_group_id
+ local snapshot_id
+ local subnet_id
+ local vpc_id
+
+ echo "AMI Version: $(ami_version)"
+ echo "AWS Region: ${AWS_DEFAULT_REGION}"
+ echo "AWS Availability Zone: ${AWS_AVAILABILITY_ZONE}"
+ echo "Base AMI: ${AMI_BASE_IMAGE}"
+ vpc_id=$(get_vpc) && \
+ echo "VPC: ${vpc_id}"
+ subnet_id=$(get_subnet) && \
+ echo "Subnet: ${subnet_id}"
+ internet_gateway_id=$(get_internet_gateway) && \
+ echo "Internet Gateway: ${internet_gateway_id}"
+ route_table_id=$(get_route_table) && \
+ echo "Route Table: ${route_table_id}"
+ security_group_id=$(get_security_group) && \
+ echo "Security Group: ${security_group_id}"
+ node_instance_id=$(get_instance node) && \
+ echo "Node Instance: ${node_instance_id}"
+ if check "${node_instance_id}"; then
+ node_public_ip=$(get_public_ip "${node_instance_id}") && \
+ echo "Node Public IP: ${node_public_ip}"
+ fi
+ image_id=$(get_image node) && \
+ echo "Node AMI: ${image_id}"
+ if check "${image_id}"; then
+ snapshot_id=$(get_image_snapshot "${image_id}") && \
+ echo "Node Snapshot: ${snapshot_id}"
+ fi
+}
+
+
+ami_list()
+{
+ for region in $(ami_regions); do
+ echo "${region}:"
+ check_echo "$(AWS_DEFAULT_REGION=${region} aws_cmd describe-images \
+ --filters \
+ "$(aws_filter owner-id "${AWS_ACCOUNT_ID}")" \
+ "$(aws_filter tag-key KuberDockAmiVersion)" \
+ --query 'Images[].[Name,ImageId,BlockDeviceMappings[0].Ebs.SnapshotId,Public]')" \
+ || echo "- no AMIs -"
+ done
+}
+
+
+ami_release()
+{
+ echo "* Release *"
+
+ local node_image_id
+ local node_snapshot_id
+
+ wait_image node
+
+ for region in $(ami_regions); do
+ echo "${region}:"
+ node_image_id=$(copy_image node "${region}")
+ echo "Node AMI: ${node_image_id}"
+ node_snapshot_id=$(AWS_DEFAULT_REGION=${region} tag_snapshot node "${node_image_id}")
+ echo "Node Snapshot: ${node_snapshot_id}"
+ AWS_DEFAULT_REGION=${region} aws_cmd modify-image-attribute \
+ --image-id "${node_image_id}" \
+ --launch-permission '{"Add":[{"Group":"all"}]}'
+ done
+}
+
+
+ami_setup()
+{
+ echo "* Setup *"
+
+ local internet_gateway_id
+ local node_instance_id
+ local node_public_ip
+ local route_table_id
+ local security_group_id
+ local subnet_id
+ local vpc_id
+
+ terminate_instance node
+
+ ssh-key-setup >> /dev/null
+
+ vpc_id=$(get_vpc || create_vpc)
+ echo "VPC: ${vpc_id}"
+
+ subnet_id=$(get_subnet || create_subnet "${vpc_id}")
+ echo "Subnet: ${subnet_id}"
+
+ internet_gateway_id=$(get_internet_gateway || create_internet_gateway "${vpc_id}")
+ echo "Internet Gateway: ${internet_gateway_id}"
+
+ route_table_id=$(get_route_table || create_route_table "${vpc_id}" "${subnet_id}" "${internet_gateway_id}")
+ echo "Route Table: ${route_table_id}"
+
+ security_group_id=$(get_security_group || create_security_group "${vpc_id}")
+ echo "Security Group: ${security_group_id}"
+
+ node_instance_id=$(run_instance node "${subnet_id}" "${security_group_id}")
+ echo "Node Instance: ${node_instance_id}"
+
+ node_public_ip=$(get_public_ip "${node_instance_id}")
+ echo "Node Public IP: ${node_public_ip}"
+}
+
+
+while [ $# -gt 0 ]; do
+ key=${1}
+ case ${key} in
+ -c|--cleanup)
+ DO_CLEANUP=yes
+ DO_DEPLOY=
+ DO_IMAGE=
+ DO_SETUP=
+ ;;
+ -f|--force-version)
+ FORCED_VERSION=${2}
+ shift
+ ;;
+ -h|--help)
+ DO_HELP=yes
+ ;;
+ -i|--info)
+ DO_INFO=yes
+ ;;
+ -l|--list)
+ DO_LIST=yes
+ ;;
+ -r|--regions)
+ REGIONS=${2}
+ shift
+ ;;
+ -t|--with-testing)
+ WITH_TESTING=yes
+ ;;
+ --release)
+ check "${SKIP_CLEANUP}" || DO_CLEANUP=yes
+ DO_RELEASE=yes
+ WITH_TESTING=${WITH_TESTING:-}
+ ;;
+ --skip-cleanup)
+ DO_CLEANUP=
+ SKIP_CLEANUP=yes
+ ;;
+ --skip-deploy)
+ DO_DEPLOY=
+ ;;
+ --skip-ami)
+ DO_IMAGE=
+ ;;
+ --skip-setup)
+ DO_SETUP=
+ ;;
+ --use-ami)
+ AMI_BASE_IMAGE=${2}
+ shift
+ ;;
+ *)
+ ami_help
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+
+do_release && ami_version >> /dev/null
+
+do_help && { ami_help; exit; }
+do_info && { ami_info; exit; }
+do_list && { ami_list; exit; }
+do_setup && ami_setup
+do_deploy && ami_deploy_node
+do_image && ami_image node
+do_release && ami_release
+do_cleanup && ami_cleanup
diff --git a/aws-kd-deploy/cluster/aws/util.sh b/aws-kd-deploy/cluster/aws/util.sh
index a65382892..7149bdf19 100755
--- a/aws-kd-deploy/cluster/aws/util.sh
+++ b/aws-kd-deploy/cluster/aws/util.sh
@@ -1224,7 +1224,7 @@ function deploy-master(){
fi
ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" -tt "${SSH_USER}@${KUBE_MASTER_IP}" mkdir /home/${SSH_USER}/kuberdock-files 2>"$LOG"
- ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" -tt "${SSH_USER}@${KUBE_MASTER_IP}" cp /var/opt/kuberdock/{node_install.sh,pd.sh} /etc/pki/etcd/ca.crt /etc/pki/etcd/etcd-client.crt /etc/pki/etcd/etcd-client.key /home/${SSH_USER}/kuberdock-files 2>"$LOG"
+ ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" -tt "${SSH_USER}@${KUBE_MASTER_IP}" cp /var/opt/kuberdock/node_install.sh /var/opt/kuberdock/node_install_common.sh /var/opt/kuberdock/pd.sh /etc/pki/etcd/ca.crt /etc/pki/etcd/etcd-client.crt /etc/pki/etcd/etcd-client.key /home/${SSH_USER}/kuberdock-files 2>"$LOG"
ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" -tt "${SSH_USER}@${KUBE_MASTER_IP}" "sudo cp /var/lib/nginx/.ssh/id_rsa.pub /home/${SSH_USER}/kuberdock-files" 2>"$LOG"
ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" -tt "${SSH_USER}@${KUBE_MASTER_IP}" "sudo chown ${SSH_USER} /home/${SSH_USER}/kuberdock-files/*" < <(cat) 2>"$LOG"
diff --git a/kubedock/kapi/elasticsearch_utils.py b/kubedock/kapi/elasticsearch_utils.py
index 329971201..eefceabe6 100644
--- a/kubedock/kapi/elasticsearch_utils.py
+++ b/kubedock/kapi/elasticsearch_utils.py
@@ -26,7 +26,7 @@
from ..nodes.models import Node
from ..pods.models import Pod
from ..users.models import User
-from .nodes import get_kuberdock_logs_pod_name
+from .service_pods import get_kuberdock_logs_pod_name
from ..settings import (
ELASTICSEARCH_REST_PORT, KUBE_API_PORT, KUBE_API_HOST, KUBE_API_VERSION)
diff --git a/kubedock/kapi/es_logs.py b/kubedock/kapi/es_logs.py
index d92f4b9aa..68b2b5266 100644
--- a/kubedock/kapi/es_logs.py
+++ b/kubedock/kapi/es_logs.py
@@ -25,8 +25,8 @@
from dateutil.parser import parse as parse_dt
from .elasticsearch_utils import execute_es_query, LogsError
-from .nodes import get_kuberdock_logs_pod_name
from .podcollection import PodCollection, POD_STATUSES
+from .service_pods import get_kuberdock_logs_pod_name
from ..core import ConnectionPool
from ..exceptions import APIError
from ..nodes.models import Node
diff --git a/kubedock/kapi/migrate_pod_utils.py b/kubedock/kapi/migrate_pod_utils.py
new file mode 100644
index 000000000..082363cbb
--- /dev/null
+++ b/kubedock/kapi/migrate_pod_utils.py
@@ -0,0 +1,193 @@
+"""Utilities to migrate pods from one node to another if those pods are binded
+to a node.
+"""
+from flask import current_app
+
+from kubedock.core import db
+from kubedock.kapi import node_utils, node_utils_aws, service_pods
+from kubedock.kapi.podcollection import PodCollection
+from kubedock.nodes.models import Node
+from kubedock.pods.models import PersistentDisk, Pod
+from kubedock.settings import AWS, WITH_TESTING
+from kubedock.users.models import User
+from kubedock.utils import NODE_STATUSES, POD_STATUSES
+
+
+def change_pods_node_selectors(from_node, to_node):
+ """Changes node selector of pods from 'from_node' to 'to_node'
+ :param from_node: node which has binded pods (object of model Node)
+ :param to_node: to this node should be binded pods. Also object of model
+ Node
+ """
+ current_app.logger.debug(
+ "Starting pods migration from '{}' to '{}'".format(
+ from_node.hostname, to_node.hostname
+ )
+ )
+ pc = PodCollection()
+ internal_user = User.get_internal()
+ PersistentDisk.get_by_node_id(from_node.id).update(
+ {PersistentDisk.node_id: to_node.id},
+ synchronize_session=False
+ )
+ dbpods = Pod.query.filter(
+ Pod.owner_id != internal_user.id,
+ Pod.status != 'deleted'
+ ).all()
+
+ for pod in dbpods:
+ if pod.pinned_node != from_node.hostname:
+ continue
+ current_app.logger.debug(
+ "Update pod ({0}) config to use new node '{1}'".format(
+ pod.id, to_node.hostname
+ )
+ )
+ pc.update(
+ pod.id, {'command': 'change_config', 'node': to_node.hostname}
+ )
+ k8s_pod = pc._get_by_id(pod.id)
+ if k8s_pod.status in (POD_STATUSES.stopping, POD_STATUSES.stopped):
+ continue
+
+ current_app.logger.debug(
+ "Restarting pod after migration: {}".format(pod.id))
+ try:
+ pc.update(pod.id, {'command': 'redeploy'})
+ # current_app.logger.debug("Skip restarting")
+ except:
+ current_app.logger.exception(
+ "Failed to redeploy pod after migration: {}".format(pod.id)
+ )
+
+
+def manage_failed_aws_node(node_hostname, ssh, deploy_func):
+ current_app.logger.debug(
+ "Try to migrate from failed node '{0}'".format(node_hostname)
+ )
+
+ failed_node = db.session.query(Node).filter(
+ Node.hostname == node_hostname
+ ).first()
+ if not failed_node:
+ current_app.logger.error(
+ "Node '{0}' not found in DB".format(node_hostname)
+ )
+ return
+
+ reserve_node_hostname, reserve_node_ip, reserve_node_fast = \
+ node_utils_aws.spawn_reserving_node(node_hostname)
+ current_app.logger.debug(
+ 'Created reserve node: {0}'.format(reserve_node_hostname)
+ )
+
+ current_app.logger.debug('Setting failed node ({0}) '
+ 'as unschedulable'.format(node_hostname))
+ node_utils.set_node_schedulable_flag(node_hostname, False)
+
+ current_app.logger.debug(
+ 'Add node to database {0}'.format(reserve_node_hostname)
+ )
+ reserve_node = Node(
+ ip=reserve_node_ip,
+ hostname=reserve_node_hostname,
+ kube_id=failed_node.kube_id,
+ state=NODE_STATUSES.pending,
+ )
+ reserve_node = node_utils.add_node_to_db(reserve_node)
+
+ current_app.logger.debug('Waiting for node is running '
+ '{0}'.format(reserve_node_hostname))
+ node_utils_aws.wait_node_running(reserve_node_hostname)
+ current_app.logger.debug(
+ 'Node is running {0}'.format(reserve_node_hostname)
+ )
+ current_app.logger.debug('Waiting for node is accessible '
+ '{0}'.format(reserve_node_hostname))
+ target_ssh = node_utils_aws.wait_node_accessible(reserve_node_hostname)
+ current_app.logger.debug(
+ 'Node is accessible {0}'.format(reserve_node_hostname)
+ )
+
+ current_app.logger.debug(
+ "Create logging pod for '{0}'".format(reserve_node_hostname)
+ )
+ ku = User.get_internal()
+ log_pod = service_pods.create_logs_pod(reserve_node_hostname, ku)
+
+ current_app.logger.debug(
+ "Deploying reserving node '{0}'".format(reserve_node_hostname)
+ )
+ current_app.logger.debug(
+ 'Fast deploy' if reserve_node_fast else 'Slow deploy'
+ )
+ deploy_func(reserve_node_fast, reserve_node.id, log_pod['podIP'])
+
+ if ssh:
+ current_app.logger.debug(
+ "Stopping k8s services on node '{0}'".format(node_hostname)
+ )
+ node_utils.stop_node_k8_services(ssh)
+ else:
+ current_app.logger.warning(
+ "Failed node '{0}' is not accessible via ssh. "
+ "Can't stop k8s services on the node.".format(node_hostname))
+
+ current_app.logger.debug(
+ "Moving storage from '{0}' to '{1}'".format(
+ node_hostname, reserve_node_hostname)
+ )
+ if not node_utils.move_aws_node_storage(ssh, target_ssh,
+ failed_node, reserve_node):
+ current_app.logger.error(
+ "Failed to move aws storage from '{0}' to '{1}'".format(
+ node_hostname, reserve_node_hostname)
+ )
+ return
+
+ current_app.logger.debug(
+ "Changing pods selectors to node '{0}'".format(reserve_node_hostname)
+ )
+ change_pods_node_selectors(failed_node, reserve_node)
+
+ current_app.logger.debug(
+ 'Delete node from KuberDock {0}'.format(node_hostname)
+ )
+ try:
+ node_utils.revoke_celery_node_install_task(failed_node)
+ node_utils.delete_node_from_db(failed_node)
+ res = node_utils.remove_node_from_k8s(node_hostname)
+ if res['status'] == 'Failure' and res['code'] != 404:
+ raise Exception('Failed to remove node {0} '
+ 'from Kubernetes'.format(node_hostname))
+ node_utils.remove_node_install_log(node_hostname)
+ node_utils.cleanup_node_network_policies(node_hostname)
+ service_pods.delete_logs_pod(node_hostname)
+ node_utils_aws.terminate_node(node_hostname)
+ except Exception as e:
+ current_app.logger.error(
+ "Failed to delete node '{0}': {1}".format(node_hostname, e)
+ )
+
+ try:
+ db.session.commit()
+ except:
+ db.session.rollback()
+ raise
+
+ current_app.logger.error("Migration to node '{0}' completed "
+ "successfully".format(reserve_node_hostname))
+
+
+def manage_failed_node(node_hostname, ssh, deploy_func):
+ """Does some actions with the failed node if it is possible.
+ Now works only for AWS setup configured with reserved nodes:
+ if some node has failed, then take one from reserve, move there storage
+ of failed node.
+ """
+ if AWS:
+ manage_failed_aws_node(node_hostname, ssh, deploy_func)
+ else:
+ current_app.logger.warning(
+ 'Failed node detected ({0}), '
+ 'but no recovery procedure available.'.format(node_hostname))
diff --git a/kubedock/kapi/node_utils.py b/kubedock/kapi/node_utils.py
index 6d61c9fd9..c7b397776 100644
--- a/kubedock/kapi/node_utils.py
+++ b/kubedock/kapi/node_utils.py
@@ -23,6 +23,7 @@
import json
import uuid
import re
+import os
from flask import current_app
@@ -38,8 +39,11 @@
NODE_INSTALL_LOG_FILE, AWS, CEPH, PD_NAMESPACE, PD_NS_SEPARATOR,
NODE_STORAGE_MANAGE_CMD, ZFS, ETCD_CALICO_HOST_ENDPOINT_KEY_PATH_TEMPLATE,
ETCD_CALICO_HOST_CONFIG_KEY_PATH_TEMPLATE, ETCD_NETWORK_POLICY_NODES,
- KD_NODE_HOST_ENDPOINT_ROLE)
+ KD_NODE_HOST_ENDPOINT_ROLE, NODE_CEPH_AWARE_KUBERDOCK_LABEL,
+ NODE_INSTALL_TASK_ID)
+from ..kd_celery import celery
from .network_policies import get_node_host_endpoint_policy
+from .node import Node as K8SNode
def get_nodes_collection(kube_type=None):
@@ -759,3 +763,200 @@ def cleanup_node_network_policies(node_hostname):
Etcd(ETCD_NETWORK_POLICY_NODES).delete(
'{}-{}'.format(i, node_hostname)
)
+
+
+def stop_node_k8_services(ssh):
+ """Tries to stop docker daemon and kubelet service on the node
+ :param ssh: ssh connection to the node
+ """
+ services = ('kubelet', 'kube-proxy', 'docker')
+ cmd = 'systemctl stop '
+ for service in services:
+ _, o, e = ssh.exec_command(cmd + service)
+ if o.channel.recv_exit_status():
+ current_app.logger.warning(
+ u"Failed to stop service '{}' on a failed node: {}".format(
+ service, e.read())
+ )
+
+
+def stop_aws_storage(ssh):
+ """Stops aws storage (zpool or lvm export),
+ detaches ebs volumes of the storage.
+ """
+ cmd = NODE_STORAGE_MANAGE_CMD + ' export-storage'
+ from kubedock.settings import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
+ cmd += ' --detach-ebs '\
+ '--aws-access-key-id {0} --aws-secret-access-key {1}'.format(
+ AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+ _, o, e = ssh.exec_command(cmd)
+ if o.channel.recv_exit_status():
+ current_app.logger.error(
+ 'Failed to stop zpool on AWS node: {0}'.format(e.read())
+ )
+ return False
+ result = o.read()
+ try:
+ data = json.loads(result)
+ except (ValueError, TypeError):
+ current_app.logger.error(
+ 'Unknown answer format from remote script: {0}\n'
+ '==================\nSTDERR:\n{1}'.format(result, e.read())
+ )
+ return False
+ if data['status'] != 'OK':
+ current_app.logger.error(
+ 'Failed to stop storage: {0}'.format(
+ data.get('data', {}).get('message')
+ )
+ )
+ return False
+ return True
+
+
+def import_aws_storage(ssh, volumes, force_detach=False):
+ """Imports aws storage to node given by ssh
+ """
+ cmd = NODE_STORAGE_MANAGE_CMD + ' import-aws-storage'
+ from kubedock.settings import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
+ cmd += ' --aws-access-key-id {0} --aws-secret-access-key {1}'.format(
+ AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
+ )
+ if force_detach:
+ cmd += ' --force-detach'
+ cmd += ' --ebs-volumes {0}'.format(' '.join(volumes))
+ current_app.logger.debug('Executing command: {}'.format(cmd))
+ _, o, e = ssh.exec_command(cmd)
+ if o.channel.recv_exit_status():
+ current_app.logger.error(
+ u"Failed to import zpool on AWS node: {}".format(e.read())
+ )
+ return False
+ result = o.read()
+ try:
+ data = json.loads(result)
+ except (ValueError, TypeError):
+ current_app.logger.error(
+ 'Unknown answer format from remote script: {0}\n'
+ '==================\nSTDERR:\n{1}'.format(result, e.read())
+ )
+ return False
+ if data['status'] != 'OK':
+ current_app.logger.error(
+ 'Failed to import storage: {0}'.format(
+ data.get('data', {}).get('message')
+ )
+ )
+ return False
+ current_app.logger.debug(
+ 'Result of importing storage: {0}'.format(o.read())
+ )
+ return True
+
+
+def move_aws_node_storage(from_node_ssh, to_node_ssh,
+ from_node, to_node):
+ """Moves EBS volumes with KD zpool from one node to another on AWS setup.
+ """
+ # Flag show should we force detach volumes before attaching it to new
+ # instance. It will be set in case if ssh to source node is unavailable,
+ # so we can't detach volumes on that node.
+ force_detach = False
+ if from_node_ssh:
+ # If we can get ssh to the old node, then try to do some preparations
+ # before moving volumes:
+ # * export zpool (also should unmount volumes)
+ # * detach EBS volumes
+ current_app.logger.debug(
+ "Stopping node '{0}' storage".format(from_node.hostname)
+ )
+ if not stop_aws_storage(from_node_ssh):
+ force_detach = True
+ else:
+ current_app.logger.debug(
+ 'Failed node is inaccessible. EBS volumes will be force detached'
+ )
+ force_detach = True
+
+ ls_devices = db.session.query(LocalStorageDevices).filter(
+ LocalStorageDevices.node_id == from_node.id
+ ).all()
+ volumes = [item.volume_name for item in ls_devices]
+ current_app.logger.debug(
+ "Importing on node '{0}' volumes: {1}".format(to_node.hostname,
+ volumes)
+ )
+ if not import_aws_storage(to_node_ssh, volumes, force_detach=force_detach):
+ return False
+ for dev in ls_devices:
+ dev.node_id = to_node.id
+ return True
+
+
+def add_node_to_k8s(host, kube_type, is_ceph_installed=False):
+ """
+ :param host: Node hostname
+ :param kube_type: Kuberdock kube type (integer id)
+ :return: Error text if error else False
+ """
+ # TODO handle connection errors except requests.RequestException
+ data = {
+ 'metadata': {
+ 'name': host,
+ 'labels': {
+ 'kuberdock-node-hostname': host,
+ 'kuberdock-kube-type': 'type_' + str(kube_type)
+ },
+ 'annotations': {
+ K8SNode.FREE_PUBLIC_IP_COUNTER_FIELD: '0'
+ }
+ },
+ 'spec': {
+ 'externalID': host,
+ }
+ }
+ if is_ceph_installed:
+ data['metadata']['labels'][NODE_CEPH_AWARE_KUBERDOCK_LABEL] = 'True'
+ res = requests.post(get_api_url('nodes', namespace=False),
+ json=data)
+ return res.text if not res.ok else False
+
+
+def set_node_schedulable_flag(node_hostname, schedulable):
+ """Marks given node as schedulable or unschedulable depending of
+ 'schedulable' flag value.
+ :param node_hostname: name of the node in kubernetes
+ :param schedulable: bool flag, if it is True, then node will be marked as
+ schedulable, if Flase - unschedulable
+ """
+ url = get_api_url('nodes', node_hostname, namespace=False)
+ try_times = 100
+ for _ in range(try_times):
+ try:
+ node = requests.get(url).json()
+ node['spec']['unschedulable'] = not schedulable
+ res = requests.put(url, data=json.dumps(node))
+ except (requests.RequestException, ValueError, KeyError):
+ continue
+ if res.ok:
+ return True
+ return False
+
+
+def remove_node_from_k8s(host):
+ r = requests.delete(get_api_url('nodes', host, namespace=False))
+ return r.json()
+
+
+def remove_node_install_log(hostname):
+ try:
+ os.remove(NODE_INSTALL_LOG_FILE.format(hostname))
+ except OSError:
+ pass
+
+
+def revoke_celery_node_install_task(node):
+ celery.control.revoke(
+ task_id=NODE_INSTALL_TASK_ID.format(node.hostname, node.id),
+ terminate=True,
+ )
diff --git a/kubedock/kapi/node_utils_aws.py b/kubedock/kapi/node_utils_aws.py
new file mode 100644
index 000000000..4264776b0
--- /dev/null
+++ b/kubedock/kapi/node_utils_aws.py
@@ -0,0 +1,383 @@
+import hashlib
+import socket
+import time
+
+from boto import ec2
+from boto.ec2 import blockdevicemapping as ec2_bdm
+from boto.ec2 import networkinterface as ec2_ni
+
+from kubedock.core import db, ssh_connect
+from kubedock.kapi.node_utils import (
+ add_node_to_k8s,
+ complete_calico_node_config,
+)
+from kubedock.nodes.models import Node
+from kubedock.rbac.models import Role
+from kubedock.settings import (
+ AWS,
+ AWS_ACCESS_KEY_ID,
+ AWS_SECRET_ACCESS_KEY,
+ AWS_INSTANCE_RUNNING_INTERVAL,
+ AWS_INSTANCE_RUNNING_MAX_ATTEMTPS,
+ FIXED_IP_POOLS,
+ MASTER_IP,
+ NODE_INSTALL_LOG_FILE,
+ NODE_INSTALL_TIMEOUT_SEC,
+ NODE_SSH_COMMAND_SHORT_EXEC_TIMEOUT,
+ REGION,
+ SSH_PUB_FILENAME,
+ IS_PRODUCTION_PKG,
+)
+from kubedock.system_settings.models import SystemSettings
+from kubedock.users.models import User, SessionData
+from kubedock.utils import (
+ NODE_STATUSES,
+ get_timezone,
+ get_version,
+ send_event,
+ send_logs,
+)
+
+
+CENTOS_AMI = {
+ 'ap-northeast-1': 'ami-eec1c380',
+ 'ap-northeast-2': 'ami-c74789a9',
+ 'ap-southeast-1': 'ami-f068a193',
+ 'ap-southeast-2': 'ami-fedafc9d',
+ 'eu-west-1': 'ami-7abd0209',
+ 'eu-central-1': 'ami-9bf712f4',
+ 'sa-east-1': 'ami-26b93b4a',
+ 'us-east-1': 'ami-6d1c2007',
+ 'us-west-1': 'ami-af4333cf',
+ 'us-west-2': 'ami-d2c924b2',
+}
+
+
+HASH_FILES = [
+ 'aws-kd-deploy/aws-kd-ami.sh',
+ 'deploy.sh',
+ 'kuberdock.spec',
+ 'node_install.sh',
+ 'node_install_ami.sh',
+ 'node_install_common.sh',
+ 'node_prepare_ami.sh',
+]
+
+
+USER_DATA_TEMPLATE = """\
+#!/bin/bash
+echo "{0}" > /root/.ssh/authorized_keys
+"""
+
+
+class AMIException(Exception):
+ pass
+
+
+def create_instance(image_id, node_root_disk_size, node_root_disk_type,
+ iam_profile_node_arn, node_size, subnet_id,
+ aws_ssh_key_name, security_group_ids, tags):
+ connection = get_connection()
+
+ image = connection.get_image(image_id=image_id)
+
+ block_device = ec2_bdm.EBSBlockDeviceType()
+ block_device.delete_on_termination = True
+ block_device.size = node_root_disk_size
+ block_device.volume_type = node_root_disk_type
+
+ block_device_map = ec2_bdm.BlockDeviceMapping()
+ block_device_map[image.root_device_name] = block_device
+
+ network_interface = ec2_ni.NetworkInterfaceSpecification(
+ subnet_id=subnet_id,
+ groups=security_group_ids,
+ associate_public_ip_address=True,
+ )
+ network_interfaces = ec2_ni.NetworkInterfaceCollection(network_interface)
+
+ user_data = get_user_data()
+
+ reservation = connection.run_instances(
+ image_id=image_id,
+ instance_profile_arn=iam_profile_node_arn,
+ instance_type=node_size,
+ key_name=aws_ssh_key_name,
+ block_device_map=block_device_map,
+ network_interfaces=network_interfaces,
+ user_data=user_data,
+ )
+
+ instance = reservation.instances[0]
+
+ connection.create_tags(resource_ids=[instance.id], tags=tags)
+ instance.modify_attribute(attribute='sourceDestCheck', value=False)
+
+ return instance
+
+
+def deploy_node(node_id, log_pod_ip):
+ db_node = Node.get_by_id(node_id)
+ admin_rid = Role.query.filter_by(rolename='Admin').one().id
+ channels = [i.id for i in SessionData.query.filter_by(role_id=admin_rid)]
+ initial_evt_sent = False
+ host = db_node.hostname
+ kube_type = db_node.kube_id
+ cpu_multiplier = SystemSettings.get_by_name('cpu_multiplier')
+ memory_multiplier = SystemSettings.get_by_name('memory_multiplier')
+ ku = User.get_internal()
+ token = ku.get_token()
+ with open(NODE_INSTALL_LOG_FILE.format(host), 'w') as log_file:
+ try:
+ timezone = get_timezone()
+ except OSError as e:
+ raise AMIException('Cannot get master timezone: {0}'.format(e))
+
+ send_logs(
+ db_node.id,
+ 'Connecting to {0} with ssh with user "root" ...'.format(host),
+ log_file,
+ channels
+ )
+
+ ssh, err = ssh_connect(host)
+ if err:
+ raise AMIException(err)
+
+ sftp = ssh.open_sftp()
+ sftp.get_channel().settimeout(NODE_SSH_COMMAND_SHORT_EXEC_TIMEOUT)
+ sftp.put('/etc/kubernetes/configfile_for_nodes',
+ '/etc/kubernetes/configfile')
+ sftp.put('/etc/pki/etcd/ca.crt', '/etc/pki/etcd/ca.crt')
+ sftp.put('/etc/pki/etcd/etcd-client.crt',
+ '/etc/pki/etcd/etcd-client.crt')
+ sftp.put('/etc/pki/etcd/etcd-client.key',
+ '/etc/pki/etcd/etcd-client.key')
+ sftp.put('/etc/pki/etcd/etcd-dns.crt', '/etc/pki/etcd/etcd-dns.crt')
+ sftp.put('/etc/pki/etcd/etcd-dns.key', '/etc/pki/etcd/etcd-dns.key')
+ sftp.close()
+
+ deploy_vars = {
+ 'NODE_IP': db_node.ip,
+ 'AWS': AWS,
+ 'MASTER_IP': MASTER_IP,
+ 'TZ': timezone,
+ 'NODENAME': host,
+ 'CPU_MULTIPLIER': cpu_multiplier,
+ 'MEMORY_MULTIPLIER': memory_multiplier,
+ 'FIXED_IP_POOLS': FIXED_IP_POOLS,
+ 'TOKEN': token,
+ 'LOG_POD_IP': log_pod_ip,
+ }
+ deploy_cmd = 'bash /node_install_ami.sh'
+
+ set_vars_str = ' '.join('{key}="{value}"'.format(key=k, value=v)
+ for k, v in deploy_vars.items())
+ cmd = '{set_vars} {deploy_cmd}'.format(set_vars=set_vars_str,
+ deploy_cmd=deploy_cmd)
+
+ s_time = time.time()
+ i, o, e = ssh.exec_command(cmd,
+ timeout=NODE_INSTALL_TIMEOUT_SEC,
+ get_pty=True)
+ try:
+ while not o.channel.exit_status_ready():
+ data = o.channel.recv(1024)
+ while data:
+ if not initial_evt_sent:
+ send_event('node:change', {'id': db_node.id})
+ initial_evt_sent = True
+ for line in data.split('\n'):
+ send_logs(db_node.id, line, log_file, channels)
+ data = o.channel.recv(1024)
+ if (time.time() - s_time) > NODE_INSTALL_TIMEOUT_SEC:
+ raise socket.timeout()
+ time.sleep(0.2)
+ except socket.timeout:
+ raise AMIException('Timeout hit during node install '
+ '{0} cmd'.format(cmd))
+
+ ret_code = o.channel.recv_exit_status()
+ if ret_code != 0:
+ raise AMIException(
+ 'Node install failed cmd {0} '
+ 'with retcode {1}'.format(cmd, ret_code)
+ )
+
+ complete_calico_node_config(host, db_node.ip)
+
+ time.sleep(2)
+
+ err = add_node_to_k8s(host, kube_type)
+ if err:
+ raise AMIException('ERROR adding node', log_file, channels)
+
+ send_logs(db_node.id, 'Adding Node completed successful.',
+ log_file, channels)
+ send_logs(db_node.id, '===================================',
+ log_file, channels)
+
+ ssh.close()
+
+ db_node.state = NODE_STATUSES.completed
+ db.session.commit()
+ send_event('node:change', {'id': db_node.id})
+
+
+def detect_ami_image(role):
+ connection = get_connection()
+
+ try:
+ images = connection.get_all_images(
+ filters={
+ 'tag:KuberDockAmiVersion': get_ami_version(),
+ 'tag:KuberDockClusterRole': role,
+ }
+ )
+ image = images[0]
+ except (AttributeError, IndexError, TypeError) as e:
+ raise AMIException(e)
+
+ return image
+
+
+def get_ami_hash():
+ md5 = hashlib.md5()
+ for f in HASH_FILES:
+ with open(f) as fd:
+ md5.update(fd.read())
+ return md5.hexdigest()[:8]
+
+
+def get_ami_version():
+ return get_version('kuberdock') if IS_PRODUCTION_PKG else get_ami_hash()
+
+
+def get_connection():
+ connection = ec2.connect_to_region(
+ REGION,
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+ )
+ if connection is None:
+ raise AMIException('Unable to connect to region {0}'.format(REGION))
+ return connection
+
+
+def get_instance(hostname):
+ connection = get_connection()
+
+ try:
+ instances = connection.get_only_instances(
+ filters={'private_dns_name': hostname}
+ )
+ instance = instances[0]
+ except (AttributeError, IndexError, TypeError) as e:
+ raise AMIException(e)
+
+ return instance
+
+
+def get_instance_profile_arn(instance):
+ try:
+ instance_profile_arn = instance.instance_profile['arn']
+ except (AttributeError, KeyError) as e:
+ raise AMIException(e)
+ return instance_profile_arn
+
+
+def get_nginx_id_rsa_pub():
+ with open(SSH_PUB_FILENAME) as id_rsa_pub_file:
+ id_rsa_pub = id_rsa_pub_file.read()
+ return id_rsa_pub
+
+
+def get_node_data(hostname):
+ fast = True
+ try:
+ image = detect_ami_image('node')
+ except AMIException:
+ fast = False
+ image = CENTOS_AMI.get(REGION)
+ if image is None:
+ raise AMIException(
+ 'There is no CentOS AMI for region {0}'.format(REGION)
+ )
+ instance = get_instance(hostname)
+ iam_profile = get_instance_profile_arn(instance)
+ root_volume = get_root_volume(instance)
+
+ data = dict(
+ image_id=image.id,
+ node_root_disk_size=root_volume.size,
+ node_root_disk_type=root_volume.type,
+ iam_profile_node_arn=iam_profile,
+ node_size=instance.instance_type,
+ subnet_id=instance.subnet_id,
+ aws_ssh_key_name=instance.key_name,
+ security_group_ids=[group.id for group in instance.groups],
+ tags=instance.tags,
+ )
+
+ return data, fast
+
+
+def get_root_volume(instance):
+ connection = get_connection()
+
+ try:
+ block_device = instance.block_device_mapping[instance.root_device_name]
+ volumes = connection.get_all_volumes(
+ filters={'volume_id': block_device.volume_id}
+ )
+ volume = volumes[0]
+ except (AttributeError, IndexError, KeyError) as e:
+ raise AMIException(e)
+
+ return volume
+
+
+def get_user_data():
+ nginx_id_rsa_pub = get_nginx_id_rsa_pub()
+ user_data = USER_DATA_TEMPLATE.format(nginx_id_rsa_pub)
+ return user_data
+
+
+def spawn_reserving_node(hostname):
+ node_data, fast = get_node_data(hostname)
+ instance = create_instance(**node_data)
+ return instance.private_dns_name, instance.private_ip_address, fast
+
+
+def terminate_node(hostname):
+ instance = get_instance(hostname)
+ instance.connection.terminate_instances([instance.id])
+
+
+def wait_node_accessible(hostname):
+ attempt = AWS_INSTANCE_RUNNING_MAX_ATTEMTPS
+ while attempt > 0:
+ ssh, err = ssh_connect(hostname)
+ if err is None:
+ break
+ attempt -= 1
+ time.sleep(AWS_INSTANCE_RUNNING_INTERVAL)
+ else:
+ raise AMIException(
+ 'Timeout waiting node {0} to be accessible'.format(hostname)
+ )
+ return ssh
+
+
+def wait_node_running(hostname):
+ attempt = AWS_INSTANCE_RUNNING_MAX_ATTEMTPS
+ while attempt > 0:
+ instance = get_instance(hostname)
+ if instance.state == 'running':
+ break
+ attempt -= 1
+ time.sleep(AWS_INSTANCE_RUNNING_INTERVAL)
+ else:
+ raise AMIException(
+ 'Timeout waiting node {0} to be running'.format(hostname)
+ )
diff --git a/kubedock/kapi/nodes.py b/kubedock/kapi/nodes.py
index c103464d8..83122cff5 100644
--- a/kubedock/kapi/nodes.py
+++ b/kubedock/kapi/nodes.py
@@ -20,56 +20,45 @@
"""Management functions for nodes.
"""
-import math
import os
import socket
from flask import current_app
-from sqlalchemy.exc import IntegrityError
from kubedock.exceptions import InternalPodsCreationError
from . import ingress
from . import pstorage
-from .network_policies import get_dns_policy_config, get_logs_policy_config
from .node_utils import (
add_node_to_db, delete_node_from_db, remove_ls_storage,
- cleanup_node_network_policies)
-from .podcollection import PodCollection
+ cleanup_node_network_policies, revoke_celery_node_install_task,
+ remove_node_install_log, remove_node_from_k8s)
+from .service_pods import (
+ create_dns_pod,
+ create_logs_pod,
+ create_policy_pod,
+ delete_logs_pod,
+)
from .. import tasks
from ..billing import kubes_to_limits
from ..billing.models import Kube
from ..core import db, ExclusiveLockContextManager
from ..domains.models import BaseDomain
from ..exceptions import APIError
-from ..kd_celery import celery
from ..nodes.models import Node, NodeFlag, NodeFlagNames
-from ..pods.models import Pod, PodIP
+from ..pods.models import PodIP
from ..settings import (
- MASTER_IP, KUBERDOCK_SETTINGS_FILE, KUBERDOCK_INTERNAL_USER,
- ELASTICSEARCH_REST_PORT, NODE_INSTALL_LOG_FILE, NODE_INSTALL_TASK_ID,
- ETCD_NETWORK_POLICY_SERVICE, ELASTICSEARCH_PUBLISH_PORT, CALICO,
- ZFS, AWS, ETCD_CACERT, DNS_CLIENT_CRT, DNS_CLIENT_KEY, DNS_SERVICE_IP)
+ MASTER_IP, KUBERDOCK_SETTINGS_FILE, NODE_INSTALL_LOG_FILE,
+ NODE_INSTALL_TASK_ID, ZFS, AWS)
from ..users.models import User
from ..utils import (
- Etcd,
send_event_to_role,
run_ssh_command,
- retry,
NODE_STATUSES,
get_node_token,
ip2int,
get_current_host_ips,
get_node_interface,
)
-from ..validation import check_internal_pod_data
-
-KUBERDOCK_DNS_POD_NAME = 'kuberdock-dns'
-KUBERDOCK_POLICY_POD_NAME = 'kuberdock-policy-agent'
-KUBERDOCK_LOGS_MEMORY_LIMIT = 256 * 1024 * 1024
-
-
-def get_kuberdock_logs_pod_name(node):
- return 'kuberdock-logs-{0}'.format(node)
def create_node(ip, hostname, kube_id, public_interface=None,
@@ -168,120 +157,6 @@ def _create_internal_pods(hostname, token):
return log_pod
-def create_logs_pod(hostname, owner):
- def _create_pod():
- pod_name = get_kuberdock_logs_pod_name(hostname)
- dbpod = db.session.query(Pod).filter(Pod.name == pod_name,
- Pod.owner_id == owner.id).first()
- if dbpod:
- return PodCollection(owner).get(dbpod.id, as_json=False)
-
- try:
- logs_kubes = 1
- logcollector_kubes = logs_kubes
- logstorage_kubes = logs_kubes
- node_resources = kubes_to_limits(
- logs_kubes, Kube.get_internal_service_kube_type()
- )['resources']
- logs_memory_limit = node_resources['limits']['memory']
- if logs_memory_limit < KUBERDOCK_LOGS_MEMORY_LIMIT:
- logs_kubes = int(math.ceil(
- float(KUBERDOCK_LOGS_MEMORY_LIMIT) / logs_memory_limit
- ))
-
- if logs_kubes > 1:
- # allocate total log cubes to log collector and to log
- # storage/search containers as 1 : 3
- total_kubes = logs_kubes * 2
- logcollector_kubes = int(math.ceil(float(total_kubes) / 4))
- logstorage_kubes = total_kubes - logcollector_kubes
- internal_ku_token = owner.get_token()
-
- logs_config = get_kuberdock_logs_config(
- hostname, pod_name,
- Kube.get_internal_service_kube_type(),
- logcollector_kubes,
- logstorage_kubes,
- MASTER_IP,
- internal_ku_token
- )
- check_internal_pod_data(logs_config, owner)
- logs_pod = PodCollection(owner).add(logs_config, skip_check=True)
- pod_id = logs_pod['id']
- PodCollection(owner).update(
- pod_id, {'command': 'synchronous_start'}
- )
- if CALICO:
- logs_policy = get_logs_policy_config(
- owner.id, pod_id, pod_name)
- Etcd(ETCD_NETWORK_POLICY_SERVICE).put(
- pod_name, value=logs_policy)
- return PodCollection(owner).get(pod_id, as_json=False)
-
- except (IntegrityError, APIError):
- # Either pod already exists or an error occurred during it's
- # creation - log and retry
- current_app.logger.exception(
- 'During "{}" node creation tried to create a Logs service '
- 'pod but got an error.'.format(hostname))
-
- return retry(_create_pod, 1, 5, exc=APIError('Could not create Log '
- 'service POD'))
-
-
-def create_dns_pod(hostname, owner):
- def _create_pod():
- if db.session.query(Pod) \
- .filter_by(name=KUBERDOCK_DNS_POD_NAME, owner=owner).first():
- return True
-
- try:
- dns_config = get_dns_pod_config()
- check_internal_pod_data(dns_config, owner)
- dns_pod = PodCollection(owner).add(dns_config, skip_check=True)
- PodCollection(owner).update(dns_pod['id'],
- {'command': 'synchronous_start'})
- if CALICO:
- dns_policy = get_dns_policy_config(owner.id, dns_pod['id'])
- Etcd(ETCD_NETWORK_POLICY_SERVICE).put(
- KUBERDOCK_DNS_POD_NAME, value=dns_policy
- )
- return True
- except (IntegrityError, APIError):
- # Either pod already exists or an error occurred during it's
- # creation - log and retry
- current_app.logger.exception(
- 'During "{}" node creation tried to create a DNS service '
- 'pod but got an error.'.format(hostname))
-
- return retry(_create_pod, 1, 5, exc=APIError('Could not create DNS '
- 'service POD'))
-
-
-def create_policy_pod(hostname, owner, token):
- def _create_pod():
- if db.session.query(Pod).filter_by(
- name=KUBERDOCK_POLICY_POD_NAME, owner=owner).first():
- return True
-
- try:
- policy_conf = get_policy_agent_config(MASTER_IP, token)
- check_internal_pod_data(policy_conf, owner)
- policy_pod = PodCollection(owner).add(policy_conf, skip_check=True)
- PodCollection(owner).update(policy_pod['id'],
- {'command': 'synchronous_start'})
- return True
- except (IntegrityError, APIError):
- # Either pod already exists or an error occurred during it's
- # creation - log and retry
- current_app.logger.exception(
- 'During "{}" node creation tried to create a Network Policy '
- 'service pod but got an error.'.format(hostname))
-
- return retry(_create_pod, 1, 5, exc=APIError('Could not create Network '
- 'Policy service POD'))
-
-
def mark_node_as_being_deleted(node_id):
node = Node.query.filter(Node.id == node_id).first()
if node is None:
@@ -313,20 +188,11 @@ def delete_node(node_id=None, node=None, force=False, verbose=True):
_check_node_can_be_deleted(node)
try:
- celery.control.revoke(task_id=NODE_INSTALL_TASK_ID.format(
- node.hostname, node.id
- ),
- terminate=True)
+ revoke_celery_node_install_task(node)
except Exception as e:
raise APIError('Couldn\'t cancel node deployment. Error: {}'.format(e))
- ku = User.query.filter_by(username=KUBERDOCK_INTERNAL_USER).first()
-
- logs_pod_name = get_kuberdock_logs_pod_name(node.hostname)
- logs_pod = db.session.query(Pod).filter_by(name=logs_pod_name,
- owner=ku).first()
- if logs_pod:
- PodCollection(ku).delete(logs_pod.id, force=True)
+ delete_logs_pod(node.hostname)
try:
delete_node_from_db(node)
@@ -334,15 +200,13 @@ def delete_node(node_id=None, node=None, force=False, verbose=True):
raise APIError(u'Failed to delete node from DB: {}'.format(e))
ls_clean_error = remove_ls_storage(hostname, raise_on_error=False)
- res = tasks.remove_node_by_host(hostname)
+ res = remove_node_from_k8s(hostname)
if res['status'] == 'Failure' and res['code'] != 404:
raise APIError('Failed to delete node in k8s: {0}, code: {1}. \n'
'Please check and remove it manually if needed.'
.format(res['message'], res['code']))
- try:
- os.remove(NODE_INSTALL_LOG_FILE.format(hostname))
- except OSError:
- pass
+
+ remove_node_install_log(hostname)
try:
cleanup_node_network_policies(hostname)
@@ -478,326 +342,3 @@ def _deploy_node(dbnode, log_pod_ip, do_deploy, with_testing,
else:
dbnode.state = NODE_STATUSES.completed
db.session.commit()
-
-
-def get_kuberdock_logs_config(node, name, kube_type,
- collector_kubes, storage_kubes, master_ip,
- internal_ku_token):
- # Give 2/3 of elastic kubes limits to elastic heap. It's recommended do not
- # give all memory to the heap, and leave some to Lucene.
- es_memory_limit = kubes_to_limits(
- storage_kubes, kube_type
- )['resources']['limits']['memory']
- es_heap_limit = (es_memory_limit * 2) / 3
- return {
- "name": name,
- "replicas": 1,
- "kube_type": kube_type,
- "node": node,
- "restartPolicy": "Always",
- "volumes": [
- {
- "name": "docker-containers",
- # path is allowed only for kuberdock-internal
- "localStorage": {"path": "/var/lib/docker/containers"}
- },
- {
- "name": "es-persistent-storage",
- # path is allowed only for kuberdock-internal
- "localStorage": {"path": "/var/lib/elasticsearch"},
- }
- ],
- "containers": [
- {
- "command": ["./run.sh"],
- "kubes": collector_kubes,
- "image": "kuberdock/fluentd:1.8",
- "name": "fluentd",
- "env": [
- {
- "name": "NODENAME",
- "value": node
- },
- {
- "name": "ES_HOST",
- "value": "127.0.0.1"
- }
- ],
- "ports": [
- {
- "isPublic": False,
- "protocol": "UDP",
- "containerPort": 5140,
- "hostPort": 5140
- }
- ],
- "volumeMounts": [
- {
- "name": "docker-containers",
- "mountPath": "/var/lib/docker/containers"
- }
- ],
- "workingDir": "/root",
- "terminationMessagePath": None
- },
- {
- "kubes": storage_kubes,
- "image": "kuberdock/elasticsearch:2.2",
- "name": "elasticsearch",
- "env": [
- {
- "name": "MASTER",
- "value": master_ip
- },
- {
- "name": "TOKEN",
- "value": internal_ku_token
- },
- {
- "name": "NODENAME",
- "value": node
- },
- {
- "name": "ES_HEAP_SIZE",
- "value": "{}m".format(es_heap_limit / (1024 * 1024))
- }
- ],
- "ports": [
- {
- "isPublic": False,
- "protocol": "TCP",
- "containerPort": ELASTICSEARCH_REST_PORT,
- "hostPort": ELASTICSEARCH_REST_PORT
- },
- {
- "isPublic": False,
- "protocol": "TCP",
- "containerPort": ELASTICSEARCH_PUBLISH_PORT,
- "hostPort": ELASTICSEARCH_PUBLISH_PORT
- }
- ],
- "volumeMounts": [
- {
- "name": "es-persistent-storage",
- "mountPath": "/elasticsearch/data"
- }
- ],
- "workingDir": "",
- "terminationMessagePath": None
- }
- ]
- }
-
-
-def get_dns_pod_config(domain='kuberdock', ip=DNS_SERVICE_IP):
- """Returns config of k8s DNS service pod."""
- # Based on
- # https://github.com/kubernetes/kubernetes/blob/release-1.2/
- # cluster/addons/dns/skydns-rc.yaml.in
- # TODO AC-3377: migrate on yaml-based templates
- # TODO AC-3378: integrate exechealthz container
- return {
- "name": KUBERDOCK_DNS_POD_NAME,
- "podIP": ip,
- "replicas": 1,
- "kube_type": Kube.get_internal_service_kube_type(),
- "node": None,
- "restartPolicy": "Always",
- "dnsPolicy": "Default",
- "volumes": [
- {
- "name": "kubernetes-config",
- # path is allowed only for kuberdock-internal
- "localStorage": {"path": "/etc/kubernetes"}
- },
- {
- "name": "etcd-pki",
- # path is allowed only for kuberdock-internal
- "localStorage": {"path": "/etc/pki/etcd"}
- }
- ],
- "containers": [
- {
- "name": "etcd",
- "command": [
- "/usr/local/bin/etcd",
- "-data-dir",
- "/var/etcd/data",
- "-listen-client-urls",
- "https://0.0.0.0:2379,http://127.0.0.1:4001",
- "-advertise-client-urls",
- "https://0.0.0.0:2379,http://127.0.0.1:4001",
- "-initial-cluster-token",
- "skydns-etcd",
- "--ca-file",
- ETCD_CACERT,
- "--cert-file",
- "/etc/pki/etcd/etcd-dns.crt",
- "--key-file",
- "/etc/pki/etcd/etcd-dns.key"
- ],
- "kubes": 1,
- "image": "gcr.io/google_containers/etcd-amd64:2.2.1",
- "env": [],
- "ports": [
- {
- "isPublic": False,
- "protocol": "TCP",
- "containerPort": 2379
- }
- ],
- "volumeMounts": [
- {
- "name": "etcd-pki",
- "mountPath": "/etc/pki/etcd"
- }
- ],
- "workingDir": "",
- "terminationMessagePath": None
- },
- {
- "name": "kube2sky",
- "args": [
- "--domain={0}".format(domain),
- "--kubecfg-file=/etc/kubernetes/configfile",
- "--kube-master-url=https://10.254.0.1",
- ],
- "kubes": 1,
- "image": "kuberdock/kube2sky:1.2",
- "env": [],
- "ports": [],
- "volumeMounts": [
- {
- "name": "kubernetes-config",
- "mountPath": "/etc/kubernetes"
- }
- ],
- "workingDir": "",
- "terminationMessagePath": None,
- "readinessProbe": {
- "httpGet": {
- "path": "/readiness",
- "port": 8081,
- "scheme": "HTTP",
- },
- "initialDelaySeconds": 30,
- "timeoutSeconds": 5
- },
- "livenessProbe": {
- "httpGet": {
- "path": "/healthz",
- "port": 8080,
- "scheme": "HTTP"
- },
- "initialDelaySeconds": 60,
- "timeoutSeconds": 5,
- "successThreshold": 1,
- "failureThreshold": 5,
- }
- },
- {
- "name": "skydns",
- "args": [
- "-machines=http://127.0.0.1:4001",
- "-addr=0.0.0.0:53",
- "-ns-rotate=false",
- "-domain={0}.".format(domain)
- ],
- "kubes": 1,
- "image": "gcr.io/google_containers/skydns:2015-10-13-8c72f8c",
- "env": [],
- "ports": [
- {
- "isPublic": False,
- "protocol": "UDP",
- "containerPort": 53
- },
- {
- "isPublic": False,
- "protocol": "TCP",
- "containerPort": 53
- }
- ],
- "volumeMounts": [],
- "workingDir": "",
- "terminationMessagePath": None
- },
- {
- "name": "healthz",
- "image": "gcr.io/google_containers/exechealthz:1.0",
- "args": [
- "-cmd=nslookup {0} 127.0.0.1 >/dev/null".format(domain),
- "-port=8080"
- ],
- "ports": [{
- "protocol": "TCP",
- "containerPort": 8080
- }]
- }
- ]
- }
-
-
-def get_policy_agent_config(master, token):
- return {
- "name": "kuberdock-policy-agent",
- "replicas": 1,
- "kube_type": Kube.get_internal_service_kube_type(),
- "node": None,
- "restartPolicy": "Always",
- "hostNetwork": True,
- "volumes": [
- {
- "name": "etcd-pki",
- # path is allowed only for kuberdock-internal
- "localStorage": {"path": "/etc/pki/etcd"}
- }
- ],
- "containers": [
- {
- "command": [],
- "kubes": 1,
- "image": "kuberdock/k8s-policy-agent:v0.1.4-kd2",
- "name": "policy-agent",
- "env": [
- {
- "name": "ETCD_AUTHORITY",
- "value": "{0}:2379".format(master)
- },
- {
- "name": "ETCD_SCHEME",
- "value": "https"
- },
- {
- "name": "ETCD_CA_CERT_FILE",
- "value": ETCD_CACERT
- },
- {
- "name": "ETCD_CERT_FILE",
- "value": DNS_CLIENT_CRT
- },
- {
- "name": "ETCD_KEY_FILE",
- "value": DNS_CLIENT_KEY
- },
- {
- "name": "K8S_API",
- "value": "https://{0}:6443".format(master)
- },
- {
- "name": "K8S_AUTH_TOKEN",
- "value": token
- }
- ],
- "ports": [],
- "volumeMounts": [
- {
- "name": "etcd-pki",
- "mountPath": "/etc/pki/etcd"
- }
- ],
- "workingDir": "",
- "terminationMessagePath": None
- },
- ]
- }
diff --git a/kubedock/kapi/service_pods.py b/kubedock/kapi/service_pods.py
new file mode 100644
index 000000000..dce7e113f
--- /dev/null
+++ b/kubedock/kapi/service_pods.py
@@ -0,0 +1,479 @@
+import math
+
+from flask import current_app
+from sqlalchemy.exc import IntegrityError
+
+from .network_policies import get_dns_policy_config, get_logs_policy_config
+from .podcollection import PodCollection
+from ..billing import kubes_to_limits
+from ..billing.models import Kube
+from ..core import db
+from ..exceptions import APIError
+from ..pods.models import Pod
+from ..settings import (
+ DNS_CLIENT_CRT,
+ DNS_CLIENT_KEY,
+ DNS_SERVICE_IP,
+ ELASTICSEARCH_PUBLISH_PORT,
+ ELASTICSEARCH_REST_PORT,
+ ETCD_CACERT,
+ ETCD_NETWORK_POLICY_SERVICE,
+ MASTER_IP,
+)
+from ..users.models import User
+from ..utils import Etcd, retry
+from ..validation import check_internal_pod_data
+
+
+KUBERDOCK_DNS_POD_NAME = 'kuberdock-dns'
+KUBERDOCK_POLICY_POD_NAME = 'kuberdock-policy-agent'
+KUBERDOCK_LOGS_MEMORY_LIMIT = 256 * 1024 * 1024
+
+
+def get_kuberdock_logs_pod_name(node):
+ return 'kuberdock-logs-{0}'.format(node)
+
+
+def create_logs_pod(hostname, owner):
+ def _create_pod():
+ pod_name = get_kuberdock_logs_pod_name(hostname)
+ dbpod = db.session.query(Pod).filter(Pod.name == pod_name,
+ Pod.owner_id == owner.id).first()
+ if dbpod:
+ return PodCollection(owner).get(dbpod.id, as_json=False)
+
+ try:
+ logs_kubes = 1
+ logcollector_kubes = logs_kubes
+ logstorage_kubes = logs_kubes
+ node_resources = kubes_to_limits(
+ logs_kubes, Kube.get_internal_service_kube_type()
+ )['resources']
+ logs_memory_limit = node_resources['limits']['memory']
+ if logs_memory_limit < KUBERDOCK_LOGS_MEMORY_LIMIT:
+ logs_kubes = int(math.ceil(
+ float(KUBERDOCK_LOGS_MEMORY_LIMIT) / logs_memory_limit
+ ))
+
+ if logs_kubes > 1:
+ # allocate total log cubes to log collector and to log
+ # storage/search containers as 1 : 3
+ total_kubes = logs_kubes * 2
+ logcollector_kubes = int(math.ceil(float(total_kubes) / 4))
+ logstorage_kubes = total_kubes - logcollector_kubes
+ internal_ku_token = owner.get_token()
+
+ logs_config = get_kuberdock_logs_config(
+ hostname, pod_name,
+ Kube.get_internal_service_kube_type(),
+ logcollector_kubes,
+ logstorage_kubes,
+ MASTER_IP,
+ internal_ku_token
+ )
+ check_internal_pod_data(logs_config, owner)
+ logs_pod = PodCollection(owner).add(logs_config, skip_check=True)
+ pod_id = logs_pod['id']
+ PodCollection(owner).update(
+ pod_id, {'command': 'synchronous_start'}
+ )
+ logs_policy = get_logs_policy_config(
+ owner.id, pod_id, pod_name)
+ Etcd(ETCD_NETWORK_POLICY_SERVICE).put(
+ pod_name, value=logs_policy)
+ return PodCollection(owner).get(pod_id, as_json=False)
+
+ except (IntegrityError, APIError):
+ # Either pod already exists or an error occurred during it's
+ # creation - log and retry
+ current_app.logger.exception(
+ 'During "{}" node creation tried to create a Logs service '
+ 'pod but got an error.'.format(hostname))
+
+ return retry(_create_pod, 1, 5, exc=APIError('Could not create Log '
+ 'service POD'))
+
+
+def create_dns_pod(hostname, owner):
+ def _create_pod():
+ if db.session.query(Pod) \
+ .filter_by(name=KUBERDOCK_DNS_POD_NAME, owner=owner).first():
+ return True
+
+ try:
+ dns_config = get_dns_pod_config()
+ check_internal_pod_data(dns_config, owner)
+ dns_pod = PodCollection(owner).add(dns_config, skip_check=True)
+ PodCollection(owner).update(dns_pod['id'],
+ {'command': 'synchronous_start'})
+ dns_policy = get_dns_policy_config(owner.id, dns_pod['id'])
+ Etcd(ETCD_NETWORK_POLICY_SERVICE).put(
+ KUBERDOCK_DNS_POD_NAME, value=dns_policy
+ )
+ return True
+ except (IntegrityError, APIError):
+ # Either pod already exists or an error occurred during it's
+ # creation - log and retry
+ current_app.logger.exception(
+ 'During "{}" node creation tried to create a DNS service '
+ 'pod but got an error.'.format(hostname))
+
+ return retry(_create_pod, 1, 5, exc=APIError('Could not create DNS '
+ 'service POD'))
+
+
+def create_policy_pod(hostname, owner, token):
+ def _create_pod():
+ if db.session.query(Pod).filter_by(
+ name=KUBERDOCK_POLICY_POD_NAME, owner=owner).first():
+ return True
+
+ try:
+ policy_conf = get_policy_agent_config(MASTER_IP, token)
+ check_internal_pod_data(policy_conf, owner)
+ policy_pod = PodCollection(owner).add(policy_conf, skip_check=True)
+ PodCollection(owner).update(policy_pod['id'],
+ {'command': 'synchronous_start'})
+ return True
+ except (IntegrityError, APIError):
+ # Either pod already exists or an error occurred during it's
+ # creation - log and retry
+ current_app.logger.exception(
+ 'During "{}" node creation tried to create a Network Policy '
+ 'service pod but got an error.'.format(hostname))
+
+ return retry(_create_pod, 1, 5, exc=APIError('Could not create Network '
+ 'Policy service POD'))
+
+
+def delete_logs_pod(hostname):
+ ku = User.get_internal()
+
+ logs_pod_name = get_kuberdock_logs_pod_name(hostname)
+ logs_pod = db.session.query(Pod).filter_by(name=logs_pod_name,
+ owner=ku).first()
+ if logs_pod:
+ PodCollection(ku).delete(logs_pod.id, force=True)
+
+
+def get_kuberdock_logs_config(node, name, kube_type,
+ collector_kubes, storage_kubes, master_ip,
+ internal_ku_token):
+ # Give 2/3 of elastic kubes limits to elastic heap. It's recommended do not
+ # give all memory to the heap, and leave some to Lucene.
+ es_memory_limit = kubes_to_limits(
+ storage_kubes, kube_type
+ )['resources']['limits']['memory']
+ es_heap_limit = (es_memory_limit * 2) / 3
+ return {
+ "name": name,
+ "replicas": 1,
+ "kube_type": kube_type,
+ "node": node,
+ "restartPolicy": "Always",
+ "volumes": [
+ {
+ "name": "docker-containers",
+ # path is allowed only for kuberdock-internal
+ "localStorage": {"path": "/var/lib/docker/containers"}
+ },
+ {
+ "name": "es-persistent-storage",
+ # path is allowed only for kuberdock-internal
+ "localStorage": {"path": "/var/lib/elasticsearch"},
+ }
+ ],
+ "containers": [
+ {
+ "command": ["./run.sh"],
+ "kubes": collector_kubes,
+ "image": "kuberdock/fluentd:1.8",
+ "name": "fluentd",
+ "env": [
+ {
+ "name": "NODENAME",
+ "value": node
+ },
+ {
+ "name": "ES_HOST",
+ "value": "127.0.0.1"
+ }
+ ],
+ "ports": [
+ {
+ "isPublic": False,
+ "protocol": "UDP",
+ "containerPort": 5140,
+ "hostPort": 5140
+ }
+ ],
+ "volumeMounts": [
+ {
+ "name": "docker-containers",
+ "mountPath": "/var/lib/docker/containers"
+ }
+ ],
+ "workingDir": "/root",
+ "terminationMessagePath": None
+ },
+ {
+ "kubes": storage_kubes,
+ "image": "kuberdock/elasticsearch:2.2",
+ "name": "elasticsearch",
+ "env": [
+ {
+ "name": "MASTER",
+ "value": master_ip
+ },
+ {
+ "name": "TOKEN",
+ "value": internal_ku_token
+ },
+ {
+ "name": "NODENAME",
+ "value": node
+ },
+ {
+ "name": "ES_HEAP_SIZE",
+ "value": "{}m".format(es_heap_limit / (1024 * 1024))
+ }
+ ],
+ "ports": [
+ {
+ "isPublic": False,
+ "protocol": "TCP",
+ "containerPort": ELASTICSEARCH_REST_PORT,
+ "hostPort": ELASTICSEARCH_REST_PORT
+ },
+ {
+ "isPublic": False,
+ "protocol": "TCP",
+ "containerPort": ELASTICSEARCH_PUBLISH_PORT,
+ "hostPort": ELASTICSEARCH_PUBLISH_PORT
+ }
+ ],
+ "volumeMounts": [
+ {
+ "name": "es-persistent-storage",
+ "mountPath": "/elasticsearch/data"
+ }
+ ],
+ "workingDir": "",
+ "terminationMessagePath": None
+ }
+ ]
+ }
+
+
+def get_dns_pod_config(domain='kuberdock', ip=DNS_SERVICE_IP):
+ """Returns config of k8s DNS service pod."""
+ # Based on
+ # https://github.com/kubernetes/kubernetes/blob/release-1.2/
+ # cluster/addons/dns/skydns-rc.yaml.in
+ # TODO AC-3377: migrate on yaml-based templates
+ # TODO AC-3378: integrate exechealthz container
+ return {
+ "name": KUBERDOCK_DNS_POD_NAME,
+ "podIP": ip,
+ "replicas": 1,
+ "kube_type": Kube.get_internal_service_kube_type(),
+ "node": None,
+ "restartPolicy": "Always",
+ "dnsPolicy": "Default",
+ "volumes": [
+ {
+ "name": "kubernetes-config",
+ # path is allowed only for kuberdock-internal
+ "localStorage": {"path": "/etc/kubernetes"}
+ },
+ {
+ "name": "etcd-pki",
+ # path is allowed only for kuberdock-internal
+ "localStorage": {"path": "/etc/pki/etcd"}
+ }
+ ],
+ "containers": [
+ {
+ "name": "etcd",
+ "command": [
+ "/usr/local/bin/etcd",
+ "-data-dir",
+ "/var/etcd/data",
+ "-listen-client-urls",
+ "https://0.0.0.0:2379,http://127.0.0.1:4001",
+ "-advertise-client-urls",
+ "https://0.0.0.0:2379,http://127.0.0.1:4001",
+ "-initial-cluster-token",
+ "skydns-etcd",
+ "--ca-file",
+ ETCD_CACERT,
+ "--cert-file",
+ "/etc/pki/etcd/etcd-dns.crt",
+ "--key-file",
+ "/etc/pki/etcd/etcd-dns.key"
+ ],
+ "kubes": 1,
+ "image": "gcr.io/google_containers/etcd-amd64:2.2.1",
+ "env": [],
+ "ports": [
+ {
+ "isPublic": False,
+ "protocol": "TCP",
+ "containerPort": 2379
+ }
+ ],
+ "volumeMounts": [
+ {
+ "name": "etcd-pki",
+ "mountPath": "/etc/pki/etcd"
+ }
+ ],
+ "workingDir": "",
+ "terminationMessagePath": None
+ },
+ {
+ "name": "kube2sky",
+ "args": [
+ "--domain={0}".format(domain),
+ "--kubecfg-file=/etc/kubernetes/configfile",
+ "--kube-master-url=https://10.254.0.1",
+ ],
+ "kubes": 1,
+ "image": "kuberdock/kube2sky:1.2",
+ "env": [],
+ "ports": [],
+ "volumeMounts": [
+ {
+ "name": "kubernetes-config",
+ "mountPath": "/etc/kubernetes"
+ }
+ ],
+ "workingDir": "",
+ "terminationMessagePath": None,
+ "readinessProbe": {
+ "httpGet": {
+ "path": "/readiness",
+ "port": 8081,
+ "scheme": "HTTP",
+ },
+ "initialDelaySeconds": 30,
+ "timeoutSeconds": 5
+ },
+ "livenessProbe": {
+ "httpGet": {
+ "path": "/healthz",
+ "port": 8080,
+ "scheme": "HTTP"
+ },
+ "initialDelaySeconds": 60,
+ "timeoutSeconds": 5,
+ "successThreshold": 1,
+ "failureThreshold": 5,
+ }
+ },
+ {
+ "name": "skydns",
+ "args": [
+ "-machines=http://127.0.0.1:4001",
+ "-addr=0.0.0.0:53",
+ "-ns-rotate=false",
+ "-domain={0}.".format(domain)
+ ],
+ "kubes": 1,
+ "image": "gcr.io/google_containers/skydns:2015-10-13-8c72f8c",
+ "env": [],
+ "ports": [
+ {
+ "isPublic": False,
+ "protocol": "UDP",
+ "containerPort": 53
+ },
+ {
+ "isPublic": False,
+ "protocol": "TCP",
+ "containerPort": 53
+ }
+ ],
+ "volumeMounts": [],
+ "workingDir": "",
+ "terminationMessagePath": None
+ },
+ {
+ "name": "healthz",
+ "image": "gcr.io/google_containers/exechealthz:1.0",
+ "args": [
+ "-cmd=nslookup {0} 127.0.0.1 >/dev/null".format(domain),
+ "-port=8080"
+ ],
+ "ports": [{
+ "protocol": "TCP",
+ "containerPort": 8080
+ }]
+ }
+ ]
+ }
+
+
+def get_policy_agent_config(master, token):
+ return {
+ "name": "kuberdock-policy-agent",
+ "replicas": 1,
+ "kube_type": Kube.get_internal_service_kube_type(),
+ "node": None,
+ "restartPolicy": "Always",
+ "hostNetwork": True,
+ "volumes": [
+ {
+ "name": "etcd-pki",
+ # path is allowed only for kuberdock-internal
+ "localStorage": {"path": "/etc/pki/etcd"}
+ }
+ ],
+ "containers": [
+ {
+ "command": [],
+ "kubes": 1,
+ "image": "kuberdock/k8s-policy-agent:v0.1.4-kd2",
+ "name": "policy-agent",
+ "env": [
+ {
+ "name": "ETCD_AUTHORITY",
+ "value": "{0}:2379".format(master)
+ },
+ {
+ "name": "ETCD_SCHEME",
+ "value": "https"
+ },
+ {
+ "name": "ETCD_CA_CERT_FILE",
+ "value": ETCD_CACERT
+ },
+ {
+ "name": "ETCD_CERT_FILE",
+ "value": DNS_CLIENT_CRT
+ },
+ {
+ "name": "ETCD_KEY_FILE",
+ "value": DNS_CLIENT_KEY
+ },
+ {
+ "name": "K8S_API",
+ "value": "https://{0}:6443".format(master)
+ },
+ {
+ "name": "K8S_AUTH_TOKEN",
+ "value": token
+ }
+ ],
+ "ports": [],
+ "volumeMounts": [
+ {
+ "name": "etcd-pki",
+ "mountPath": "/etc/pki/etcd"
+ }
+ ],
+ "workingDir": "",
+ "terminationMessagePath": None
+ },
+ ]
+ }
diff --git a/kubedock/kapi/tests/test_elasticsearch_utils.py b/kubedock/kapi/tests/test_elasticsearch_utils.py
index e0972dfcb..031b4cffa 100644
--- a/kubedock/kapi/tests/test_elasticsearch_utils.py
+++ b/kubedock/kapi/tests/test_elasticsearch_utils.py
@@ -33,7 +33,7 @@
from kubedock.settings import KUBE_API_HOST, KUBE_API_PORT
from kubedock.billing.models import Kube
from kubedock.nodes.models import Node
-from kubedock.kapi.nodes import get_kuberdock_logs_pod_name
+from kubedock.kapi.service_pods import get_kuberdock_logs_pod_name
class TestElasticsearchUtils(DBTestCase):
diff --git a/kubedock/kapi/tests/test_nodes.py b/kubedock/kapi/tests/test_nodes.py
index db2d57876..83a02ba1f 100644
--- a/kubedock/kapi/tests/test_nodes.py
+++ b/kubedock/kapi/tests/test_nodes.py
@@ -32,9 +32,8 @@
from kubedock.billing.models import Kube
from kubedock.core import db
from kubedock.exceptions import APIError
-from kubedock.kapi import nodes, network_policies
+from kubedock.kapi import nodes
from kubedock.nodes.models import Node
-from kubedock.users.models import User
from kubedock.pods.models import IPPool
from kubedock.testutils.testcases import DBTestCase
from kubedock.utils import NODE_STATUSES
@@ -74,19 +73,20 @@ def tearDown(self):
@mock.patch.object(nodes, 'get_current_host_ips')
@mock.patch.object(nodes, '_check_node_ip')
@mock.patch.object(nodes, 'PodIP')
+ @mock.patch.object(nodes, 'create_policy_pod')
@mock.patch.object(nodes, 'create_logs_pod')
- @mock.patch.object(nodes, 'Etcd')
+ @mock.patch.object(nodes, 'create_dns_pod')
@mock.patch.object(nodes, 'get_node_token')
@mock.patch.object(nodes, '_check_node_hostname')
@mock.patch.object(nodes, '_deploy_node')
- @mock.patch.object(nodes, 'PodCollection')
@mock.patch.object(nodes.socket, 'gethostbyname')
- def test_create_node(self, gethostbyname_mock, podcollection_mock,
+ def test_create_node(self, gethostbyname_mock,
deploy_node_mock,
check_node_hostname_mock,
get_node_token_mock,
- etcd_mock,
+ create_dns_pod_mock,
create_logs_pod_mock,
+ create_policy_pod_mock,
pod_ip_mock,
check_node_ip_mock,
get_current_host_ips_mock):
@@ -99,13 +99,10 @@ def test_create_node(self, gethostbyname_mock, podcollection_mock,
gethostbyname_mock.return_value = ip
get_current_host_ips_mock.return_value = fake_master_ips
- podcollection_mock.return_value.add.return_value = {'id': 1}
get_node_token_mock.return_value = 'some-token'
log_pod_ip = '123.123.123.123'
create_logs_pod_mock.return_value = {'podIP': log_pod_ip}
- nodes.CALICO = True
-
# add a Node with Pod IP
pod_ip = mock.Mock()
pod_ip.ip_address = 3232235778 # '192.168.1.2'
@@ -135,7 +132,9 @@ def test_create_node(self, gethostbyname_mock, podcollection_mock,
check_node_ip_mock.assert_called_once_with(node.ip, hostname)
get_node_token_mock.assert_called_once_with()
# one call for dns pod
- self.assertEqual(etcd_mock.call_count, 1)
+ self.assertEqual(create_dns_pod_mock.call_count, 1)
+ # one call for policy pod
+ self.assertEqual(create_policy_pod_mock.call_count, 1)
# add a node with the same IP
with self.assertRaises(APIError):
@@ -147,30 +146,9 @@ def test_create_node(self, gethostbyname_mock, podcollection_mock,
with self.assertRaises(APIError):
nodes.create_node(None, 'anotherhost', kube_id)
- @mock.patch.object(network_policies, 'get_calico_ip_tunnel_address')
- @mock.patch.object(nodes, 'Etcd')
- @mock.patch.object(nodes, 'PodCollection')
- def test_create_logs_pod(self, podcollection_mock, etcd_mock,
- get_calico_ip_mock):
- hostname = 'qwerty'
- test_result = 3131313
- pod_id = '424242'
- podcollection_mock.return_value.add.return_value = {'id': pod_id}
- podcollection_mock.return_value.get.return_value = test_result
- get_calico_ip_mock.return_value = '12.12.12.12'
- nodes.CALICO = True
- owner = User.get_internal()
- res = nodes.create_logs_pod(hostname, owner)
- self.assertEqual(res, test_result)
- self.assertEqual(etcd_mock.call_count, 1)
- get_calico_ip_mock.assert_called_once_with()
- podcollection_mock.return_value.update.assert_called_once_with(
- pod_id, {'command': 'synchronous_start'}
- )
-
@mock.patch.object(nodes, 'remove_ls_storage')
- @mock.patch.object(nodes.tasks, 'remove_node_by_host')
- def test_delete_node(self, remove_by_host_mock, remove_ls_storage_mock):
+ @mock.patch.object(nodes, 'remove_node_from_k8s')
+ def test_delete_node(self, remove_node_mock, remove_ls_storage_mock):
"""Test for kapi.nodes.delete_node function."""
node1, node2 = self.add_two_nodes()
id1 = node1.id
@@ -179,7 +157,7 @@ def test_delete_node(self, remove_by_host_mock, remove_ls_storage_mock):
nodes.delete_node(id1)
nodes_ = Node.get_all()
- remove_by_host_mock.assert_called_once_with(node1.hostname)
+ remove_node_mock.assert_called_once_with(node1.hostname)
remove_ls_storage_mock.assert_called_once_with(
node1.hostname, raise_on_error=False)
self.assertEqual(nodes_, [node2])
@@ -188,9 +166,9 @@ def test_delete_node(self, remove_by_host_mock, remove_ls_storage_mock):
nodes.delete_node(id1)
@mock.patch.object(nodes, 'remove_ls_storage')
- @mock.patch.object(nodes.tasks, 'remove_node_by_host')
+ @mock.patch.object(nodes, 'remove_node_from_k8s')
def test_node_cant_be_deleted_in_fixed_ip_pools_mode_with_active_ip_pools(
- self, remove_by_host_mock, remove_ls_storage_mock):
+ self, remove_node_mock, remove_ls_storage_mock):
remove_ls_storage_mock.return_value = ''
current_app.config['FIXED_IP_POOLS'] = True
diff --git a/kubedock/kapi/tests/test_service_pods.py b/kubedock/kapi/tests/test_service_pods.py
new file mode 100644
index 000000000..d2c715bd4
--- /dev/null
+++ b/kubedock/kapi/tests/test_service_pods.py
@@ -0,0 +1,28 @@
+import mock
+
+from kubedock.kapi import network_policies, service_pods
+from kubedock.users.models import User
+from kubedock.testutils.testcases import DBTestCase
+
+
+class TestServicePods(DBTestCase):
+
+ @mock.patch.object(network_policies, 'get_calico_ip_tunnel_address')
+ @mock.patch.object(service_pods, 'Etcd')
+ @mock.patch.object(service_pods, 'PodCollection')
+ def test_create_logs_pod(self, podcollection_mock, etcd_mock,
+ get_calico_ip_mock):
+ hostname = 'qwerty'
+ test_result = 3131313
+ pod_id = '424242'
+ podcollection_mock.return_value.add.return_value = {'id': pod_id}
+ podcollection_mock.return_value.get.return_value = test_result
+ get_calico_ip_mock.return_value = '12.12.12.12'
+ owner = User.get_internal()
+ res = service_pods.create_logs_pod(hostname, owner)
+ self.assertEqual(res, test_result)
+ self.assertEqual(etcd_mock.call_count, 1)
+ get_calico_ip_mock.assert_called_once_with()
+ podcollection_mock.return_value.update.assert_called_once_with(
+ pod_id, {'command': 'synchronous_start'}
+ )
diff --git a/kubedock/network_policies_utils.py b/kubedock/network_policies_utils.py
index 547220abc..04ca62293 100644
--- a/kubedock/network_policies_utils.py
+++ b/kubedock/network_policies_utils.py
@@ -32,7 +32,7 @@
get_tiers,
)
from .kapi.node_utils import complete_calico_node_config
-from .kapi.nodes import (
+from .kapi.service_pods import (
KUBERDOCK_DNS_POD_NAME,
get_kuberdock_logs_pod_name,
)
diff --git a/kubedock/settings.py b/kubedock/settings.py
index 2d8d614a5..a8e4b5ec0 100644
--- a/kubedock/settings.py
+++ b/kubedock/settings.py
@@ -191,6 +191,7 @@ def _get_remote_sentry_settings():
# If None, defaults will be used
SSH_KEY_FILENAME = '/var/lib/nginx/.ssh/id_rsa'
+SSH_PUB_FILENAME = '/var/lib/nginx/.ssh/id_rsa.pub'
INFLUXDB_HOST = os.environ.get('INFLUXDB_HOST', '127.0.0.1')
INFLUXDB_PORT = 8086
@@ -284,12 +285,15 @@ def _get_remote_sentry_settings():
ZFS = False
AWS = False
+AWS_ACCESS_KEY_ID = AWS_SECRET_ACCESS_KEY = REGION = ''
# Default EBS volume type for node storage on AWS.
# Available types are: 'standard', 'io1', 'gp2'
AWS_DEFAULT_EBS_VOLUME_TYPE = 'standard'
AWS_DEFAULT_EBS_VOLUME_IOPS = 1000
# AWS EBS volume types which support provisioned iops
AWS_IOPS_PROVISION_VOLUME_TYPES = ('io1',)
+AWS_INSTANCE_RUNNING_INTERVAL = 3
+AWS_INSTANCE_RUNNING_MAX_ATTEMTPS = 30
MASTER_IP = ''
MASTER_TOBIND_FLANNEL = 'enp0s5'
diff --git a/kubedock/tasks.py b/kubedock/tasks.py
index fcf6f2f15..ec531546b 100644
--- a/kubedock/tasks.py
+++ b/kubedock/tasks.py
@@ -32,10 +32,12 @@
from . import dns_management
from .core import db, ssh_connect
+from .kapi import migrate_pod_utils, node_utils_aws
from .kapi.collect import collect, send
from .kapi.helpers import KubeQuery, raise_if_failure
from .kapi.node import Node as K8SNode
from .kapi.node_utils import (
+ add_node_to_k8s, remove_node_from_k8s,
setup_storage_to_aws_node, add_volume_to_node_ls,
complete_calico_node_config)
from .kapi.pstorage import (
@@ -48,15 +50,16 @@
from .rbac.models import Role
from .settings import (
NODE_INSTALL_LOG_FILE, MASTER_IP, AWS, NODE_INSTALL_TIMEOUT_SEC,
- NODE_CEPH_AWARE_KUBERDOCK_LABEL, CEPH, CEPH_KEYRING_PATH,
+ CEPH, CEPH_KEYRING_PATH,
CEPH_POOL_NAME, CEPH_CLIENT_USER,
KUBERDOCK_INTERNAL_USER, NODE_SSH_COMMAND_SHORT_EXEC_TIMEOUT,
- CALICO, NODE_STORAGE_MANAGE_DIR, ZFS)
+ CALICO, NODE_STORAGE_MANAGE_DIR, ZFS, WITH_TESTING)
from .system_settings.models import SystemSettings
+from .updates.helpers import get_maintenance
from .users.models import SessionData
from .utils import (
update_dict, get_api_url, send_event, send_event_to_role, send_logs,
- k8s_json_object_hook, get_timezone, NODE_STATUSES, POD_STATUSES
+ k8s_json_object_hook, get_timezone, NODE_STATUSES, POD_STATUSES,
)
@@ -125,43 +128,18 @@ def delete_service_nodelay(item, namespace=None):
return r.json()
-def remove_node_by_host(host):
- r = requests.delete(get_api_url('nodes', host, namespace=False))
- return r.json()
-
-
-def add_node_to_k8s(host, kube_type, is_ceph_installed=False):
- """
- :param host: Node hostname
- :param kube_type: Kuberdock kube type (integer id)
- :return: Error text if error else False
- """
- # TODO handle connection errors except requests.RequestException
- data = {
- 'metadata': {
- 'name': host,
- 'labels': {
- 'kuberdock-node-hostname': host,
- 'kuberdock-kube-type': 'type_' + str(kube_type)
- },
- 'annotations': {
- K8SNode.FREE_PUBLIC_IP_COUNTER_FIELD: '0'
- }
- },
- 'spec': {
- 'externalID': host,
- }
- }
- if is_ceph_installed:
- data['metadata']['labels'][NODE_CEPH_AWARE_KUBERDOCK_LABEL] = 'True'
- res = requests.post(get_api_url('nodes', namespace=False),
- json=data)
- return res.text if not res.ok else False
+def deploy_node(fast, node_id, log_pod_ip):
+ if fast:
+ return node_utils_aws.deploy_node(node_id, log_pod_ip)
+ else:
+ return add_new_node(node_id, log_pod_ip, with_testing=WITH_TESTING,
+ skip_storate=True)
@celery.task(base=AddNodeTask)
def add_new_node(node_id, log_pod_ip, with_testing=False, redeploy=False,
- ls_devices=None, ebs_volume=None, deploy_options=None):
+ ls_devices=None, ebs_volume=None, deploy_options=None,
+ skip_storate=False):
db_node = Node.get_by_id(node_id)
admin_rid = Role.query.filter_by(rolename="Admin").one().id
channels = [i.id for i in SessionData.query.filter_by(role_id=admin_rid)]
@@ -194,7 +172,7 @@ def add_new_node(node_id, log_pod_ip, with_testing=False, redeploy=False,
send_logs(node_id, 'Redeploy.', log_file, channels)
send_logs(node_id, 'Remove node {0} from kubernetes...'.format(
host), log_file, channels)
- result = remove_node_by_host(host)
+ result = remove_node_from_k8s(host)
send_logs(node_id, json.dumps(result, indent=2), log_file,
channels)
@@ -219,6 +197,7 @@ def add_new_node(node_id, log_pod_ip, with_testing=False, redeploy=False,
sftp.put('fslimit.py', '/fslimit.py')
sftp.put('make_elastic_config.py', '/make_elastic_config.py')
sftp.put('node_install.sh', '/node_install.sh')
+ sftp.put('node_install_common.sh', '/node_install_common.sh')
if not CALICO:
sftp.put('node_network_plugin.sh', '/node_network_plugin.sh')
sftp.put('node_network_plugin.py', '/node_network_plugin.py')
@@ -345,6 +324,8 @@ def add_new_node(node_id, log_pod_ip, with_testing=False, redeploy=False,
ssh.exec_command('rm /node_install.sh',
timeout=NODE_SSH_COMMAND_SHORT_EXEC_TIMEOUT)
+ ssh.exec_command('rm /node_install_common.sh',
+ timeout=NODE_SSH_COMMAND_SHORT_EXEC_TIMEOUT)
if CEPH:
NodeFlag.save_flag(
@@ -363,10 +344,14 @@ def add_new_node(node_id, log_pod_ip, with_testing=False, redeploy=False,
send_logs(node_id, err_message, log_file, channels)
raise NodeInstallException(err_message)
else:
- send_logs(node_id, 'Setup persistent storage...', log_file,
- channels)
- setup_node_storage(ssh, node_id, ls_devices, ebs_volume,
- channels)
+ if skip_storate:
+ send_logs(node_id, 'Skip storage setup, node is reserving...',
+ log_file, channels)
+ else:
+ send_logs(node_id, 'Setup persistent storage...', log_file,
+ channels)
+ setup_node_storage(ssh, node_id, ls_devices, ebs_volume,
+ channels)
complete_calico_node_config(host, db_node.ip)
@@ -547,23 +532,37 @@ def clean_drives_for_deleted_users():
@celery.task(ignore_result=True)
def check_if_node_down(hostname):
- # In some cases kubelet doesn't post it's status, and restart may help
- # to make it alive. It's a workaround for kubelet bug.
- # TODO: research the bug and remove the workaround
+ """ In some cases kubelet doesn't post it's status, and restart may help
+ to make it alive. It's a workaround for kubelet bug.
+ TODO: research the bug and remove the workaround
+ """
+ # Do not try to recover if maintenance mode is on
+ if get_maintenance():
+ current_app.warning(
+ "Skip recover kubelet on '{}', because of maintenance mode is on"
+ .format(hostname)
+ )
+ return
+ node_is_failed = False
ssh, error_message = ssh_connect(hostname, timeout=3)
if error_message:
current_app.logger.debug(
'Failed connect to node %s: %s',
hostname, error_message
)
- return
- i, o, e = ssh.exec_command('systemctl restart kubelet')
- exit_status = o.channel.recv_exit_status()
- if exit_status != 0:
- current_app.logger.debug(
- 'Failed to restart kubelet on node: %s, exit status: %s',
- hostname, exit_status
- )
+ node_is_failed = True
+ ssh = None
+ else:
+ i, o, e = ssh.exec_command('systemctl restart kubelet')
+ exit_status = o.channel.recv_exit_status()
+ if exit_status != 0:
+ node_is_failed = True
+ current_app.logger.debug(
+ 'Failed to restart kubelet on node: %s, exit status: %s',
+ hostname, exit_status
+ )
+ if node_is_failed:
+ migrate_pod_utils.manage_failed_node(hostname, ssh, deploy_node)
@celery.task()
diff --git a/kubedock/updates/health_check.py b/kubedock/updates/health_check.py
index a7edbe59c..ffa450663 100644
--- a/kubedock/updates/health_check.py
+++ b/kubedock/updates/health_check.py
@@ -39,7 +39,7 @@
from kubedock.users.models import User
from kubedock.kapi.node_utils import get_nodes_collection
from kubedock.kapi.podcollection import PodCollection
-from kubedock.kapi.nodes import get_kuberdock_logs_pod_name
+from kubedock.kapi.service_pods import get_kuberdock_logs_pod_name
from kubedock.kapi.helpers import LocalService
from kubedock.kapi import pstorage
from kubedock.pods.models import Pod
diff --git a/node_install.sh b/node_install.sh
index 5d4964e53..f566bcb9d 100644
--- a/node_install.sh
+++ b/node_install.sh
@@ -24,34 +24,15 @@
# IMPORTANT: each package must be installed with separate command because of
# yum incorrect error handling!
-# ========================= DEFINED VARS ===============================
-AWS=${AWS}
-KUBERNETES_CONF_DIR='/etc/kubernetes'
-EXIT_MESSAGE="Installation error."
-KUBE_REPO='/etc/yum.repos.d/kube-cloudlinux.repo'
-KUBE_TEST_REPO='/etc/yum.repos.d/kube-cloudlinux-testing.repo'
-PLUGIN_DIR_BASE='/usr/libexec/kubernetes'
-KD_WATCHER_SERVICE='/etc/systemd/system/kuberdock-watcher.service'
-KD_KERNEL_VARS='/etc/sysctl.d/75-kuberdock.conf'
-KD_RSYSLOG_CONF='/etc/rsyslog.d/kuberdock.conf'
-KD_ELASTIC_LOGS='/var/lib/elasticsearch'
-FSTAB_BACKUP="/var/lib/kuberdock/backups/fstab.pre-swapoff"
-CEPH_VERSION=hammer
-CEPH_BASE='/etc/yum.repos.d/ceph-base'
-CEPH_REPO='/etc/yum.repos.d/ceph.repo'
-
-KD_SSH_GC_PATH="/var/lib/kuberdock/scripts/kd-ssh-gc"
-KD_SSH_GC_LOCK="/var/run/kuberdock-ssh-gc.lock"
-KD_SSH_GC_CMD="flock -n $KD_SSH_GC_LOCK -c '$KD_SSH_GC_PATH;rm $KD_SSH_GC_LOCK'"
-KD_SSH_GC_CRON="@hourly $KD_SSH_GC_CMD >/dev/null 2>&1"
-
-ZFS_MODULES_LOAD_CONF="/etc/modules-load.d/kuberdock-zfs.conf"
-ZFS_POOLS_LOAD_CONF="/etc/modprobe.d/kuberdock-zfs.conf"
-
-NODE_STORAGE_MANAGE_DIR=node_storage_manage
-# ======================= // DEFINED VARS ===============================
+source "$(dirname "${BASH_SOURCE}")/node_install_common.sh"
+ami()
+{
+ [ -n "${AMI}" ]
+}
+check_iface_and_ip()
+{
# check public interface and node ip
if [ -z "$NODE_IP" ]; then
>&2 echo "NODE_IP is not set"
@@ -76,11 +57,13 @@ else
fi
fi
# // check public interface and node ip
+}
+ami || check_iface_and_ip
echo "Set locale to en_US.UTF-8"
export LANG=en_US.UTF-8
-echo "Using MASTER_IP=${MASTER_IP}"
+ami || echo "Using MASTER_IP=${MASTER_IP}"
if [ "$ZFS" = yes ]; then
echo "Using ZFS as storage backend"
elif [ ! -z "$CEPH_CONF" ]; then
@@ -88,8 +71,7 @@ elif [ ! -z "$CEPH_CONF" ]; then
else
echo "Using LVM as storage backend"
fi
-echo "Set time zone to $TZ"
-timedatectl set-timezone "$TZ"
+ami || set_timezone
echo "Deploy started: $(date)"
# This should be as early as possible because outdated metadata (one that was
@@ -100,16 +82,6 @@ yum clean metadata
# ======================== JUST COMMON HELPERS ================================
# SHOULD BE DEFINED FIRST, AND BEFORE USE
# Most common helpers are defined first.
-check_status()
-{
- local temp=$?
- if [ $temp -ne 0 ];then
- echo $EXIT_MESSAGE
- exit $temp
- fi
-}
-
-
yum_wrapper()
{
if [ -z "$WITH_TESTING" ];then
@@ -363,7 +335,7 @@ check_xfs()
}
check_release
-check_mem
+ami || check_mem
check_selinux
check_xfs "/var/lib/docker/overlay"
@@ -378,7 +350,7 @@ if [[ $ERRORS ]]; then
exit 3
fi
-check_disk
+ami || check_disk
if [[ $ERRORS ]]; then
printf "Following noncompliances of KD cluster requirements have been detected:\n"
@@ -387,7 +359,7 @@ if [[ $ERRORS ]]; then
exit 3
fi
-check_disk_for_production
+ami || check_disk_for_production
if [[ $WARNS ]]; then
printf "Warning:\n"
@@ -411,11 +383,6 @@ setup_ntpd ()
local ntp_config="/etc/ntp.conf"
- # Backup ntp.conf before any modifications
- backup_ntp_config="${ntp_config}.kd.backup.$(date --iso-8601=ns --utc)"
- echo "Save current $ntp_config to $backup_ntp_config"
- cp "$ntp_config" "$backup_ntp_config"
-
_sync_time() {
grep '^server' "$ntp_config" | awk '{print $2}' | xargs ntpdate -u
}
@@ -431,11 +398,7 @@ setup_ntpd ()
_sync_time
check_status
- sed -i "/^server /d; /^tinker /d" "$ntp_config"
- # NTP on master server should work at least a few minutes before ntp
- # clients start trusting him. Thus we postpone the sync with it
- echo "server ${MASTER_IP} iburst minpoll 3 maxpoll 4" >> "$ntp_config"
- echo "tinker panic 0" >> "$ntp_config"
+ ami || configure_ntpd
systemctl daemon-reload
systemctl restart ntpd
@@ -667,9 +630,10 @@ fi
# 4 copy kubelet auth token and etcd certs
+copy_kubelet_auth_and_etcd_certs()
+{
echo "Copy certificates and tokens..."
mv /configfile $KUBERNETES_CONF_DIR/configfile
-mkdir -p /etc/pki/etcd
check_status
mv /ca.crt /etc/pki/etcd/
mv /etcd-client.crt /etc/pki/etcd/
@@ -677,6 +641,10 @@ mv /etcd-client.key /etc/pki/etcd/
mv /etcd-dns.crt /etc/pki/etcd/
mv /etcd-dns.key /etc/pki/etcd/
check_status
+}
+
+mkdir -p /etc/pki/etcd
+ami || copy_kubelet_auth_and_etcd_certs
# 4.1 create and populate scripts directory
# TODO refactor this staff to kdnode package or copy folder ones
@@ -734,72 +702,17 @@ setup_cron
mv /${NODE_STORAGE_MANAGE_DIR} /var/lib/kuberdock/scripts/
-
-if [ "$FIXED_IP_POOLS" = True ]; then
- fixed_ip_pools="yes"
-else
- fixed_ip_pools="no"
-fi
-cat < "/var/lib/kuberdock/kuberdock.json"
-{"fixed_ip_pools": "$fixed_ip_pools",
-"master": "$MASTER_IP",
-"node": "$NODENAME",
-"network_interface": "$PUBLIC_INTERFACE",
-"token": "$TOKEN"}
-EOF
+ami || kuberdock_json
# 5. configure Node config
-echo "Configuring kubernetes..."
-sed -i "/^KUBE_MASTER/ {s|http://127.0.0.1:8080|https://${MASTER_IP}:6443|}" $KUBERNETES_CONF_DIR/config
-sed -i '/^KUBELET_HOSTNAME/s/^/#/' $KUBERNETES_CONF_DIR/kubelet
-
-# Kubelet's 10255 port (built-in cadvisor) should be accessible from master,
-# because heapster.service use it to gather data for our "usage statistics"
-# feature. Master-only access is ensured by our cluster-wide firewall
-sed -i "/^KUBELET_ADDRESS/ {s|127.0.0.1|0.0.0.0|}" $KUBERNETES_CONF_DIR/kubelet
-check_status
-
-sed -i "/^KUBELET_API_SERVER/ {s|http://127.0.0.1:8080|https://${MASTER_IP}:6443|}" $KUBERNETES_CONF_DIR/kubelet
-if [ "$AWS" = True ];then
- sed -i '/^KUBELET_ARGS/ {s|""|"--cloud-provider=aws --kubeconfig=/etc/kubernetes/configfile --cluster_dns=10.254.0.10 --cluster_domain=kuberdock --register-node=false --network-plugin=kuberdock --maximum-dead-containers=1 --maximum-dead-containers-per-container=1 --minimum-container-ttl-duration=10s --cpu-cfs-quota=true --cpu-multiplier='${CPU_MULTIPLIER}' --memory-multiplier='${MEMORY_MULTIPLIER}' --node-ip='${NODE_IP}'"|}' $KUBERNETES_CONF_DIR/kubelet
-else
- sed -i '/^KUBELET_ARGS/ {s|""|"--kubeconfig=/etc/kubernetes/configfile --cluster_dns=10.254.0.10 --cluster_domain=kuberdock --register-node=false --network-plugin=kuberdock --maximum-dead-containers=1 --maximum-dead-containers-per-container=1 --minimum-container-ttl-duration=10s --cpu-cfs-quota=true --cpu-multiplier='${CPU_MULTIPLIER}' --memory-multiplier='${MEMORY_MULTIPLIER}' --node-ip='${NODE_IP}'"|}' $KUBERNETES_CONF_DIR/kubelet
-fi
-sed -i '/^KUBE_PROXY_ARGS/ {s|""|"--kubeconfig=/etc/kubernetes/configfile --proxy-mode iptables"|}' $KUBERNETES_CONF_DIR/proxy
-check_status
-
+ami || configure_kubelet
# 6a. configure Calico CNI plugin
echo "Enabling Calico CNI plugin ..."
yum_wrapper -y install calico-cni-1.3.1-3.el7
-echo >> $KUBERNETES_CONF_DIR/config
-echo "# Calico etcd authority" >> $KUBERNETES_CONF_DIR/config
-echo ETCD_AUTHORITY="$MASTER_IP:2379" >> $KUBERNETES_CONF_DIR/config
-echo ETCD_SCHEME="https" >> $KUBERNETES_CONF_DIR/config
-echo ETCD_CA_CERT_FILE="/etc/pki/etcd/ca.crt" >> $KUBERNETES_CONF_DIR/config
-echo ETCD_CERT_FILE="/etc/pki/etcd/etcd-client.crt" >> $KUBERNETES_CONF_DIR/config
-echo ETCD_KEY_FILE="/etc/pki/etcd/etcd-client.key" >> $KUBERNETES_CONF_DIR/config
-
-TOKEN=$(grep token /etc/kubernetes/configfile | grep -oP '[a-zA-Z0-9]+$')
-
-mkdir -p /etc/cni/net.d
-cat > /etc/cni/net.d/10-calico.conf << EOF
-{
- "name": "calico-k8s-network",
- "type": "calico",
- "log_level": "info",
- "ipam": {
- "type": "calico-ipam"
- },
- "policy": {
- "type": "k8s",
- "k8s_api_root": "https://$MASTER_IP:6443/api/v1/",
- "k8s_auth_token": "$TOKEN"
- }
-}
-EOF
+ami || configure_cni
pushd /var/lib/kuberdock/scripts
python kubelet_args.py --network-plugin=
@@ -829,13 +742,7 @@ EOF
# 8. setup rsyslog forwarding
-echo "Reconfiguring rsyslog..."
-cat > $KD_RSYSLOG_CONF << EOF
-\$LocalHostName $NODENAME
-\$template LongTagForwardFormat,"<%PRI%>%TIMESTAMP:::date-rfc3339% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg%"
-*.* @${LOG_POD_IP}:5140;LongTagForwardFormat
-EOF
-
+ami || configure_rsyslog
echo 'Configuring docker...'
@@ -917,7 +824,7 @@ prjquota_enable "/var/lib/kuberdock/storage"
# 10. enable services
echo "Enabling services..."
systemctl daemon-reload
-systemctl reenable kubelet
+ami || systemctl reenable kubelet
check_status
mkdir -p /etc/systemd/system/kube-proxy.service.d
@@ -929,7 +836,7 @@ Restart=always
RestartSec=5s" > /etc/systemd/system/kube-proxy.service.d/restart.conf
systemctl daemon-reload
-systemctl reenable kube-proxy
+ami || systemctl reenable kube-proxy
check_status
# 11. disable swap for best performance
@@ -1039,9 +946,7 @@ docker pull "$CALICO_NODE_IMAGE" > /dev/null
time sync
#sleep 10 # even harder workaround
-echo "Starting Calico node..."
-ETCD_AUTHORITY="$MASTER_IP:2379" ETCD_SCHEME=https ETCD_CA_CERT_FILE=/etc/pki/etcd/ca.crt ETCD_CERT_FILE=/etc/pki/etcd/etcd-client.crt ETCD_KEY_FILE=/etc/pki/etcd/etcd-client.key HOSTNAME="$NODENAME" /opt/bin/calicoctl node --ip="$NODE_IP" --node-image="$CALICO_NODE_IMAGE"
-check_status
+ami || start_calico_node
# 16. Reboot will be executed in python function
echo "Node deploy script finished: $(date)"
diff --git a/node_install_ami.sh b/node_install_ami.sh
new file mode 100644
index 000000000..d8c2c15f0
--- /dev/null
+++ b/node_install_ami.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+source "$(dirname "${BASH_SOURCE}")/node_install_common.sh"
+
+set_timezone
+configure_ntpd
+configure_rsyslog
+configure_cni
+configure_kubelet
+kuberdock_json
+
+for service in ntpd rsyslog kubelet kube-proxy
+do
+ systemctl restart ${service}
+done
+
+start_calico_node
+
+rm -f /node_install_ami.sh
+rm -f /node_install_common.sh
diff --git a/node_install_common.sh b/node_install_common.sh
new file mode 100644
index 000000000..9583d6239
--- /dev/null
+++ b/node_install_common.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+# ========================= DEFINED VARS ===============================
+export AWS=${AWS}
+export KUBERNETES_CONF_DIR='/etc/kubernetes'
+export EXIT_MESSAGE="Installation error."
+export KUBE_REPO='/etc/yum.repos.d/kube-cloudlinux.repo'
+export KUBE_TEST_REPO='/etc/yum.repos.d/kube-cloudlinux-testing.repo'
+export PLUGIN_DIR_BASE='/usr/libexec/kubernetes'
+export KD_WATCHER_SERVICE='/etc/systemd/system/kuberdock-watcher.service'
+export KD_KERNEL_VARS='/etc/sysctl.d/75-kuberdock.conf'
+export KD_RSYSLOG_CONF='/etc/rsyslog.d/kuberdock.conf'
+export KD_ELASTIC_LOGS='/var/lib/elasticsearch'
+export FSTAB_BACKUP="/var/lib/kuberdock/backups/fstab.pre-swapoff"
+export CEPH_VERSION=hammer
+export CEPH_BASE='/etc/yum.repos.d/ceph-base'
+export CEPH_REPO='/etc/yum.repos.d/ceph.repo'
+
+export KD_SSH_GC_PATH="/var/lib/kuberdock/scripts/kd-ssh-gc"
+export KD_SSH_GC_LOCK="/var/run/kuberdock-ssh-gc.lock"
+export KD_SSH_GC_CMD="flock -n ${KD_SSH_GC_LOCK} -c '${KD_SSH_GC_PATH}; rm ${KD_SSH_GC_LOCK}'"
+export KD_SSH_GC_CRON="@hourly ${KD_SSH_GC_CMD} > /dev/null 2>&1"
+
+export ZFS_MODULES_LOAD_CONF="/etc/modules-load.d/kuberdock-zfs.conf"
+export ZFS_POOLS_LOAD_CONF="/etc/modprobe.d/kuberdock-zfs.conf"
+
+export NODE_STORAGE_MANAGE_DIR=node_storage_manage
+# ======================= // DEFINED VARS ===============================
+
+
+check_status()
+{
+ local temp=$?
+ if [ "${temp}" -ne 0 ];then
+ echo "${EXIT_MESSAGE}"
+ exit "${temp}"
+ fi
+}
+
+
+configure_ntpd()
+{
+ local ntp_config="/etc/ntp.conf"
+
+ # Backup ntp.conf before any modifications
+ backup_ntp_config="${ntp_config}.kd.backup.$(date --iso-8601=ns --utc)"
+ echo "Save current ${ntp_config} to ${backup_ntp_config}"
+ cp "${ntp_config}" "${backup_ntp_config}"
+
+ sed -i "/^server /d; /^tinker /d" "${ntp_config}"
+ # NTP on master server should work at least a few minutes before ntp
+ # clients start trusting him. Thus we postpone the sync with it
+ echo "server ${MASTER_IP} iburst minpoll 3 maxpoll 4" >> "${ntp_config}"
+ echo "tinker panic 0" >> "${ntp_config}"
+}
+
+
+kuberdock_json()
+{
+if [ "${FIXED_IP_POOLS}" = True ]; then
+ fixed_ippools="yes"
+else
+ fixed_ippools="no"
+fi
+cat << EOF > "/var/lib/kuberdock/kuberdock.json"
+{"fixed_ip_pools": "${fixed_ippools}",
+"master": "${MASTER_IP}",
+"node": "${NODENAME}",
+"network_interface": "${PUBLIC_INTERFACE}",
+"token": "${TOKEN}"}
+EOF
+}
+
+
+configure_kubelet()
+{
+echo "Configuring kubernetes..."
+sed -i "/^KUBE_MASTER/ {s|http://127.0.0.1:8080|https://${MASTER_IP}:6443|}" "${KUBERNETES_CONF_DIR}"/config
+sed -i '/^KUBELET_HOSTNAME/s/^/#/' "${KUBERNETES_CONF_DIR}"/kubelet
+
+# Kubelet's 10255 port (built-in cadvisor) should be accessible from master,
+# because heapster.service use it to gather data for our "usage statistics"
+# feature. Master-only access is ensured by our cluster-wide firewall
+sed -i "/^KUBELET_ADDRESS/ {s|127.0.0.1|0.0.0.0|}" "${KUBERNETES_CONF_DIR}"/kubelet
+check_status
+
+sed -i "/^KUBELET_API_SERVER/ {s|http://127.0.0.1:8080|https://${MASTER_IP}:6443|}" "${KUBERNETES_CONF_DIR}"/kubelet
+if [ "${AWS}" = True ];then
+ sed -i '/^KUBELET_ARGS/ {s|""|"--cloud-provider=aws --kubeconfig=/etc/kubernetes/configfile --cluster_dns=10.254.0.10 --cluster_domain=kuberdock --register-node=false --network-plugin=kuberdock --maximum-dead-containers=1 --maximum-dead-containers-per-container=1 --minimum-container-ttl-duration=10s --cpu-cfs-quota=true --cpu-multiplier='"${CPU_MULTIPLIER}"' --memory-multiplier='"${MEMORY_MULTIPLIER}"' --node-ip='"${NODE_IP}"'"|}' "${KUBERNETES_CONF_DIR}"/kubelet
+else
+ sed -i '/^KUBELET_ARGS/ {s|""|"--kubeconfig=/etc/kubernetes/configfile --cluster_dns=10.254.0.10 --cluster_domain=kuberdock --register-node=false --network-plugin=kuberdock --maximum-dead-containers=1 --maximum-dead-containers-per-container=1 --minimum-container-ttl-duration=10s --cpu-cfs-quota=true --cpu-multiplier='"${CPU_MULTIPLIER}"' --memory-multiplier='"${MEMORY_MULTIPLIER}"' --node-ip='"${NODE_IP}"'"|}' "${KUBERNETES_CONF_DIR}"/kubelet
+fi
+sed -i '/^KUBE_PROXY_ARGS/ {s|""|"--kubeconfig=/etc/kubernetes/configfile --proxy-mode iptables"|}' "$KUBERNETES_CONF_DIR"/proxy
+check_status
+}
+
+
+configure_cni()
+{
+{
+echo
+echo "# Calico etcd authority"
+echo ETCD_AUTHORITY="${MASTER_IP}:2379"
+echo ETCD_SCHEME="https"
+echo ETCD_CA_CERT_FILE="/etc/pki/etcd/ca.crt"
+echo ETCD_CERT_FILE="/etc/pki/etcd/etcd-client.crt"
+echo ETCD_KEY_FILE="/etc/pki/etcd/etcd-client.key"
+} >> "${KUBERNETES_CONF_DIR}"/config
+
+K8S_TOKEN=$(grep token /etc/kubernetes/configfile | grep -oP '[a-zA-Z0-9]+$')
+
+mkdir -p /etc/cni/net.d
+cat > /etc/cni/net.d/10-calico.conf << EOF
+{
+ "name": "calico-k8s-network",
+ "type": "calico",
+ "log_level": "info",
+ "ipam": {
+ "type": "calico-ipam"
+ },
+ "policy": {
+ "type": "k8s",
+ "k8s_api_root": "https://${MASTER_IP}:6443/api/v1/",
+ "k8s_auth_token": "${K8S_TOKEN}"
+ }
+}
+EOF
+}
+
+
+configure_rsyslog()
+{
+echo "Reconfiguring rsyslog..."
+cat > "${KD_RSYSLOG_CONF}" << EOF
+\$LocalHostName ${NODENAME}
+\$template LongTagForwardFormat,"<%PRI%>%TIMESTAMP:::date-rfc3339% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg%"
+*.* @${LOG_POD_IP}:5140;LongTagForwardFormat
+EOF
+}
+
+
+set_timezone()
+{
+echo Set time zone to "${TZ}"
+timedatectl set-timezone "${TZ}"
+}
+
+
+start_calico_node()
+{
+echo "Starting Calico node..."
+ETCD_AUTHORITY="${MASTER_IP}:2379" ETCD_SCHEME=https ETCD_CA_CERT_FILE=/etc/pki/etcd/ca.crt ETCD_CERT_FILE=/etc/pki/etcd/etcd-client.crt ETCD_KEY_FILE=/etc/pki/etcd/etcd-client.key HOSTNAME="${NODENAME}" /opt/bin/calicoctl node --ip="${NODE_IP}" --node-image="${CALICO_NODE_IMAGE}"
+check_status
+}
diff --git a/node_prepare_ami.sh b/node_prepare_ami.sh
new file mode 100644
index 000000000..fe02cfc1f
--- /dev/null
+++ b/node_prepare_ami.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+for image in \
+ gcr.io/google_containers/etcd-amd64:2.2.1 \
+ gcr.io/google_containers/exechealthz:1.0 \
+ gcr.io/google_containers/skydns:2015-10-13-8c72f8c \
+ gcr.io/google_containers/pause:2.0 \
+ jetstack/kube-lego:0.1.3 \
+ kuberdock/calico-node:0.22.0-kd2 \
+ kuberdock/defaultbackend:0.0.1 \
+ kuberdock/elasticsearch:2.2 \
+ kuberdock/fluentd:1.8 \
+ kuberdock/k8s-policy-agent:v0.1.4-kd2 \
+ kuberdock/kube2sky:1.2 \
+ kuberdock/nginx-ingress-controller:0.2.0 \
+ ; do
+ docker pull ${image}
+done
+
+systemctl disable rsyslog
+systemctl stop rsyslog
+
+rm -f /node_install.sh
+
+yum clean packages
diff --git a/node_storage_manage/aws.py b/node_storage_manage/aws.py
index 5b8305290..766c6f131 100644
--- a/node_storage_manage/aws.py
+++ b/node_storage_manage/aws.py
@@ -39,6 +39,7 @@
def get_aws_instance_meta(utils):
+ """Returns some metadata for the instance."""
identity = utils.get_instance_identity()
meta = utils.get_instance_metadata()
return {
@@ -49,6 +50,14 @@ def get_aws_instance_meta(utils):
def get_aws_block_device_mapping(connection, instance_id):
+ """Returns dict of {
+ :
+ }
+ where device name is string in form of '/dev/xvdX'
+ block device type is an object of class
+ boto.ec2.blockdevicemapping.BlockDeviceType
+ (we need only volume_id from it)
+ """
return connection.get_instance_attribute(
instance_id=instance_id,
attribute='blockDeviceMapping'
@@ -56,6 +65,11 @@ def get_aws_block_device_mapping(connection, instance_id):
def detach_ebs(aws_access_key_id, aws_secret_access_key, devices):
+ """Detaches volumes for given devices if they are attached to the instance.
+ Will wait some time until all detached volumes become 'detached'.
+ Returns true if all devices was successfully detached.
+ False - otherwise.
+ """
import boto
import boto.ec2
meta = get_aws_instance_meta(boto.utils)
@@ -67,9 +81,99 @@ def detach_ebs(aws_access_key_id, aws_secret_access_key, devices):
bd_mapping = get_aws_block_device_mapping(
connection, meta['instance-id']
)
- for key, bd in bd_mapping.iteritems():
- if key in devices:
- connection.detach_volume(bd.volume_id)
+ volume_ids = [bd.volume_id for key, bd in bd_mapping.iteritems()
+ if key in devices]
+ return _detach_ebs_volumes(connection, volume_ids, force=False)
+
+
+def _detach_ebs_volumes(connection, volume_ids, force=False):
+ for volume_id in volume_ids:
+ connection.detach_volume(volume_id, force=force)
+ if not wait_for_detached_state(connection, volume_ids):
+ return False
+ return True
+
+
+def _volumes_are_available(connection, volume_ids):
+ volumes = connection.get_all_volumes(volume_ids)
+ # In AWS documentation there are the following states:
+ # (attaching | attached | detaching | detached)
+ # But actually boto returns None for detached volume, so check it
+ # both here.
+ return all(
+ (
+ item.attachment_state() == 'detached' or
+ item.attachment_state() is None
+ ) and
+ item.status == 'available' for item in volumes
+ )
+
+
+def wait_for_detached_state(connection, volume_ids):
+ """Will wait until volumes with given ids become detached.
+ It will check volumes states 180 times with 1 second pause, so max
+ wait time will be approximately 3 minutes.
+ Returns True if all volumes become detached. False if one or more volumes
+ will not become detached.
+ """
+ retry_count = 60 * 3
+ pause = 1
+ for _ in xrange(retry_count):
+ if _volumes_are_available(connection, volume_ids):
+ return True
+ time.sleep(pause)
+ return False
+
+
+def attach_existing_volumes(call_args):
+ """Attaches volumes to the instance. Volumes names must be passed via
+ call_args.ebs_volumes list.
+ Returns tuple of success flag and list of dicts described attached volumes.
+ Each element of this list will include fields:
+ 'name': ,
+ 'instance_id': ,
+ 'device':
+ """
+ import boto
+ import boto.ec2
+ meta = get_aws_instance_meta(boto.utils)
+ region = meta['region']
+ instance_id = meta['instance-id']
+ connection = boto.ec2.connect_to_region(
+ region,
+ aws_access_key_id=call_args.aws_access_key_id,
+ aws_secret_access_key=call_args.aws_secret_access_key
+ )
+ names_to_attach = call_args.ebs_volumes
+ force_detach = call_args.force_detach
+
+ existing_volumes = connection.get_all_volumes()
+ volumes = []
+ for item in existing_volumes:
+ if item.tags.get('Name', 'Nameless') in names_to_attach:
+ volumes.append(item)
+ break
+ if len(volumes) != len(names_to_attach):
+ return False, 'Not all EBS volumes were found'
+
+ volume_ids = [vol.id for vol in volumes]
+ if not _volumes_are_available(connection, volume_ids):
+ if force_detach:
+ _detach_ebs_volumes(connection, volume_ids, force=True)
+ # update volumes, because it's state may be changed
+ volumes = connection.get_all_volumes(volume_ids)
+
+ dev_list = []
+ for volume in volumes:
+ try:
+ ok, result = attach_ebs_volume(connection, instance_id, volume)
+ if ok != OK:
+ return False, result
+ dev_list.append(result)
+ except (boto.exception.BotoClientError,
+ boto.exception.BotoServerError) as err:
+ return False, 'Failed to attach volume: {}'.format(err)
+ return True, dev_list
def do_ebs_attach(call_args):
diff --git a/node_storage_manage/manage.py b/node_storage_manage/manage.py
index f42328d6d..8d4df6d21 100644
--- a/node_storage_manage/manage.py
+++ b/node_storage_manage/manage.py
@@ -98,7 +98,8 @@
from .storage import (
do_get_info, do_add_volume, do_remove_storage, VOLUME_MANAGE_NAME,
- do_create_volume, do_remove_volume, do_resize_volume
+ do_create_volume, do_remove_volume, do_resize_volume,
+ do_export_storage, do_import_storage,
)
@@ -113,6 +114,28 @@ def _do_remove_storage(call_args):
return ERROR, {'message': u'Remove storage failed: {}'.format(result)}
# we expect device list in result in case of successful storage removing
devices = result
+ if result and call_args.detach_ebs:
+ if not aws.detach_ebs(call_args.aws_access_key_id,
+ call_args.aws_secret_access_key,
+ devices):
+ return ERROR, {'message': 'Failed to detach EBS volumes'}
+ return OK, {
+ 'message': 'Localstorage ({}) has been deleted'
+ .format(VOLUME_MANAGE_NAME)
+ }
+
+
+def _do_export_storage(call_args):
+ """Prepare storage to be exported to another host.
+ Now supported only by ZFS storage backend.
+ TODO: implement for LVM.
+ """
+ ok, result = do_export_storage(call_args)
+ if not ok:
+ return ERROR, {
+ 'message': u'Export storage failed: {}'.format(result)}
+ # we expect device list in result in case of successful storage export
+ devices = result
if result and call_args.detach_ebs:
aws.detach_ebs(
call_args.aws_access_key_id,
@@ -120,10 +143,28 @@ def _do_remove_storage(call_args):
devices
)
return OK, {
- 'message': 'Localstorage ({}) has been deleted'
+ 'message': 'Localstorage ({}) has been exported'
.format(VOLUME_MANAGE_NAME)
}
+
+def _do_import_aws_storage(call_args):
+ """Imports storage on AWS.
+ """
+ ok, result = aws.attach_existing_volumes(call_args)
+ if not ok:
+ return ERROR, {
+ 'message': u'Failed to attach EBS volumes: {}'.format(result)
+ }
+ ok, import_result = do_import_storage(result)
+ if not ok:
+ return ERROR, {
+ 'message': u'Failed to import storage: {}'.format(
+ import_result)
+ }
+ return OK, result
+
+
def _do_resize_volume(call_args):
"""Calls do_resize_volume of current storage.
Before calling the method of current storage checks if path exists, and
@@ -146,9 +187,11 @@ def _do_resize_volume(call_args):
'add-volume': do_add_volume,
'get-info': do_get_info,
'remove-storage': _do_remove_storage,
+ 'export-storage': _do_export_storage,
'create-volume': do_create_volume,
'remove-volume': do_remove_volume,
'resize-volume': _do_resize_volume,
+ 'import-aws-storage': _do_import_aws_storage,
}
@@ -232,6 +275,27 @@ def process_args():
dest='aws_secret_access_key', required=False,
help='AWS secret access key'
)
+
+ export_storage = subparsers.add_parser(
+ 'export-storage',
+ help='Unmount and export localstorage volume group from node.'
+ )
+ export_storage.add_argument(
+ '--detach-ebs', dest='detach_ebs',
+ default=False, action='store_true',
+ help='Detach EBS volumes that were in LS volume group'
+ )
+ export_storage.add_argument(
+ '--aws-access-key-id',
+ dest='aws_access_key_id', required=False,
+ help='AWS access key ID'
+ )
+ export_storage.add_argument(
+ '--aws-secret-access-key',
+ dest='aws_secret_access_key', required=False,
+ help='AWS secret access key'
+ )
+
create_volume = subparsers.add_parser(
'create-volume',
help='Create persistent volume with current localstorage backend'
@@ -264,6 +328,37 @@ def process_args():
help='New size quota (GB) of the volume'
)
+ import_aws_storage = subparsers.add_parser(
+ 'import-aws-storage',
+ help='Imports storage to the AWS node'
+ )
+ import_aws_storage.add_argument(
+ '--aws-access-key-id',
+ dest='aws_access_key_id',
+ required=True,
+ help='AWS access key ID'
+ )
+ import_aws_storage.add_argument(
+ '--aws-secret-access-key',
+ dest='aws_secret_access_key',
+ required=True,
+ help='AWS secret access key'
+ )
+ import_aws_storage.add_argument(
+ '--force-detach',
+ dest='force_detach',
+ required=False, default=False,
+ help='Try to force detach volumes before attaching',
+ action='store_true'
+ )
+ import_aws_storage.add_argument(
+ '--ebs-volumes',
+ dest='ebs_volumes',
+ nargs='+',
+ required=True,
+ help='List of EBS volume names which contains importing storage'
+ )
+
return parser.parse_args()
diff --git a/node_storage_manage/node_lvm_manage.py b/node_storage_manage/node_lvm_manage.py
index f515d791c..3c5e88008 100644
--- a/node_storage_manage/node_lvm_manage.py
+++ b/node_storage_manage/node_lvm_manage.py
@@ -108,6 +108,20 @@ def do_remove_storage(_):
vg.close()
+def do_export_storage(_):
+ """Prepares storage to be used in another host.
+ Not implemented for LVM.
+ """
+ return False, u'The operation is not implemented for LVM'
+
+
+def do_import_storage(_):
+ """Prepares imports a storage detached from another node.
+ Not implemented for LVM.
+ """
+ return False, u'The operation is not implemented for LVM'
+
+
def remove_ls_mount():
save_file = '/etc/fstab.kdsave'
fstab = '/etc/fstab'
diff --git a/node_storage_manage/node_zfs_manage.py b/node_storage_manage/node_zfs_manage.py
index 341f63db3..d1210e746 100644
--- a/node_storage_manage/node_zfs_manage.py
+++ b/node_storage_manage/node_zfs_manage.py
@@ -172,10 +172,64 @@ def do_remove_storage(_):
return readable statuses of performed operation.
"""
+ return _perform_zpool_stop_operation('destroy')
+
+
+def do_export_storage(_):
+ """Prepares storage to be used in another host.
+ Runs 'zpool export' command.
+ Returns success flag and list of devices used in zpool, or error message
+ in case of an error.
+ """
+ return _perform_zpool_stop_operation('export')
+
+
+def do_import_storage(_):
+ """Prepares imports a storage detached from another node.
+ Executes 'zpool import' operation.
+ """
+ try:
+ all_names = _list_zpools()
+ except:
+ return False, ('Unable to list ZFS pools. Maybe ZFS is not properly '
+ 'installed yet, '
+ 'skip this if this is during node cleanup process')
+ if KD_ZPOOL_NAME in all_names:
+ return False, 'Zpool {} already exists.'.format(KD_ZPOOL_NAME)
+ try:
+ silent_call(['zpool', 'import', '-f', KD_ZPOOL_NAME])
+ except:
+ return False, 'Failed to import zpool'
+
+ try:
+ silent_call(['zfs', 'mount', '-a'])
+ except:
+ return False, 'Failed to mount zfs volumes'
+
+ try:
+ devices = get_device_list(KD_ZPOOL_NAME)
+ except:
+ return (
+ False,
+ 'Failed to get device list in zpool "{}"'.format(KD_ZPOOL_NAME)
+ )
+ return True, devices
+
+
+def _perform_zpool_stop_operation(operation):
+ """Performs one of operations on existing zpool:
+ ('destroy', 'export'). Returns list of device names which was used by the
+ zpool.
+ """
+ allowed_operations = ('destroy', 'export')
+ if operation not in allowed_operations:
+ return False, u'Invalid operation name: {}'.format(operation)
+
try:
all_names = _list_zpools()
except Exception:
- return False, ('Unable to list ZFS pools. Maybe ZFS is not properly installed yet, '
+ return False, ('Unable to list ZFS pools. Maybe ZFS is not properly '
+ 'installed yet, '
'skip this if this is during node cleanup process')
if KD_ZPOOL_NAME not in all_names:
return True, []
@@ -187,10 +241,10 @@ def do_remove_storage(_):
'Failed to get device list in zpool "{}"'.format(KD_ZPOOL_NAME)
)
try:
- silent_call(['zpool', 'destroy', '-f', KD_ZPOOL_NAME])
+ silent_call(['zpool', operation, '-f', KD_ZPOOL_NAME])
return True, devices
except:
- return False, 'Failed to delete zpool {}'.format(KD_ZPOOL_NAME)
+ return False, 'Failed to {} zpool {}'.format(operation, KD_ZPOOL_NAME)
def add_devices_to_localstorage(devices):