Skip to content

Commit

Permalink
Merge pull request #19 from mapuri/multimaster
Browse files Browse the repository at this point in the history
Vendor in recent changes to contiv build for multiple master instance support
  • Loading branch information
mapuri committed Nov 25, 2015
2 parents 315bae4 + 8730e19 commit a94e4c4
Show file tree
Hide file tree
Showing 14 changed files with 184 additions and 64 deletions.
42 changes: 23 additions & 19 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ end

service_init = false
if ENV['CONTIV_SRV_INIT'] then
# in demo mode we initialize and bring up the services
service_init = true
end

Expand All @@ -35,7 +36,8 @@ ceph_vars = {
ansible_groups = { }
ansible_playbook = "./vendor/configuration/ansible/site.yml"
ansible_extra_vars = {
"env" => host_env
"env" => host_env,
"service_vip" => "#{base_ip}252"
}
ansible_extra_vars = ansible_extra_vars.merge(ceph_vars)

Expand All @@ -56,7 +58,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
node_name = node_names[n]
node_addr = node_ips[n]
node_vars = {
"online_master_addr" => node_ips[0],
"etcd_master_addr" => node_ips[0],
"etcd_master_name" => node_names[0],
}
config.vm.define node_name do |node|
node.vm.hostname = node_name
Expand Down Expand Up @@ -92,6 +95,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
if n == 0 then
# mount vagrant directory such that symbolic links are copied
#node.vm.synced_folder ".", "/vagrant", type: "rsync", rsync__args: ["--verbose", "-rLptgoD", "--delete", "-z"]

# mount the host's gobin path for cluster related binaries to be available
node.vm.synced_folder "#{ENV['GOPATH']}/bin", gobin_dir

Expand All @@ -100,28 +104,28 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|

# add this node to cluster-control host group
ansible_groups["cluster-control"] = [node_name]

if service_init then
# if we are bringing up services as part of the cluster, then start
# master services on first vm
ansible_groups["service-master"] = [node_name]
ansible_groups["ceph-hosts"] = [node_name]
ansible_extra_vars = ansible_extra_vars.merge(node_vars)
end
elsif service_init then
# if we are bringing up services as part of the cluster, then start
# worker services on rest of the vms
if ansible_groups["service-worker"] == nil then
ansible_groups["service-worker"] = [ ]
end
ansible_groups["service-worker"] << node_name
ansible_groups["ceph-hosts"] << node_name
ansible_extra_vars = ansible_extra_vars.merge(node_vars)
end

if service_init
# Share anything in `shared` to '/shared' on the cluster hosts.
node.vm.synced_folder "shared", "/shared"

ansible_extra_vars = ansible_extra_vars.merge(node_vars)
if n == 0 then
# if we are bringing up services as part of the cluster, then start
# master services on the first vm
if ansible_groups["service-master"] == nil then
ansible_groups["service-master"] = [ ]
end
ansible_groups["service-master"] << node_name
else
# if we are bringing up services as part of the cluster, then start
# worker services on rest of the vms
if ansible_groups["service-worker"] == nil then
ansible_groups["service-worker"] = [ ]
end
ansible_groups["service-worker"] << node_name
end
end

# Run the provisioner after all machines are up
Expand Down
10 changes: 7 additions & 3 deletions vendor/configuration/ansible/group_vars/all
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
---
# Variables here are applicable to all host groups

# define an empty environment that can be overridden from command line
env:

node_name: "{{ ansible_hostname }}"
node_addr: "{{ hostvars[ansible_hostname]['ansible_' + monitor_interface]['ipv4']['address'] }}"
online_master_addr: "192.168.24.10"

# following variables are used in one or more roles, but have no good default value to pick from.
# Leaving them as commented so that playbooks can fail early due to variable not defined error.

# env:
# service_vip:
# monitor_interface:
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@
group=root
mode=600
when: cephx

- name: copy client.admin key to `osd` only nodes as well to allow volplugin run rbd commands
copy: >
src=fetch/{{ fsid }}/etc/ceph/ceph.client.admin.keyring
dest=/etc/ceph/ceph.client.admin.keyring
owner=root
group=root
mode=600
when:
cephx and
'{{ mon_group_name }}' not in {{ group_names }}
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,8 @@
- name: start netplugin
shell: systemctl daemon-reload && systemctl start netplugin

- name: setup netmaster host alias on master
shell: echo "{{ node_addr }} netmaster" >> /etc/hosts
when: run_as == "master"

- name: setup netmaster host alias on workers
shell: echo "{{ online_master_addr }} netmaster" >> /etc/hosts
when: run_as == "worker"
- name: setup netmaster host alias
shell: echo "{{ service_vip }} netmaster" >> /etc/hosts

# XXX: need to move the following to correct roles
- name: install contivctl
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
{% if online_master_addr == node_addr or online_master_addr == "" %}
VOLPLUGIN_ARGS='--debug'
{% else %}
VOLPLUGIN_ARGS='--debug --master {{ online_master_addr }}:9005'
{% endif %}
VOLPLUGIN_ARGS='--debug --master {{ service_vip }}:9005'
2 changes: 1 addition & 1 deletion vendor/configuration/ansible/roles/etcd/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
copy: src=etcd.service dest=/etc/systemd/system/etcd.service

- name: start etcd
shell: systemctl daemon-reload && systemctl start etcd
service: name=etcd state=started
61 changes: 45 additions & 16 deletions vendor/configuration/ansible/roles/etcd/templates/etcd.j2
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,76 @@ fi

export ETCD_NAME={{ node_name }}
export ETCD_DATA_DIR=/var/lib/etcd
export ETCD_INITIAL_CLUSTER_STATE=new
export ETCD_INITIAL_CLUSTER_TOKEN=contiv-cluster
export ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:{{ etcd_client_port1 }},http://0.0.0.0:{{ etcd_client_port2 }}
export ETCD_ADVERTISE_CLIENT_URLS=http://{{ node_addr }}:{{ etcd_client_port1 }},http://{{ node_addr }}:{{ etcd_client_port2 }}
export ETCD_INITIAL_ADVERTISE_PEER_URLS=http://{{ node_addr }}:{{ etcd_peer_port1 }},http://{{ node_addr }}:{{ etcd_peer_port2 }}
export ETCD_LISTEN_PEER_URLS=http://{{ node_addr }}:{{ etcd_peer_port1 }}
export ETCD_INITIAL_CLUSTER="{{ node_name }}=http://{{ node_addr }}:{{ etcd_peer_port1 }},{{ node_name }}=http://{{ node_addr }}:{{ etcd_peer_port2 }}"

case $1 in
start)
ONLINE_MASTER_ADDR={{ online_master_addr }}
# if a master address is provided then we need to ad dthe node to existing cluster
if [ "$ONLINE_MASTER_ADDR" != "" -a "$ONLINE_MASTER_ADDR" != "{{ node_addr }}" ]; then
# XXX: There seems an issue using etcdctl with ETCD_INITIAL_ADVERTISE_PEER_URLS so passing
# ETCD_LISTEN_PEER_URLS for now
out=`etcdctl --peers="{{ online_master_addr }}:{{ etcd_client_port1 }},{{ online_master_addr }}:{{ etcd_client_port2 }}" \
member add {{ node_name }} "$ETCD_LISTEN_PEER_URLS"`
if [ $? -ne 0 ]; then
echo "failed to add member {{ node_name }}"
exit 1
fi
# parse and export the environment returned by member add
export `echo $out | awk -F 'ETCD_' '{print "ETCD_"$2 "ETCD_"$3 "ETCD_"$4}' | sed s/\"//g`
{% macro add_proxy() -%}
export ETCD_PROXY=on
export ETCD_INITIAL_CLUSTER="{{ etcd_master_name }}=http://{{ etcd_master_addr }}:{{ etcd_peer_port1 }},{{ etcd_master_name }}=http://{{ etcd_master_addr }}:{{ etcd_peer_port2 }}"
{% endmacro -%}

{% macro add_member() -%}
# XXX: There seems an issue using etcdctl with ETCD_INITIAL_ADVERTISE_PEER_URLS so passing
# ETCD_LISTEN_PEER_URLS for now
out=`etcdctl --peers="{{ etcd_master_addr }}:{{ etcd_client_port1 }},{{ etcd_master_addr }}:{{ etcd_client_port2 }}" \
member add {{ node_name }} "$ETCD_LISTEN_PEER_URLS"`
if [ $? -ne 0 ]; then
echo "failed to add member {{ node_name }}"
exit 1
fi
# parse and export the environment returned by member add
export `echo $out | awk -F 'ETCD_' '{print "ETCD_"$2 "ETCD_"$3 "ETCD_"$4}' | sed s/\"//g`
{% endmacro -%}

{% macro init_cluster() -%}
export ETCD_INITIAL_CLUSTER_STATE=new
export ETCD_INITIAL_CLUSTER="
{%- for host in groups[etcd_peers_group] -%}
{%- if loop.last -%}
{{ hostvars[host]['ansible_hostname'] }}=http://{{ hostvars[host]['ansible_' + etcd_peer_interface]['ipv4']['address'] }}:{{ etcd_peer_port1 }},{{ hostvars[host]['ansible_hostname'] }}=http://{{ hostvars[host]['ansible_' + etcd_peer_interface]['ipv4']['address'] }}:{{ etcd_peer_port2 }}
{%- else -%}
{{ hostvars[host]['ansible_hostname'] }}=http://{{ hostvars[host]['ansible_' + etcd_peer_interface]['ipv4']['address'] }}:{{ etcd_peer_port1 }},{{ hostvars[host]['ansible_hostname'] }}=http://{{ hostvars[host]['ansible_' + etcd_peer_interface]['ipv4']['address'] }}:{{ etcd_peer_port2 }},
{%- endif -%}
{% endfor -%}
"
{% endmacro -%}

{% if run_as == "worker" -%}
# on worker nodes, run etcd in proxy mode
{{ add_proxy() }}
{% elif etcd_init_cluster -%}
# on master nodes, if the cluster is being initialized for first time then initialize it
{{ init_cluster() }}
{% else -%}
# if a new master node is being commissioned then add it to exisitng cluster
{{ add_member() }}
{% endif -%}

#start etcd
echo "==> starting etcd with environment:" `env`
/usr/bin/etcd
;;

stop)
{% if run_as == "worker" -%}
echo "==> no 'stop' action for proxy"
{% else -%}
#XXX: do better cleanup like remove the member from the cluster only if it was started
out=`etcdctl member list | grep {{ node_name }} | awk -F ':' '{print $1}'`
if [ "$out" != "" ]; then
echo "==> removing member: " $out
etcdctl member remove $out
fi
{% endif -%}
;;

post-stop)
#XXX: is there a case whe we should not cleanup the data dir on stop?
#XXX: is there a case when we should not cleanup the data dir on stop?
rm -rf $ETCD_DATA_DIR
;;

Expand Down
5 changes: 5 additions & 0 deletions vendor/configuration/ansible/roles/etcd/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@ etcd_client_port1: 2379
etcd_client_port2: 4001
etcd_peer_port1: 2380
etcd_peer_port2: 7001
etcd_master_addr: "{{ node_addr }}"
etcd_master_name: "{{ node_name }}"
etcd_peers_group: "service-master"
etcd_peer_interface: "{{ monitor_interface }}"
etcd_init_cluster: true
7 changes: 7 additions & 0 deletions vendor/configuration/ansible/roles/ucarp/files/ucarp.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[Unit]
Description=Ucarp (Virtual IP service)
After=auditd.service systemd-user-sessions.service time-sync.target

[Service]
ExecStart=/usr/bin/ucarp.sh start
KillMode=control-group
13 changes: 13 additions & 0 deletions vendor/configuration/ansible/roles/ucarp/files/ucarp/vip_down.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

usage="$0 <ifname> <vip>"
if [ $# -ne 2 ]; then
echo USAGE: $usage
exit 1
fi

set -x -e

intf=$1

/sbin/ip link del dev ${intf}_0
19 changes: 19 additions & 0 deletions vendor/configuration/ansible/roles/ucarp/files/ucarp/vip_up.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

usage="$0 <ifname> <vip>"
if [ $# -ne 2 ]; then
echo USAGE: $usage
exit 1
fi

set -x -e

intf=$1
vip=$2

/sbin/ip link add name ${intf}_0 type dummy

# XXX: the subnet needs to be derived from underlying parent interface
/sbin/ip addr add ${vip}/24 dev ${intf}_0

/sbin/ip link set dev ${intf}_0 up
22 changes: 22 additions & 0 deletions vendor/configuration/ansible/roles/ucarp/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
# This role contains tasks for configuring and starting ucarp service

- name: download and install ucarp service (Redhat)
yum: name=ucarp state=present
when: ansible_os_family == "RedHat"

- name: download and install ucarp service (Ubuntu)
apt: name=ucarp state=present
when: ansible_os_family == "Debian"

- name: copy the ucarp start/stop script
template: src=ucarp.sh.j2 dest=/usr/bin/ucarp.sh mode=u=rwx,g=rx,o=rx

- name: copy the vip up and down scripts used by ucarp
copy: src=ucarp/ dest=/usr/bin/ucarp/ mode=u=rwx,g=rx,o=rx

- name: copy systemd units for ucarp
copy: src=ucarp.service dest=/etc/systemd/system/ucarp.service

- name: start ucarp
service: name=ucarp state=started
22 changes: 22 additions & 0 deletions vendor/configuration/ansible/roles/ucarp/templates/ucarp.sh.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

usage="$0 <start>"
if [ $# -ne 1 ]; then
echo USAGE: $usage
exit 1
fi

set -x -e

case $1 in
start)
/sbin/ucarp --shutdown --interface={{ monitor_interface }} \
--srcip={{ hostvars[ansible_hostname]['ansible_' + monitor_interface]['ipv4']['address'] }} \
--vhid=1 --pass=cluster_secret --addr={{ service_vip }} \
--upscript="/usr/bin/ucarp/vip_up.sh" --downscript="/usr/bin/ucarp/vip_down.sh"
;;

*)
echo USAGE: $usage
exit 1
esac
19 changes: 6 additions & 13 deletions vendor/configuration/ansible/site.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,17 @@
- { role: docker, etcd_client_port1: 2379 }
- { role: contiv_cluster }

- hosts: ceph-hosts
sudo: true
environment: env
roles:
- { role: ceph-mon, mon_group_name: ceph-hosts }
- { role: ceph-osd, osd_group_name: ceph-hosts }

# service-master hosts correspond to cluster machines that run the master/controller
# logic of the infra services
- hosts: service-master
sudo: true
environment: env
roles:
- { role: ucarp }
- { role: docker }
- { role: etcd }
- { role: etcd, run_as: master }
- { role: ceph-mon, mon_group_name: service-master }
- { role: ceph-osd, mon_group_name: service-master, osd_group_name: service-master }
- { role: swarm, run_as: master }
- { role: contiv_network, run_as: master }
- { role: contiv_storage, run_as: master }
Expand All @@ -48,13 +44,10 @@
- hosts: service-worker
sudo: true
environment: env
# XXX: there is a limitation with starting etcd on mulitple workers at same
# time, so limiting to one host at a time for now. Revisit once etcd limitation
# is addressed.
serial: 1
roles:
- { role: docker }
- { role: etcd }
- { role: etcd, run_as: worker }
- { role: ceph-osd, mon_group_name: service-master, osd_group_name: service-worker }
- { role: swarm, run_as: worker }
- { role: contiv_network, run_as: worker }
- { role: contiv_storage, run_as: worker }

0 comments on commit a94e4c4

Please sign in to comment.