From deb8a1ef8b3f4eadf70c15738e2e3bb89b673389 Mon Sep 17 00:00:00 2001 From: Ryan Beck-Buysse Date: Tue, 7 Jun 2022 11:46:55 -0500 Subject: [PATCH 1/2] Add ec2-runners action This was copied from https://github.com/Cargill/splinter Signed-off-by: Ryan Beck-Buysse --- .github/actions/ec2-runners/Dockerfile | 27 ++++ .github/actions/ec2-runners/README.md | 171 +++++++++++++++++++++++++ .github/actions/ec2-runners/action.yml | 78 +++++++++++ .github/actions/ec2-runners/aws.py | 149 +++++++++++++++++++++ 4 files changed, 425 insertions(+) create mode 100644 .github/actions/ec2-runners/Dockerfile create mode 100644 .github/actions/ec2-runners/README.md create mode 100644 .github/actions/ec2-runners/action.yml create mode 100755 .github/actions/ec2-runners/aws.py diff --git a/.github/actions/ec2-runners/Dockerfile b/.github/actions/ec2-runners/Dockerfile new file mode 100644 index 0000000000..dfaea33e97 --- /dev/null +++ b/.github/actions/ec2-runners/Dockerfile @@ -0,0 +1,27 @@ +# Copyright 2018-2022 Cargill Incorporated +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM ubuntu:focal + +RUN apt-get update && apt-get install -yq --no-install-recommends \ + python3 \ + python3-pip \ + && pip3 install \ + botocore \ + boto3 \ + requests + +COPY aws.py /aws.py + +ENTRYPOINT ["/aws.py"] diff --git a/.github/actions/ec2-runners/README.md b/.github/actions/ec2-runners/README.md new file mode 100644 index 0000000000..1ee2c27b0f --- /dev/null +++ b/.github/actions/ec2-runners/README.md @@ -0,0 +1,171 @@ +# ec2-runners + +Creates a self-hosted runner for Github Actions on EC2. Useful for when +Github hosted runners are too slow. +This is exposed as a Github Actions self-hosted runner scoped to the repo where +this action is run from. + +Provides two actions: + +`start`: + + * Creates two instances + * Bootstraps the buildx cluster + * Installs GHA runner software with the `--ephemeral` option + +`stop`: + + * Terminates any instances whose name matches the label provided + +# Example usage + +```yaml +name: GHA Buildx +on: + - push + - workflow_dispatch +jobs: + start_cluster: + name: Start buildx cluster + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start_buildx_cluster.outputs.label }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Start EC2 runners + id: start_buildx_cluster + uses: ./.github/actions/ec2-runners + with: + action: start + amd_ami_id: ${{ secrets.AMD_AMI_ID }} + amd_instance_type: t2.nano + arm_ami_id: ${{ secrets.ARM_AMI_ID }} + arm_instance_type: t4g.nano + gh_personal_access_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + mode: buildx + security_group_id: ${{ secrets.SECURITY_GROUP_ID }} + subnet: ${{ secrets.SUBNET }} + + build_docker: + name: Build docker + needs: start_cluster + runs-on: ${{ needs.start_cluster.outputs.label }} + steps: + - name: Debug + run: docker buildx ls + + stop_cluster: + name: Stop buildx cluster + needs: + - start_cluster + - build_docker + runs-on: ubuntu-latest + if: ${{ always() }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Destroy cluster + uses: ./.github/actions/ec2-runners + with: + action: stop + label: ${{ needs.start_cluster.outputs.label }} +``` + +# Configuration + +## Inputs + +`action` + + * `start` - deploy a new cluster + * `stop` - destroy a running cluster + +`amd_ami_id` + +AMI ID for the AMD instance. Should have docker installed. + +`amd_instance_type` + +Instance Type for the AMD instance + +`arm_ami_id` + +AMI ID for the ARM instance. Should have Docker installed and daemon exposed on +port `2375`. + +`arm_instance_type` + +Instance Type for the ARM instance + +`gh_personal_access_token` + +GitHub Personal Access Token with "repo" permissions + +`label` + +Label applied to the created EC2 instances during creation. +No effect during `start`. +This is required when running the `stop` action. + +`mode` + + * `buildx` - start a two node buildx cluster for multi-arch builds + * `single` - start a single self-hosted AMD runner + +Defaults to `buildx` + +`security_group_id` + +Must allow inbound traffic from the local subnet to port `2375`. +Must allow outbound traffic to connect to GitHub. + +`subnet` + +Subnet to apply to the instances + +## Outputs + +`label` + +Random value generated when creating a new cluster. +This is used for job isolation. +Capture this output in the `start` action to provide to the `stop` action so +the instances are terminated. + +# Setup + +Assumes you have two pre-builts AMIs + +AMD runner: Docker installed + +ARM runner: Docker installed and daemon exposed on port `2375` + +Steps to expose docker daemon: + +``` +sudo vi /etc/docker/daemon.json + +{ + "hosts": ["unix:///var/run/docker.sock", "tcp://0.0.0.0:2375"] +} +``` +``` +sudo vi /lib/systemd/system/docker.service +ExecStart=/usr/bin/dockerd --containerd=/run/containerd/containerd.sock +``` +``` +sudo systemctl daemon-reload + +sudo systemctl restart docker.service +``` diff --git a/.github/actions/ec2-runners/action.yml b/.github/actions/ec2-runners/action.yml new file mode 100644 index 0000000000..5e6c8f051a --- /dev/null +++ b/.github/actions/ec2-runners/action.yml @@ -0,0 +1,78 @@ +name: GHA Buildx +description: Provision a self-hosted buildx cluster for GHA +inputs: + action: + description: >- + - 'start' - deploy a new cluster + - 'stop' - destroy a running cluster + required: true + + amd_ami_id: + description: >- + AMI ID for the AMD instance + required: false + + amd_instance_type: + description: >- + Instance Type for the AMD instance + required: false + + arm_ami_id: + description: >- + AMI ID for the ARM instance + required: false + + arm_instance_type: + description: >- + Instance Type for the ARM instance + required: false + + gh_personal_access_token: + description: >- + GitHub Personal Access Token + required: true + + label: + description: >- + Label applied to the created EC2 instances. + This is required when running the 'stop' action. + required: false + + mode: + description: >- + 'buildx' - start a two node buildx cluster for multi-arch builds. + 'single' - start a single self-hosted AMD runner. + Defaults to 'buildx'. + required: false + default: 'buildx' + + security_group_id: + description: >- + Must allow outbound traffic to connect to GitHub + required: false + + subnet: + description: >- + Subnet to apply to the instances + required: false + +outputs: + label: + description: >- + Random value generated when creating a new cluster. + Used to make sure jobs only run on the clusters they create. + +runs: + using: 'docker' + image: 'Dockerfile' + args: + - ${{ inputs.action }} + - ${{ inputs.amd_ami_id }} + - ${{ inputs.amd_instance_type }} + - ${{ inputs.arm_ami_id }} + - ${{ inputs.arm_instance_type }} + - ${{ inputs.gh_personal_access_token }} + - ${{ inputs.label }} + - ${{ inputs.mode }} + - ${{ inputs.security_group_id }} + - ${{ inputs.subnet }} diff --git a/.github/actions/ec2-runners/aws.py b/.github/actions/ec2-runners/aws.py new file mode 100755 index 0000000000..91ac64f994 --- /dev/null +++ b/.github/actions/ec2-runners/aws.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# Copyright 2018-2022 Cargill Incorporated +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import boto3 +import botocore +import os +import requests +import sys +import time +import uuid + +action = os.environ["INPUT_ACTION"] +amd_ami = os.environ["INPUT_AMD_AMI_ID"] +amd_instancetype = os.environ["INPUT_AMD_INSTANCE_TYPE"] +arm_ami = os.environ["INPUT_ARM_AMI_ID"] +arm_instancetype = os.environ["INPUT_ARM_INSTANCE_TYPE"] +github_pat = os.environ["INPUT_GH_PERSONAL_ACCESS_TOKEN"] +mode = os.environ["INPUT_MODE"] +region = os.environ["AWS_REGION"] +repo = os.environ["GITHUB_REPOSITORY"] +securitygroup = os.environ["INPUT_SECURITY_GROUP_ID"] +subnet = os.environ["INPUT_SUBNET"] + +ec2client = boto3.client('ec2', region_name=region) + +def create_instance(ami, instancetype, label, userdata): + response = ec2client.run_instances( + BlockDeviceMappings=[ + { + 'DeviceName': '/dev/xvda', + 'Ebs': { + 'DeleteOnTermination': True, + 'VolumeSize': 30, + 'VolumeType': 'gp2' + }, + }, + ], + ImageId=ami, + InstanceType=instancetype, + MaxCount=1, + MinCount=1, + Monitoring={ + 'Enabled': False + }, + SecurityGroupIds=(securitygroup,), + SubnetId=subnet, + TagSpecifications=[ + { + "ResourceType": "instance", + 'Tags': [ + {'Key': 'Name', 'Value': label }, + ] + }, + ], + UserData=userdata, + ) + return response + +def get_instances_from_tag(tag): + instance_ids = [] + tag_filter = [ + {'Name':'tag:Name','Values': [tag]}, + ] + + response = ec2client.describe_instances(Filters=tag_filter) + + for reservation in response["Reservations"]: + for instance in reservation["Instances"]: + instance_ids.append(instance["InstanceId"]) + return instance_ids + +def get_regtoken(): + try: + headers = {'Authorization': "token {}".format(github_pat.strip())} + r = requests.post(f"https://api.github.com/repos/{repo}/actions/runners/registration-token", headers=headers) + return r.json()["token"] + except: + print("ERROR: Unable to get GHA self-hosted registration token") + sys.exit(1) + +def make_label(): + return str(uuid.uuid1()).split("-")[0] + +def terminate_instances(tag): + instances_to_terminate = get_instances_from_tag(tag) + print(instances_to_terminate) + try: + response = ec2client.terminate_instances( + InstanceIds=(instances_to_terminate), DryRun=True + ) + except botocore.exceptions.ClientError as e: + if 'DryRunOperation' not in str(e): + raise + try: + response = ec2client.terminate_instances( + InstanceIds=(instances_to_terminate), DryRun=False + ) + print("Termination was successful") + except botocore.exceptions.ClientError as e: + print(e) + +if action == "start": + reg_token = get_regtoken() + label = make_label() + print(f"Creating instances with tag {label}") + if mode == "buildx": + arm_userdata="" + arminstance=create_instance(arm_ami, arm_instancetype, label, arm_userdata) + arm_private_ip = arminstance['Instances'][0]['PrivateIpAddress'] + print("Started ARM instance %s at %s" % (arminstance['Instances'][0]['InstanceId'], arm_private_ip)) + print("Sleeping for 20s so %s will be ready" % arminstance['Instances'][0]['InstanceId']) + time.sleep(20) + + buildx_userdata=f""" + echo "{arm_private_ip} buildx" >> /etc/hosts + DOCKER_HOST=tcp://buildx:2375 docker buildx create --name cluster + docker buildx create --name cluster --append + docker buildx use cluster + docker buildx inspect --bootstrap + """ + else: + buildx_userdata="" + + amd_userdata=f"""#!/bin/bash + {buildx_userdata} + mkdir /tmp/actions-runner && cd /tmp/actions-runner + curl -o actions-runner-linux-x64-2.288.1.tar.gz -L https://github.com/actions/runner/releases/download/v2.288.1/actions-runner-linux-x64-2.288.1.tar.gz + tar xzf ./actions-runner-linux-x64-2.288.1.tar.gz + RUNNER_ALLOW_RUNASROOT=1 ./config.sh --url https://github.com/{repo} --token {reg_token} --labels {label} --ephemeral --unattended + RUNNER_ALLOW_RUNASROOT=1 ./run.sh + """ + + print(f"::set-output name=label::{label}") + amdinstance=create_instance(amd_ami, amd_instancetype, label, amd_userdata) + +if action == "stop": + terminate_instances(os.environ["INPUT_LABEL"]) From a5805c4c6e046a17eded567ae0bb9bff56947249 Mon Sep 17 00:00:00 2001 From: Ryan Beck-Buysse Date: Wed, 8 Jun 2022 13:58:16 -0500 Subject: [PATCH 2/2] Update GHA workflows to use ephemeral ec2 runners The hardware we were previously using for multi-arch builds is being end-of-lifed so this is the next best solution. Signed-off-by: Ryan Beck-Buysse --- .github/workflows/0-3-grid-dev.yaml | 80 +++++++++++++++++++- .github/workflows/0-3-publish-release.yaml | 86 +++++++++++++++++++++- 2 files changed, 163 insertions(+), 3 deletions(-) diff --git a/.github/workflows/0-3-grid-dev.yaml b/.github/workflows/0-3-grid-dev.yaml index cd78ef092d..380f024195 100644 --- a/.github/workflows/0-3-grid-dev.yaml +++ b/.github/workflows/0-3-grid-dev.yaml @@ -3,10 +3,54 @@ on: - push - workflow_dispatch jobs: + start_cluster: + if: github.repository == 'hyperledger/grid' + name: Start buildx cluster + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start_buildx_cluster.outputs.label }} + permissions: + id-token: write + contents: read + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_BUILDX_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + + - uses: actions/checkout@v2 + + - name: Start EC2 runner + id: start_buildx_cluster + uses: ./.github/actions/ec2-runners + with: + action: start + amd_ami_id: ${{ secrets.AMD_AMI_ID }} + amd_instance_type: ${{ secrets.AMD_INSTANCE_TYPE }} + arm_ami_id: ${{ secrets.ARM_AMI_ID }} + arm_instance_type: ${{ secrets.ARM_INSTANCE_TYPE }} + gh_personal_access_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + security_group_id: ${{ secrets.SECURITY_GROUP_ID }} + subnet: ${{ secrets.SUBNET }} + + - name: Output label + run: echo ${{ steps.start_buildx_cluster.outputs.label }} + + - name: Notify Slack of Failure + if: cancelled() || failure() + uses: 8398a7/action-slack@v3 + with: + status: ${{ job.status }} + fields: repo,message,author,job + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + build_grid_dev: if: github.repository == 'hyperledger/grid' name: Build grid-dev - runs-on: macos-arm + needs: start_cluster + runs-on: ${{ needs.start_cluster.outputs.label }} steps: - name: Login to DockerHub uses: docker/login-action@v1 @@ -31,3 +75,37 @@ jobs: fields: repo,message,author,job env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + + stop_cluster: + name: Stop buildx cluster + needs: + - start_cluster + - build_grid_dev + runs-on: ubuntu-latest + if: ${{ github.repository == 'hyperledger/grid' && always() }} + permissions: + id-token: write + contents: read + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_BUILDX_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + + - uses: actions/checkout@v2 + + - name: Destroy cluster + uses: ./.github/actions/ec2-runners + with: + action: stop + label: ${{ needs.start_cluster.outputs.label }} + + - name: Notify Slack of Failure + if: cancelled() || failure() + uses: 8398a7/action-slack@v3 + with: + status: ${{ job.status }} + fields: repo,message,author,job + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/0-3-publish-release.yaml b/.github/workflows/0-3-publish-release.yaml index 1b5481eaa1..27bd30ed66 100644 --- a/.github/workflows/0-3-publish-release.yaml +++ b/.github/workflows/0-3-publish-release.yaml @@ -28,11 +28,59 @@ jobs: - name: Run tests run: just ci-test + start_cluster: + needs: + - unit_test_grid + if: >- + github.repository_owner == 'hyperledger' + name: Start buildx cluster + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start-buildx-cluster.outputs.label }} + permissions: + id-token: write + contents: read + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_BUILDX_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + + - uses: actions/checkout@v2 + + - name: Start EC2 runner + id: start-buildx-cluster + uses: ./.github/actions/ec2-runners + with: + action: start + amd_ami_id: ${{ secrets.AMD_AMI_ID }} + amd_instance_type: c6i.4xlarge + arm_ami_id: ${{ secrets.ARM_AMI_ID }} + arm_instance_type: c6g.4xlarge + gh_personal_access_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + security_group_id: ${{ secrets.SECURITY_GROUP_ID }} + subnet: ${{ secrets.SUBNET }} + + - name: Output label + run: echo ${{ steps.start-buildx-cluster.outputs.label }} + + - name: Notify Slack of Failure + if: failure() + uses: 8398a7/action-slack@v3 + with: + status: ${{ job.status }} + fields: repo,message,author,job + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + publish_docker: - needs: unit_test_grid + needs: + - start_cluster + - unit_test_grid if: >- github.repository_owner == 'hyperledger' - runs-on: macos-arm + runs-on: ${{ needs.start_cluster.outputs.label }} steps: - name: Display envvars run: env @@ -65,6 +113,40 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + stop_cluster: + name: Stop buildx cluster + needs: + - start_cluster + - publish_docker + runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'hyperledger' && always() }} + permissions: + id-token: write + contents: read + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_BUILDX_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + + - uses: actions/checkout@v2 + + - name: Destroy cluster + uses: ./.github/actions/ec2-runners + with: + action: stop + label: ${{ needs.start_cluster.outputs.label }} + + - name: Notify Slack of Failure + if: cancelled() || failure() + uses: 8398a7/action-slack@v3 + with: + status: ${{ job.status }} + fields: repo,message,author,job + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + publish_to_crates: needs: unit_test_grid if: >-