diff --git a/.env-dummy b/.env-dummy index 6c0f5a5..964dc4d 100644 --- a/.env-dummy +++ b/.env-dummy @@ -1,6 +1,18 @@ # Go to smee.io to generate a URL here SMEE_URL=https://smee.io/CHANGEME +# No need to change this for testing +REDIS_HOST=rq-server + +# Optionally customize port redis listens on +REDIS_PORT=6379 + +# Base url of pr binaries mirror +PR_BINARIES_MIRROR_BASE_URL=s3://spack-binaries-prs + +# Name of expected base branch (we react to PRs merged to this branch) +PR_BINARIES_BASE_BRANCH=develop + # You don't need to change this unless you change the docker-compose volumes GITHUB_PRIVATE_KEY=/app/spackbot/spack-bot-develop.private-key.pem diff --git a/.github/workflows/build-deploy.yaml b/.github/workflows/build-deploy.yaml index 57b687d..d316cdb 100644 --- a/.github/workflows/build-deploy.yaml +++ b/.github/workflows/build-deploy.yaml @@ -12,19 +12,25 @@ on: jobs: deploy-test-containers: runs-on: ubuntu-latest - name: Build Spackbot Container + strategy: + fail-fast: false + # matrix: [tag, path to Dockerfile, label] + matrix: + dockerfile: [[spack-bot, ./Dockerfile, Spackbot], + [spackbot-workers, ./workers/Dockerfile, "Spackbot Workers"]] + name: Build ${{matrix.dockerfile[2]}} Container steps: - name: Checkout uses: actions/checkout@v2 - name: Build and Run Test Container run: | - docker build -t ghcr.io/spack/spack-bot:latest . - docker tag ghcr.io/spack/spack-bot:latest ghcr.io/spack/spack-bot:${GITHUB_SHA::8} + docker build -f ${{matrix.dockerfile[1]}} -t ghcr.io/spack/${{matrix.dockerfile[0]}}:latest . + docker tag ghcr.io/spack/${{matrix.dockerfile[0]}}:latest ghcr.io/spack/${{matrix.dockerfile[0]}}:${GITHUB_SHA::8} - name: Login and Deploy Test Container if: (github.event_name != 'pull_request') run: | docker images echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ secrets.GHCR_USERNAME }} --password-stdin - docker push --all-tags ghcr.io/spack/spack-bot + docker push --all-tags ghcr.io/spack/${{matrix.dockerfile[0]}} diff --git a/docker-compose.yml b/docker-compose.yml index c376ff6..93aa305 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,25 @@ services: context: . dockerfile: smee/Dockerfile + rq-worker: + build: + context: . + dockerfile: workers/Dockerfile + env_file: + - ./.env + deploy: + replicas: 1 + + rq-server: + env_file: + - ./.env + image: redis:alpine + expose: + - ${REDIS_PORT} + volumes: + - redis-data:/data + - redis-conf:/usr/local/etc/redis/redis.conf + spackbot: build: context: . @@ -29,3 +48,7 @@ services: - ./.env links: - smee + +volumes: + redis-data: + redis-conf: diff --git a/requirements.txt b/requirements.txt index 977d905..3ebe3e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ aiohttp gidgethub python_dotenv +rq sh # Add these so we don't wait for install diff --git a/spackbot/handlers/__init__.py b/spackbot/handlers/__init__.py index 92f2ddc..c7da1d2 100644 --- a/spackbot/handlers/__init__.py +++ b/spackbot/handlers/__init__.py @@ -2,3 +2,4 @@ from .labels import add_labels # noqa from .reviewers import add_reviewers # noqa from .style import style_comment, fix_style # noqa +from .mirrors import graduate_pr_binaries # noqa diff --git a/spackbot/handlers/mirrors.py b/spackbot/handlers/mirrors.py new file mode 100644 index 0000000..7add1f8 --- /dev/null +++ b/spackbot/handlers/mirrors.py @@ -0,0 +1,52 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import logging + +from spackbot.helpers import pr_expected_base, pr_mirror_base_url +from spackbot.workers import copy_pr_binaries, update_mirror_index, work_queue + +# If we don't provide a timeout, the default in RQ is 180 seconds +WORKER_JOB_TIMEOUT = 6 * 60 * 60 + +logger = logging.getLogger(__name__) + + +async def graduate_pr_binaries(event, gh): + payload = event.data + + base_branch = payload["pull_request"]["base"]["ref"] + is_merged = payload["pull_request"]["merged"] + + if is_merged and base_branch == pr_expected_base: + pr_number = payload["number"] + pr_branch = payload["pull_request"]["head"]["ref"] + + shared_mirror_url = f"{pr_mirror_base_url}/shared_pr_mirror" + + logger.info( + f"PR {pr_number}/{pr_branch} merged to develop, graduating binaries" + ) + + copy_q = work_queue.get_copy_queue() + copy_job = copy_q.enqueue( + copy_pr_binaries, + pr_number, + pr_branch, + shared_mirror_url, + job_timeout=WORKER_JOB_TIMEOUT, + ) + logger.info(f"Copy job queued: {copy_job.id}") + + # If the index job queue has a job queued already, there is no need to + # schedule another one + index_q = work_queue.get_index_queue() + if len(index_q.get_job_ids()) <= 0: + update_job = index_q.enqueue( + update_mirror_index, shared_mirror_url, job_timeout=WORKER_JOB_TIMEOUT + ) + logger.info(f"update-index job queued: {update_job.id}") + else: + logger.info("skipped queuing redundant update-index job") diff --git a/spackbot/helpers.py b/spackbot/helpers.py index 51ff1ef..765534c 100644 --- a/spackbot/helpers.py +++ b/spackbot/helpers.py @@ -36,6 +36,12 @@ aliases = ["spack-bot", "spackbot", "spack-bot-develop", botname] alias_regex = "(%s)" % "|".join(aliases) +# Information needed for graduating PR binaries +pr_mirror_base_url = os.environ.get( + "PR_BINARIES_MIRROR_BASE_URL", "s3://spack-binaries-prs" +) +pr_expected_base = os.environ.get("PR_BINARIES_BASE_BRANCH", "develop") + @contextlib.contextmanager def temp_dir(): diff --git a/spackbot/routes.py b/spackbot/routes.py index 6dc1e9a..9636e17 100644 --- a/spackbot/routes.py +++ b/spackbot/routes.py @@ -62,6 +62,14 @@ async def on_pull_request(event, gh, *args, session, **kwargs): await handlers.add_reviewers(event, gh) +@router.register("pull_request", action="closed") +async def on_closed_pull_request(event, gh, *args, session, **kwargs): + """ + Respond to the pull request closed + """ + await handlers.graduate_pr_binaries(event, gh) + + @router.register("issue_comment", action="created") async def add_comments(event, gh, *args, session, **kwargs): """ diff --git a/spackbot/workers.py b/spackbot/workers.py new file mode 100644 index 0000000..22ff744 --- /dev/null +++ b/spackbot/workers.py @@ -0,0 +1,202 @@ +import aiohttp +import asyncio +import logging +import os +import tempfile +import zipfile + +from datetime import datetime + +from redis import Redis +from rq import Queue + +from spackbot.helpers import gitlab_spack_project_url, pr_mirror_base_url + +logger = logging.getLogger(__name__) + +REDIS_HOST = os.environ.get("REDIS_HOST", "localhost") +REDIS_PORT = int(os.environ.get("REDIS_PORT", "6379")) +GITLAB_TOKEN = os.environ.get("GITLAB_TOKEN") +QUERY_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" + + +async def find_latest_pipeline(url, headers, session): + async with session.get(url, headers=headers) as response: + pipeline_objects = await response.json() + + latest_p_obj = None + + if pipeline_objects: + latest_p_obj = pipeline_objects[0] + latest_time = datetime.strptime(latest_p_obj["updated_at"], QUERY_TIME_FORMAT) + + for i in range(1, len(pipeline_objects)): + p_obj = pipeline_objects[i] + updated = datetime.strptime(p_obj["updated_at"], QUERY_TIME_FORMAT) + if updated > latest_time: + latest_time = updated + latest_p_obj = p_obj + + return latest_p_obj + + +async def retrieve_artifacts(url, headers, dl_folder, session): + save_path = os.path.join(dl_folder, "artifacts.zip") + + async with session.get(url, headers=headers) as response: + if not os.path.exists(dl_folder): + os.makedirs(dl_folder) + + with open(save_path, "wb") as fd: + async for chunk in response.content.iter_chunked(65536): + fd.write(chunk) + + zip_file = zipfile.ZipFile(save_path) + zip_file.extractall(dl_folder) + zip_file.close() + + os.remove(save_path) + + +async def download_spack_lock_files(url, headers, download_dir, session): + async with session.get(url, headers=headers) as response: + job_objects = await response.json() + + folder_list = [] + + if job_objects: + for job in job_objects: + artifacts_url = f"{gitlab_spack_project_url}/jobs/{job['id']}/artifacts" + dl_folder = os.path.join(download_dir, job["name"]) + + await retrieve_artifacts(artifacts_url, headers, dl_folder, session) + + for root, _, files in os.walk(dl_folder): + if "spack.lock" in files: + folder_list.append(root) + break + else: + print( + f"Error: unable to find spack.lock in download folder {dl_folder}" + ) + + return folder_list + + +class WorkQueue: + def __init__(self): + logger.info(f"WorkQueue creating redis connection ({REDIS_HOST}, {REDIS_PORT})") + self.redis_conn = Redis(host=REDIS_HOST, port=REDIS_PORT) + logger.info(f"WorkQueue creating redis connection ({REDIS_HOST}, {REDIS_PORT})") + self.copy_q = Queue(name="copy", connection=self.redis_conn) + self.index_q = Queue(name="index", connection=self.redis_conn) + + def get_copy_queue(self): + return self.copy_q + + def get_index_queue(self): + return self.index_q + + +work_queue = WorkQueue() + + +async def run_in_subprocess(cmd_string): + proc = await asyncio.create_subprocess_shell( + cmd_string, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + stdout, stderr = await proc.communicate() + + print(f"[{cmd_string!r} exited with {proc.returncode}]") + if stdout: + print(f"[stdout]\n{stdout.decode()}") + if stderr: + print(f"[stderr]\n{stderr.decode()}") + + +async def copy_pr_binaries(pr_number, pr_branch, shared_pr_mirror_url): + """Find the latest gitlab pipeline for the PR, get the spack.lock + for each child pipeline, and for each one, activate the environment + and issue the spack buildcache sync command to copy between the + per-pr mirror and the shared pr mirror. + """ + pipeline_ref = f"github/pr{pr_number}_{pr_branch}" + pr_mirror_url = f"{pr_mirror_base_url}/{pipeline_ref}" + pipelines_url = ( + f"{gitlab_spack_project_url}/pipelines?ref={pipeline_ref}&per_page=100" + ) + headers = {"PRIVATE-TOKEN": GITLAB_TOKEN} + + # Create single new session for gitlab requests + async with aiohttp.ClientSession() as session: + latest_pipeline = await find_latest_pipeline(pipelines_url, headers, session) + + if not latest_pipeline: + print(f"Unable to find latest pipeline for {pipeline_ref}") + return + + print(f"found latest pipeline for {pipeline_ref}:") + print(latest_pipeline) + + p_id = latest_pipeline["id"] + + jobs_url = f"{gitlab_spack_project_url}/pipelines/{p_id}/jobs" + + with tempfile.TemporaryDirectory() as tmp_dir_path: + print(f"Downloading spack.lock files under: {tmp_dir_path}") + folders = await download_spack_lock_files( + jobs_url, headers, tmp_dir_path, session + ) + + for env_dir in folders: + print( + f"Copying binaries from {pr_mirror_url} to {shared_pr_mirror_url}" + ) + print(f" using spack environment: {env_dir}") + + cmd_elements = [ + "spack", + "-e", + env_dir, + "-d", + "buildcache", + "sync", + "--src-mirror-url", + pr_mirror_url, + "--dest-mirror-url", + shared_pr_mirror_url, + ] + + await run_in_subprocess(" ".join(cmd_elements)) + + # Clean up the per-pr mirror + print(f"Deleting mirror: {pr_mirror_url}") + + cmd_elements = ["spack", "mirror", "destroy", "--mirror-url", pr_mirror_url] + + await run_in_subprocess(" ".join(cmd_elements)) + + +async def update_mirror_index(mirror_url): + """Use spack buildcache command to update index on remote mirror""" + print(f"Updating binary index at {mirror_url}") + + cmd_elements = [ + "spack", + "-d", + "buildcache", + "update-index", + "--mirror-url", + f"'{mirror_url}'", + ] + + await run_in_subprocess(" ".join(cmd_elements)) + + +async def test_job(): + print("Running a test spack command") + + cmd_elements = ["spack", "help", "--all"] + + await run_in_subprocess(" ".join(cmd_elements)) diff --git a/workers/Dockerfile b/workers/Dockerfile new file mode 100644 index 0000000..e939053 --- /dev/null +++ b/workers/Dockerfile @@ -0,0 +1,11 @@ +FROM ghcr.io/spack/ubuntu-bionic:latest + +COPY workers/requirements.txt /source/requirements.txt +COPY workers/entrypoint.sh /source/entrypoint.sh +COPY spackbot /source/spackbot + +RUN pip3 install --upgrade pip setuptools wheel && \ + pip3 install -r /source/requirements.txt + +WORKDIR /source +ENTRYPOINT ["/bin/bash", "/source/entrypoint.sh"] diff --git a/workers/entrypoint.sh b/workers/entrypoint.sh new file mode 100755 index 0000000..68f0f97 --- /dev/null +++ b/workers/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Make sure spack can be found by worker jobs +. $SPACK_ROOT/share/spack/setup-env.sh + +# Define REDIS_HOST and REDIS_PORT in .env file or k8s deployment. Workers +# always take jobs from the "copy" queue first and then the "index" queue +# when passed in this order. +rq worker -u redis://${REDIS_HOST}:${REDIS_PORT} --with-scheduler copy index diff --git a/workers/requirements.txt b/workers/requirements.txt new file mode 100644 index 0000000..71a8b11 --- /dev/null +++ b/workers/requirements.txt @@ -0,0 +1,4 @@ +aiohttp +boto3 +gidgethub +rq