CML #64
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CML | |
on: | |
workflow_dispatch: | |
inputs: | |
deploy_debug_enabled: | |
description: "Run the deployment with tmate.io debugging enabled" | |
required: true | |
type: boolean | |
default: false | |
model_debug_enabled: | |
description: "Run the model with tmate.io debugging enabled" | |
required: true | |
type: boolean | |
default: false | |
end_debug_enabled: | |
description: "Run tmate.io debugging at the end of the workflow" | |
required: true | |
type: boolean | |
default: false | |
force_retraining: | |
description: "Force retraining in model execution" | |
required: true | |
type: boolean | |
default: false | |
force_postprocessing: | |
description: "Force postprocessing in model execution" | |
required: true | |
type: boolean | |
default: false | |
force_summarize: | |
description: "Force summarization in model execution" | |
required: true | |
type: boolean | |
default: false | |
force_all: | |
description: "Force all steps in model execution" | |
required: true | |
type: boolean | |
default: false | |
push: | |
branches: | |
- "exp*" | |
paths: | |
- "pyrovelocity/**" | |
- "reproducibility/figures/*.py" | |
- "reproducibility/figures/dvc.*" | |
- "reproducibility/figures/config.yaml" | |
# Review/set variables via gh CLI: | |
# | |
# gh secret list --repo=$(GH_REPO) | |
# gh secret set PERSONAL_ACCESS_TOKEN --repo="$(GH_REPO)" --body="$(GH_PAT)" | |
# | |
# see also: | |
# https://cml.dev/doc/self-hosted-runners?tab=GitHub#personal-access-token | |
# test machine: n1-standard-32+nvidia-tesla-t4*1 | |
# large machine: n1-standard-64+nvidia-tesla-t4*4 | |
# large machine: a2-highgpu-2g+nvidia-tesla-a100*2 | |
env: | |
REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} | |
GOOGLE_APPLICATION_CREDENTIALS_DATA: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS_DATA }} | |
GCP_SERVICE_ACCOUNT: ${{ secrets.GCP_SERVICE_ACCOUNT }} | |
MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLFLOW_TRACKING_PASSWORD }} | |
MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_TRACKING_URI }} | |
MLFLOW_TRACKING_USERNAME: ${{ secrets.MLFLOW_TRACKING_USERNAME }} | |
TF_LOG_PROVIDER: DEBUG | |
jobs: | |
deploy: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: iterative/setup-cml@v1 | |
- name: Setup tmate debug session | |
uses: mxschmitt/action-tmate@v3 | |
if: ${{ inputs.deploy_debug_enabled }} | |
- name: Deploy cloud runner | |
run: | | |
GCP_ZONES=( "us-central1-b" "us-central1-f" | |
"us-central1-a" "us-central1-c" | |
"us-east1-b" | |
"us-west1-b" "us-west4-b" ) | |
for trial in {1..3}; do | |
for zone in "${GCP_ZONES[@]}"; do | |
echo "Trial $trial in zone $zone" | |
cml runner launch \ | |
--labels=cml-gpu \ | |
--reuse-idle \ | |
--cloud=gcp \ | |
--cloud-type=n1-highmem-96+nvidia-tesla-t4*4 \ | |
--cloud-gpu=tesla \ | |
--cloud-hdd-size=500 \ | |
--cloud-region="$zone" \ | |
--cloud-permission-set="${GCP_SERVICE_ACCOUNT},scopes=storage-rw" \ | |
&& break 2 | |
done | |
done | |
pipeline: | |
needs: deploy | |
runs-on: [self-hosted, cml-gpu] | |
timeout-minutes: 1440 | |
container: | |
image: ghcr.io/pinellolab/pyrovelocity:master | |
options: --gpus all | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Set up python | |
id: setup-python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Install Poetry | |
uses: snok/install-poetry@v1 | |
with: | |
virtualenvs-create: true | |
virtualenvs-in-project: true | |
installer-parallel: true | |
- name: Load cached venv | |
id: cached-poetry-dependencies | |
uses: actions/cache@v3 | |
with: | |
path: .venv | |
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} | |
- name: Setup tmate debug session | |
uses: mxschmitt/action-tmate@v3 | |
if: ${{ inputs.model_debug_enabled }} | |
- name: Install dependencies | |
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' | |
run: poetry install -E dev -E plotting --no-interaction --no-root | |
- name: Install project | |
run: poetry install -E dev -E plotting --no-interaction | |
- name: CML setup | |
run: cml ci | |
- name: Setup rclone | |
run: | | |
sudo -v ; curl https://rclone.org/install.sh | sudo bash | |
which rclone | |
sudo apt-get update && sudo apt-get install tree | |
which tree | |
mkdir -p $HOME/.config/rclone/ | |
echo '${{ secrets.RCLONE_CONF }}' > $HOME/.config/rclone/rclone.conf | |
- name: Run experiment and submit report | |
run: | | |
source .venv/bin/activate | |
FLAGS="" | |
if [ "${{ github.event.inputs.force_retraining }}" = "true" ]; then | |
FLAGS="${FLAGS} -ft" | |
fi | |
if [ "${{ github.event.inputs.force_postprocessing }}" = "true" ]; then | |
FLAGS="${FLAGS} -fp" | |
fi | |
if [ "${{ github.event.inputs.force_summarize }}" = "true" ]; then | |
FLAGS="${FLAGS} -fs" | |
fi | |
if [ "${{ github.event.inputs.force_all }}" = "true" ]; then | |
FLAGS="${FLAGS} -f" | |
fi | |
./.github/pipeline.sh ${FLAGS} | |
shell: bash | |
- name: Setup tmate debug session | |
uses: mxschmitt/action-tmate@v3 | |
if: ${{ inputs.end_debug_enabled }} |