From 53cd43664e7a529a369e7dd875d8f1df0684aeaf Mon Sep 17 00:00:00 2001 From: Ofey Chan Date: Thu, 3 Aug 2023 14:09:41 +0800 Subject: [PATCH] feat: polish starter project code style (#8) * feat: polish project template code style * fix: pre-commit * stage: step by step * stage: description --- .style.yapf | 2 +- README.md | 8 +- colossalai_platform/cli/cli.py | 3 +- .../cli/commands/projects/DataSchema.json | 1 - .../commands/projects/HyperParameters.json | 15 ++-- .../cli/commands/projects/patch.py | 6 +- .../cli/commands/projects/train.py | 86 +++++++++++++------ .../cli/commands/projects/train.sh | 24 +++++- requirements.txt | 2 +- setup.py | 10 +-- version.txt | 2 +- 11 files changed, 108 insertions(+), 51 deletions(-) delete mode 100644 colossalai_platform/cli/commands/projects/DataSchema.json diff --git a/.style.yapf b/.style.yapf index 418a2ab..05be0dc 100644 --- a/.style.yapf +++ b/.style.yapf @@ -2,4 +2,4 @@ based_on_style = google spaces_before_comment = 4 split_before_logical_operator = true -column_limit = 120 \ No newline at end of file +column_limit = 120 diff --git a/README.md b/README.md index e77546f..6ce4d22 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ ## 📚 Overview -This repository contains the Command Line Tools for the ColossalAI Platform. +This repository contains the Command Line Tools for the ColossalAI Platform. The CLI is called `cap` which stands for `C(olossal)A(I) P(latform)`. -This CLI aims to provide an interface for users to access the various functions provided on the cloud platform and boost their productivity. +This CLI aims to provide an interface for users to access the various functions provided on the cloud platform and boost their productivity. The CLI is built on top of [Click](https://click.palletsprojects.com/en/8.0.x/), a Python package for creating beautiful command line interfaces in a composable way with as little code as necessary. @@ -32,7 +32,7 @@ pip install git+https://github.com/hpcaitech/ColossalAI-Platform-CLI@main pip install colossalai-platform ``` -## ⌨️ Usage +## ⌨️ Usage A documentation website will be built when this CLI is mature. Before that, you can refer to the following sections to use the CLI. @@ -41,4 +41,4 @@ Before that, you can refer to the following sections to use the CLI. ```bash cap template init -``` \ No newline at end of file +``` diff --git a/colossalai_platform/cli/cli.py b/colossalai_platform/cli/cli.py index 7b20c19..71845b8 100644 --- a/colossalai_platform/cli/cli.py +++ b/colossalai_platform/cli/cli.py @@ -1,7 +1,6 @@ import click -from colossalai_platform.cli.aliased_group import (CONTEXT_SETTINGS, - AliasedGroup) +from colossalai_platform.cli.aliased_group import (CONTEXT_SETTINGS, AliasedGroup) from .commands import project diff --git a/colossalai_platform/cli/commands/projects/DataSchema.json b/colossalai_platform/cli/commands/projects/DataSchema.json deleted file mode 100644 index 0967ef4..0000000 --- a/colossalai_platform/cli/commands/projects/DataSchema.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/colossalai_platform/cli/commands/projects/HyperParameters.json b/colossalai_platform/cli/commands/projects/HyperParameters.json index b7346ff..75795bd 100644 --- a/colossalai_platform/cli/commands/projects/HyperParameters.json +++ b/colossalai_platform/cli/commands/projects/HyperParameters.json @@ -1,9 +1,14 @@ { "HyperParameters": [ - { - "name": "epoch", - "types": "string", - "defaultValue": "10" - } + // TODO: Add your parameters like this: + // { + // "name": "epoch", + // "types": "string", + // "defaultValue": "10", + // "description": "Number of epochs for training" + // } + // + // At our platform, it would be injected into `train.sh` + // as environment variable ${epoch} ] } diff --git a/colossalai_platform/cli/commands/projects/patch.py b/colossalai_platform/cli/commands/projects/patch.py index 01afe01..7ac09e2 100644 --- a/colossalai_platform/cli/commands/projects/patch.py +++ b/colossalai_platform/cli/commands/projects/patch.py @@ -1,8 +1,12 @@ # Code generated by ColossalAI-Platform-CLI. DO NOT EDIT. -def patch_tensorboard(): +def _patch_tensorboard(): try: from tensorboard.compat.tensorflow_stub.io.gfile import LocalFileSystem # hack to make tensorboard compatible with Cloud File System delattr(LocalFileSystem, "append") except ImportError: print("Tensorboard patch failed. Please check your tensorboard version.") + + +def patch_platform_specific_dependencies(): + _patch_tensorboard() diff --git a/colossalai_platform/cli/commands/projects/train.py b/colossalai_platform/cli/commands/projects/train.py index 20b1ca0..e32534f 100644 --- a/colossalai_platform/cli/commands/projects/train.py +++ b/colossalai_platform/cli/commands/projects/train.py @@ -1,38 +1,68 @@ import os -from patch import patch_tensorboard +import argparse +import types +from patch import patch_platform_specific_dependencies -# ================================== -# Code generated by ColossalAI-Platform-CLI. DO NOT EDIT. +# Please do not remove this call, +# the platform's runtime environment needs it. +patch_platform_specific_dependencies() + +# =================================================================== +# Example code generated by ColossalAI-Platform-CLI. # -# ColossalAI Platform Specification -# Start -# ================================== +# Modify it to fit your needs. +# =================================================================== -# patch dependencies -patch_tensorboard() -# Environment variables for your data access -PROJECT_DIR = os.environ['PROJECT_DIR'] -DATASET_DIR = os.environ['DATASET_DIR'] -OUTPUT_DIR = os.environ['OUTPUT_DIR'] -INPUT_MODEL_DIR = os.environ['MODEL_DIR'] -CHECKPOINT_DIR = os.path.join(OUTPUT_DIR, 'checkpoint') -TENSORBOARD_DIR = os.path.join(OUTPUT_DIR, 'tensorboard') +def add_platform_args(parser: argparse.ArgumentParser): + # required arguments + parser.add_argument( + "--project_dir", + type=str, + required=True, + help="The directory contains the project code.", + ) + parser.add_argument( + "--dataset_dir", + type=str, + required=True, + help="The directory contains the training dataset.", + ) + parser.add_argument( + "--output_dir", + type=str, + required=True, + help="The directory project would write output into.", + ) + # optional arguments, add more if you need + parser.add_argument( + "--model_dir", + type=str, + default=None, + help="The directory contains the model to finetune.", + ) -# create directories -def create_if_not_exist(path): - # create if not exists - if not os.path.exists(path): - os.makedirs(path, exist_ok=True) +def main(): + parser = argparse.ArgumentParser(description="training script") + add_platform_args(parser) + args = parser.parse_args() -create_if_not_exist(CHECKPOINT_DIR) -create_if_not_exist(TENSORBOARD_DIR) + # There are some path conventions: + # + # $OUTPUT_DIR/tensorboard: + # The platform-builtin tensorboard expects events to be here. + # $OUTPUT_DIR/checkpoint + # The platform-builtin checkpoint recovery feature + # expects the checkpoint to be here. + tensorboard_dir = os.path.join(args.output_dir, "tensorboard") + os.mkdir(tensorboard_dir, exist_ok=True) + checkpoint_dir = os.path.join(args.output_dir, "checkpoint") + os.mkdir(checkpoint_dir, exist_ok=True) -# ================================== -# Code generated by ColossalAI-Platform-CLI. DO NOT EDIT. -# -# ColossalAI Platform Specification -# End -# ================================== + # TODO: your training code here + + +if __name__ == "__main__": + main() diff --git a/colossalai_platform/cli/commands/projects/train.sh b/colossalai_platform/cli/commands/projects/train.sh index d0e5206..dc0f935 100644 --- a/colossalai_platform/cli/commands/projects/train.sh +++ b/colossalai_platform/cli/commands/projects/train.sh @@ -1,10 +1,32 @@ #!/usr/bin/env bash SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# =================================================================== +# Welcome to ColossalAI Platform! +# =================================================================== +# Those environment variables would be injected by the runner: +# +# 1. ColossalAI Platform defined ones: +# PROJECT_DIR, DATASET_DIR, MODEL_DIR, OUTPUT_DIR, SCRIPT_DIR +# +# 2. Required by torchrun: +# NNODES, NPROC_PER_NODE, NODE_RANK, MASTER_ADDR, MASTER_PORT +# +# 3. Hyperparameters from configuration UI: +# (check HyperParameters.json for more details) +# +# After that, the runner would execute `train.sh`, this script. +# =================================================================== + torchrun --nnodes ${NNODES} \ --nproc_per_node ${NPROC_PER_NODE} \ --node_rank ${NODE_RANK} \ --master_addr ${MASTER_ADDR} \ --master_port ${MASTER_PORT} \ ${SCRIPT_DIR}/train.py \ - --epoch ${epoch} # add your argument passing here + --project_dir ${PROJECT_DIR} \ + --dataset_dir ${DATASET_DIR} \ + --model_dir ${MODEL_DIR} \ + --output_dir ${OUTPUT_DIR} + +# TODO: add more argument passing here diff --git a/requirements.txt b/requirements.txt index b98f660..dca9a90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -click \ No newline at end of file +click diff --git a/setup.py b/setup.py index 0609fdf..0565356 100644 --- a/setup.py +++ b/setup.py @@ -23,10 +23,10 @@ def get_project_files(): # the templates files are located in current_file_path/templates current_file_path = os.path.dirname(os.path.abspath(__file__)) project_file_dir = os.path.join(current_file_path, 'colossalai_platform/cli/commands/projects') - + # get all the files in project_file_dir project_files = [] - + for file_name in os.listdir(project_file_dir): project_files.append(os.path.join(project_file_dir, file_name)) return project_files @@ -60,9 +60,7 @@ def get_project_files(): cap=colossalai_platform.cli:cli ''', classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: Apache Software License', - 'Topic :: Software Development :: Build Tools', - "Topic :: System :: Distributed Computing" + 'Programming Language :: Python :: 3', 'License :: OSI Approved :: Apache Software License', + 'Topic :: Software Development :: Build Tools', "Topic :: System :: Distributed Computing" ], ) diff --git a/version.txt b/version.txt index 8a9ecc2..8acdd82 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.0.1 \ No newline at end of file +0.0.1