Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: polish starter project code style #8

Merged
merged 4 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .style.yapf
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
based_on_style = google
spaces_before_comment = 4
split_before_logical_operator = true
column_limit = 120
column_limit = 120
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

## 📚 Overview

This repository contains the Command Line Tools for the ColossalAI Platform.
This repository contains the Command Line Tools for the ColossalAI Platform.
The CLI is called `cap` which stands for `C(olossal)A(I) P(latform)`.
This CLI aims to provide an interface for users to access the various functions provided on the cloud platform and boost their productivity.
This CLI aims to provide an interface for users to access the various functions provided on the cloud platform and boost their productivity.


The CLI is built on top of [Click](https://click.palletsprojects.com/en/8.0.x/), a Python package for creating beautiful command line interfaces in a composable way with as little code as necessary.
Expand All @@ -32,7 +32,7 @@ pip install git+https://github.com/hpcaitech/ColossalAI-Platform-CLI@main
pip install colossalai-platform
```

## ⌨️ Usage
## ⌨️ Usage

A documentation website will be built when this CLI is mature.
Before that, you can refer to the following sections to use the CLI.
Expand All @@ -41,4 +41,4 @@ Before that, you can refer to the following sections to use the CLI.

```bash
cap template init <template-name>
```
```
3 changes: 1 addition & 2 deletions colossalai_platform/cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import click

from colossalai_platform.cli.aliased_group import (CONTEXT_SETTINGS,
AliasedGroup)
from colossalai_platform.cli.aliased_group import (CONTEXT_SETTINGS, AliasedGroup)

from .commands import project

Expand Down
1 change: 0 additions & 1 deletion colossalai_platform/cli/commands/projects/DataSchema.json

This file was deleted.

15 changes: 10 additions & 5 deletions colossalai_platform/cli/commands/projects/HyperParameters.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
{
"HyperParameters": [
{
"name": "epoch",
"types": "string",
"defaultValue": "10"
}
// TODO: Add your parameters like this:
// {
// "name": "epoch",
// "types": "string",
// "defaultValue": "10",
// "description": "Number of epochs for training"
// }
//
// At our platform, it would be injected into `train.sh`
// as environment variable ${epoch}
]
}
6 changes: 5 additions & 1 deletion colossalai_platform/cli/commands/projects/patch.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Code generated by ColossalAI-Platform-CLI. DO NOT EDIT.
def patch_tensorboard():
def _patch_tensorboard():
try:
from tensorboard.compat.tensorflow_stub.io.gfile import LocalFileSystem
# hack to make tensorboard compatible with Cloud File System
delattr(LocalFileSystem, "append")
except ImportError:
print("Tensorboard patch failed. Please check your tensorboard version.")


def patch_platform_specific_dependencies():
_patch_tensorboard()
86 changes: 58 additions & 28 deletions colossalai_platform/cli/commands/projects/train.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,68 @@
import os
from patch import patch_tensorboard
import argparse
import types
from patch import patch_platform_specific_dependencies

# ==================================
# Code generated by ColossalAI-Platform-CLI. DO NOT EDIT.
# Please do not remove this call,
# the platform's runtime environment needs it.
patch_platform_specific_dependencies()

# ===================================================================
# Example code generated by ColossalAI-Platform-CLI.
#
# ColossalAI Platform Specification
# Start
# ==================================
# Modify it to fit your needs.
# ===================================================================

# patch dependencies
patch_tensorboard()

# Environment variables for your data access
PROJECT_DIR = os.environ['PROJECT_DIR']
DATASET_DIR = os.environ['DATASET_DIR']
OUTPUT_DIR = os.environ['OUTPUT_DIR']
INPUT_MODEL_DIR = os.environ['MODEL_DIR']
CHECKPOINT_DIR = os.path.join(OUTPUT_DIR, 'checkpoint')
TENSORBOARD_DIR = os.path.join(OUTPUT_DIR, 'tensorboard')
def add_platform_args(parser: argparse.ArgumentParser):
# required arguments
parser.add_argument(
"--project_dir",
type=str,
required=True,
help="The directory contains the project code.",
)
parser.add_argument(
"--dataset_dir",
type=str,
required=True,
help="The directory contains the training dataset.",
)
parser.add_argument(
"--output_dir",
type=str,
required=True,
help="The directory project would write output into.",
)

# optional arguments, add more if you need
parser.add_argument(
"--model_dir",
type=str,
default=None,
help="The directory contains the model to finetune.",
)

# create directories
def create_if_not_exist(path):
# create if not exists
if not os.path.exists(path):
os.makedirs(path, exist_ok=True)

def main():
parser = argparse.ArgumentParser(description="training script")
add_platform_args(parser)
args = parser.parse_args()

create_if_not_exist(CHECKPOINT_DIR)
create_if_not_exist(TENSORBOARD_DIR)
# There are some path conventions:
#
# $OUTPUT_DIR/tensorboard:
# The platform-builtin tensorboard expects events to be here.
# $OUTPUT_DIR/checkpoint
# The platform-builtin checkpoint recovery feature
# expects the checkpoint to be here.
tensorboard_dir = os.path.join(args.output_dir, "tensorboard")
os.mkdir(tensorboard_dir, exist_ok=True)
checkpoint_dir = os.path.join(args.output_dir, "checkpoint")
os.mkdir(checkpoint_dir, exist_ok=True)

# ==================================
# Code generated by ColossalAI-Platform-CLI. DO NOT EDIT.
#
# ColossalAI Platform Specification
# End
# ==================================
# TODO: your training code here


if __name__ == "__main__":
main()
24 changes: 23 additions & 1 deletion colossalai_platform/cli/commands/projects/train.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
#!/usr/bin/env bash
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"

# ===================================================================
# Welcome to ColossalAI Platform!
# ===================================================================
# Those environment variables would be injected by the runner:
#
# 1. ColossalAI Platform defined ones:
# PROJECT_DIR, DATASET_DIR, MODEL_DIR, OUTPUT_DIR, SCRIPT_DIR
#
# 2. Required by torchrun:
# NNODES, NPROC_PER_NODE, NODE_RANK, MASTER_ADDR, MASTER_PORT
#
# 3. Hyperparameters from configuration UI:
# (check HyperParameters.json for more details)
#
# After that, the runner would execute `train.sh`, this script.
# ===================================================================

torchrun --nnodes ${NNODES} \
--nproc_per_node ${NPROC_PER_NODE} \
--node_rank ${NODE_RANK} \
--master_addr ${MASTER_ADDR} \
--master_port ${MASTER_PORT} \
${SCRIPT_DIR}/train.py \
--epoch ${epoch} # add your argument passing here
--project_dir ${PROJECT_DIR} \
--dataset_dir ${DATASET_DIR} \
--model_dir ${MODEL_DIR} \
--output_dir ${OUTPUT_DIR}

# TODO: add more argument passing here
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
click
click
10 changes: 4 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def get_project_files():
# the templates files are located in current_file_path/templates
current_file_path = os.path.dirname(os.path.abspath(__file__))
project_file_dir = os.path.join(current_file_path, 'colossalai_platform/cli/commands/projects')

# get all the files in project_file_dir
project_files = []

for file_name in os.listdir(project_file_dir):
project_files.append(os.path.join(project_file_dir, file_name))
return project_files
Expand Down Expand Up @@ -60,9 +60,7 @@ def get_project_files():
cap=colossalai_platform.cli:cli
''',
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: Apache Software License',
'Topic :: Software Development :: Build Tools',
"Topic :: System :: Distributed Computing"
'Programming Language :: Python :: 3', 'License :: OSI Approved :: Apache Software License',
'Topic :: Software Development :: Build Tools', "Topic :: System :: Distributed Computing"
],
)
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.1
0.0.1
Loading