Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish mlora.py #16

Merged
merged 3 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Support
## Installation
```bash
# Optional but recommended
conda create -n aspen_env python=3.6
conda create -n aspen_env python=3.8
conda activate aspen_env
# Install requirements
pip install -r requirements.txt
Expand All @@ -31,18 +31,18 @@ import aspen
```
## Quickstart

The `mlora.py` code is a starting point for finetuning and inference on various datasets.
The `mlora.py` code is a starting point for finetuning on various datasets.
Basic command for finetuning a baseline model on the Alpaca dataset:
```bash
python mlora.py --model_name_or_path <path_or_name>
python mlora.py \
--base_model decapoda-research/llama-7b-hf \
--config ./config/alpaca.json \
--load_8bit true
```

For models larger than 13B, we recommend adjusting the learning rate:
```bash
python mlora.py -–learning_rate 0.0001 --model_name_or_path <path_or_name>
```
You can check the template finetune configuration in [template](./template/) folder.

You can check detailed usage information by `--help` option:
For further detailed usage information, please use `--help` option:
```bash
python mlora.py --help
```
Expand Down
48 changes: 48 additions & 0 deletions config/finetune.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"cutoff_len": 256,
"group_by_length": false,
"expand_right": true,
"save_step": 200,
"lora": [
{
"name": "lora_0",
"output": "lora_0",
"batch_size": 16,
"num_epochs": 3,
"r": 8,
"alpha": 16,
"dropout": 0.05,
"target_modules": {
"q_proj": true,
"k_proj": true,
"v_proj": true,
"o_proj": true,
"w1_proj": false,
"w2_proj": false,
"w3_proj": false
},
"data": "data/data_demo.json",
"prompt": "template/template_demo.json"
},
{
"name": "lora_1",
"output": "lora_1",
"batch_size": 16,
"num_epochs": 3,
"r": 8,
"alpha": 16,
"dropout": 0.05,
"target_modules": {
"q_proj": true,
"k_proj": true,
"v_proj": true,
"o_proj": true,
"w1_proj": false,
"w2_proj": false,
"w3_proj": false
},
"data": "data/data_demo.json",
"prompt": "template/template_demo.json"
}
]
}
111 changes: 100 additions & 11 deletions mlora.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,32 @@
import argparse
import torch
import aspen
import json
import os

# Command Line Arguments

parser = argparse.ArgumentParser(description='ASPEN main program')
parser.add_argument('--model_name_or_path', type=str, help='Path to or name of base model')
parser.add_argument('--load_in_8bit', type=bool, default=False, help='Load model in 8bit mode')
parser.add_argument('--device', type=str, default='cuda:0', help='Specify which GPU to be used, default is cuda:0')
parser.add_argument('--log', type=bool, default=True, help='Turn on or off log, default is true')
parser.add_argument('--base_model', type=str,
help='Path to or name of base model')
parser.add_argument('--load_8bit', type=bool, default=False,
help='Load model in 8bit mode')
parser.add_argument('--device', type=str, default='cuda:0',
help='Specify which GPU to be used, default is cuda:0')
parser.add_argument('--config', type=str,
help='Path to finetune configuration')
parser.add_argument('--seed', type=int, default=42,
help='Random seed in integer, default is 42')
parser.add_argument('--log', type=bool, default=True,
help='Turn on or off log, default is true')

args = parser.parse_args()


def log(msg: str):
if args.log:
print('[%s] ASPEN: %s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg))
print('[%s] ASPEN: %s' %
(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg))


if torch.cuda.is_available():
Expand All @@ -44,25 +56,102 @@ def log(msg: str):
exit(-1)


if args.model_name_or_path is None:
print('error: Argument --model_name_or_path are required.')
if args.base_model is None:
print('error: Argument --base_model are required.')
parser.print_help()
exit(-1)


if args.config is None:
print('error: Argument --config are required.')
parser.print_help()
exit(-1)


def prep_llm():
# Functions


def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)


def load_base_model():
llama_args = aspen.LlamaModelArgs()
tokenizer = aspen.Tokenizer(args.model_name_or_path + os.sep + 'tokenizer.model')
tokenizer = aspen.Tokenizer(args.base_model + os.sep + 'tokenizer.model')
tokenizer.pad_id_ = 0
llama_args.max_seq_len_ = 4096
llama_args.device = args.device
llama_args.vocab_size_ = tokenizer.n_words_
llama_args.pad_id_ = tokenizer.pad_id_
llama_args.n_heads_ = 32
model = aspen.LlamaModel(llama_args)
aspen.load_llama_tf_weight(model, args.model_name_or_path, args.device, args.load_in_8bit)
aspen.load_llama_tf_weight(
model, args.base_model, args.device, args.load_8bit)
return tokenizer, model


def init_lora_model(config: dict, llama_model: aspen.LlamaModel):
for lora_config in config["lora"]:
aspen.load_random_lora_7b_weight(
llama_model,
lora_config["name"],
lora_config["r"],
llama_model.dim_,
lora_config["target_modules"],
llama_model.device_)
llama_model.update_lora_configure(
lora_config["name"], lora_config["r"], lora_config["alpha"], lora_config["dropout"])


def train(config: dict, llama_model: aspen.LlamaModel):
torch.cuda.empty_cache()
optimizer = torch.optim.AdamW(llama_model.get_train_paramas(config))
step_cnt = 0
while not data_set.check_done():
optimizer.zero_grad()
loss_fn = torch.nn.CrossEntropyLoss()
input: aspen.MultiLoraBatchData = data_set.get_batch_data()

step_cnt += 1

output = llama_model.forward(input)
labels = torch.tensor(input.batch_tokens_,
dtype=torch.long).to(args.device)

total_loss = None
for lora_config in input.lora_batch_data_config_:
start_idx = lora_config.batch_start_idx_
end_idx = lora_config.batch_end_idx_
loss_input = output[start_idx:end_idx][..., :-1,
:].contiguous().view(-1, llama_model.vocab_size_)
loss_target = labels[start_idx:end_idx][...,
1:].contiguous().view(-1)
loss = loss_fn(loss_input, loss_target)
print(
f" adapter: {lora_config.adapter_name_} loss: {loss}")
if total_loss is None:
total_loss = loss
else:
total_loss += loss

total_loss.backward()
optimizer.step()

if step_cnt % config["save_step"] == 0:
aspen.save_lora_model(llama_model, config, f"{step_cnt}")

aspen.save_lora_model(llama_model, config)


# Main Function


if __name__ == "__main__":
tokenizer, model = prep_llm()
setup_seed(args.seed)
with open(args.config, 'r', encoding='utf8') as fp:
config = json.load(fp)
tokenizer, model = load_base_model()
data_set = aspen.DataSet(config, tokenizer)
init_lora_model(config, model)
train(config, model)