Skip to content

Commit

Permalink
added template json file
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoffatt2 committed Mar 14, 2024
1 parent c3259c2 commit 6c8d3db
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 10 deletions.
32 changes: 32 additions & 0 deletions curriculum/curriculum.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"config": "template.json",
"max_iters": 3000,
"dataset": "shakespeare_char",
"block_size": 256
},
{
"config": "template.json",
"max_iters": 3000,
"dataset": "wikitext103",
"block_size": 256
},
{
"config": "template.json",
"max_iters": 3000,
"dataset": "xsum",
"block_size": 256
},
{
"config": "template.json",
"max_iters": 3000,
"dataset": "billsum",
"block_size": 256
},
{
"config": "template.json",
"max_iters": 3000,
"dataset": "cnn_dailymail",
"block_size": 256
}
]
28 changes: 28 additions & 0 deletions explorations/template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[
{
"max_iters": ["3000"],
"n_layer": ["6"],
"n_head": ["6"],
"n_embd": ["384"],
"block_size": ["256"],
"eval_iters": ["200"],
"eval_interval": ["250"],
"log_interval": ["10"],
"device": ["cuda"],
"dataset": [""],
"compile": [true],
"seed": {
"range": {
"start": 1,
"end": 5,
"step": 1
}
},
"use_post_ln": [true],
"softmax_variant_attn": ["softmax"],
"use_abs_pos_embeddings": [true],
"use_rotary_embeddings": [true],
"tensorboard_run_name": ["curriculum"]
}
]

25 changes: 18 additions & 7 deletions run_curriculum_learning.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import subprocess
import argparse
import os
import sys
import json

prev_csv_dir = ""
prev_output_dir = ""

def run_experiments_command(training_stage, config):
def run_experiments_command(training_stage, config, **kwargs):
global prev_csv_dir
global prev_output_dir

Expand All @@ -24,14 +24,25 @@ def run_experiments_command(training_stage, config):
command.extend(["--use-best-val-loss-from", "csv_logs/" + prev_csv_dir, prev_output_dir])
prev_csv_dir = csv_dir
prev_output_dir = output_dir

for key, val in kwargs.items():
command.extend([f"--override_{key}", str(val)])
return command

def main(config_file):
with open(config_file) as f:
configs = f.read().splitlines()

for i, config in enumerate(configs):
subprocess.run(run_experiments_command(i+1, config))
ext = os.path.splitext(config_file)
if ext[1] == ".py":
with open(config_file, "r") as f:
configs = f.read().splitlines()

for i, config in enumerate(configs):
subprocess.run(run_experiments_command(i+1, config))
elif ext[1] == ".json":
with open(config_file, "r") as f:
configs = json.load(f)

for i, config in enumerate(configs):
subprocess.run(run_experiments_command(i+1, **config))

if __name__ == "__main__":
parser = argparse.ArgumentParser(
Expand Down
18 changes: 15 additions & 3 deletions run_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ def parse_args():
parser.add_argument("--add_names", action="store_true", help="Include names of values of the configuration parameters in addition to values (may cause too long a file name).")
parser.add_argument("--use-best-val-loss-from", nargs=2, metavar=('csv_dir', 'output_dir'), type=str, default=['', ''],
help="Grab the best val loss of the run given by the csv_dir. Then, use the corresponding ckpt from the matching output_dir")
parser.add_argument('--override_max_iters', default=None, type=int)
parser.add_argument('--override_dataset', default=None, type=str)
parser.add_argument('--override_block_size', default=None, type=int)
return parser.parse_args()

def find_best_val_loss(csv_dir, output_dir):
Expand Down Expand Up @@ -94,10 +97,18 @@ def format_config_name(config, config_basename, prefix, add_names):

return f"{prefix}{config_basename}-{'-'.join(config_items)}"

def run_command(config, config_basename, output_dir, csv_ckpt_dir, prefix, add_names, best_val_loss_from):
def run_command(config, config_basename, output_dir, csv_ckpt_dir, prefix, add_names,
best_val_loss_from, override_max_iters, override_dataset, override_block_size):
formatted_name = format_config_name(config, config_basename, prefix, add_names)
config['tensorboard_run_name'] = formatted_name
config['out_dir'] = os.path.join(output_dir, f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{formatted_name}")

if override_max_iters:
config['max_iters'] = str(override_max_iters)
if override_dataset:
config['dataset'] = override_dataset
if override_block_size:
config['block_size'] = str(override_block_size)

base_command = ["python3", "train.py"]
for key, value in config.items():
Expand Down Expand Up @@ -129,8 +140,9 @@ def main():

for config in original_configurations:
for combination in generate_combinations(config):
run_command(combination, config_basename, args.output_dir,
args.csv_ckpt_dir, args.prefix, args.add_names, args.use_best_val_loss_from)
run_command(combination, config_basename, args.output_dir, args.csv_ckpt_dir,
args.prefix, args.add_names, args.use_best_val_loss_from,
args.override_max_iters, args.override_dataset, args.override_block_size)

if __name__ == "__main__":
main()
Expand Down

0 comments on commit 6c8d3db

Please sign in to comment.