Skip to content

Commit

Permalink
add install cript
Browse files Browse the repository at this point in the history
  • Loading branch information
samsja committed Dec 4, 2024
1 parent c8b2664 commit 6e5c511
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.vscode/*
logs/*
wandb/*
datasets/*

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
64 changes: 64 additions & 0 deletions scripts/install/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash

set -e

# Colors for output
GREEN='\033[0;32m'
NC='\033[0m' # No Color

log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}

main() {
# Check if sudo is installed
if ! command -v sudo &> /dev/null; then
apt update
apt install sudo -y
fi

log_info "Updating apt..."
sudo apt update

log_info "Installing cmake python3-dev..."
sudo apt install python3-dev cmake -y

log_info "Installing iperf..."
sudo apt install iperf -y

log_info "Cloning repository..."
git clone https://github.com/PrimeIntellect-ai/prime.git

log_info "Entering project directory..."
cd prime

log_info "Installing uv..."
curl -LsSf https://astral.sh/uv/install.sh | sh

log_info "Sourcing uv environment..."
source $HOME/.local/bin/env

log_info "Creating virtual environment..."
uv venv

log_info "Activating virtual environment..."
source .venv/bin/activate

log_info "Installing dependencies..."
uv sync --extra all

log_info "Installing flash-attn..."
uv pip install flash-attn --no-build-isolation

log_info "Updating git submodules..."
git submodule update --init --recursive

log_info "Downloading data..."
mkdir -p datasets
uv run python scripts/subset_data.py --dataset_name PrimeIntellect/fineweb-edu --data_world_size 1 --data_rank 0 --max_shards 128
mv fineweb-edu/ datasets/fineweb-edu/

log_info "Installation completed! You can double check that everything is install correctly by running 'GLOO_SOCKET_IFNAME=lo GLOBAL_ADDR=localhost GLOBAL_RANK=0 GLOBAL_UNIQUE_ID=0 GLOBAL_WORLD_SIZE=1 GLOBAL_PORT=8989 uv run torchrun --nproc_per_node=2 src/zeroband/train.py @configs/debug/diloco.toml'"
}

main
2 changes: 1 addition & 1 deletion src/zeroband/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


class DataConfig(BaseConfig):
dataset_name_or_paths: str = "/data/datasets/fineweb-edu"
dataset_name_or_paths: str = "datasets/fineweb-edu"
val_dataset_name_or_paths: Optional[str] = None
seq_length: int = 1024
fake: bool = False
Expand Down

0 comments on commit 6e5c511

Please sign in to comment.