Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions scripts/cluster/remote_evaluate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,18 @@ echo "Num samples: $NUM_SAMPLES"
echo "Device: $DEVICE"
echo ""

# Upload config file to cluster
echo "Step 1: Uploading config file..."
# Sync code from GitHub
echo "Step 1: Syncing code from GitHub..."
sshpass -p "$PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
"$HOST" "cd ~/giblet-responses && source ~/.bashrc && source scripts/cluster/setup_cluster.sh && setup_cluster_environment" || {
echo "Error: Failed to sync code from GitHub"
exit 1
}
echo "✓ Code synced from GitHub"

# Upload config file to cluster (config files may not be committed to git)
echo ""
echo "Step 2: Uploading config file..."
sshpass -p "$PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
"$CONFIG" "$HOST:~/giblet-responses/$(basename $CONFIG)" || {
echo "Error: Failed to upload config file"
Expand All @@ -113,7 +123,7 @@ echo "✓ Config uploaded"

# Create evaluation script on cluster
echo ""
echo "Step 2: Creating evaluation script on cluster..."
echo "Step 3: Creating evaluation script on cluster..."

EVAL_SCRIPT=$(cat <<'EOF'
#!/bin/bash
Expand Down Expand Up @@ -166,7 +176,7 @@ echo "✓ Evaluation script created"

# Run evaluation
echo ""
echo "Step 3: Running evaluation on $CLUSTER..."
echo "Step 4: Running evaluation on $CLUSTER..."
echo "This may take several minutes..."
echo ""

Expand All @@ -181,7 +191,7 @@ sshpass -p "$PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/de
# Sync results back if requested
if [ "$SYNC_RESULTS" = true ]; then
echo ""
echo "Step 4: Syncing results back to local machine..."
echo "Step 5: Syncing results back to local machine..."

LOCAL_OUTPUT_DIR="$OUTPUT_DIR"
mkdir -p "$LOCAL_OUTPUT_DIR"
Expand Down
39 changes: 8 additions & 31 deletions scripts/cluster/remote_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"
#
# This script handles:
# - SSH authentication via sshpass using cluster credentials
# - Code synchronization via rsync
# - Code synchronization via GitHub (git pull)
# - Remote environment setup verification
# - Training job launch in persistent screen sessions (NO SLURM)
# - Multi-GPU training via torchrun (handled by run_giblet.sh)
Expand Down Expand Up @@ -247,40 +247,17 @@ if $RESUME; then
echo ""
fi

# Sync code to remote server
echo -e "${BLUE}=== Synchronizing Code ===${NC}"
# Sync code from GitHub instead of rsync
echo -e "${BLUE}=== Synchronizing Code via GitHub ===${NC}"

# Build rsync exclude list
EXCLUDE_ARGS="--exclude=.git --exclude=data/sherlock_nii --exclude=__pycache__ --exclude=*.pyc --exclude=venv* --exclude=*.log --exclude=checkpoints_local"
if ! $RESUME; then
EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude=checkpoints --exclude=logs"
fi

RSYNC_CMD="rsync -avz --progress $EXCLUDE_ARGS ./ $USERNAME@$SERVER:$BASE_PATH/"
SETUP_CMD="cd $BASE_PATH && source ~/.bashrc && source ~/giblet-responses/scripts/cluster/setup_cluster.sh && setup_cluster_environment"

if $DRY_RUN; then
echo -e "${BLUE}[DRY RUN]${NC} Would run: sshpass -p [PASSWORD] $RSYNC_CMD"
else
echo -e "Running: ${GREEN}rsync${NC}"
sshpass -p "$PASSWORD" $RSYNC_CMD
fi
echo ""

# Check/setup remote environment
echo -e "${BLUE}=== Verifying Remote Environment ===${NC}"
ENV_CHECK=$(run_ssh "cd $BASE_PATH && if [ -f setup_environment.sh ]; then echo 'setup_script_exists'; fi")

if [[ "$ENV_CHECK" == *"setup_script_exists"* ]]; then
echo -e "${GREEN}Found setup_environment.sh${NC}"
if ! $DRY_RUN; then
echo -e "${YELLOW}Checking if environment setup is needed...${NC}"
run_ssh "cd $BASE_PATH && if ! conda env list | grep -q giblet-py311; then bash setup_environment.sh; fi"
else
echo -e "${BLUE}[DRY RUN]${NC} Would check and setup environment if needed"
fi
echo -e "${BLUE}[DRY RUN]${NC} Would run GitHub sync on cluster"
echo -e "${BLUE}[DRY RUN]${NC} Command: $SETUP_CMD"
else
echo -e "${YELLOW}Warning: setup_environment.sh not found on remote${NC}"
echo -e "${YELLOW}Environment setup may need to be done manually${NC}"
echo -e "Running: ${GREEN}GitHub sync${NC}"
run_ssh "$SETUP_CMD"
fi
echo ""

Expand Down
Loading
Loading