Skip to content

Commit 84f72cf

Browse files
ekzhupan-x-c
andauthored
Fix installation script and improve CLI configuration (#63)
* fix installation script * fix installation script * fix pre-commit * fix tests fix install * fix docker compose * misc improvements * remove options * up version --------- Co-authored-by: pxc <panxuchen.pxc@alibaba-inc.com>
1 parent 51d2ef5 commit 84f72cf

20 files changed

Lines changed: 288 additions & 281 deletions

.github/workflows/docker/docker-compose.yml

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
11
services:
22
# use 2 nodes to simulate a cluster environment
33
tuft-node-1:
4-
image: tuft-unittest:20260123
4+
image: nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
55
pull_policy: never
6-
command: bash -c "source /opt/venv/bin/activate && uv pip install -e .[dev,backend,persistence] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
6+
command: bash -c "
7+
chmod 1777 /tmp && apt update && apt install -y --no-install-recommends \
8+
build-essential \
9+
curl git wget vim tmux net-tools \
10+
python3 python3-pip python3-dev python3-packaging python3-venv \
11+
libomp-dev infiniband-diags libibverbs-dev librdmacm-dev rdma-core perftest \
12+
&& rm -rf /var/lib/apt/lists/* \
13+
&& ln -sf /usr/bin/python3 /usr/bin/python \
14+
&& ln -sf /usr/bin/pip3 /usr/bin/pip \
15+
&& bash /workspace/scripts/install.sh --local-source /workspace \
16+
&& source $HOME/.local/bin/env \
17+
&& source /root/.tuft/venv/bin/activate \
18+
&& uv pip install .[dev] \
19+
&& ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
720
environment:
821
- HF_ENDPOINT=https://hf-mirror.com
922
- RAY_ADDRESS=auto
@@ -12,7 +25,7 @@ services:
1225
- TUFT_TEST_MODEL_1=/mnt/models/Qwen3-0.6B
1326
- TUFT_TEST_MODEL_2=/mnt/models/Qwen3-1.7B
1427
- TEST_REDIS_URL=redis://tuft-redis:6379
15-
- VIRTUAL_ENV=/opt/venv
28+
- VIRTUAL_ENV=/root/.tuft/venv
1629
working_dir: /workspace
1730
networks:
1831
- tuft-network
@@ -25,36 +38,7 @@ services:
2538
reservations:
2639
devices:
2740
- driver: nvidia
28-
device_ids: ['0', '1']
29-
capabilities: [gpu]
30-
31-
tuft-node-2:
32-
image: tuft-unittest:20260123
33-
pull_policy: never
34-
command: bash -c "source /opt/venv/bin/activate && uv pip install -e .[dev,backend,persistence] && ray start --address=tuft-node-1:6379 --block"
35-
environment:
36-
- HF_ENDPOINT=https://hf-mirror.com
37-
- TUFT_CHECKPOINT_DIR=/mnt/checkpoints
38-
- TUFT_TEST_MODEL=/mnt/models/Qwen3-0.6B
39-
- TUFT_TEST_MODEL_1=/mnt/models/Qwen3-0.6B
40-
- TUFT_TEST_MODEL_2=/mnt/models/Qwen3-1.7B
41-
- TEST_REDIS_URL=redis://tuft-redis:6379
42-
- VIRTUAL_ENV=/opt/venv
43-
working_dir: /workspace
44-
volumes:
45-
- tuft-volume:/mnt
46-
- ../../..:/workspace
47-
depends_on:
48-
- tuft-node-1
49-
networks:
50-
- tuft-network
51-
shm_size: "64G"
52-
deploy:
53-
resources:
54-
reservations:
55-
devices:
56-
- driver: nvidia
57-
device_ids: ['2', '3']
41+
device_ids: ['0', '1', '2', '3']
5842
capabilities: [gpu]
5943

6044
tuft-redis:

.github/workflows/install-script.yml

Lines changed: 51 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -59,36 +59,42 @@ jobs:
5959
env:
6060
TUFT_HOME: ${{ runner.temp }}/tuft
6161

62-
- name: Test tuft (dry run - check config error)
62+
- name: Test tuft launch --help
6363
run: |
6464
export PATH="${TUFT_HOME}/bin:$PATH"
65-
# Should fail with config error, not import error
66-
tuft 2>&1 | grep -q "\-\-config" || tuft 2>&1 | grep -q "config"
65+
tuft launch --help
6766
env:
6867
TUFT_HOME: ${{ runner.temp }}/tuft
6968

70-
- name: Clean up installation
71-
run: rm -rf "${TUFT_HOME}"
72-
env:
73-
TUFT_HOME: ${{ runner.temp }}/tuft
74-
75-
test-install-default-with-backend:
76-
runs-on: ubuntu-latest
77-
78-
steps:
79-
- name: Checkout code
80-
uses: actions/checkout@v4
81-
82-
- name: Run install script (default includes backend)
69+
- name: Test tuft launch requires config
8370
run: |
84-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
71+
export PATH="${TUFT_HOME}/bin:$PATH"
72+
# Should fail with config error when no config provided
73+
if tuft launch 2>&1; then
74+
echo "Expected tuft launch to fail without config"
75+
exit 1
76+
fi
77+
# Verify error message mentions config
78+
tuft launch 2>&1 | grep -qi "config"
8579
env:
8680
TUFT_HOME: ${{ runner.temp }}/tuft
8781

88-
- name: Verify backend dependencies installed
82+
- name: Test tuft launch with config file
8983
run: |
90-
"${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
91-
"${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
84+
export PATH="${TUFT_HOME}/bin:$PATH"
85+
# Create a minimal config file
86+
cat > "${TUFT_HOME}/configs/tuft_config.yaml" << 'EOF'
87+
model_owner: test
88+
supported_models:
89+
- model_name: test-model
90+
model_path: /nonexistent/path
91+
max_model_len: 1024
92+
authorized_users:
93+
test-key: test-user
94+
EOF
95+
# Launch should fail due to missing model, but get past config validation
96+
# We just verify it doesn't fail on config parsing
97+
tuft launch 2>&1 | grep -v "Configuration file must be provided" || true
9298
env:
9399
TUFT_HOME: ${{ runner.temp }}/tuft
94100

@@ -97,25 +103,23 @@ jobs:
97103
env:
98104
TUFT_HOME: ${{ runner.temp }}/tuft
99105

100-
test-install-without-backend:
106+
test-backend-dependencies:
101107
runs-on: ubuntu-latest
102108

103109
steps:
104110
- name: Checkout code
105111
uses: actions/checkout@v4
106112

107-
- name: Run install script without backend
113+
- name: Run install script
108114
run: |
109-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
115+
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
110116
env:
111117
TUFT_HOME: ${{ runner.temp }}/tuft
112118

113-
- name: Verify minimal install (no peft)
119+
- name: Verify backend dependencies installed
114120
run: |
115-
# peft should NOT be installed in minimal mode
116-
"${TUFT_HOME}/venv/bin/python" -c "import peft" 2>&1 && exit 1 || echo "peft not installed (expected)"
117-
# tuft should still be importable
118-
"${TUFT_HOME}/venv/bin/python" -c "import tuft; print('tuft imported successfully')"
121+
"${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
122+
"${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
119123
env:
120124
TUFT_HOME: ${{ runner.temp }}/tuft
121125

@@ -150,13 +154,20 @@ jobs:
150154
env:
151155
TUFT_HOME: ${{ runner.temp }}/tuft
152156

153-
- name: Test upgrade command
157+
- name: Test upgrade command (from PyPI)
154158
run: |
155159
export PATH="${TUFT_HOME}/bin:$PATH"
156160
tuft upgrade
157161
env:
158162
TUFT_HOME: ${{ runner.temp }}/tuft
159163

164+
- name: Test upgrade command (from local source)
165+
run: |
166+
export PATH="${TUFT_HOME}/bin:$PATH"
167+
tuft upgrade --local-source "$GITHUB_WORKSPACE"
168+
env:
169+
TUFT_HOME: ${{ runner.temp }}/tuft
170+
160171
- name: Clean up installation
161172
run: rm -rf "${TUFT_HOME}"
162173
env:
@@ -171,7 +182,7 @@ jobs:
171182

172183
- name: Initial install
173184
run: |
174-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
185+
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
175186
env:
176187
TUFT_HOME: ${{ runner.temp }}/tuft
177188

@@ -184,7 +195,7 @@ jobs:
184195

185196
- name: Reinstall with --clean
186197
run: |
187-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend --clean
198+
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --clean
188199
env:
189200
TUFT_HOME: ${{ runner.temp }}/tuft
190201

@@ -202,36 +213,30 @@ jobs:
202213
env:
203214
TUFT_HOME: ${{ runner.temp }}/tuft
204215

205-
test-install-backend-command:
216+
test-upgrade-from-source:
206217
runs-on: ubuntu-latest
207218

208219
steps:
209220
- name: Checkout code
210221
uses: actions/checkout@v4
211222

212-
- name: Install without backend first
213-
run: |
214-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
215-
env:
216-
TUFT_HOME: ${{ runner.temp }}/tuft
217-
218-
- name: Verify peft is NOT installed
219-
run: |
220-
"${TUFT_HOME}/venv/bin/python" -c "import peft" 2>&1 && exit 1 || echo "peft not installed (expected)"
223+
- name: Install tuft
224+
run: bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
221225
env:
222226
TUFT_HOME: ${{ runner.temp }}/tuft
223227

224-
- name: Run install-backend command
228+
- name: Test upgrade --from-source
225229
run: |
226230
export PATH="${TUFT_HOME}/bin:$PATH"
227-
tuft install-backend
231+
tuft upgrade --from-source
228232
env:
229233
TUFT_HOME: ${{ runner.temp }}/tuft
230234

231-
- name: Verify backend dependencies now installed
235+
- name: Verify tuft still works after upgrade
232236
run: |
233-
"${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
234-
"${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
237+
export PATH="${TUFT_HOME}/bin:$PATH"
238+
tuft version
239+
tuft launch --help
235240
env:
236241
TUFT_HOME: ${{ runner.temp }}/tuft
237242

.github/workflows/unittest.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ jobs:
3434
MAX_RETRIES=20
3535
RETRY_INTERVAL=5
3636
for i in $(seq 1 $MAX_RETRIES); do
37-
if docker compose exec tuft-node-1 bash -c "source /opt/venv/bin/activate && ray status" \
38-
&& docker compose exec tuft-node-2 bash -c "source /opt/venv/bin/activate && ray status"; then
37+
if docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && ray status"; then
3938
break
4039
fi
4140
echo "Waiting for ray cluster to be ready... ($i/$MAX_RETRIES)"
@@ -51,7 +50,7 @@ jobs:
5150
# set a github env variable to indicate tests were run, so that subsequent steps can check it
5251
run: |
5352
echo "tests_run=true" >> $GITHUB_ENV
54-
docker compose exec tuft-node-1 bash -c "source /opt/venv/bin/activate && pytest tests -v -s --gpu --basetemp /mnt/checkpoints --ctrf report.json"
53+
docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && pytest tests -v -s --gpu --basetemp /mnt/checkpoints --ctrf report.json"
5554
5655
- name: Convert report.json time to ms
5756
working-directory: tuft-${{ github.run_id }}

docker/Dockerfile

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#
88
# Note:
99
# This Dockerfile uses 'uv' to create a virtual environment for better package management.
10-
# The uv virtual environment is created at `/opt/venv`, use `source /opt/venv/bin/activate` to activate it.
10+
# The uv virtual environment is created at `/root/.tuft/venv`, use `source /root/.tuft/venv/bin/activate` to activate it.
1111
# Make sure to use `uv pip` to install packages within the virtual environment.
1212

1313
FROM nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
@@ -23,28 +23,26 @@ RUN chmod 1777 /tmp && apt update && apt install -y --no-install-recommends \
2323
&& ln -sf /usr/bin/python3 /usr/bin/python \
2424
&& ln -sf /usr/bin/pip3 /usr/bin/pip
2525

26-
ENV VIRTUAL_ENV=/opt/venv
26+
ENV VIRTUAL_ENV=/root/.tuft/venv
2727

2828
# copy the TuFT dir into the workspace
2929
COPY ./pyproject.toml .
3030
COPY ./LICENSE .
3131
COPY ./README.md .
3232
COPY ./src ./src
33+
COPY ./scripts ./scripts
3334

3435
# Uncomment the following line if you want to use AliCloud Mirror to speed up pip install
3536
# ENV UV_DEFAULT_INDEX=http://mirrors.cloud.aliyuncs.com/pypi/simple/
3637

3738
# Uncomment the following line to use a Hugging Face mirror if you have network connection problem with Hugging Face
3839
# ENV HF_ENDPOINT=https://hf-mirror.com
3940

40-
# Install uv
41-
RUN pip install uv && uv venv ${VIRTUAL_ENV} --python=python3.12
42-
43-
# Install minimal TuFT
44-
RUN . ${VIRTUAL_ENV}/bin/activate && uv pip install -e .[dev,backend,persistence]
45-
46-
# Install flash_attn
47-
RUN . ${VIRTUAL_ENV}/bin/activate && uv pip install flash_attn==2.8.1 --no-build-isolation
41+
# Install
42+
RUN bash ./scripts/install.sh --local-source /workspace \
43+
&& . $HOME/.local/bin/env \
44+
&& . /root/.tuft/venv/bin/activate \
45+
&& uv pip install .[dev]
4846

4947
ENTRYPOINT ["/bin/bash", "-c", "source ${VIRTUAL_ENV}/bin/activate && exec \"$@\"", "--"]
5048
CMD ["bash"]

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "tuft"
3-
version = "0.1.1"
3+
version = "0.1.2"
44
description = "A multi-tenant fine-tuning platform for LLMs with Tinker-compatible API"
55
authors = [
66
{ name = "TuFT Developers", email = "tuft@list.alibaba-inc.com" }
@@ -24,7 +24,7 @@ dependencies = [
2424
"opentelemetry-instrumentation-fastapi>=0.41b0",
2525
"opentelemetry-instrumentation-logging>=0.41b0",
2626
"psutil>=5.9.0",
27-
"pynvml>=11.5.0",
27+
"nvidia-ml-py>=13.0.0",
2828
]
2929

3030
[project.scripts]

0 commit comments

Comments
 (0)