diff --git a/.github/workflows/pr-clean-code-test.yml b/.github/workflows/pr-clean-code-test.yml
index 64c37dd6..08ee94ea 100644
--- a/.github/workflows/pr-clean-code-test.yml
+++ b/.github/workflows/pr-clean-code-test.yml
@@ -26,22 +26,27 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install .[ci_test]
+          pip install lizard flake8 black isort mypy
       - name: CCN with Lizard
         run: |
           lizard -l python ./mlora -C 12
+          lizard -l python ./mlora_cli -C 12
       - name: Lint with flake8
         run: |
           flake8 ./mlora --count --show-source --statistics --max-line-length=88 --max-complexity 15 --ignore=E203,W503,E704
+          flake8 ./mlora_cli --count --show-source --statistics --max-line-length=88 --max-complexity 15 --ignore=E203,W503,E704
       - name: Lint with black
         run: |
           black --check ./mlora
+          black --check ./mlora_cli
       - name: Lint with isort
         run: |
           isort ./mlora --check --profile black
+          isort ./mlora_cli --check --profile black
       - name: Static code check with mypy
         run: |
           mypy ./mlora --ignore-missing-imports --non-interactive --install-types --check-untyped-defs
+          mypy ./mlora_cli --ignore-missing-imports --non-interactive --install-types --check-untyped-defs
       - name: Test with pytest
         run: |
           pytest
diff --git a/.github/workflows/pre-commit b/.github/workflows/pre-commit
index 49f04528..34d4c359 100755
--- a/.github/workflows/pre-commit
+++ b/.github/workflows/pre-commit
@@ -1,13 +1,21 @@
 #!/bin/bash
 
-lizard -l python ./mlora -C 12
+check_dir=(
+    "./mlora"
+    "./mlora_cli"
+)
 
-black --check ./mlora
+for dir in ${check_dir[*]}; do
+    lizard -l python ${dir} -C 12
+    
+    black --check ${dir}
+    
+    isort ${dir} --check --profile black
+    
+    flake8 ${dir} --count --show-source --statistics --max-line-length=88 --max-complexity 15 --ignore=E203,W503,E704
+    
+    mypy ${dir} --ignore-missing-imports --non-interactive --install-types --check-untyped-defs
+done
 
-isort ./mlora --check --profile black
-
-flake8 ./mlora --count --show-source --statistics --max-line-length=88 --max-complexity 15 --ignore=E203,W503,E704
-
-mypy ./mlora --ignore-missing-imports --non-interactive --install-types --check-untyped-defs
 
 pytest
diff --git a/Dockerfile.deploy b/Dockerfile.deploy
new file mode 100644
index 00000000..ec210471
--- /dev/null
+++ b/Dockerfile.deploy
@@ -0,0 +1,13 @@
+FROM yezhengmaolove/mlora:latest
+
+MAINTAINER YeZhengMao <yezhengmaolove@gmail.com>
+
+WORKDIR /mLoRA
+
+RUN git pull \
+    && /usr/bin/echo -e '#!/bin/bash\neval "$(pyenv init -)"\neval "$(pyenv virtualenv-init -)"\npython mlora_server.py --base_model $BASE_MODEL --root $STORAGE_DIR' | tee /opt/deploy.sh
+
+ENV PYENV_ROOT=/root/.pyenv
+ENV PATH "$PYENV_ROOT/bin/:$PATH"
+
+CMD /bin/bash /opt/deploy.sh
\ No newline at end of file
diff --git a/README.md b/README.md
index e70de5b2..b3e18d25 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ mLoRA (a.k.a Multi-LoRA Fine-Tune) is an open-source framework designed for effi
 The end-to-end architecture of the mLoRA is shown in the figure:
 
 <div align="center">
-<img src="./docs/assets/architecture.jpg" width=50%">
+<img src="./docs/assets/architecture.jpg" width=70%">
 </div>
 
 
@@ -44,9 +44,9 @@ cd mLoRA
 pip install .
 ```
 
-The `mlora.py` code is a starting point for batch fine-tuning LoRA adapters.
+The `mlora_train.py` code is a starting point for batch fine-tuning LoRA adapters.
 ```bash
-python mlora.py \
+python mlora_train.py \
   --base_model TinyLlama/TinyLlama-1.1B-Chat-v0.4 \
   --config demo/lora/lora_case_1.yaml
 ```
@@ -55,7 +55,7 @@ You can check the adapters' configuration in [demo](./demo/) folder, there are s
 
 For further detailed usage information, please use `--help` option:
 ```bash
-python mlora.py --help
+python mlora_train.py --help
 ```
 
 ## Quickstart with Docker
@@ -79,34 +79,37 @@ ssh root@localhost -p <host_port>
 # pull the latest code and run the mlora
 cd /mLoRA
 git pull
-python mlora.py \
+python mlora_train.py \
   --base_model TinyLlama/TinyLlama-1.1B-Chat-v0.4 \
   --config demo/lora/lora_case_1.yaml
 ```
 
-## Deploy as service
+## Deploy as service with Docker
 We can deploy mLoAR as a service to continuously receive user requests and perform fine-tuning task.
 
-[![asciicast](https://asciinema.org/a/IifqdtBoJAVP4r8wg1lrcm9LI.svg)](https://asciinema.org/a/IifqdtBoJAVP4r8wg1lrcm9LI)
+First, you should pull the latest image (for deploy):
 
 ```bash
-# Install requirements for deploy
-pip install .[deploy]
-# Start the server
-python mlora_server.py \
-  --base_model /data/TinyLlama-1.1B-Chat-v1.0/ \
-  --root /tmp/mlora
+docker pull yezhengmaolove/mlora:deploy_latest
 ```
-For further detailed usage information, please use `--help` option:
 
+Deploy our mLoRA server:
 ```bash
-python mlora_server.py --help
+docker run -itd --runtime nvidia --gpus all \
+    -v ~/your_dataset_cache_dir:/cache \
+    -v ~/your_model_dir:/model \
+    -p <host_port>:8000 \
+    --name mlora_server \
+    -e "BASE_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v0.4" \
+    -e "STORAGE_DIR=/cache" \
+    yezhengmaolove/mlora:deploy_latest
 ```
 
-Once the service is deployed, use `mlora_cli.py` to interact with the server.
+Once the service is deployed, install and use `mlora_cli.py` to interact with the server.
 
 ```bash
-python mlora_cli.py
+# install the client tools
+pip install mlora-cli
 ```
 
 ## Why you should use mLoRA
diff --git a/build.sh b/build.sh
new file mode 100755
index 00000000..7d78d209
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+ln -sf pyproject.cli.toml pyproject.toml
+python -m build .
+
+ln -sf pyproject.mlora.toml pyproject.toml
+python -m build .
\ No newline at end of file
diff --git a/docs/assets/architecture.jpg b/docs/assets/architecture.jpg
index becdcb94..c238dd2d 100644
Binary files a/docs/assets/architecture.jpg and b/docs/assets/architecture.jpg differ
diff --git a/mlora/cli/setting.py b/mlora/cli/setting.py
deleted file mode 100644
index e179dec4..00000000
--- a/mlora/cli/setting.py
+++ /dev/null
@@ -1,9 +0,0 @@
-G_PORT = 8000
-G_HOST = "http://127.0.0.1"
-
-
-def url() -> str:
-    global G_HOST
-    global G_PORT
-
-    return G_HOST + ":" + str(G_PORT)
diff --git a/mlora/utils/setup.py b/mlora/utils/setup.py
index f8ab2421..17ba1919 100644
--- a/mlora/utils/setup.py
+++ b/mlora/utils/setup.py
@@ -4,7 +4,6 @@
 
 import torch
 
-import mlora
 import mlora.profiler
 
 
diff --git a/mlora_cli.py b/mlora_cli.py
deleted file mode 100644
index bf2d8f97..00000000
--- a/mlora_cli.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import mlora.cli
-
-import cmd
-
-
-def help_quit(_):
-    print("Quit the cli")
-
-
-def do_quit(*_):
-    exit(0)
-
-
-class mLoRAShell(cmd.Cmd):
-    intro = 'Welcome to the mLoRA CLI. Type help or ? to list commands.\n'
-    prompt = '(mLoRA) '
-
-    help_quit = help_quit
-    do_quit = do_quit
-
-    help_dispatcher = mlora.cli.help_dispatcher
-    do_dispatcher = mlora.cli.do_dispatcher
-
-    help_file = mlora.cli.help_file
-    do_file = mlora.cli.do_file
-
-    help_dataset = mlora.cli.help_dataset
-    do_dataset = mlora.cli.do_dataset
-
-    help_adapter = mlora.cli.help_adapter
-    do_adapter = mlora.cli.do_adapter
-
-    help_task = mlora.cli.help_task
-    do_task = mlora.cli.do_task
-
-
-if __name__ == '__main__':
-    mLoRAShell().cmdloop()
diff --git a/mlora/cli/__init__.py b/mlora_cli/__init__.py
similarity index 100%
rename from mlora/cli/__init__.py
rename to mlora_cli/__init__.py
diff --git a/mlora/cli/adapter.py b/mlora_cli/adapter.py
similarity index 100%
rename from mlora/cli/adapter.py
rename to mlora_cli/adapter.py
diff --git a/mlora/cli/dataset.py b/mlora_cli/dataset.py
similarity index 100%
rename from mlora/cli/dataset.py
rename to mlora_cli/dataset.py
diff --git a/mlora/cli/dispatcher.py b/mlora_cli/dispatcher.py
similarity index 100%
rename from mlora/cli/dispatcher.py
rename to mlora_cli/dispatcher.py
diff --git a/mlora/cli/file.py b/mlora_cli/file.py
similarity index 100%
rename from mlora/cli/file.py
rename to mlora_cli/file.py
diff --git a/mlora_cli/pyproject.toml b/mlora_cli/pyproject.toml
new file mode 100644
index 00000000..8b39bb04
--- /dev/null
+++ b/mlora_cli/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "mlora_cli"
+version = "0.2"
+description = "A cli tool for mLoRA system."
+readme = "README.md"
+requires-python = ">=3.12"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+dependencies = ["rich", "InquirerPy"]
+
+[project.urls]
+"Homepage" = "https://github.com/TUDB-Labs/mLoRA"
+"Bug Tracker" = "https://github.com/TUDB-Labs/mLoRA/issues"
+
+[tool.setuptools.packages.find]
+include = [".*"]
diff --git a/mlora_cli/setting.py b/mlora_cli/setting.py
new file mode 100644
index 00000000..9e9b7da5
--- /dev/null
+++ b/mlora_cli/setting.py
@@ -0,0 +1,32 @@
+G_PORT = 8000
+G_HOST = "http://127.0.0.1"
+
+
+def url() -> str:
+    global G_HOST
+    global G_PORT
+
+    return G_HOST + ":" + str(G_PORT)
+
+
+def help_set():
+    print("Usage of a set:")
+    print("  host")
+    print("    set the host.")
+    print("  port")
+    print("    set the port.")
+
+
+def do_set(_, args):
+    args = args.split(" ")
+
+    global G_PORT
+    global G_HOST
+
+    if args[0] == "host":
+        G_HOST = args[1]
+    elif args[0] == "port":
+        # convert to int to check
+        G_PORT = int(args[1])
+    else:
+        help_set()
diff --git a/mlora_cli/shell.py b/mlora_cli/shell.py
new file mode 100644
index 00000000..cfa65544
--- /dev/null
+++ b/mlora_cli/shell.py
@@ -0,0 +1,46 @@
+import cmd
+
+from .adapter import do_adapter, help_adapter
+from .dataset import do_dataset, help_dataset
+from .dispatcher import do_dispatcher, help_dispatcher
+from .file import do_file, help_file
+from .setting import do_set, help_set
+from .task import do_task, help_task
+
+
+def help_quit(_):
+    print("Quit the cli")
+
+
+def do_quit(*_):
+    exit(0)
+
+
+class mLoRAShell(cmd.Cmd):
+    intro = "Welcome to the mLoRA CLI. Type help or ? to list commands.\n"
+    prompt = "(mLoRA) "
+
+    help_quit = help_quit
+    do_quit = do_quit
+
+    help_dispatcher = help_dispatcher
+    do_dispatcher = do_dispatcher
+
+    help_file = help_file
+    do_file = do_file
+
+    help_dataset = help_dataset
+    do_dataset = do_dataset
+
+    help_adapter = help_adapter
+    do_adapter = do_adapter
+
+    help_task = help_task
+    do_task = do_task
+
+    help_set = help_set
+    do_set = do_set
+
+
+def cmd_loop():
+    mLoRAShell().cmdloop()
diff --git a/mlora/cli/task.py b/mlora_cli/task.py
similarity index 100%
rename from mlora/cli/task.py
rename to mlora_cli/task.py
diff --git a/mlora_client.py b/mlora_client.py
new file mode 100644
index 00000000..9daea1c9
--- /dev/null
+++ b/mlora_client.py
@@ -0,0 +1,4 @@
+from mlora_cli.shell import cmd_loop
+
+if __name__ == "__main__":
+    cmd_loop()
diff --git a/mlora_server.py b/mlora_server.py
index 3e1db8e7..9c32b55f 100644
--- a/mlora_server.py
+++ b/mlora_server.py
@@ -93,7 +93,11 @@ def backend_server_run_fn(args):
     mLoRAServer.include_router(mlora.server.adapter_router)
     mLoRAServer.include_router(mlora.server.task_router)
 
-    web_thread = threading.Thread(target=uvicorn.run, args=(mLoRAServer,))
+    web_thread = threading.Thread(
+        target=uvicorn.run,
+        args=(mLoRAServer,),
+        kwargs={"host": "0.0.0.0", "port": 8000},
+    )
 
     logging.info("Start the backend web server run thread")
     web_thread.start()
diff --git a/mlora.py b/mlora_train.py
similarity index 100%
rename from mlora.py
rename to mlora_train.py
diff --git a/pyproject.cli.toml b/pyproject.cli.toml
new file mode 100644
index 00000000..eeecfa08
--- /dev/null
+++ b/pyproject.cli.toml
@@ -0,0 +1,26 @@
+[project]
+name = "mlora_cli"
+version = "0.2.1"
+description = "The cli tools for mLoRA system."
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+dependencies = ["requests", "rich", "InquirerPy"]
+
+[project.urls]
+"Homepage" = "https://github.com/TUDB-Labs/mLoRA"
+"Bug Tracker" = "https://github.com/TUDB-Labs/mLoRA/issues"
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+include = ["mlora_cli", "mlora_cli.*"]
+
+[project.scripts]
+mlora_cli = "mlora_cli.shell:cmd_loop"
diff --git a/pyproject.mlora.toml b/pyproject.mlora.toml
new file mode 100644
index 00000000..d59df97e
--- /dev/null
+++ b/pyproject.mlora.toml
@@ -0,0 +1,42 @@
+[project]
+name = "mlora"
+version = "0.2"
+description = "A system for fine-tuning multiple large language models (LLMs) using the LoRA and its variants more efficiently."
+readme = "README.md"
+requires-python = ">=3.12"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "torch==2.2.1",
+    "einops==0.6.1",
+    "datasets==2.14.5",
+    "accelerate==0.24.1",
+    "transformers==4.38.2",
+    "bitsandbytes==0.41.1",
+    "sentencepiece==0.1.99",
+    "protobuf==3.20.2",
+    "scipy==1.11.4",
+]
+
+[project.optional-dependencies]
+ci_test = ["pytest", "flake8", "lizard", "black", "isort", "mypy"]
+test = ["peft", "setuptools"]
+debug = ["graphviz"]
+deploy = ["fastapi", "plyvel", "uvicorn"]
+
+[project.urls]
+"Homepage" = "https://github.com/TUDB-Labs/mLoRA"
+"Bug Tracker" = "https://github.com/TUDB-Labs/mLoRA/issues"
+
+[tool.pytest.ini_options]
+pythonpath = [".", "mlora"]
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+include = ["mlora", "mlora.*"]
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 64010fad..00000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,42 +0,0 @@
-[build-system]
-requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "mlora"
-version = "0.2"
-description = "A system for fine-tuning multiple large language models (LLMs) using the LoRA and its variants more efficiently."
-readme = "README.md"
-requires-python = ">=3.12"
-classifiers = [
-    "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: Apache Software License",
-    "Operating System :: OS Independent",
-]
-dependencies = [
-    "torch==2.2.1",
-    "einops==0.6.1",
-    "datasets==2.14.5",
-    "accelerate==0.24.1",
-    "transformers==4.38.2",
-    "bitsandbytes==0.41.1",
-    "sentencepiece==0.1.99",
-    "protobuf==3.20.2",
-    "scipy==1.11.4",
-]
-
-[project.optional-dependencies]
-ci_test = ["pytest", "flake8", "lizard", "black", "isort", "mypy"]
-test = ["peft", "setuptools"]
-debug = ["graphviz"]
-deploy = ["rich", "fastapi", "plyvel", "uvicorn", "InquirerPy"]
-
-[tool.pytest.ini_options]
-pythonpath = [".", "mlora"]
-
-[project.urls]
-"Homepage" = "https://github.com/TUDB-Labs/mLoRA"
-"Bug Tracker" = "https://github.com/TUDB-Labs/mLoRA/issues"
-
-[tool.setuptools.packages.find]
-include = ["mlora"]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 120000
index 00000000..90a50f91
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1 @@
+pyproject.mlora.toml
\ No newline at end of file
diff --git a/tests/finetune_all_case.sh b/tests/finetune_all_case.sh
index 13b73917..caacf5de 100755
--- a/tests/finetune_all_case.sh
+++ b/tests/finetune_all_case.sh
@@ -12,5 +12,5 @@ declare -a test_case_yamls=(
 set -x
 for test_case in "${test_case_yamls[@]}"
 do
-    python mlora.py --base_model $1 --config ${test_case}
+    python mlora_train.py --base_model $1 --config ${test_case}
 done
\ No newline at end of file