Skip to content

Commit

Permalink
feat: adding rpc_servers parameter to Llama class (#1477)
Browse files Browse the repository at this point in the history
* passthru rpc_servers params

wip

* enable llama rpc by default

* convert string to byte

* add rpc package

* Revert "enable llama rpc by default"

This reverts commit 832c6dd.

* update readme

* Only set rpc_servers when provided

* Add rpc servers to server options

---------

Co-authored-by: Andrei Betlen <[email protected]>
  • Loading branch information
chraac and abetlen authored Jun 4, 2024
1 parent 6e0642c commit d634efc
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ build.kompute:
build.sycl:
CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e .

build.rpc:
CMAKE_ARGS="-DLLAMA_RPC=on" python3 -m pip install --verbose -e .

build.sdist:
python3 -m build --sdist

Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,17 @@ CMAKE_ARGS="-DLLAMA_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pi
```
</details>

<details>
<summary>RPC</summary>

To install with RPC support, set the `LLAMA_RPC=on` environment variable before installing:

```bash
source /opt/intel/oneapi/setvars.sh
CMAKE_ARGS="-DLLAMA_RPC=on" pip install llama-cpp-python
```
</details>


### Windows Notes

Expand Down
7 changes: 7 additions & 0 deletions llama_cpp/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(
split_mode: int = llama_cpp.LLAMA_SPLIT_MODE_LAYER,
main_gpu: int = 0,
tensor_split: Optional[List[float]] = None,
rpc_servers: Optional[str] = None,
vocab_only: bool = False,
use_mmap: bool = True,
use_mlock: bool = False,
Expand Down Expand Up @@ -150,6 +151,7 @@ def __init__(
split_mode: How to split the model across GPUs. See llama_cpp.LLAMA_SPLIT_* for options.
main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_LAYER: ignored
tensor_split: How split tensors should be distributed across GPUs. If None, the model is not split.
rpc_servers: Comma separated list of RPC servers to use for offloading
vocab_only: Only load the vocabulary no weights.
use_mmap: Use mmap if possible.
use_mlock: Force the system to keep the model in RAM.
Expand Down Expand Up @@ -221,6 +223,11 @@ def __init__(
) # 0x7FFFFFFF is INT32 max, will be auto set to all layers
self.model_params.split_mode = split_mode
self.model_params.main_gpu = main_gpu
if rpc_servers is not None:
self.model_params.rpc_servers = rpc_servers.encode('utf-8')
self._rpc_servers = rpc_servers
else:
self._rpc_servers = None
self.tensor_split = tensor_split
self._c_tensor_split = None
if self.tensor_split is not None:
Expand Down
1 change: 1 addition & 0 deletions llama_cpp/server/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
use_mmap=settings.use_mmap,
use_mlock=settings.use_mlock,
kv_overrides=kv_overrides,
rpc_servers=settings.rpc_servers,
# Context Params
seed=settings.seed,
n_ctx=settings.n_ctx,
Expand Down
4 changes: 4 additions & 0 deletions llama_cpp/server/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class ModelSettings(BaseSettings):
default=None,
description="List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.",
)
rpc_servers: Optional[str] = Field(
default=None,
description="comma seperated list of rpc servers for offloading",
)
# Context Params
seed: int = Field(
default=llama_cpp.LLAMA_DEFAULT_SEED, description="Random seed. -1 for random."
Expand Down

0 comments on commit d634efc

Please sign in to comment.