Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
source .venv/bin/activate
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,7 @@ htmlcov/
# Windows
Thumbs.db
ehthumbs.db
desktop.ini
desktop.ini

# model
DeepSeek-R1-Distill-Qwen-1.5B/
26 changes: 17 additions & 9 deletions include/llaisys/models/qwen2.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
#include "../tensor.h"

__C {
struct LlaisysQwen2Meta {
typedef struct LlaisysQwen2Meta_ {
llaisysDataType_t dtype;
size_t nlayer, hs, nh, nkvh, dh, di, maxseq, voc;
float epsilon, theta;
int64_t end_token;
};
}LlaisysQwen2Meta;

struct LlaisysQwen2Weights {
typedef struct LlaisysQwen2Weights_ {
llaisysTensor_t in_embed;
llaisysTensor_t out_embed;
llaisysTensor_t out_norm_w; // a.k.a. model.norm.weight
Expand All @@ -27,16 +27,24 @@ __C {
llaisysTensor_t *mlp_gate_w;
llaisysTensor_t *mlp_up_w;
llaisysTensor_t *mlp_down_w;
};
}LlaisysQwen2Weights;

struct LlaisysQwen2Model;
typedef struct LlaisysQwen2Model_ {
LlaisysQwen2Meta* meta;
LlaisysQwen2Weights* weights = nullptr;
void *impl = nullptr; // Opaque pointer to the actual model implementation (e.g., a C++ class instance).
}LlaisysQwen2Model;

__export struct LlaisysQwen2Model *llaisysQwen2ModelCreate(const LlaisysQwen2Meta *meta, llaisysDeviceType_t device, int *device_ids, int ndevice);

__export void llaisysQwen2ModelDestroy(struct LlaisysQwen2Model * model);

__export struct LlaisysQwen2Weights *llaisysQwen2ModelWeights(struct LlaisysQwen2Model * model);
__export LlaisysQwen2Model *llaisysQwen2ModelCreate(const LlaisysQwen2Meta *meta, llaisysDeviceType_t device, int *device_ids, int ndevice);

__export int64_t llaisysQwen2ModelInfer(struct LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken);
__export void llaisysQwen2ModelDestroy(LlaisysQwen2Model * model);

__export void llaisysQwen2modelLoadWeight(LlaisysQwen2Model * model, const void *weight_data, const char *weight_name);

__export LlaisysQwen2Weights *llaisysQwen2ModelWeights(LlaisysQwen2Model * model);

__export int64_t llaisysQwen2ModelInfer(LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken);
}
#endif // LLAISYS_MODELS_QWEN2_H
4 changes: 4 additions & 0 deletions make.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
xmake
xmake install
pip install ./python

8 changes: 7 additions & 1 deletion python/llaisys/libllaisys/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
from .llaisys_types import llaisysMemcpyKind_t, MemcpyKind
from .llaisys_types import llaisysStream_t
from .tensor import llaisysTensor_t
from .models.qwen2 import Qwen2Meta, LlaisysQwen2Meta_t, LlaisysQwen2Model_t, LlaisysQwen2Weights_t
from .tensor import load_tensor
from .ops import load_ops

from .models.qwen2 import load_qwen2

def load_shared_library():
lib_dir = Path(__file__).parent
Expand All @@ -38,6 +39,7 @@ def load_shared_library():
load_runtime(LIB_LLAISYS)
load_tensor(LIB_LLAISYS)
load_ops(LIB_LLAISYS)
load_qwen2(LIB_LLAISYS)


__all__ = [
Expand All @@ -46,6 +48,10 @@ def load_shared_library():
"llaisysStream_t",
"llaisysTensor_t",
"llaisysDataType_t",
"Qwen2Meta",
"LlaisysQwen2Meta_t",
"LlaisysQwen2Model_t",
"LlaisysQwen2Weights_t",
"DataType",
"llaisysDeviceType_t",
"DeviceType",
Expand Down
10 changes: 10 additions & 0 deletions python/llaisys/libllaisys/llaisys_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ class MemcpyKind(IntEnum):

llaisysMemcpyKind_t = ctypes.c_int

'''
struct LlaisysQwen2Meta_ {
llaisysDataType_t dtype;
size_t nlayer, hs, nh, nkvh, dh, di, maxseq, voc;
float epsilon, theta;
int64_t end_token;
}
'''


# Stream type (opaque pointer)
llaisysStream_t = ctypes.c_void_p

Expand Down
Empty file.
59 changes: 59 additions & 0 deletions python/llaisys/libllaisys/models/qwen2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from ctypes import POINTER, c_void_p, c_size_t, c_int, c_char_p, Structure, c_float, c_int64
from ..llaisys_types import *
from ..tensor import llaisysTensor_t

class Qwen2Meta(Structure):
_fields_ = [
("dtype", llaisysDataType_t),
("nlayer", c_size_t),
("hs", c_size_t),
("nh", c_size_t),
("nkvh", c_size_t),
("dh", c_size_t),
("di", c_size_t),
("maxseq", c_size_t),
("voc", c_size_t),
("epsilon", c_float),
("theta", c_float),
("end_token", c_int64),
]

class LlaisysQwen2Weights(Structure):
_fields_ = [
("in_embed", llaisysTensor_t),
("out_embed", llaisysTensor_t),
("out_norm_w", llaisysTensor_t),
("attn_norm_w", POINTER(llaisysTensor_t)),
("attn_q_w", POINTER(llaisysTensor_t)),
("attn_q_b", POINTER(llaisysTensor_t)),
("attn_k_w", POINTER(llaisysTensor_t)),
("attn_k_b", POINTER(llaisysTensor_t)),
("attn_v_w", POINTER(llaisysTensor_t)),
("attn_v_b", POINTER(llaisysTensor_t)),
("attn_o_w", POINTER(llaisysTensor_t)),
("mlp_norm_w", POINTER(llaisysTensor_t)),
("mlp_gate_w", POINTER(llaisysTensor_t)),
("mlp_up_w", POINTER(llaisysTensor_t)),
("mlp_down_w", POINTER(llaisysTensor_t)),
]

LlaisysQwen2Meta_t = POINTER(Qwen2Meta)
LlaisysQwen2Model_t = c_void_p
LlaisysQwen2Weights_t = POINTER(LlaisysQwen2Weights)


def load_qwen2(lib):
lib.llaisysQwen2ModelCreate.argtypes = [LlaisysQwen2Meta_t, llaisysDeviceType_t, POINTER(c_int), c_int]
lib.llaisysQwen2ModelCreate.restype = LlaisysQwen2Model_t

lib.llaisysQwen2ModelDestroy.argtypes = [LlaisysQwen2Model_t]
lib.llaisysQwen2ModelDestroy.restype = None

lib.llaisysQwen2modelLoadWeight.argtypes = [LlaisysQwen2Model_t, c_void_p, c_char_p]
lib.llaisysQwen2modelLoadWeight.restype = None

lib.llaisysQwen2ModelInfer.argtypes = [LlaisysQwen2Model_t, POINTER(c_int64), c_size_t]
lib.llaisysQwen2ModelInfer.restype = c_int64

lib.llaisysQwen2ModelWeights.argtypes = [LlaisysQwen2Model_t]
lib.llaisysQwen2ModelWeights.restype = LlaisysQwen2Weights_t
2 changes: 1 addition & 1 deletion python/llaisys/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .qwen2 import Qwen2
from .qwen2 import Qwen2
70 changes: 57 additions & 13 deletions python/llaisys/models/qwen2.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
import ctypes
import numpy as np
import gc
from enum import IntEnum
from typing import Sequence
from ..libllaisys import LIB_LLAISYS
from ..libllaisys import DeviceType
from ..libllaisys import *
from ..tensor import Tensor
import torch


from pathlib import Path
import safetensors
import json


class Qwen2:

def __init__(self, model_path, device: DeviceType = DeviceType.CPU):
# TODO: Implement model constructor

model_path = Path(model_path)

for file in sorted(model_path.glob("*.safetensors")):
data_ = safetensors.safe_open(file, framework="numpy", device="cpu")
for name_ in data_.keys():
## TODO: load the model weights
pass
self.model_path = Path(model_path)
self.device = device
self._load_config()
self._load_weights()

def __delete__(self):
LIB_LLAISYS.llaisysQwen2ModelDestroy(self.model)

def generate(
self,
Expand All @@ -28,6 +33,45 @@ def generate(
temperature: float = 0.8,
):

# TODO: Implement generate function
ptr = np.array(inputs, dtype=np.int64).ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
l = len(inputs)
ret = list(inputs)
id = 0
while id != self.config["eos_token_id"]:
id = int(LIB_LLAISYS.llaisysQwen2ModelInfer(self.model, ptr, ctypes.c_size_t(l)))
ret.append(id)
ptr = ctypes.byref(ctypes.c_int64(id))
l = 1
return ret

def _load_config(self):
config_file = self.model_path / "config.json"
with open(config_file, "r") as f:
self.config = json.load(f)
meta = Qwen2Meta()
meta.dtype = ctypes.c_int(DataType.BF16)
meta.nlayer = ctypes.c_size_t(self.config["num_hidden_layers"])
meta.hs = ctypes.c_size_t(self.config["hidden_size"])
meta.nh = ctypes.c_size_t(self.config["num_attention_heads"])
meta.nkvh = ctypes.c_size_t(self.config["num_key_value_heads"])
meta.dh = ctypes.c_size_t(self.config["hidden_size"] // self.config["num_attention_heads"])
meta.di = ctypes.c_size_t(self.config["intermediate_size"])
meta.maxseq = ctypes.c_size_t(self.config["max_position_embeddings"])
meta.voc = ctypes.c_size_t(self.config["vocab_size"])
meta.epsilon = ctypes.c_float(self.config["rms_norm_eps"])
meta.theta = ctypes.c_float(self.config["rope_theta"])
meta.end_token = ctypes.c_int64(self.config["eos_token_id"])


return []
id = ctypes.c_int(0)
self.model = LIB_LLAISYS.llaisysQwen2ModelCreate(ctypes.byref(meta), self.device, ctypes.byref(id), 1)

def _load_weights(self):
for file in sorted(self.model_path.glob("*.safetensors")):
data_ = safetensors.safe_open(file, framework="torch", device="cpu")
for name_ in data_.keys():
tensor = data_.get_tensor(name_)
name_c = ctypes.c_char_p(name_.encode('utf-8'))
LIB_LLAISYS.llaisysQwen2modelLoadWeight(self.model, ctypes.c_void_p(tensor.data_ptr()), name_c)
del tensor
gc.collect()
24 changes: 24 additions & 0 deletions python/llaisys/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
DataType,
)
from ctypes import c_size_t, c_int, c_ssize_t, c_void_p
import torch


class Tensor:
Expand Down Expand Up @@ -95,3 +96,26 @@ def slice(self, dim: int, start: int, end: int):
self._tensor, c_size_t(dim), c_size_t(start), c_size_t(end)
)
)

@staticmethod
def from_torch(torch_tensor: torch.Tensor):
assert torch_tensor.is_contiguous(), "Only contiguous tensors are supported"
assert torch_tensor.device.type in ["cpu", "cuda"], "Only CPU and CUDA devices are supported"

device_type = DeviceType.CPU if torch_tensor.device.type == "cpu" else DeviceType.NVIDIA
dtype = DataType.F32
if torch_tensor.dtype == torch.float16:
dtype = DataType.F16
elif torch_tensor.dtype == torch.bfloat16:
dtype = DataType.BF16
else:
raise ValueError(f"Unsupported data type: {torch_tensor.dtype}")
_tensor = Tensor(
shape=torch_tensor.shape,
dtype=dtype,
device=device_type,
device_id=torch_tensor.device.index if torch_tensor.device.type == "cuda" else 0,
)

_tensor.load(torch_tensor.data_ptr())
return _tensor
1 change: 0 additions & 1 deletion src/core/runtime/runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ class Runtime {
const LlaisysRuntimeAPI *api() const;

storage_t allocateDeviceStorage(size_t size);
;
storage_t allocateHostStorage(size_t size);
void freeStorage(Storage *storage);

Expand Down
Loading