Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DeepSeek-R1-Distill-Qwen-1.5B
Submodule DeepSeek-R1-Distill-Qwen-1.5B added at ad9f0a
2 changes: 2 additions & 0 deletions include/llaisys/models/qwen2.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,7 @@ __C {
__export struct LlaisysQwen2Weights *llaisysQwen2ModelWeights(struct LlaisysQwen2Model * model);

__export int64_t llaisysQwen2ModelInfer(struct LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken);

__export void llaisysQwen2ModelResetCache(struct LlaisysQwen2Model * model);
}
#endif // LLAISYS_MODELS_QWEN2_H
6 changes: 6 additions & 0 deletions python/llaisys/libllaisys/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from .tensor import llaisysTensor_t
from .tensor import load_tensor
from .ops import load_ops
from .models import load_models
from .models import LlaisysQwen2Meta, LlaisysQwen2Weights, llaisysQwen2Model_t


def load_shared_library():
Expand All @@ -38,6 +40,7 @@ def load_shared_library():
load_runtime(LIB_LLAISYS)
load_tensor(LIB_LLAISYS)
load_ops(LIB_LLAISYS)
load_models(LIB_LLAISYS)


__all__ = [
Expand All @@ -52,4 +55,7 @@ def load_shared_library():
"llaisysMemcpyKind_t",
"MemcpyKind",
"llaisysStream_t",
"LlaisysQwen2Meta",
"LlaisysQwen2Weights",
"llaisysQwen2Model_t",
]
74 changes: 74 additions & 0 deletions python/llaisys/libllaisys/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from ctypes import POINTER, c_void_p, c_size_t, c_int64, c_int, c_float, Structure
from .llaisys_types import llaisysDataType_t, llaisysDeviceType_t
from .tensor import llaisysTensor_t


class LlaisysQwen2Meta(Structure):
_fields_ = [
("dtype", llaisysDataType_t),
("nlayer", c_size_t),
("hs", c_size_t),
("nh", c_size_t),
("nkvh", c_size_t),
("dh", c_size_t),
("di", c_size_t),
("maxseq", c_size_t),
("voc", c_size_t),
("epsilon", c_float),
("theta", c_float),
("end_token", c_int64),
]


class LlaisysQwen2Weights(Structure):
_fields_ = [
("in_embed", llaisysTensor_t),
("out_embed", llaisysTensor_t),
("out_norm_w", llaisysTensor_t),
("attn_norm_w", POINTER(llaisysTensor_t)),
("attn_q_w", POINTER(llaisysTensor_t)),
("attn_q_b", POINTER(llaisysTensor_t)),
("attn_k_w", POINTER(llaisysTensor_t)),
("attn_k_b", POINTER(llaisysTensor_t)),
("attn_v_w", POINTER(llaisysTensor_t)),
("attn_v_b", POINTER(llaisysTensor_t)),
("attn_o_w", POINTER(llaisysTensor_t)),
("mlp_norm_w", POINTER(llaisysTensor_t)),
("mlp_gate_w", POINTER(llaisysTensor_t)),
("mlp_up_w", POINTER(llaisysTensor_t)),
("mlp_down_w", POINTER(llaisysTensor_t)),
]


llaisysQwen2Model_t = c_void_p


def load_models(lib):
# llaisysQwen2ModelCreate
lib.llaisysQwen2ModelCreate.argtypes = [
POINTER(LlaisysQwen2Meta),
llaisysDeviceType_t,
POINTER(c_int),
c_int,
]
lib.llaisysQwen2ModelCreate.restype = llaisysQwen2Model_t

# llaisysQwen2ModelDestroy
lib.llaisysQwen2ModelDestroy.argtypes = [llaisysQwen2Model_t]
lib.llaisysQwen2ModelDestroy.restype = None

# llaisysQwen2ModelWeights
lib.llaisysQwen2ModelWeights.argtypes = [llaisysQwen2Model_t]
lib.llaisysQwen2ModelWeights.restype = POINTER(LlaisysQwen2Weights)

# llaisysQwen2ModelInfer
lib.llaisysQwen2ModelInfer.argtypes = [
llaisysQwen2Model_t,
POINTER(c_int64),
c_size_t,
]
lib.llaisysQwen2ModelInfer.restype = c_int64

# llaisysQwen2ModelResetCache
lib.llaisysQwen2ModelResetCache.argtypes = [llaisysQwen2Model_t]
lib.llaisysQwen2ModelResetCache.restype = None
Loading