InfiniTensor · usersforsomebody · Jan 22, 2026 · Jan 24, 2026 · Jan 25, 2026 · Jan 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -87,4 +87,7 @@ htmlcov/
 # Windows
 Thumbs.db
 ehthumbs.db
-desktop.ini
+desktop.ini
+# Windows Zone.Identifier (invalid path on Windows if committed)
+*:Zone.Identifier
+*.Identifier
diff --git a/include/llaisys/models/qwen2.h b/include/llaisys/models/qwen2.h
@@ -31,12 +31,26 @@ __C {
 
     struct LlaisysQwen2Model;
 
-    __export struct LlaisysQwen2Model *llaisysQwen2ModelCreate(const LlaisysQwen2Meta *meta, llaisysDeviceType_t device, int *device_ids, int ndevice);
+    __export struct LlaisysQwen2Model *llaisysQwen2ModelCreate(const LlaisysQwen2Meta *meta, llaisysDeviceType_t device, int *device_ids, int ndevice,llaisysDataType_t dtype);
 
     __export void llaisysQwen2ModelDestroy(struct LlaisysQwen2Model * model);
 
     __export struct LlaisysQwen2Weights *llaisysQwen2ModelWeights(struct LlaisysQwen2Model * model);
 
-    __export int64_t llaisysQwen2ModelInfer(struct LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken);
+    __export void llaisysQwen2LoadWeight(
+        struct LlaisysQwen2Model * model,
+        const char * name,
+        void * data,
+        size_t * shape,
+        size_t ndim,
+        llaisysDataType_t dtype);
+
+    __export void *llaisysQwen2ModelForward(
+        struct LlaisysQwen2Model * model,
+        int64_t * token_ids,
+        size_t seq_len,
+        size_t start_pos);
+
+    __export int llaisysQwen2Sample(void * logits_ptr);
 }
 #endif // LLAISYS_MODELS_QWEN2_H
diff --git a/linear.prof b/linear.prof
diff --git a/python/llaisys/libllaisys/__init__.py b/python/llaisys/libllaisys/__init__.py
@@ -9,6 +9,7 @@
 from .llaisys_types import llaisysDataType_t, DataType
 from .llaisys_types import llaisysMemcpyKind_t, MemcpyKind
 from .llaisys_types import llaisysStream_t
+from .llaisys_types import LlaisysQwen2Meta
 from .tensor import llaisysTensor_t
 from .tensor import load_tensor
 from .ops import load_ops
@@ -33,11 +34,51 @@ def load_shared_library():
 
     return ctypes.CDLL(str(lib_path))
 
+def load_qwen2_api(lib):
+    try:
+        if hasattr(lib,'llaisysQwen2ModelCreate'):
+            lib.llaisysQwen2ModelCreate.argtypes=[
+                ctypes.POINTER(LlaisysQwen2Meta),
+                ctypes.c_int,
+                ctypes.POINTER(ctypes.c_int),
+                ctypes.c_int,
+                ctypes.c_int
+            ]
+            lib.llaisysQwen2ModelCreate.restype=ctypes.c_void_p
+        if hasattr(lib,'llaisysQwen2LoadWeight'):
+            lib.llaisysQwen2LoadWeight.argtypes=[
+                ctypes.c_void_p,
+                ctypes.c_char_p,
+                ctypes.c_void_p,
+                ctypes.POINTER(ctypes.c_size_t),
+                ctypes.c_size_t,
+                ctypes.c_int
+            ]
+            lib.llaisysQwen2LoadWeight.restype=None
+    except Exception as e:
+       print(f"Warning: Failed to load Qwen2 API signatures. {e}")
+def llaisys_qwen2_create(meta,device_id):
+    return LIB_LLAISYS.llaisysQwen2ModelCreate(
+        ctypes.byref(meta),
+        device_id,
+        None,
+        0
+    )
+def llaisys_qwen2_load_weight(model_handle,name,data_ptr,shape,ndim,dtype):
+    LIB_LLAISYS.llaisysQwen2LoadWeight(
+        model_handle,
+        name,
+        data_ptr,
+        shape,
+        ndim,
+        dtype
+    )
 
 LIB_LLAISYS = load_shared_library()
 load_runtime(LIB_LLAISYS)
 load_tensor(LIB_LLAISYS)
 load_ops(LIB_LLAISYS)
+load_qwen2_api(LIB_LLAISYS)
 
 
 __all__ = [
@@ -52,4 +93,7 @@ def load_shared_library():
     "llaisysMemcpyKind_t",
     "MemcpyKind",
     "llaisysStream_t",
+    "LlaisysQwen2Meta",
+    "llaisys_qwen2_create",
+    "llaisys_qwen2_load_weight"
 ]
diff --git a/python/llaisys/libllaisys/llaisys_types.py b/python/llaisys/libllaisys/llaisys_types.py
@@ -61,3 +61,20 @@ class MemcpyKind(IntEnum):
     "MemcpyKind",
     "llaisysStream_t",
 ]
+
+
+class LlaisysQwen2Meta(ctypes.Structure):
+    _fields_ = [
+        ("dtype",ctypes.c_int),
+        ("nlayer",ctypes.c_size_t),
+        ("hs",ctypes.c_size_t),
+        ("nh",ctypes.c_size_t),
+        ("nkvh",ctypes.c_size_t),
+        ("dh",ctypes.c_size_t),
+        ("di",ctypes.c_size_t),
+        ("maxseq",ctypes.c_size_t),
+        ("voc",ctypes.c_size_t),
+        ("epsilon",ctypes.c_float),
+        ("theta",ctypes.c_float),
+        ("end_token",ctypes.c_int64,)
+    ]
diff --git a/python/llaisys/models/qwen2.py b/python/llaisys/models/qwen2.py
@@ -4,21 +4,141 @@
 
 from pathlib import Path
 import safetensors
+import ctypes
+import numpy as np
+import struct
+import json
+import mmap
+import os
 
-
+from ..libllaisys import(
+     DeviceType,
+     LlaisysQwen2Meta,
+     llaisys_qwen2_create,
+     llaisys_qwen2_load_weight
+)
+TYPE_MAP={
+         # 必须与 C++ enum llaisysDataType_t 完全一致
+         # 参见 include/llaisys.h 和 python/llaisys/libllaisys/llaisys_types.py
+         "F32":13,
+         "F16":12,   # 修复: 之前错误地映射为 11(F8)
+         "BF16":19
+    }
 class Qwen2:
 
     def __init__(self, model_path, device: DeviceType = DeviceType.CPU):
-        # TODO: Implement model constructor
+        self.lib=LIB_LLAISYS
 
-        model_path = Path(model_path)
+        config_path=os.path.join(model_path,"config.json")
+        if not os.path.exists(config_path):
+            raise FileNotFoundError(f"Config not found at {config_path}")
+        with open(config_path,"r") as f:
+            config=json.load(f)
+
+        meta=LlaisysQwen2Meta()
+        meta.nlayer=config.get("num_hidden_layers",28)
+        meta.hs=config.get("hidden_size",1536)
+        meta.nh=config.get("num_attention_heads",12)
+        meta.nkvh=config.get("num_key_value_heads",2)
+        meta.vocab_size=config.get("vocab_size",151936)
+        meta.maxseq=config.get("max_position_embeddings",32768)
+        meta.epsilon=config.get("rms_norm_eps",1e-6)
+        meta.theta=config.get("rope_theta",10000.0)
+
+        config_dtype_str=config.get("torch_dtype","float16")
 
+        target_key="F16"
+        if config_dtype_str=="float32":
+            target_key="F32"
+        elif config_dtype_str=="bfloat16":
+            target_key="BF16"
+        elif config_dtype_str=="float16":
+            target_key="F16"
+
+        if target_key not in TYPE_MAP:
+            print(f"Warning: Unknown dtype {config_dtype_str}, using F16")
+            target_dtype=TYPE_MAP["F16"]
+        else:
+            target_dtype=TYPE_MAP[target_key]
+        self.model=self.lib.llaisysQwen2ModelCreate(
+            ctypes.byref(meta),
+            device.value,
+            None,
+            0,
+            target_dtype
+        )
+
+        self.lib.llaisysQwen2ModelForward.restype=ctypes.c_void_p
+        self.lib.llaisysQwen2ModelForward.argtypes=[
+            ctypes.c_void_p,
+            ctypes.POINTER(ctypes.c_int64),
+            ctypes.c_size_t,
+            ctypes.c_size_t
+        ]
+        if hasattr(self.lib,'llaisysQwen2Sample'):
+            self.lib.llaisysQwen2Sample.restype=ctypes.c_int
+            self.lib.llaisysQwen2Sample.argtypes=[ctypes.c_void_p]
+
+        model_path = Path(model_path)
         for file in sorted(model_path.glob("*.safetensors")):
-            data_ = safetensors.safe_open(file, framework="numpy", device="cpu")
-            for name_ in data_.keys():
-                ## TODO: load the model weights
-                pass
+            with open(file,'rb') as f_obj:
+                header_size=struct.unpack('<Q',f_obj.read(8))[0]
+                header_json=f_obj.read(header_size)
+                header_data=json.loads(header_json)
+                data_start=8+header_size
+
+                with mmap.mmap(f_obj.fileno(),0,access=mmap.ACCESS_READ) as mm:
+                    for name_,info in header_data.items():
+                            if name_=="__metadata__": continue
+                            dtype_str=info['dtype']
+                            if dtype_str not in TYPE_MAP:
+                                continue
+                            dtype=TYPE_MAP[dtype_str]
+
+                            shape=info['shape']
+                            start,end=info['data_offsets']
+
+                            np_dtype=np.float32 if dtype_str == 'F32' else np.uint16
+                            itemsize=np.dtype(np_dtype).itemsize
+                            tensor_np=np.frombuffer(
+                                mm,
+                                dtype=np_dtype,
+                                count=(end-start)//itemsize,
+                                offset=data_start+start
+                            )
+
+                            c_name=name_.encode('utf-8')
+                            ndim=len(shape)
+                            ShapeArrayType=ctypes.c_size_t*ndim
+                            c_shape=ShapeArrayType(*shape)
+
+                            c_data_ptr=ctypes.c_void_p(tensor_np.ctypes.data)
+                            llaisys_qwen2_load_weight(
+                                 self.model,
+                                 c_name,
+                                 c_data_ptr,
+                                 c_shape,
+                                 ndim,
+                                 dtype
+                            )
+                            del tensor_np
 
+    def forward(self,input_ids:Sequence[int],start_pos:int):
+        seq_len=len(input_ids)
+
+        InputArrayType=ctypes.c_int64*seq_len
+        input_c_array=InputArrayType(*input_ids)
+
+        input_ptr=ctypes.cast(input_c_array,ctypes.POINTER(ctypes.c_int64))
+
+        logits_ptr=self.lib.llaisysQwen2ModelForward(
+            self.model,
+            input_ptr,
+            seq_len,
+            start_pos
+        )
+        return logits_ptr
+
     def generate(
         self,
         inputs: Sequence[int],
@@ -27,7 +147,35 @@ def generate(
         top_p: float = 0.8,
         temperature: float = 0.8,
     ):
+        if max_new_tokens is None:
+            max_new_tokens=100
+
+        tokens=list(inputs)
+        start_pos=0
+
+        eos_token_id=151643
+
+        logits_ptr=self.forward(tokens,start_pos)
+
+        next_token=self.lib.llaisysQwen2Sample(logits_ptr)
+        tokens.append(next_token)
+        if next_token ==eos_token_id:
+            return tokens
+
+        start_pos=len(inputs)
+
+        for i in range(max_new_tokens - 1):
+            input_step=[tokens[-1]]
+
+            logits_ptr=self.forward(input_step,start_pos)
+
+            next_token=self.lib.llaisysQwen2Sample(logits_ptr)
+
+            tokens.append(next_token)
+            # 与 HF 一样，将 EOS token 也包含在输出序列中
+            if next_token == eos_token_id:
+                break
 
-        # TODO: Implement generate function
+            start_pos+=1
 
-        return []
+        return tokens
diff --git a/report_linear.pdf b/report_linear.pdf