spike-zhu · spike-zhu · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -23,6 +23,7 @@ jobs:
 
     - name: Install Python dependencies
       run: |
+        pip install numpy
         pip install torch
 
     - name: Install xmake
@@ -33,15 +34,15 @@ jobs:
     - name: configure xmake
       run: xmake f --cpu=true -cv
 
-    - name: Build with XMake
-      run: xmake
-
-    - name: Find and Set INFINI_ROOT
-      id: set_infini_root
+    - name: Set INFINI_ROOT
       run: |
-        export INFINI_ROOT=$GITHUB_WORKSPACE
+        export INFINI_ROOT=$GITHUB_WORKSPACE/.infini
+        mkdir -p $INFINI_ROOT
         echo "INFINI_ROOT=$INFINI_ROOT" >> $GITHUB_ENV
 
+    - name: Build with XMake
+      run: xmake build && xmake install
+
     - name: Run Python Tests
       run: |
         GREEN='\033[0;32m'

diff --git a/README.md b/README.md
@@ -71,7 +71,7 @@ infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc
 
 ## 一、使用说明
 
-### 配置
+### 1. 配置
 
 #### 查看当前配置
 
@@ -99,23 +99,27 @@ xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
 xmake f --cambricon-mlu=true -cv
 ```
 
-### 编译
+#### 配置 NPU
+
+````xmake
+xmake f --ascend-npu=true -cv
+````
+
+### 2. 编译安装
 
 ```xmake
-xmake
+xmake build && xmake install
 ```
 
-### 将编译好的算子库添加至环境变量 `INFINI_ROOT`
+### 3. 设置环境变量
 
-```bash
-export INFINI_ROOT=[PATH_TO_LIBRARY]
-```
+按输出提示设置 `INFINI_ROOT` 和 `LD_LIBRARY_PATH` 环境变量。
 
-### 运行算子测试
+### 4. 运行算子测试
 
 ```bash
 cd operatorspy/tests
-python operator_name.py
+python operator_name.py [--cpu | --cuda | --cambricon | --ascend]
 ```
 
 ## 二、开发说明

diff --git a/include/data_type.h b/include/data_type.h
@@ -9,6 +9,7 @@ typedef struct DataLayout {
         mantissa : 8,
         exponent : 8;
 
+#ifdef __cplusplus
     bool operator==(const DataLayout &other) const {
         union TypePun {
             DataLayout layout;
@@ -24,12 +25,13 @@ typedef struct DataLayout {
     bool operator!=(const DataLayout &other) const {
         return !(*this == other);
     }
+#endif
 } DataLayout;
 
 typedef struct DataLayout DT;
 
 // clang-format off
-constexpr static struct DataLayout
+const static struct DataLayout
     I8   = {1, 1, 1,  7,  0},
     I16  = {1, 1, 2, 15,  0},
     I32  = {1, 1, 4, 31,  0},

diff --git a/include/ops/concat/concat.h b/include/ops/concat/concat.h
@@ -0,0 +1,27 @@
+#ifndef CONCAT_H
+#define CONCAT_H
+
+#include "../../export.h"
+#include "../../operators.h"
+
+typedef struct ConcatDescriptor {
+    Device device;  
+} ConcatDescriptor;
+
+typedef ConcatDescriptor *infiniopConcatDescriptor_t;
+
+__C __export infiniopStatus_t infiniopCreateConcatDescriptor(infiniopHandle_t handle,
+                                                             infiniopConcatDescriptor_t *desc_ptr,
+                                                             infiniopTensorDescriptor_t y,
+                                                             infiniopTensorDescriptor_t *x,
+                                                             uint64_t num_inputs,
+                                                             int64_t axis);
+
+__C __export infiniopStatus_t infiniopConcat(infiniopConcatDescriptor_t desc,
+                                             void *y,
+                                             void const **x,
+                                             void *stream);
+
+__C __export infiniopStatus_t infiniopDestroyConcatDescriptor(infiniopConcatDescriptor_t desc);
+
+#endif
diff --git a/include/ops/gemm/gemm.h b/include/ops/gemm/gemm.h
@@ -18,8 +18,8 @@ __C __export infiniopStatus_t infiniopCreateGEMMDescriptor(infiniopHandle_t hand
                                                            infiniopTensorDescriptor_t c_desc,
                                                            float alpha,
                                                            float beta,
-                                                           bool transA,
-                                                           bool transB);
+                                                           char transA,
+                                                           char transB);
 
 __C __export infiniopStatus_t infiniopGetGEMMWorkspaceSize(infiniopGEMMDescriptor_t desc, uint64_t *size);
 

diff --git a/include/ops/mlp/mlp.h b/include/ops/mlp/mlp.h
@@ -19,7 +19,7 @@ __C __export infiniopStatus_t infiniopCreateMLPDescriptor(infiniopHandle_t handl
                                                           infiniopTensorDescriptor_t w12_desc,
                                                           infiniopTensorDescriptor_t w3_desc,
                                                           float alpha,
-                                                          bool residual);
+                                                          char residual);
 
 __C __export infiniopStatus_t infiniopGetMLPWorkspaceSize(infiniopMLPDescriptor_t desc, uint64_t *size);
 

diff --git a/operatorspy/liboperators.py b/operatorspy/liboperators.py
@@ -10,7 +10,6 @@
 
 LIB_OPERATORS_DIR = os.path.join(os.environ.get("INFINI_ROOT"), "lib")
 
-
 class TensorDescriptor(Structure):
     _fields_ = [
         ("dt", DataLayout),
@@ -19,10 +18,8 @@ class TensorDescriptor(Structure):
         ("pattern", POINTER(c_int64)),
     ]
 
-
 infiniopTensorDescriptor_t = ctypes.POINTER(TensorDescriptor)
 
-
 class CTensor:
     def __init__(self, desc, data):
         self.descriptor = desc

diff --git a/operatorspy/tests/avg_pool.py b/operatorspy/tests/avg_pool.py
@@ -152,10 +152,6 @@ def test(
         elapsed = (time.time() - start_time) / NUM_ITERATIONS
         print(f"    lib time: {elapsed :6f}")
 
-
-    print(x)
-    print(y)
-    print(ans)
     assert torch.allclose(y, ans, atol=0, rtol=1e-3)
     check_error(lib.infiniopDestroyAvgPoolDescriptor(descriptor))
 
@@ -205,7 +201,7 @@ def test_musa(lib, test_cases):
         # ((1, 1, 10), (3,), (1,), (1,)),
         ((1, 1, 2, 2), (2, 2), (1, 1), (1, 1)),
         ((32, 4, 224, 224), (3, 3), (1, 1), (2, 2)),
-        ((1, 1, 16, 16, 16), (5, 5, 5), (2, 2, 2), (2, 2, 2)),
+        # ((1, 1, 16, 16, 16), (5, 5, 5), (2, 2, 2), (2, 2, 2)),
     ]
     args = get_args()
     lib = open_lib()