forked from secretflow/spu
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlinalg.h
94 lines (82 loc) · 3.16 KB
/
linalg.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
// Copyright 2021 Ant Group Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#define EIGEN_HAS_OPENMP
#include "Eigen/Core"
namespace spu::mpc::linalg {
/**
* @brief C := op( A )*op( B )
*
* @tparam T Type of A, B, C
* @param M Number of rows in A
* @param N Number of columns in B
* @param K Number of columns in A and number of rows in B
* @param A Pointer to A
* @param LDA Leading dimension stride of A
* @param IDA Inner dimension stride of A
* @param B Pointer to B
* @param LDB Leading dimension stride of B
* @param IDB Inner dimension stride of B
* @param C Pointer to C
* @param LDC Leading dimension stride of C
* @param IDC Inner dimension stride of C
*/
template <typename T>
void matmul(int64_t M, int64_t N, int64_t K, const T* A, int64_t LDA,
int64_t IDA, const T* B, int64_t LDB, int64_t IDB, T* C,
int64_t LDC, int64_t IDC) {
using StrideT = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
using MapMatrixConstT = Eigen::Map<
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>,
Eigen::Unaligned, StrideT>;
using MapMatrixT = Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>,
Eigen::Unaligned, StrideT>;
MapMatrixConstT a(A, M, K, StrideT(LDA, IDA));
MapMatrixConstT b(B, K, N, StrideT(LDB, IDB));
MapMatrixT c(C, M, N, StrideT(LDC, IDC));
// if (M == 1) {
// // GEMV case 1*K * K*N -> 1*N
// auto work_load_size = computeTaskSize(N);
// yacl::parallel_for(0, N, work_load_size, [&](int64_t begin, int64_t end)
// {
// auto block_size = end - begin;
// c.block(0, begin, 1, block_size) =
// a.row(0) * b.block(0, begin, K, block_size);
// });
// return;
// } else if (N == 1) {
// // GEMV case M*K * K*1 -> M*1
// auto work_load_size = computeTaskSize(M);
// yacl::parallel_for(0, M, work_load_size, [&](int64_t begin, int64_t end)
// {
// auto block_size = end - begin;
// c.block(begin, 0, block_size, 1) =
// a.block(begin, 0, block_size, K) * b.col(0);
// });
// return;
// }
// If we don't limit # threads, eigen may overloading omp tasks (especially
// under relative small tasks, MLP for example)
//
// FIXME: Investigate what can happen once we support ILP
// The performance is extremely bad when multi-process all tries to use
// num_cores.
// auto expected_num_threads = std::max((M * K + kMinTaskSize) / kMinTaskSize,
// (N * K + kMinTaskSize) / kMinTaskSize);
Eigen::setNbThreads(4);
c.noalias() = a * b;
}
} // namespace spu::mpc::linalg