-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8fefd70
commit 6b29767
Showing
3 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# | ||
# Copyright 2023 Benjamin Worpitz, Jan Stephan | ||
# SPDX-License-Identifier: ISC | ||
# | ||
|
||
################################################################################ | ||
# Required CMake version. | ||
|
||
cmake_minimum_required(VERSION 3.25) | ||
|
||
set_property(GLOBAL PROPERTY USE_FOLDERS ON) | ||
|
||
################################################################################ | ||
# Project. | ||
|
||
set(_TARGET_NAME useBLASInAlpaka) | ||
|
||
project(${_TARGET_NAME} LANGUAGES CXX) | ||
|
||
|
||
|
||
# Add cuBLAS library | ||
find_package(CUDA REQUIRED) | ||
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} cublas) | ||
|
||
#------------------------------------------------------------------------------- | ||
# Find alpaka. | ||
|
||
if(NOT TARGET alpaka::alpaka) | ||
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF) | ||
|
||
if(alpaka_USE_SOURCE_TREE) | ||
# Don't build the examples recursively | ||
set(alpaka_BUILD_EXAMPLES OFF) | ||
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka") | ||
else() | ||
find_package(alpaka REQUIRED) | ||
endif() | ||
endif() | ||
|
||
#------------------------------------------------------------------------------- | ||
# Add executable. | ||
|
||
alpaka_add_executable( | ||
${_TARGET_NAME} | ||
src/useBLASInAlpaka.cpp) | ||
target_link_libraries( | ||
${_TARGET_NAME} | ||
PUBLIC alpaka::alpaka ${CUDA_LIBRARIES}) | ||
|
||
set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example) | ||
set_target_properties(${_TARGET_NAME} PROPERTIES CUDA_STANDARD 14) | ||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
#include <alpaka/alpaka.hpp> | ||
#include <cublas_v2.h> | ||
#include <iostream> | ||
#include <cmath> | ||
|
||
// Index type | ||
using Idx = std::size_t; | ||
// Set data type | ||
using DataType = float; | ||
|
||
// Initialize the matrix in column-major order | ||
template<typename TMdSpan> | ||
inline void initializeMatrix(TMdSpan& span, int value) { | ||
auto const numRows = span.extent(0); | ||
auto const numCols = span.extent(1); | ||
for (Idx j = 0; j < numCols; ++j) { | ||
for (Idx i = 0; i < numRows; ++i) { | ||
span(i, j) = static_cast<DataType>(value); | ||
} | ||
} | ||
} | ||
|
||
int main() { | ||
using Dim = alpaka::DimInt<2>; | ||
|
||
// Define matrix dimensions, A is MxK and B is KxN | ||
Idx const M = 3; // Rows in A and C | ||
Idx const N = 2; // Columns in B and C | ||
Idx const K = 1; // Columns in A and rows in B | ||
|
||
// Define device and queue | ||
using Acc = alpaka::AccGpuCudaRt<Dim, Idx>; | ||
using Queue = alpaka::Queue<Acc, alpaka::Blocking>; | ||
using Vec = alpaka::Vec<Dim, Idx>; | ||
|
||
auto const platformHost = alpaka::PlatformCpu{}; | ||
auto const devHost = alpaka::getDevByIdx(platformHost, 0); | ||
auto const platformAcc = alpaka::Platform<Acc>{}; | ||
auto const devAcc = alpaka::getDevByIdx(platformAcc, 0); | ||
|
||
Queue queue(devAcc); | ||
|
||
// Define the 2D extents (dimensions) | ||
Vec const extentA(static_cast<Idx>(M), static_cast<Idx>(K)); | ||
Vec const extentB(static_cast<Idx>(K), static_cast<Idx>(N)); | ||
Vec const extentC(static_cast<Idx>(M), static_cast<Idx>(N)); | ||
|
||
// Allocate host memory | ||
auto bufHostA = alpaka::allocBuf<DataType, Idx>(devHost, extentA); | ||
auto bufHostB = alpaka::allocBuf<DataType, Idx>(devHost, extentB); | ||
auto bufHostC = alpaka::allocBuf<DataType, Idx>(devHost, extentC); | ||
|
||
// Create mdspan views for host buffers | ||
auto mdHostA = alpaka::experimental::getMdSpan(bufHostA); | ||
auto mdHostB = alpaka::experimental::getMdSpan(bufHostB); | ||
auto mdHostC = alpaka::experimental::getMdSpan(bufHostC); | ||
|
||
// Initialize host matrices | ||
initializeMatrix(mdHostA, 1); // All elements in A are 1 | ||
initializeMatrix(mdHostB, 2); // All elements in B are 2 | ||
|
||
// Print initialized matrices on the host | ||
std::cout << "Matrix A (Host):" << std::endl; | ||
for (Idx i = 0; i < M; ++i) { | ||
for (Idx j = 0; j < K; ++j) { | ||
std::cout << mdHostA(i, j) << ""; | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
||
std::cout << "Matrix B (Host):" << std::endl; | ||
for (Idx i = 0; i < K; ++i) { | ||
for (Idx j = 0; j < N; ++j) { | ||
std::cout << mdHostB(i, j) << ""; | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
||
// Allocate device memory | ||
auto bufDevA = alpaka::allocBuf<DataType, Idx>(devAcc, extentA); | ||
auto bufDevB = alpaka::allocBuf<DataType, Idx>(devAcc, extentB); | ||
auto bufDevC = alpaka::allocBuf<DataType, Idx>(devAcc, extentC); | ||
|
||
// Copy data to device | ||
alpaka::memcpy(queue, bufDevA, bufHostA); | ||
alpaka::memcpy(queue, bufDevB, bufHostB); | ||
alpaka::wait(queue); | ||
|
||
|
||
|
||
|
||
std::cout << "Copied matrices A and B to the device." << std::endl; | ||
|
||
// Get the native CUDA stream from Alpaka queue | ||
auto alpakaStream = alpaka::getNativeHandle(queue); | ||
|
||
// cuBLAS setup | ||
cublasHandle_t cublasHandle; | ||
cublasCreate(&cublasHandle); | ||
cublasSetStream(cublasHandle, alpakaStream); | ||
auto pitchA = alpaka::getPitchesInBytes(bufDevA); | ||
auto pitchB = alpaka::getPitchesInBytes(bufDevB); | ||
auto pitchC = alpaka::getPitchesInBytes(bufDevC); | ||
|
||
std::cout << "pitchA" << pitchA << std::endl; | ||
std::cout << "pitchB" << pitchB << std::endl; | ||
std::cout << "pitchC" << pitchC << std::endl; | ||
|
||
// Perform matrix multiplication: C = A * B | ||
// Perform matrix multiplication: C = A * B | ||
float alpha = 1.0f, beta = 0.0f; // Set beta to 0.0f to overwrite C | ||
cublasSgemm( | ||
cublasHandle, | ||
CUBLAS_OP_N, CUBLAS_OP_N, // No transpose | ||
M, N, K, // Dimensions | ||
&alpha, | ||
alpaka::getPtrNative(bufDevA), M, // Leading dimension (rows of A) | ||
alpaka::getPtrNative(bufDevB), K, // Leading dimension (rows of B) | ||
&beta, | ||
alpaka::getPtrNative(bufDevC), M // Leading dimension (rows of C) | ||
); | ||
|
||
|
||
|
||
|
||
alpaka::wait(queue); // Wait for multiplication to complete | ||
std::cout << "Matrix multiplication completed." << std::endl; | ||
|
||
// Copy result back to host | ||
alpaka::memcpy(queue, bufHostC, bufDevC); | ||
alpaka::wait(queue); | ||
std::cout << "Copied result matrix C back to the host." << std::endl; | ||
|
||
// Print result matrix C | ||
std::cout << "Matrix C (Host):" << std::endl; | ||
for (Idx i = 0; i < M; ++i) { | ||
for (Idx j = 0; j < N; ++j) { | ||
std::cout << mdHostC(i, j) << " "; | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
||
// Verify the result | ||
bool success = true; | ||
DataType expectedValue = 2 * K; // Expected value for all elements in C | ||
for (Idx i = 0; i < M; ++i) { | ||
for (Idx j = 0; j < N; ++j) { | ||
if (std::fabs(mdHostC(i, j) - expectedValue) > 1e-5f) { // Allow small floating-point errors | ||
std::cout << "Mismatch at (" << i << ", " << j << "): " | ||
<< mdHostC(i, j) << " != " << expectedValue << std::endl; | ||
success = false; | ||
} | ||
} | ||
} | ||
|
||
std::cout << "Multiplication of matrices of size " << M << "x" << K << " and " << K << "x" << N | ||
<< " using mdspan " << (success ? "succeeded" : "failed") << "!" << std::endl; | ||
|
||
if (!success) { | ||
return EXIT_FAILURE; | ||
} | ||
|
||
// Cleanup cuBLAS | ||
cublasDestroy(cublasHandle); | ||
|
||
return EXIT_SUCCESS; | ||
} |