initial trial

alpaka-group · Nov 21, 2024 · 6b29767 · 6b29767
1 parent 8fefd70
commit 6b29767
Show file tree

Hide file tree

Showing 3 changed files with 222 additions and 0 deletions.
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -36,3 +36,5 @@ add_subdirectory("randomCells2D/")
 add_subdirectory("reduce/")
 add_subdirectory("tagSpecialization/")
 add_subdirectory("vectorAdd/")
+add_subdirectory("useBLASInAlpaka/")
+
diff --git a/example/useBLASInAlpaka/CMakeLists.txt b/example/useBLASInAlpaka/CMakeLists.txt
@@ -0,0 +1,53 @@
+#
+# Copyright 2023 Benjamin Worpitz, Jan Stephan
+# SPDX-License-Identifier: ISC
+#
+
+################################################################################
+# Required CMake version.
+
+cmake_minimum_required(VERSION 3.25)
+
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+################################################################################
+# Project.
+
+set(_TARGET_NAME useBLASInAlpaka)
+
+project(${_TARGET_NAME} LANGUAGES CXX)
+
+
+
+# Add cuBLAS library
+find_package(CUDA REQUIRED)
+set(CUDA_LIBRARIES ${CUDA_LIBRARIES} cublas)
+
+#-------------------------------------------------------------------------------
+# Find alpaka.
+
+if(NOT TARGET alpaka::alpaka)
+    option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)
+
+    if(alpaka_USE_SOURCE_TREE)
+        # Don't build the examples recursively
+        set(alpaka_BUILD_EXAMPLES OFF)
+        add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
+    else()
+        find_package(alpaka REQUIRED)
+    endif()
+endif()
+
+#-------------------------------------------------------------------------------
+# Add executable.
+
+alpaka_add_executable(
+    ${_TARGET_NAME}
+    src/useBLASInAlpaka.cpp)
+target_link_libraries(
+    ${_TARGET_NAME}
+    PUBLIC alpaka::alpaka ${CUDA_LIBRARIES})
+
+set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)
+set_target_properties(${_TARGET_NAME}  PROPERTIES CUDA_STANDARD 14)
+add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
diff --git a/example/useBLASInAlpaka/src/useBLASInAlpaka.cpp b/example/useBLASInAlpaka/src/useBLASInAlpaka.cpp
@@ -0,0 +1,167 @@
+#include <alpaka/alpaka.hpp>
+#include <cublas_v2.h>
+#include <iostream>
+#include <cmath>
+
+// Index type
+using Idx = std::size_t;
+// Set data type
+using DataType = float;
+
+// Initialize the matrix in column-major order
+template<typename TMdSpan>
+inline void initializeMatrix(TMdSpan& span, int value) {
+    auto const numRows = span.extent(0);
+    auto const numCols = span.extent(1);
+    for (Idx j = 0; j < numCols; ++j) {
+        for (Idx i = 0; i < numRows; ++i) {
+            span(i, j) = static_cast<DataType>(value);
+        }
+    }
+}
+
+int main() {
+    using Dim = alpaka::DimInt<2>;
+
+           // Define matrix dimensions, A is MxK and B is KxN
+    Idx const M = 3; // Rows in A and C
+    Idx const N = 2; // Columns in B and C
+    Idx const K = 1; // Columns in A and rows in B
+
+           // Define device and queue
+    using Acc = alpaka::AccGpuCudaRt<Dim, Idx>;
+    using Queue = alpaka::Queue<Acc, alpaka::Blocking>;
+    using Vec = alpaka::Vec<Dim, Idx>;
+
+    auto const platformHost = alpaka::PlatformCpu{};
+    auto const devHost = alpaka::getDevByIdx(platformHost, 0);
+    auto const platformAcc = alpaka::Platform<Acc>{};
+    auto const devAcc = alpaka::getDevByIdx(platformAcc, 0);
+
+    Queue queue(devAcc);
+
+           // Define the 2D extents (dimensions)
+    Vec const extentA(static_cast<Idx>(M), static_cast<Idx>(K));
+    Vec const extentB(static_cast<Idx>(K), static_cast<Idx>(N));
+    Vec const extentC(static_cast<Idx>(M), static_cast<Idx>(N));
+
+           // Allocate host memory
+    auto bufHostA = alpaka::allocBuf<DataType, Idx>(devHost, extentA);
+    auto bufHostB = alpaka::allocBuf<DataType, Idx>(devHost, extentB);
+    auto bufHostC = alpaka::allocBuf<DataType, Idx>(devHost, extentC);
+
+           // Create mdspan views for host buffers
+    auto mdHostA = alpaka::experimental::getMdSpan(bufHostA);
+    auto mdHostB = alpaka::experimental::getMdSpan(bufHostB);
+    auto mdHostC = alpaka::experimental::getMdSpan(bufHostC);
+
+           // Initialize host matrices
+    initializeMatrix(mdHostA, 1); // All elements in A are 1
+    initializeMatrix(mdHostB, 2); // All elements in B are 2
+
+           // Print initialized matrices on the host
+    std::cout << "Matrix A (Host):" << std::endl;
+    for (Idx i = 0; i < M; ++i) {
+        for (Idx j = 0; j < K; ++j) {
+            std::cout << mdHostA(i, j) << "";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "Matrix B (Host):" << std::endl;
+    for (Idx i = 0; i < K; ++i) {
+        for (Idx j = 0; j < N; ++j) {
+            std::cout << mdHostB(i, j) << "";
+        }
+        std::cout << std::endl;
+    }
+
+           // Allocate device memory
+    auto bufDevA = alpaka::allocBuf<DataType, Idx>(devAcc, extentA);
+    auto bufDevB = alpaka::allocBuf<DataType, Idx>(devAcc, extentB);
+    auto bufDevC = alpaka::allocBuf<DataType, Idx>(devAcc, extentC);
+
+           // Copy data to device
+    alpaka::memcpy(queue, bufDevA, bufHostA);
+    alpaka::memcpy(queue, bufDevB, bufHostB);
+    alpaka::wait(queue);
+
+
+
+
+    std::cout << "Copied matrices A and B to the device." << std::endl;
+
+           // Get the native CUDA stream from Alpaka queue
+    auto alpakaStream = alpaka::getNativeHandle(queue);
+
+           // cuBLAS setup
+    cublasHandle_t cublasHandle;
+    cublasCreate(&cublasHandle);
+    cublasSetStream(cublasHandle, alpakaStream);
+    auto pitchA = alpaka::getPitchesInBytes(bufDevA);
+    auto pitchB = alpaka::getPitchesInBytes(bufDevB);
+    auto pitchC = alpaka::getPitchesInBytes(bufDevC);
+
+    std::cout << "pitchA" << pitchA  << std::endl;
+    std::cout << "pitchB" << pitchB  << std::endl;
+    std::cout << "pitchC" << pitchC  << std::endl;
+
+           // Perform matrix multiplication: C = A * B
+    // Perform matrix multiplication: C = A * B
+    float alpha = 1.0f, beta = 0.0f; // Set beta to 0.0f to overwrite C
+    cublasSgemm(
+        cublasHandle,
+        CUBLAS_OP_N, CUBLAS_OP_N, // No transpose
+        M, N, K,                 // Dimensions
+        &alpha,
+        alpaka::getPtrNative(bufDevA), M, // Leading dimension (rows of A)
+        alpaka::getPtrNative(bufDevB), K, // Leading dimension (rows of B)
+        &beta,
+        alpaka::getPtrNative(bufDevC), M // Leading dimension (rows of C)
+        );
+
+
+
+
+    alpaka::wait(queue); // Wait for multiplication to complete
+    std::cout << "Matrix multiplication completed." << std::endl;
+
+           // Copy result back to host
+    alpaka::memcpy(queue, bufHostC, bufDevC);
+    alpaka::wait(queue);
+    std::cout << "Copied result matrix C back to the host." << std::endl;
+
+           // Print result matrix C
+    std::cout << "Matrix C (Host):" << std::endl;
+    for (Idx i = 0; i < M; ++i) {
+        for (Idx j = 0; j < N; ++j) {
+            std::cout << mdHostC(i, j) << " ";
+        }
+        std::cout << std::endl;
+    }
+
+           // Verify the result
+    bool success = true;
+    DataType expectedValue = 2 * K; // Expected value for all elements in C
+    for (Idx i = 0; i < M; ++i) {
+        for (Idx j = 0; j < N; ++j) {
+            if (std::fabs(mdHostC(i, j) - expectedValue) > 1e-5f) { // Allow small floating-point errors
+                std::cout << "Mismatch at (" << i << ", " << j << "): "
+                          << mdHostC(i, j) << " != " << expectedValue << std::endl;
+                success = false;
+            }
+        }
+    }
+
+    std::cout << "Multiplication of matrices of size " << M << "x" << K << " and " << K << "x" << N
+              << " using mdspan " << (success ? "succeeded" : "failed") << "!" << std::endl;
+
+    if (!success) {
+        return EXIT_FAILURE;
+    }
+
+           // Cleanup cuBLAS
+    cublasDestroy(cublasHandle);
+
+    return EXIT_SUCCESS;
+}