From bfa53b37f9c2d2fca4d1e20134a4c39d639cf020 Mon Sep 17 00:00:00 2001 From: Thilina Ratnayaka Date: Tue, 24 Sep 2024 11:59:29 -0400 Subject: [PATCH] Add a test which calculate vectorDot product --- .../cpp/31_oklt_v3_moving_avg/CMakeLists.txt | 9 +- examples/cpp/31_oklt_v3_moving_avg/main.cpp | 98 +++++++++++++------ .../cpp/31_oklt_v3_moving_avg/vectorDot.okl | 28 ++++++ 3 files changed, 103 insertions(+), 32 deletions(-) create mode 100644 examples/cpp/31_oklt_v3_moving_avg/vectorDot.okl diff --git a/examples/cpp/31_oklt_v3_moving_avg/CMakeLists.txt b/examples/cpp/31_oklt_v3_moving_avg/CMakeLists.txt index 6c319e147..f8425f02b 100644 --- a/examples/cpp/31_oklt_v3_moving_avg/CMakeLists.txt +++ b/examples/cpp/31_oklt_v3_moving_avg/CMakeLists.txt @@ -2,9 +2,10 @@ compile_cpp_example_with_modes(oklt_v3_moving_avg main.cpp) add_custom_target(cpp_example_oklt_v3_moving_avg_cpy ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/constants.h constants.h - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/movingAverage.okl movingAverage.okl) + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/movingAverage.okl movingAverage.okl + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/vectorDot.okl vectorDot.okl) add_dependencies(examples_cpp_oklt_v3_moving_avg cpp_example_oklt_v3_moving_avg_cpy) target_sources(examples_cpp_oklt_v3_moving_avg - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/movingAverage.okl - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/constants.h - ) + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/movingAverage.okl + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/vectorDot.okl + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/constants.h) diff --git a/examples/cpp/31_oklt_v3_moving_avg/main.cpp b/examples/cpp/31_oklt_v3_moving_avg/main.cpp index 462024f63..538f85c6b 100644 --- a/examples/cpp/31_oklt_v3_moving_avg/main.cpp +++ b/examples/cpp/31_oklt_v3_moving_avg/main.cpp @@ -3,9 +3,25 @@ #include #include "constants.h" -std::vector buildData(std::size_t size, - float initialValue, - float fluctuation) +bool starts_with(const std::string &str, const std::string &substring) { + return str.rfind(substring, 0) == 0; +} + +occa::json getDeviceOptions(int argc, const char **argv) { + for(int i = 0; i < argc; ++i) { + std::string argument(argv[i]); + if((starts_with(argument,"-d") || starts_with(argument, "--device")) && i + 1 < argc) + { + std::string value(argv[i + 1]); + return occa::json::parse(value); + } + } + return occa::json::parse("{mode: 'Serial'}"); +} + +std::vector buildMovingAverageData(std::size_t size, + float initialValue, + float fluctuation) { std::vector buffer(size); float currentValue = initialValue; @@ -34,36 +50,13 @@ std::vector goldMovingAverage(const std::vector &hostVector) { return result; } -bool starts_with(const std::string &str, const std::string &substring) { - return str.rfind(substring, 0) == 0; -} - -occa::json getDeviceOptions(int argc, const char **argv) { - for(int i = 0; i < argc; ++i) { - std::string argument(argv[i]); - if((starts_with(argument,"-d") || starts_with(argument, "--device")) && i + 1 < argc) - { - std::string value(argv[i + 1]); - return occa::json::parse(value); - } - } - return occa::json::parse("{mode: 'Serial'}"); -} - -int main(int argc, const char **argv) { - - occa::json deviceOpts = getDeviceOptions(argc, argv); - auto inputHostBuffer = buildData(THREADS_PER_BLOCK * WINDOW_SIZE + WINDOW_SIZE, 10.0f, 4.0f); +int runMovingAverageTest(occa::device &device, occa::json &buildProps) { + auto inputHostBuffer = buildMovingAverageData(THREADS_PER_BLOCK * WINDOW_SIZE + WINDOW_SIZE, 10.0f, 4.0f); std::vector outputHostBuffer(inputHostBuffer.size() - WINDOW_SIZE); - occa::device device(deviceOpts); occa::memory deviceInput = device.malloc(inputHostBuffer.size()); occa::memory deviceOutput = device.malloc(outputHostBuffer.size()); - occa::json buildProps({ - {"transpiler-version", 3} - }); - occa::kernel movingAverageKernel = device.buildKernel("movingAverage.okl", "movingAverage32f", buildProps); deviceInput.copyFrom(inputHostBuffer.data(), inputHostBuffer.size()); @@ -86,8 +79,57 @@ int main(int argc, const char **argv) { return 1; } } + std::cout << "Comparison with gold has passed" << std::endl; std::cout << "Moving average finished" << std::endl; return 0; } + +int runVectorDotTest(occa::device &device, occa::json &buildProps) { + const std::size_t size = 1e7; + auto vecA = std::vector(size); + auto vecB = std::vector(size); + auto vecT = std::vector((size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK); + + for (std::size_t i = 0; i < vecA.size(); i++) + vecA[i] = i + 1, vecB[i] = size - (i + 1); + + occa::memory deviceVecA = device.malloc(vecA.size()); + deviceVecA.copyFrom(vecA.data(), vecA.size()); + + occa::memory deviceVecB = device.malloc(vecB.size()); + deviceVecB.copyFrom(vecB.data(), vecB.size()); + + occa::memory deviceVecT = device.malloc(vecT.size(), 0); + + occa::kernel vectorDotKernel = device.buildKernel("vectorDot.okl", "vectorDot", + buildProps); + + vectorDotKernel(deviceVecT, deviceVecA.size(), deviceVecA, deviceVecB); + + deviceVecT.copyTo(vecT.data(), deviceVecT.size()); + + double dot = 0; + for (std::size_t i = 0; i < vecT.size(); i++) + dot += vecT[i]; + + const double exact = (size * (size + 1.0) * (size - 1.0)) / 6; + return (std::fabs(dot - exact)/exact > 1e-8); +} + +int main(int argc, const char **argv) { + + occa::json deviceOpts = getDeviceOptions(argc, argv); + occa::device device(deviceOpts); + + occa::json buildProps({ + {"transpiler-version", 3} + }); + + int failure = 0; + failure |= runMovingAverageTest(device, buildProps); + failure |= runVectorDotTest(device, buildProps); + + return failure; +} diff --git a/examples/cpp/31_oklt_v3_moving_avg/vectorDot.okl b/examples/cpp/31_oklt_v3_moving_avg/vectorDot.okl new file mode 100644 index 000000000..becf10a7d --- /dev/null +++ b/examples/cpp/31_oklt_v3_moving_avg/vectorDot.okl @@ -0,0 +1,28 @@ +#include "constants.h" + +@kernel void vectorDot(double *temp, const unsigned int n, const double *a, + const double *b) { + for (unsigned int i = 0; i < (n + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; i++; @outer) { + @shared float s[THREADS_PER_BLOCK]; + + for (int j = 0; j < THREADS_PER_BLOCK; j++; @inner) { + int t = i * THREADS_PER_BLOCK + j; + if (t < n) + s[j] = a[t] * b[t]; + else + s[j] = 0.0; + } + + for (int k = (THREADS_PER_BLOCK + 1) / 2; k > 0; k /= 2) { + for (int j = 0; j < THREADS_PER_BLOCK; j++; @inner) { + if (j < k) + s[j] += s[j + k]; + } + } + + for (int j = 0; j < THREADS_PER_BLOCK; j++; @inner) { + if (j == 0) + temp[i] = s[0]; + } + } +}