Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ rocm_enable_cppcheck(
# Disable because of too many FPs
arithOperationsOnVoidPointer
definePrefix:*test/include/test.hpp
definePrefix:*src/targets/gpu/kernels/include/migraphx/kernels/test.hpp
ctuOneDefinitionRuleViolation:*test/*
useSmartPointer:*src/api/api.cpp
useSmartPointer:*make_shared_array.hpp
Expand Down
23 changes: 13 additions & 10 deletions src/targets/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,23 +108,26 @@ target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CUR
target_compile_options(migraphx_device PRIVATE -Wno-ignored-attributes)
migraphx_generate_export_header(migraphx_device DIRECTORY migraphx/gpu/device)

add_library(kernel_file_check EXCLUDE_FROM_ALL)
add_library(compile_migraphx_gpu_kernels INTERFACE)
target_compile_definitions(compile_migraphx_gpu_kernels INTERFACE -DMIGRAPHX_NLOCAL=256)
target_compile_definitions(compile_migraphx_gpu_kernels INTERFACE -DMIGRAPHX_WAVEFRONTSIZE=64)
target_include_directories(compile_migraphx_gpu_kernels INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(compile_migraphx_gpu_kernels INTERFACE compile_for_gpu)
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
target_link_libraries(compile_migraphx_gpu_kernels composable_kernel::jit_library)
endif()

add_library(migraphx_gpu_kernel_file_check EXCLUDE_FROM_ALL)

foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
target_sources(migraphx_gpu_kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach()

target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256)
target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_WAVEFRONTSIZE=64)
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu)
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
target_link_libraries(kernel_file_check composable_kernel::jit_library)
endif()
target_link_libraries(migraphx_gpu_kernel_file_check compile_migraphx_gpu_kernels)

rocm_clang_tidy_check(kernel_file_check)
rocm_clang_tidy_check(migraphx_gpu_kernel_file_check)

file(GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit/*.cpp)

Expand Down
27 changes: 17 additions & 10 deletions src/targets/gpu/compile_hip_code_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,11 @@ std::size_t compute_block_size(const context& ctx, std::size_t n, std::size_t ma
return std::min(std::max(min_block_size, block_size), max_block_size);
}

operation
compile_hip_code_object(context& ctx, const std::string& content, hip_compile_options options)
std::vector<char>
compile_hip_raw(context& ctx, const std::string& content, hip_compile_options options)
{
assert(options.global > 0);
assert(options.local > 0);
assert(not options.inputs.empty());
assert(options.inputs.size() == options.virtual_inputs.size() or
options.virtual_inputs.empty());
std::vector<src_file> srcs = options.additional_src_files;
static auto kernels{::migraphx_kernels()};
std::transform(
Expand All @@ -185,9 +182,6 @@ compile_hip_code_object(context& ctx, const std::string& content, hip_compile_op
std::back_inserter(srcs),
[](const std::pair<std::string_view, std::string_view>& elem) { return src_file{elem}; });
srcs.emplace_back("main.cpp", content);
auto args_hpp =
generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs);
srcs.emplace_back("args.hpp", args_hpp);

if(options.global % options.local != 0 and hip_accept_non_uniform_wg())
options.emplace_param("-fno-offload-uniform-block");
Expand All @@ -202,10 +196,23 @@ compile_hip_code_object(context& ctx, const std::string& content, hip_compile_op
options.params.insert(options.params.end(), warnings.begin(), warnings.end());
options.emplace_param("-ftemplate-backtrace-limit=0");
options.emplace_param("-Werror");
auto cos = compile_hip_src(srcs, options.params, get_device_name());
auto cos = compile_hip_src(srcs, options.params, ctx.get_current_device().get_device_name());
if(cos.size() != 1)
MIGRAPHX_THROW("No code object");
return code_object_op{value::binary{cos.front()},
return cos.front();
}

operation
compile_hip_code_object(context& ctx, const std::string& content, hip_compile_options options)
{
assert(not options.inputs.empty());
assert(options.inputs.size() == options.virtual_inputs.size() or
options.virtual_inputs.empty());
auto args_hpp =
generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs);
options.additional_src_files.emplace_back("args.hpp", args_hpp);

return code_object_op{value::binary{compile_hip_raw(ctx, content, std::move(options))},
options.kernel_name,
options.global,
options.local,
Expand Down
2 changes: 1 addition & 1 deletion src/targets/gpu/hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ static std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return result;
}

static std::shared_ptr<void> write_to_gpu(const void* x, std::size_t sz, bool host = false)
std::shared_ptr<void> write_to_gpu(const void* x, std::size_t sz, bool host)
{
gpu_sync();
auto result = allocate_gpu(sz, host);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ struct hip_compile_options
MIGRAPHX_GPU_EXPORT std::function<std::size_t(std::size_t local)>
compute_global_for(const context& ctx, std::size_t n, std::size_t over = 1);

MIGRAPHX_GPU_EXPORT std::vector<char>
compile_hip_raw(context& ctx, const std::string& content, hip_compile_options options);

MIGRAPHX_GPU_EXPORT operation compile_hip_code_object(context& ctx,
const std::string& content,
hip_compile_options options);
Expand Down
9 changes: 9 additions & 0 deletions src/targets/gpu/include/migraphx/gpu/hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ MIGRAPHX_GPU_EXPORT argument get_preallocation(context& ctx, const std::string&

MIGRAPHX_GPU_EXPORT void gpu_fill(context& ctx, const argument& dst, int value = 0);

MIGRAPHX_GPU_EXPORT std::shared_ptr<void>
write_to_gpu(const void* x, std::size_t sz, bool host = false);

template <class T>
std::shared_ptr<T> write_to_gpu(const T& x, bool host = false)
{
return std::static_pointer_cast<T>(write_to_gpu(&x, sizeof(T), host));
}

struct hip_allocate
{
shape s;
Expand Down
2 changes: 2 additions & 0 deletions src/targets/gpu/include/migraphx/gpu/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ struct MIGRAPHX_GPU_EXPORT kernel
{
}

bool empty() const;

void launch(hipStream_t stream,
std::size_t global,
std::size_t local,
Expand Down
2 changes: 2 additions & 0 deletions src/targets/gpu/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ kernel::kernel(const char* image, const std::string& name) : impl(std::make_shar
MIGRAPHX_THROW("Failed to get function: " + name + ": " + hip_error(status));
}

bool kernel::empty() const { return impl == nullptr; }

static void launch_kernel(hipFunction_t fun,
hipStream_t stream,
std::size_t global,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value, Co

while(count > 0)
{
auto it = first;
auto* it = first;
auto step = count / 2;
it += step;

Expand Down
11 changes: 5 additions & 6 deletions src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/debug.hpp>

Expand Down Expand Up @@ -122,21 +123,20 @@ template <class T, index_int N>
struct array
{
using value_type = T;
T d[N];
T d[N] = {{0}};

constexpr array() = default;

template <class... Ts,
MIGRAPHX_REQUIRES(sizeof...(Ts) == N and (is_convertible<Ts, T>{} and ...))>
constexpr array(Ts... xs) : d{xs...}
constexpr array(Ts... xs) : d{static_cast<value_type>(xs)...}
Copy link
Preview

Copilot AI Sep 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explicit cast to value_type in the variadic constructor could cause silent truncation or precision loss. Consider using a concept or SFINAE to ensure safe conversions, or document the potential for data loss in conversions.

Copilot uses AI. Check for mistakes.

{
}

template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{} and (N > 1))>
constexpr explicit array(U x)
{
for(index_int i = 0; i < N; i++)
d[i] = x;
fill(this->begin(), this->end(), x);
}

constexpr T& operator[](index_int i)
Expand Down Expand Up @@ -195,8 +195,7 @@ struct array
constexpr auto apply(F f) const
{
array<decltype(f(d[0])), N> result;
for(index_int i = 0; i < N; i++)
result[i] = f(d[i]);
transform(this->begin(), this->end(), result.begin(), f);
return result;
}

Expand Down
62 changes: 53 additions & 9 deletions src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,60 @@ struct shape : equality_comparable<shape<Lens, Strides>>
}
constexpr auto skips() const
{
return return_c([] {
auto lstrides = Strides{};
return none_of(lstrides.begin(), lstrides.end(), [](auto x) { return x == 1; });
});
if constexpr(decltype(this->elements()){} == 1)
{
return false_type{};
}
else
{
return return_c([] {
auto lstrides = Strides{};
return none_of(lstrides.begin(), lstrides.end(), [](auto x) { return x == 1; });
});
}
}

constexpr auto standard() const { return packed() and not transposed(); }
constexpr auto standard() const
{
if constexpr(decltype(this->elements()){} == 1)
{
return true_type{};
}
else
{
return return_c([] {
constexpr auto n = decltype(this->elements()){};
struct state
{
bool ok = true;
index_int expected = decltype(n){};
};
auto reduce = [](state acc, array<index_int, 2> x) -> state {
index_int len = x[0];
index_int stride = x[1];
if(not acc.ok)
return acc;
if(len == 1)
return acc;
if(acc.expected % len != 0)
return {false};
acc.expected /= len;
if(stride != acc.expected)
return {false};
return acc;
};
auto ldims = Lens{};
auto lstrides = Strides{};
return inner_product(ldims.begin(),
ldims.end(),
lstrides.begin(),
state{},
reduce,
MIGRAPHX_LIFT(make_array))
.ok;
});
}
}

constexpr index_int index(index_array x) const { return x.dot(strides); }

Expand All @@ -85,10 +132,7 @@ struct shape : equality_comparable<shape<Lens, Strides>>
MIGRAPHX_ASSERT(i >= elements() or i == compute_index(i));
return i;
}
else
{
return compute_index(i);
}
return compute_index(i);
}

constexpr index_int compute_index(index_int i) const
Expand Down
Loading
Loading