Skip to content

Commit

Permalink
Merge pull request #172 from hvdijk/atomic64
Browse files Browse the repository at this point in the history
[compiler] Implement 64-bit atomics.
  • Loading branch information
hvdijk authored Oct 23, 2023
2 parents 5f1030f + 8cc21d7 commit e8b065c
Show file tree
Hide file tree
Showing 830 changed files with 42,030 additions and 6,924 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Upgrade guidance:
* 0.78.0: to introduce mux builtins for sub-group, work-group, and
vector-group operations.
* 0.79.0: to introduce mux builtins for sub-group shuffle operations.
* 0.80.0: to introduce support for 64-bit atomic operations.
* The `compiler::ImageArgumentSubstitutionPass` now replaces sampler typed
parameters in kernel functions with i32 parameters via a wrapper function.
The `host` target as a consequence now passes samplers to kernels as 32-bit
Expand Down
5 changes: 5 additions & 0 deletions doc/modules/mux/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ version increases mean backward compatible bug fixes have been applied.
Versions prior to 1.0.0 may contain breaking changes in minor
versions as the API is still under development.

0.80.0
------

* Added atomic capabilities.

0.79.0
------

Expand Down
2 changes: 1 addition & 1 deletion doc/specifications/mux-compiler-spec.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ComputeMux Compiler Specification
=================================

This is version 0.79.0 of the specification.
This is version 0.80.0 of the specification.

ComputeMux is Codeplay’s proprietary API for executing compute workloads across
heterogeneous devices. ComputeMux is an extremely lightweight,
Expand Down
5 changes: 4 additions & 1 deletion doc/specifications/mux-runtime-spec.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ComputeMux Runtime Specification
================================

This is version 0.79.0 of the specification.
This is version 0.80.0 of the specification.

ComputeMux is Codeplay’s proprietary API for executing compute workloads across
heterogeneous devices. ComputeMux is an extremely lightweight,
Expand Down Expand Up @@ -186,6 +186,7 @@ in the system, but it can still be useful as a target for compilation.
uint32_t allocation_capabilities;
uint32_t source_capabilities;
uint32_t address_capabilities;
uint32_t atomic_capabilities;
uint32_t cache_capabilities;
uint32_t half_capabilities;
uint32_t float_capabilities;
Expand Down Expand Up @@ -243,6 +244,8 @@ in the system, but it can still be useful as a target for compilation.
``mux_source_capabilities_e``.
- ``address_capabilities`` - a bitfield of
``mux_address_capabilities_e``.
- ``atomic_capabilities`` - a bitfield of
``mux_atomic_capabilities_e``.
- ``cache_capabilities`` - a bitfield of ``mux_cache_capabilities_e``.
- ``half_capabilities`` - half floating point support, a bitfield of
``mux_floating_point_capabilities_e``.
Expand Down
721 changes: 721 additions & 0 deletions modules/compiler/builtins/include/builtins/builtins-3.0.h

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@

type_pairs = {
'atomic_int': 'int',
'atomic_long': 'long',
'atomic_uint': 'uint',
'atomic_ulong': 'ulong',
'atomic_float': 'float',
'atomic_double': 'double',
}

address_spaces = ['__local', '__global']
Expand Down
22 changes: 16 additions & 6 deletions modules/compiler/builtins/scripts/generate_header_30.sh
Original file line number Diff line number Diff line change
Expand Up @@ -377,53 +377,63 @@ function all_atomic()
echo ""
for k in __local __global ""
do
for i in int uint float
for i in int long uint ulong float double
do
double_support_begin $i
echo "void __CL_BUILTIN_ATTRIBUTES atomic_init(volatile $k atomic_$i *obj, $i value);"
double_support_end $i
done
done
echo "void __CL_BUILTIN_ATTRIBUTES atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);"

for k in __local __global ""
do
for i in int uint float
for i in int long uint ulong float double
do
double_support_begin $i
echo "void __CL_BUILTIN_ATTRIBUTES atomic_store_explicit(volatile $k atomic_$i *object, $i desired, memory_order order, memory_scope scope);"
double_support_end $i
done
done

for k in __local __global ""
do
for i in int uint float
for i in int long uint ulong float double
do
double_support_begin $i
echo "$i __CL_BUILTIN_ATTRIBUTES atomic_load_explicit(volatile $k atomic_$i *object, memory_order order, memory_scope scope);"
double_support_end $i
done
done

for k in __local __global ""
do
for i in int uint float
for i in int long uint ulong float double
do
double_support_begin $i
echo "$i __CL_BUILTIN_ATTRIBUTES atomic_exchange_explicit(volatile $k atomic_$i *object, $i desired, memory_order order, memory_scope scope);"
double_support_end $i
done
done

for name in atomic_compare_exchange_strong_explicit atomic_compare_exchange_weak_explicit
do
for k1 in __local __global ""
do
for i in int uint float
for i in int long uint ulong float double
do
double_support_begin $i
for k2 in __local __global __private ""
do
echo "bool __CL_BUILTIN_ATTRIBUTES $name(volatile $k1 atomic_$i *object, $k2 $i *expected, $i desired, memory_order success, memory_order failure);"
echo "bool __CL_BUILTIN_ATTRIBUTES $name(volatile $k1 atomic_$i *object, $k2 $i *expected, $i desired, memory_order success, memory_order failure, memory_scope scope);"
done
double_support_end $i
done
done
done

for t in int uint
for t in int long uint ulong
do
for op in add sub or xor and min max
do
Expand Down
12 changes: 7 additions & 5 deletions modules/compiler/source/base/source/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,11 +975,13 @@ void BaseModule::addDefaultOpenCLPreprocessorOpts(
}

// Disable `cl_khr_int64_base_atomics` and `cl_khr_int64_extended_atomics`
// until we support them. (CA-518)
addOpenCLOpt("-cl_khr_int64_base_atomics", opencl_opts);
addMacroUndef("cl_khr_int64_base_atomics", macro_defs);
addOpenCLOpt("-cl_khr_int64_extended_atomics", opencl_opts);
addMacroUndef("cl_khr_int64_extended_atomics", macro_defs);
// unless supported by the device.
if (!(device_info->atomic_capabilities & mux_atomic_capabilities_64bit)) {
addOpenCLOpt("-cl_khr_int64_base_atomics", opencl_opts);
addMacroUndef("cl_khr_int64_base_atomics", macro_defs);
addOpenCLOpt("-cl_khr_int64_extended_atomics", opencl_opts);
addMacroUndef("cl_khr_int64_extended_atomics", macro_defs);
}

if (options.standard == Standard::OpenCLC30) {
// work-group collective functions are an optional feature in OpenCL 3.0.
Expand Down
Loading

0 comments on commit e8b065c

Please sign in to comment.