From cf88f7cc6a977ea452e7e32f8ca1df9662cecde0 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Wed, 19 Feb 2020 16:25:59 +0000 Subject: [PATCH] Update the Specialization Constant proposal using the kernel handler and NTTP. Signed-off-by: Victor Lomuller --- spec-constant/index.md | 293 +++++++++++++-------------- spec-constant/sycl_kernel_handler.md | 3 +- 2 files changed, 143 insertions(+), 153 deletions(-) diff --git a/spec-constant/index.md b/spec-constant/index.md index 748de7f..f604a51 100644 --- a/spec-constant/index.md +++ b/spec-constant/index.md @@ -2,7 +2,8 @@ |-------------|--------| | Name | SYCL Specialization Constant | | Date of Creation | 18 April 2018 | -| Target | SYCL 1.2.1 extension / SYCL 2.2 | +| Target | SYCL 1.2.1 extension | +| Depends on | `kernel_handler` | | Current Status | _Work in progress_ | | Reply-to | Victor Lomüller | | Original author | Victor Lomüller , Toomas Remmelg | @@ -13,8 +14,8 @@ ## Motivation Many applications use runtime known constants to adapt their behaviors to their runtime environment. -Such constants are unknown when the developer compiles the application but will remain invariant through-out the application execution. -This is especially true for highly tuned software that requires information about the hardware on which the the application is running. +Such constants are unknown when the developer compiles the application but will remain invariant throughout the application execution. +This is especially true for highly tuned software that requires information about the hardware on which the application is running. Since OpenCL C kernels are being fully compiled at runtime, those constants are usually expressed as macro and the value is passed to online compiler when the kernel is being compiled. However, SYCL being statically compiled, it is not possible to use this approach. Template based techniques might not be possible or come at the price of code size explosion. @@ -28,60 +29,102 @@ Even if the motivation is derived from a SPIR-V concept, its usage is not limite ## Specialization Constant Overview -The following SYCL program present a specialization constant are expressed. +The following present a minimal SYCL 1.2.1 program using specialization constant written with C++17: ```cpp #include #include class specialized_kernel; -class runtime_const; // Fetch a value at runtime. float get_value(); // Declare a specialization constant id. // The variable `runtime_const` will be used as the id. -cl::sycl::experimental::spec_id runtime_const(42.f); +cl::sycl::specialization_id runtime_const(42.f); int main() { - cl::sycl::queue queue; - cl::sycl::program program(queue.get_context()); - - // Set the value of the specialization constant. - program.set_spec_constant(get_value()); - // Build the program, the value set by set_spec_constant - // will be used as a constant by the underlying JIT - // if it has native support for specialization constant. - program.build_with_kernel_type(); - - std::vector vec(1); - { - cl::sycl::buffer buffer(vec.data(), vec.size()); - - queue.submit([&](cl::sycl::handler& cgh) { - auto acc = cgh.get_access(buffer); - // Retrieve a placeholder object representing the spec constant. - auto my_constant = cgh.get_spec_constant(); - - cgh.single_task( - program.get_kernel(), - [=]() { - acc[0] = my_constant.get(); // This should become a constant. - }); - }); - } + cl::sycl::queue queue; + std::vector vec(1); + + { + cl::sycl::buffer buffer(vec.data(), vec.size()); + + queue.submit([&](cl::sycl::handler &cgh) { + auto acc = cgh.get_access(buffer); + + // Set a runtime as a JIT constant. + // This may force the runtime to compile `specialized_kernel` + // using the value returned by get_value. + cgh.set_specialization_constant(get_value()); + + cgh.single_task([=](cl::sycl::kernel_handler h) { + // The value returned by get_specialization_constant is the value + // returned by get_value(). + acc[0] = h.get_specialization_constant(); + }); + }); + } + return 0; } ``` +The global variable `runtime_const` declare a specialization constant id with the default value `42.0`. +This variable acts as an identifier for SYCL to refer to a constant whose value may only be set at runtime. +In the main function, a SYCL kernel is enqueued. Before it is enqueued, the call `cgh.set_specialization_constant(get_value())` is binding the value returned by `get_value()` to the current module and the specialization id `runtime_const`. +In the kernel, the value return by `h.get_specialization_constant()` will be the value returned by `get_value()` before the enqueue. +If the target natively supports specialization constant, this value will be known by the underlying consumer when it builds the kernel. +Without the call to `set_specialization_constant`, the call `get_specialization_constant` would still be valid and would have return the default value `42.0`. + +The following present an equivalent program as above but in C++11 and with a prebuild kernel: + +```cpp +#include +#include -In this example, the construction of `runtime_const` creates an specialization constant id, the initializer is taken as default value for the spec constant. The call to `set_spec_constant` binds the value returned by the call to `get_value` to the SYCL `program`. -At static compilation time, the value is unknown to the SYCL device compiler, thus cannot be used by the optimizations. -At runtime, `get_value` is evaluated and bond to the SYCL `program`, giving the opportunity for the underlying OpenCL runtime to use it during the kernel build. +class specialized_kernel; +class runtime_const; -Upon submission of the kernel `specialized_kernel`, the call to `get_spec_constant` return a spec_constant object. This object is a placeholder that represent the specialization constant inside the kernel. -The specialization constant `my_constant` is later used inside `specialized_kernel` and the expression `my_constant.get()` returns the value returned by the call to `get_value()`. -If the target natively supports specialization constant, this value will be known by the underlying OpenCL consumer when it builds the kernel. +// Fetch a value at runtime. +float get_value(); + +// Declare a specialization constant id. +// The variable `runtime_const` will be used as the id. +cl::sycl::specialization_id runtime_const(42.f); + +int main() { + cl::sycl::queue queue; + cl::sycl::program program(queue.get_context()); + + // Set the value of the specialization constant. + program.set_specialization_constant(get_value()); + + // Build the program, the value set by set_spec_constant + // will be used as a constant by the underlying JIT + // if it has native support for specialization constant. + program.build_with_kernel_type(); + + std::vector vec(1); + { + cl::sycl::buffer buffer(vec.data(), vec.size()); + + queue.submit([&](cl::sycl::handler &cgh) { + auto acc = cgh.get_access(buffer); + + cgh.single_task( + program.get_kernel(), + [=](cl::sycl::kernel_handler h) { + // This should become a constant. + acc[0] = h.get_specialization_constant(); + }); + }); + } + return 0; +} +``` + +This code is doing the same as previously expect that the kernel is precompiled and compatible with C++11. A more concrete example would be a blocked matrix-matrix multiply. In this version of the algorithm, threads in a work-group collectively load elements from global to local memory and then perform part of the operation on the block. @@ -162,21 +205,25 @@ void mat_multiply(cl::sycl::queue& q, T* MA, T* MB, T* MC, int matSize) { } } ``` -In this example, `blockSize` depends on a runtime feature that is a hardware constant for a given device, thus never changes once known. + +In this example, `blockSize` depends on a runtime feature that is a hardware constant depending on the device, thus never changes once known. The main issue is that this value is treated as a constant variable, and the compiler is unable to use it to perform optimizations like constant propagation or loop unrolling. It can even have an adverse effect as the value will use a register and the compiler may be forced to spill or reload the value multiple times. Using specialization constants, the routine can be rewritten as: + ```cpp -cl::sycl::experimental::spec_id block_size(1); +using namespace cl::sycl; + +specialization_id block_size{1}; template -void mat_multiply(cl::sycl::queue& q, T* MA, T* MB, T* MC, int matSize) { +void mat_multiply(queue& q, T* MA, T* MB, T* MC, int matSize) { auto device = q.get_device(); // Choose a block size based on some information about the device. auto maxBlockSize = - device.get_info(); + device.get_info(); auto blockSizeCst = prevPowerOfTwo(std::sqrt(maxBlockSize)); blockSizeCst = std::min(matSize, blockSize); @@ -196,13 +243,13 @@ void mat_multiply(cl::sycl::queue& q, T* MA, T* MB, T* MC, int matSize) { localRange, cgh); accessor pBB( localRange, cgh); - auto blockSize = cgh.set_spec_constant(blockSizeCst); + cgh.set_spec_constant(blockSizeCst); cgh.parallel_for>( program.get_kernel>(), nd_range<2>{range<2>(matSize, matSize), range<2>(blockSizeCst, blockSizeCst)}, - [=](nd_item<2> it) { + [=](nd_item<2> it, kernel_handler handler) { // Current block int blockX = it.get_group(0); int blockY = it.get_group(1); @@ -211,6 +258,7 @@ void mat_multiply(cl::sycl::queue& q, T* MA, T* MB, T* MC, int matSize) { int localX = it.get_local(0); int localY = it.get_local(1); + std::size_t blockSize = h.get_specialization_constant(); // Start in the A matrix int a_start = matSize * blockSize * blockY; // End in the b matrix @@ -248,80 +296,53 @@ void mat_multiply(cl::sycl::queue& q, T* MA, T* MB, T* MC, int matSize) { } } ``` -In this example, `blockSize` is now a specialization constant holding the value same value as before, meaning that the value is now injected inside the module, allow the OpenCL consumer to use the value in the optimizations (loop unrolling for instance). -Note that the specialization constant ID is independent from the template parameter `T` from which the kernel depends on. This means that all kernel instances will share the same value. +In this example, `blockSize` is now depending on a specialization constant, meaning that the value can be now injected inside the module. The consumer can freely use the value to perform optimizations (loop unrolling for instance). +Note that the specialization constant ID is independent of the template parameter `T` from which the kernel depends on. This means that all kernel instances will share the same value. ## Specialization Constant Representation -Specialization constants are encapsulated into a `cl::sycl::experimental::spec_constant` immutable object which can be passed to a SYCL kernel as a parameter. -This object can only be constructed by the SYCL runtime. -Accessing the value is done either explicitly via a `get` function or an implicit conversion. +Specialization constants are represented by instances of `cl::sycl::specialization_id` in the namespace scope. +Each instance of `cl::sycl::specialization_id` holds a default value that can be set using the constexpr constructor. +Note that the device compiler must be able to evaluate the default value at compile time to establish a valid module. -The `cl::sycl::experimental::spec_constant` interface is defined as follows: +The `cl::sycl::specialization_id` interface is defined as follows: ```cpp namespace cl { namespace sycl { -namespace experimental { template -class spec_id { +class specialization_id { private: // Implementation defined constructor. - spec_id(const spec_id&) = delete; - spec_id(spec_id&&) = delete; -public: - using type = T; + specialization_id(const specialization_id&) = delete; + specialization_id(specialization_id&&) = delete; - // Argument `Args` are forwarded to the underlying T Ctor. - // This allow the user to setup a default value for the spec_id instance. - // The initialization of T must be evaluated at compile time to be valid. - template - explicit constexpr spec_id(Args&&...); -}; - -template & s> -class spec_constant { -private: - // Implementation defined constructor. - spec_constant(/* Implementation defined */); - spec_constant(spec_constant&&) = delete; public: - using type = T; + static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); - spec_constant(const spec_constant&) = default; + using type = T; - T get() const; // explicit access. - operator T() const; // implicit conversion. + // Argument `Args` are forwarded to an underlying T Ctor. + // This allow the user to setup a default value for the specialization_id instance. + // The initialization of T must be evaluated at compile time to be valid. + template + explicit constexpr specialization_id(Args&&...); }; -} // namespace experimental } // namespace sycl } // namespace cl ``` -Where `T` is the type of the constant. To be valid, the type `T` must be standard layout and trivially copyable. -The template parameter `ID` is a unique name to designate the specialization constant. -The name follows the same requirement and restrictions as the SYCL kernel names. -It is valid for a program to reuse a kernel name for a specialization constant name and vice versa. - -There is no guarantees about the size of the object, whether or not the constant is stored in memory is left as an implementation detail. - -A `cl::sycl::experimental::spec_constant` object is considered initialized once the result of a `cl::sycl::program::set_spec_constant` is assigned to it. - -Once initialized, `cl::sycl::experimental::spec_constant` objects are immutable, attempts to circumvent this property produces undefined behavior. - -`cl::sycl::experimental::spec_constant` is default constructible, although the object is not considered initialized until the result of the call to `cl::sycl::program::set_spec_constant` is assigned to it. - -Attempts to use an uninitialized `cl::sycl::experimental::spec_constant` produces undefined behavior. +Where `T` is the type of the constant. To be valid, the type `T` must be trivially copyable. +Instances of `cl::sycl::specialization_id` must be forward declarable. ## Building Programs with Specialization Constants -SYCL program requiring a specialization constant value to be built must first set them before building. - -The program interface is extended to include a mechanism to set the constant. +SYCL program using specialization constants can be set before being built. +The `program` interface is extended to include a mechanism to set the constant. ```cpp namespace cl { @@ -331,14 +352,24 @@ class program { // ... public: - /** - * Returns true if the current program can support specialization constants natively. - * - */ - bool native_spec_constant() const noexcept; + /** + * Returns true if the current program can support specialization constants natively. + * + */ + bool has_native_spec_constant() const noexcept; + + /** + * Set a new the value for the specialization constant represented identified by the specialization_id instance. + */ + template &> + void set_specialization_constant(T cst); + + /** + * Set a new the value for the specialization constant represented identified by the specialization_id instance `s`. + */ + template + void set_specialization_constant(typename std::remove_reference_t::type); - template &> - void set_spec_constant(T cst); // ... }; @@ -346,21 +377,11 @@ public: } // namespace cl ``` -The templated member function `set_spec_constant` takes a runtime value of type `T` that will be used to set the specialization constant named `ID`. -Multiple specialization constants can be set for the same program by calling `set_spec_constant` multiple times. -Previously created `cl::sycl::experimental::spec_constant` objects becomes invalids and any usage of invalided objects produce undefined behavior. - -A specialization constant value can be overwritten if the program was not built before by recalling `set_spec_constant` with the same `ID` and the new value. -Although the type `T` of the specialization constant must remain the same. +The templated member function `set_specialization_constant` binds a runtime value of type `T` to `specialization_id` id and the program. +If a value were already set for a given `specialization_id` id then the value is overwritten by the new value. Once all specialization constants are set, the program can be compile/built using program's function `compile_with_kernel_type`/`build_with_kernel_type`. - -If a required specialization constant is not set before calling `compile_with_kernel_type` / `build_with_kernel_type`, a `cl::sycl::experimental::spec_const_error` is thrown and the build of the kernel fails. - -For a same kernel, it is valid to set different specialization constants to different `cl::sycl::program` that builds it. - -After the kernel is built, it is no longer possible to set new specialization constants. -A `cl::sycl::experimental::spec_const_error` exception will be thrown if the user attempt change it after the kernel has been built. +Once the program is in a build state, the specialization constant can no longer be changed for the program and call to `set_specialization_constant` will throw a `cl::sycl::spec_const_error` exception. ## Getting Specialization Constants via the command group handler @@ -376,12 +397,15 @@ public: // Set a value for the specialization constant represented by `s` // and return the associated spec_constant. // Note, this call may require the underlying program to be rebuilt. - template & s> - spec_constant set_spec_constant(T cst); + template & s> + void set_spec_constant(T cst); - // Retrive a spec_constant object representing `s` - template & s> - spec_constant get_spec_constant(); + // Set a value for the specialization constant represented by `s` + // and return the associated spec_constant. + // Note, this call may require the underlying program to be rebuilt. + // Only if compiling with C++17 or above. + template + void set_specialization_constant(typename std::remove_reference_t::type); // ... }; @@ -390,11 +414,6 @@ public: } // namespace cl ``` -The templated member function `get_spec_constant` takes a runtime value of type `T` that will be used to set the specialization constant named `ID`. -Multiple specialization constants can be generated by calling `get_spec_constant` multiple times. - -It is invalid to query multiple times a specialization constant with a common `ID` for the same kernel. - Upon invocation of a `single_task`/`parallel_for`/`parallel_for_work_group` construct, the runtime will build the appropriate kernel if it has never been built for the set of specialization constant passed to the kernel. The SYCL device compiler and runtime are responsible to make sure that it is valid to build the module in which the invoked kernel is defined using only the provided specialization constants. @@ -408,43 +427,15 @@ The following error class is added: ```cpp namespace cl { namespace sycl { -namespace experimental { class spec_const_error : public compile_program_error; -} // namespace experimental } // namespace sycl } // namespace cl ``` This error can be thrown if a specialization constant compilation error occurs. -## OpenCL Interoperability - -In SYCL, specialization constants use typenames to identify them rather than using the SPIR-V/OpenCL numerical identifiers. +## In kernel access to specialization constant -To allow interoperability with OpenCL, uses can use this a special templated type as the SYCL specialization constant identifier to specify the numerical identifier of a specialization constant inside the module: -```cpp -namespace cl { -namespace sycl { -namespace experimental { - -template -struct spec_constant_id { - static constexpr unsigned id = NID; -}; - -} // namespace experimental -} // namespace sycl -} // namespace cl -``` - -The runtime will use the value `NID` provided by the template parameter to set the specialization constant. -If the specified identifier does not exist in the module, a `cl::sycl::experimental::spec_const_error` error is thrown. - -For example: -```cpp - // Create a specialization constant. - auto my_constant = program.set_spec_constant>(get_value()); -``` -In this call, the runtime will bind the value with the specialization constant with the identifier `42`. +Specialization constants are accessed in kernel using the API provided by the `kernel_handler`. diff --git a/spec-constant/sycl_kernel_handler.md b/spec-constant/sycl_kernel_handler.md index 2cb21d4..7b9ea90 100644 --- a/spec-constant/sycl_kernel_handler.md +++ b/spec-constant/sycl_kernel_handler.md @@ -49,8 +49,7 @@ public: T get_specialization_constant(); template - typename std::remove_reference_t::type - get_specialization_constant(const specialization_constant::type, s>&); + typename std::remove_reference_t::type get_specialization_constant(); }; }