@@ -157,70 +157,12 @@ class __SYCL_EXPORT SubmissionInfo {
157157
158158} // namespace v1
159159
160- template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
160+ template <detail::WrapAs WrapAs, typename LambdaArgType,
161+ typename KernelName = detail::auto_name, bool EventNeeded = false ,
161162 typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
162- auto submit_kernel_direct_parallel_for (
163- const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
164- KernelTypeUniversalRef &&KernelFunc,
165- const detail::code_location &CodeLoc = detail::code_location::current()) {
166- // TODO Properties not supported yet
167- (void )Props;
168- static_assert (
169- std::is_same_v<PropertiesT,
170- ext::oneapi::experimental::empty_properties_t >,
171- " Setting properties not supported yet for no-CGH kernel submit." );
172- detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
173-
174- using KernelType =
175- std::remove_const_t <std::remove_reference_t <KernelTypeUniversalRef>>;
176-
177- using NameT =
178- typename detail::get_kernel_name_t <KernelName, KernelType>::name;
179- using LambdaArgType =
180- sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
181- static_assert (
182- std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
183- " Kernel argument of a sycl::parallel_for with sycl::nd_range "
184- " must be either sycl::nd_item or be convertible from sycl::nd_item" );
185- using TransformedArgType = sycl::nd_item<Dims>;
186-
187- #ifndef __SYCL_DEVICE_ONLY__
188- detail::checkValueRange<Dims>(Range);
189- #endif
190-
191- detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
192- TransformedArgType, PropertiesT>::wrap (KernelFunc);
193-
194- HostKernelRef<KernelType, KernelTypeUniversalRef, TransformedArgType, Dims>
195- HostKernel (std::forward<KernelTypeUniversalRef>(KernelFunc));
196-
197- // Instantiating the kernel on the host improves debugging.
198- // Passing this pointer to another translation unit prevents optimization.
199- #ifndef NDEBUG
200- // TODO: call library to prevent dropping call due to optimization
201- (void )
202- detail::GetInstantiateKernelOnHostPtr<KernelType, LambdaArgType, Dims>();
203- #endif
204-
205- detail::DeviceKernelInfo *DeviceKernelInfoPtr =
206- &detail::getDeviceKernelInfo<NameT>();
207-
208- if constexpr (EventNeeded) {
209- return submit_kernel_direct_with_event_impl (
210- Queue, Range, HostKernel, DeviceKernelInfoPtr,
211- TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
212- } else {
213- submit_kernel_direct_without_event_impl (
214- Queue, Range, HostKernel, DeviceKernelInfoPtr,
215- TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
216- }
217- }
218-
219- template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
220- typename PropertiesT, typename KernelTypeUniversalRef>
221- auto submit_kernel_direct_single_task (
163+ auto submit_kernel_direct (
222164 const queue &Queue, [[maybe_unused]] PropertiesT Props,
223- KernelTypeUniversalRef &&KernelFunc,
165+ const nd_range<Dims> &Range, KernelTypeUniversalRef &&KernelFunc,
224166 const detail::code_location &CodeLoc = detail::code_location::current()) {
225167 // TODO Properties not supported yet
226168 static_assert (
@@ -235,17 +177,18 @@ auto submit_kernel_direct_single_task(
235177 using NameT =
236178 typename detail::get_kernel_name_t <KernelName, KernelType>::name;
237179
238- detail::KernelWrapper<detail:: WrapAs::single_task , NameT, KernelType, void ,
180+ detail::KernelWrapper<WrapAs, NameT, KernelType, LambdaArgType ,
239181 PropertiesT>::wrap (KernelFunc);
240182
241- HostKernelRef<KernelType, KernelTypeUniversalRef, void , 1 > HostKernel (
242- std::forward<KernelTypeUniversalRef>(KernelFunc));
183+ HostKernelRef<KernelType, KernelTypeUniversalRef, LambdaArgType, Dims>
184+ HostKernel ( std::forward<KernelTypeUniversalRef>(KernelFunc));
243185
244186 // Instantiating the kernel on the host improves debugging.
245187 // Passing this pointer to another translation unit prevents optimization.
246188#ifndef NDEBUG
247189 // TODO: call library to prevent dropping call due to optimization.
248- (void )detail::GetInstantiateKernelOnHostPtr<KernelType, void , 1 >();
190+ (void )
191+ detail::GetInstantiateKernelOnHostPtr<KernelType, LambdaArgType, Dims>();
249192#endif
250193
251194 detail::DeviceKernelInfo *DeviceKernelInfoPtr =
@@ -269,15 +212,57 @@ auto submit_kernel_direct_single_task(
269212
270213 if constexpr (EventNeeded) {
271214 return submit_kernel_direct_with_event_impl (
272- Queue, nd_range< 1 >{ 1 , 1 } , HostKernel, DeviceKernelInfoPtr,
215+ Queue, Range , HostKernel, DeviceKernelInfoPtr,
273216 TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
274217 } else {
275218 submit_kernel_direct_without_event_impl (
276- Queue, nd_range< 1 >{ 1 , 1 } , HostKernel, DeviceKernelInfoPtr,
219+ Queue, Range , HostKernel, DeviceKernelInfoPtr,
277220 TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
278221 }
279222}
280223
224+ template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
225+ typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
226+ auto submit_kernel_direct_parallel_for (
227+ const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
228+ KernelTypeUniversalRef &&KernelFunc,
229+ const detail::code_location &CodeLoc = detail::code_location::current()) {
230+
231+ using KernelType =
232+ std::remove_const_t <std::remove_reference_t <KernelTypeUniversalRef>>;
233+
234+ using LambdaArgType =
235+ sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
236+ static_assert (
237+ std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
238+ " Kernel argument of a sycl::parallel_for with sycl::nd_range "
239+ " must be either sycl::nd_item or be convertible from sycl::nd_item" );
240+ using TransformedArgType = sycl::nd_item<Dims>;
241+
242+ #ifndef __SYCL_DEVICE_ONLY__
243+ detail::checkValueRange<Dims>(Range);
244+ #endif
245+
246+ return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
247+ KernelName, EventNeeded, PropertiesT,
248+ KernelTypeUniversalRef, Dims>(
249+ Queue, Props, Range, std::forward<KernelTypeUniversalRef>(KernelFunc),
250+ CodeLoc);
251+ }
252+
253+ template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
254+ typename PropertiesT, typename KernelTypeUniversalRef>
255+ auto submit_kernel_direct_single_task (
256+ const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
257+ const detail::code_location &CodeLoc = detail::code_location::current()) {
258+
259+ return submit_kernel_direct<detail::WrapAs::single_task, void , KernelName,
260+ EventNeeded, PropertiesT, KernelTypeUniversalRef,
261+ 1 >(
262+ Queue, Props, nd_range<1 >{1 , 1 },
263+ std::forward<KernelTypeUniversalRef>(KernelFunc), CodeLoc);
264+ }
265+
281266} // namespace detail
282267
283268namespace ext ::oneapi ::experimental {
0 commit comments