Skip to content

Commit 94f16cd

Browse files
authored
Merge pull request #180 from hvdijk/localbuffer
[test] More reviews, rework AddLocalBuffer.
2 parents 53aa9da + ecb615c commit 94f16cd

File tree

11 files changed

+79
-73
lines changed

11 files changed

+79
-73
lines changed

source/cl/test/UnitCL/include/kts/execution.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,12 @@ struct BaseExecution : ::ucl::CommandQueueTest, SharedExecution {
105105
void AddInOutBuffer(size_t size, Reference1DPtr<T> refIn,
106106
Reference1D<T> refOut);
107107

108-
void AddLocalBuffer(size_t size);
108+
void AddLocalBuffer(size_t nelm, size_t elmsize);
109+
110+
template <typename T>
111+
void AddLocalBuffer(size_t size) {
112+
AddLocalBuffer(size, sizeof(T));
113+
}
109114

110115
template <typename T>
111116
void AddInputImage(const cl_image_format &format, const cl_image_desc &desc,

source/cl/test/UnitCL/source/C11Atomics.cpp

+20-20
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class InitTest : public C11AtomicTestBase {
5959
AddInputBuffer(kts::N, random_reference);
6060
AddOutputBuffer(kts::N, random_reference);
6161
if (local) {
62-
AddLocalBuffer(kts::localN * sizeof(T));
62+
AddLocalBuffer<T>(kts::localN);
6363
RunGeneric1D(kts::N, kts::localN);
6464
} else {
6565
RunGeneric1D(kts::N);
@@ -182,7 +182,7 @@ TEST_P(FenceTest, C11Atomics_08_Fence_Local) {
182182
// Set up the buffers.
183183
this->AddInputBuffer(kts::N, kts::Ref_Identity);
184184
this->AddOutputBuffer(kts::N, kts::Ref_Identity);
185-
this->AddLocalBuffer(kts::localN * sizeof(cl_int));
185+
this->AddLocalBuffer<cl_int>(kts::localN);
186186

187187
// Run the test.
188188
this->RunGeneric1D(kts::N, kts::localN);
@@ -206,7 +206,7 @@ class LoadStoreTest : public C11AtomicTestBase {
206206
AddInputBuffer(kts::N, random_reference);
207207
AddOutputBuffer(kts::N, random_reference);
208208
if (local) {
209-
AddLocalBuffer(kts::localN * sizeof(T));
209+
AddLocalBuffer<T>(kts::localN);
210210
RunGeneric1D(kts::N, kts::localN);
211211
} else {
212212
RunGeneric1D(kts::N);
@@ -345,7 +345,7 @@ class ExchangeTest : public C11AtomicTestBase {
345345
// output.
346346
AddOutputBuffer(kts::N, initializer_reference);
347347
if (local) {
348-
AddLocalBuffer(kts::localN * sizeof(T));
348+
AddLocalBuffer<T>(kts::localN);
349349
RunGeneric1D(kts::N, kts::localN);
350350
} else {
351351
RunGeneric1D(kts::N);
@@ -433,7 +433,7 @@ TEST_P(FlagTest, C11Atomics_17_Flag_Local_Clear_Set) {
433433
// The expected output is that the local atomic flags are all unset
434434
// by the kernel.
435435
this->AddOutputBuffer(kts::N, false_reference);
436-
this->AddLocalBuffer(kts::localN * sizeof(cl_bool));
436+
this->AddLocalBuffer<cl_bool>(kts::localN);
437437

438438
// Run the test.
439439
this->RunGeneric1D(kts::N, kts::localN);
@@ -452,7 +452,7 @@ TEST_P(FlagTest, C11Atomics_18_Flag_Local_Set_Twice) {
452452
// The expected output is that the local atomic flags are all set
453453
// by the kernel.
454454
this->AddOutputBuffer(kts::N, true_reference);
455-
this->AddLocalBuffer(kts::localN * sizeof(cl_bool));
455+
this->AddLocalBuffer<cl_bool>(kts::localN);
456456

457457
// Run the test.
458458
this->RunGeneric1D(kts::N, kts::localN);
@@ -515,7 +515,7 @@ class FetchTest : public C11AtomicTestBase {
515515
// The expected output values are the initial values loaded atomically.
516516
AddOutputBuffer(kts::N, random_reference);
517517
if (local) {
518-
AddLocalBuffer(kts::localN * sizeof(T));
518+
AddLocalBuffer<T>(kts::localN);
519519
RunGeneric1D(kts::N, kts::localN);
520520
} else {
521521
RunGeneric1D(kts::N);
@@ -609,7 +609,7 @@ class FetchTest : public C11AtomicTestBase {
609609
AddInputBuffer(kts::N / kts::localN, init_reference);
610610
}
611611

612-
AddLocalBuffer(kts::N / kts::localN);
612+
AddLocalBuffer<T>(kts::localN);
613613

614614
// Run the test.
615615
RunGeneric1D(kts::N, kts::localN);
@@ -1521,10 +1521,10 @@ class FetchTruthTableTest
15211521
this->AddInputBuffer(2, input_reference);
15221522
// Expected output is the result of the binary operation.
15231523
this->AddOutputBuffer(1, output_reference);
1524-
this->AddLocalBuffer(2);
1524+
this->AddLocalBuffer<T>(2);
15251525

15261526
// Run the test.
1527-
this->RunGeneric1D(2);
1527+
this->RunGeneric1D(2, 2);
15281528
}
15291529
};
15301530

@@ -1714,9 +1714,9 @@ class Strong : public C11AtomicTestBase {
17141714
if (!local) {
17151715
RunGeneric1D(kts::N);
17161716
} else {
1717-
AddLocalBuffer(kts::localN);
1717+
AddLocalBuffer<T>(kts::localN);
17181718
if (local_local) {
1719-
AddLocalBuffer(kts::localN);
1719+
AddLocalBuffer<T>(kts::localN);
17201720
}
17211721
RunGeneric1D(kts::N, kts::localN);
17221722
}
@@ -1900,7 +1900,7 @@ class StrongGlobalSingle : public C11AtomicTestBase {
19001900
if (!local) {
19011901
RunGeneric1D(kts::N);
19021902
} else {
1903-
AddLocalBuffer(kts::localN);
1903+
AddLocalBuffer<T>(kts::localN);
19041904

19051905
// Run the test.
19061906
RunGeneric1D(kts::N, kts::localN);
@@ -2059,9 +2059,9 @@ class StrongLocalSingle : public C11AtomicTestBase {
20592059
AddInOutBuffer(kts::N, expected_in_reference, expected_output_reference);
20602060
AddInputBuffer(kts::N, desired_reference);
20612061
AddOutputBuffer(kts::N, bool_output_reference);
2062-
AddLocalBuffer(1);
2062+
AddLocalBuffer<T>(1);
20632063
if (local_local) {
2064-
AddLocalBuffer(kts::localN);
2064+
AddLocalBuffer<T>(kts::localN);
20652065
}
20662066

20672067
// Run the test.
@@ -2208,9 +2208,9 @@ class Weak : public C11AtomicTestBase {
22082208
if (!local) {
22092209
RunGeneric1D(kts::N);
22102210
} else {
2211-
AddLocalBuffer(kts::localN);
2211+
AddLocalBuffer<T>(kts::localN);
22122212
if (local_local) {
2213-
AddLocalBuffer(kts::localN);
2213+
AddLocalBuffer<T>(kts::localN);
22142214
}
22152215
RunGeneric1D(kts::N, kts::localN);
22162216
}
@@ -2401,7 +2401,7 @@ class WeakGlobalSingle : public C11AtomicTestBase {
24012401
if (!local) {
24022402
RunGeneric1D(kts::N);
24032403
} else {
2404-
AddLocalBuffer(kts::localN * sizeof(T));
2404+
AddLocalBuffer<T>(kts::localN);
24052405
RunGeneric1D(kts::N, kts::localN);
24062406
}
24072407
}
@@ -2563,9 +2563,9 @@ class WeakLocalSingle : public C11AtomicTestBase {
25632563
AddInOutBuffer(kts::N, expected_in_reference, expected_output_reference);
25642564
AddInputBuffer(kts::N, desired_reference);
25652565
AddOutputBuffer(kts::N, bool_output_reference);
2566-
AddLocalBuffer(1);
2566+
AddLocalBuffer<T>(1);
25672567
if (local_local) {
2568-
AddLocalBuffer(kts::localN * sizeof(T));
2568+
AddLocalBuffer<T>(kts::localN);
25692569
}
25702570

25712571
// Run the test.

source/cl/test/UnitCL/source/cl_khr_extended_async_copies/extended_async.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ TEST_P(Execution, Ext_Async_01_Simple_2D) {
4444
if (!isSourceTypeIn({OPENCL_C, OFFLINE})) {
4545
GTEST_SKIP();
4646
}
47-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
48-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
49-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
47+
AddLocalBuffer<cl_int>(local_wg_size);
48+
AddLocalBuffer<cl_int>(local_wg_size);
49+
AddLocalBuffer<cl_int>(local_wg_size);
5050
AddInputBuffer(kts::N, vaddInA);
5151
AddInputBuffer(kts::N, vaddInB);
5252
AddOutputBuffer(kts::N, vaddOutC);
@@ -58,9 +58,9 @@ TEST_P(Execution, Ext_Async_02_Simple_3D) {
5858
if (!isSourceTypeIn({OPENCL_C, OFFLINE})) {
5959
GTEST_SKIP();
6060
}
61-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
62-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
63-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
61+
AddLocalBuffer<cl_int>(local_wg_size);
62+
AddLocalBuffer<cl_int>(local_wg_size);
63+
AddLocalBuffer<cl_int>(local_wg_size);
6464
AddInputBuffer(kts::N, vaddInA);
6565
AddInputBuffer(kts::N, vaddInB);
6666
AddOutputBuffer(kts::N, vaddOutC);

source/cl/test/UnitCL/source/kts/execution.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,13 @@ void kts::ucl::BaseExecution::AddInOutBuffer(BufferDesc &&desc) {
380380
args_->AddInOutBuffer(desc);
381381
}
382382

383-
void kts::ucl::BaseExecution::AddLocalBuffer(size_t size) {
383+
void kts::ucl::BaseExecution::AddLocalBuffer(size_t nelm, size_t elmsize) {
384+
assert(elmsize != 0 && "cannot allocate zero-sized elements");
385+
size_t bytesize = nelm * elmsize;
386+
assert(bytesize / elmsize == nelm && "overflow in size computation");
384387
// UnitCL AddLocalBuffer requires this to be allocated with cargo::alloc.
385388
void *raw = cargo::alloc(sizeof(PointerPrimitive), alignof(PointerPrimitive));
386-
PointerPrimitive *pointer_primitive = new (raw) PointerPrimitive(size);
389+
PointerPrimitive *pointer_primitive = new (raw) PointerPrimitive(bytesize);
387390
args_->AddLocalBuffer(pointer_primitive);
388391
}
389392

source/cl/test/UnitCL/source/ktst_barrier.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ TEST_P(Execution, Barrier_14_Barrier_In_Reduce) {
364364

365365
AddInputBuffer(kts::N, refIn);
366366
AddOutputBuffer(kts::N / kts::localN, refOut);
367-
AddLocalBuffer(kts::localN * sizeof(cl_int));
367+
AddLocalBuffer<cl_int>(kts::localN);
368368
RunGeneric1D(kts::N, kts::localN);
369369
}
370370

@@ -397,7 +397,7 @@ TEST_P(MemFenceTests, Barrier_16_Memory_Fence_Global) {
397397
TEST_P(MemFenceTests, Barrier_16_Memory_Fence_Local) {
398398
AddMacro("FENCE_OP", getParam());
399399
AddInputBuffer(kts::N, kts::Ref_Identity);
400-
AddLocalBuffer(kts::localN);
400+
AddLocalBuffer<cl_int>(kts::localN);
401401
AddOutputBuffer(kts::N, kts::Ref_Identity);
402402
RunGeneric1D(kts::N, kts::localN);
403403
}

source/cl/test/UnitCL/source/ktst_dma.cpp

+27-29
Original file line numberDiff line numberDiff line change
@@ -124,27 +124,27 @@ TEST_P(Execution, Dma_01_Direct) {
124124
}
125125

126126
TEST_P(Execution, Dma_02_Explicit_Copy) {
127-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
128-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
127+
AddLocalBuffer<cl_int>(local_wg_size);
128+
AddLocalBuffer<cl_int>(local_wg_size);
129129
AddInputBuffer(kts::N, vaddInA);
130130
AddInputBuffer(kts::N, vaddInB);
131131
AddOutputBuffer(kts::N, vaddOutC);
132132
RunGeneric1D(kts::N, local_wg_size);
133133
}
134134

135135
TEST_P(Execution, Dma_03_Explicit_Copy_Rotate) {
136-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
137-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
136+
AddLocalBuffer<cl_int>(local_wg_size);
137+
AddLocalBuffer<cl_int>(local_wg_size);
138138
AddInputBuffer(kts::N, vaddInA);
139139
AddInputBuffer(kts::N, vaddInB);
140140
AddOutputBuffer(kts::N, vaddOutC);
141141
RunGeneric1D(kts::N, local_wg_size);
142142
}
143143

144144
TEST_P(Execution, Dma_04_async_copy) {
145-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
146-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
147-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
145+
AddLocalBuffer<cl_int>(local_wg_size);
146+
AddLocalBuffer<cl_int>(local_wg_size);
147+
AddLocalBuffer<cl_int>(local_wg_size);
148148
AddInputBuffer(kts::N, vaddInA);
149149
AddInputBuffer(kts::N, vaddInB);
150150
AddOutputBuffer(kts::N, vaddOutC);
@@ -156,12 +156,12 @@ TEST_P(Execution, Dma_04_async_copy) {
156156
// size of the local buffers.
157157
TEST_P(Execution, Dma_05_async_double_buffer) {
158158
const cl_int iterations = 16;
159-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
160-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
161-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
162-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
163-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
164-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
159+
AddLocalBuffer<cl_int>(local_wg_size);
160+
AddLocalBuffer<cl_int>(local_wg_size);
161+
AddLocalBuffer<cl_int>(local_wg_size);
162+
AddLocalBuffer<cl_int>(local_wg_size);
163+
AddLocalBuffer<cl_int>(local_wg_size);
164+
AddLocalBuffer<cl_int>(local_wg_size);
165165
AddInputBuffer(kts::N * iterations, vaddInA);
166166
AddInputBuffer(kts::N * iterations, vaddInB);
167167
AddOutputBuffer(kts::N * iterations, vaddOutC);
@@ -267,10 +267,9 @@ TEST_P(AsyncCopyTests, Dma_10_half_async_copy) {
267267
const auto param = getParam();
268268
AddMacro("TYPE", param.type_str);
269269

270-
const size_t local_buffer_len = local_wg_size * param.type_size;
271-
AddLocalBuffer(local_buffer_len);
272-
AddLocalBuffer(local_buffer_len);
273-
AddLocalBuffer(local_buffer_len);
270+
AddLocalBuffer(local_wg_size, param.type_size);
271+
AddLocalBuffer(local_wg_size, param.type_size);
272+
AddLocalBuffer(local_wg_size, param.type_size);
274273

275274
if (3 == param.vec_width) {
276275
AddInputBuffer(kts::N, makeHalf3Streamer(HalfTypeParam::InA));
@@ -299,10 +298,9 @@ TEST_P(AsyncCopyTests, Dma_11_half_async_strided_copy) {
299298
const auto param = getParam();
300299
AddMacro("TYPE", param.type_str);
301300

302-
const size_t local_buffer_len = local_wg_size * param.type_size;
303-
AddLocalBuffer(local_buffer_len);
304-
AddLocalBuffer(local_buffer_len);
305-
AddLocalBuffer(local_buffer_len);
301+
AddLocalBuffer(local_wg_size, param.type_size);
302+
AddLocalBuffer(local_wg_size, param.type_size);
303+
AddLocalBuffer(local_wg_size, param.type_size);
306304

307305
if (3 == param.vec_width) {
308306
AddInputBuffer(kts::N * 2, makeHalf3Streamer(HalfTypeParam::InA));
@@ -376,8 +374,8 @@ TEST_P(Execution, Dma_13_wait_event_is_barrier) {
376374
(((x % local_wg_size) + 1) % local_wg_size));
377375
};
378376

379-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
380-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
377+
AddLocalBuffer<cl_int>(local_wg_size);
378+
AddLocalBuffer<cl_int>(local_wg_size);
381379
AddInputBuffer(kts::N, vaddInA);
382380
AddInputBuffer(kts::N, vaddInB);
383381
AddOutputBuffer(kts::N, rotateB);
@@ -389,7 +387,7 @@ TEST_P(Execution, Dma_14_wait_event_is_barrier_overwrite) {
389387
return vaddInA(x) + 1;
390388
};
391389

392-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
390+
AddLocalBuffer<cl_int>(local_wg_size);
393391
AddInputBuffer(kts::N, vaddInA);
394392
AddOutputBuffer(kts::N, vaddInAPlusOne);
395393
RunGeneric1D(kts::N, local_wg_size);
@@ -408,10 +406,10 @@ TEST_P(Execution, DISABLED_Dma_15_wait_event_is_execution_barrier) {
408406
(((x % local_wg_size) + 1) % local_wg_size));
409407
};
410408

411-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
412-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
413-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
414-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
409+
AddLocalBuffer<cl_int>(local_wg_size);
410+
AddLocalBuffer<cl_int>(local_wg_size);
411+
AddLocalBuffer<cl_int>(local_wg_size);
412+
AddLocalBuffer<cl_int>(local_wg_size);
415413
AddInputBuffer(kts::N, vaddInA);
416414
AddInputBuffer(kts::N, vaddInB);
417415
AddOutputBuffer(kts::N, rotateA);
@@ -424,7 +422,7 @@ TEST_P(Execution, Dma_16_wait_event_is_barrier_strided) {
424422
return vaddInA(x) + 1;
425423
};
426424

427-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
425+
AddLocalBuffer<cl_int>(local_wg_size);
428426
AddInputBuffer(kts::N, vaddInA);
429427
AddOutputBuffer(kts::N, vaddInAPlusOne);
430428
RunGeneric1D(kts::N, local_wg_size);

source/cl/test/UnitCL/source/ktst_regression_03.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ TEST_P(Execution, Regression_56_Local_Vec_Mem) {
130130

131131
// Only want one thread
132132
AddOutputBuffer(1, refOut);
133-
AddLocalBuffer(sizeof(cl_float4));
133+
AddLocalBuffer<cl_float4>(1);
134134
AddInputBuffer(1, refIn);
135135
RunGeneric1D(1, 1);
136136
}
@@ -322,7 +322,7 @@ TEST_P(Execution, Regression_61_Sycl_Barrier) {
322322
};
323323

324324
AddInOutBuffer(kts::N, kts::Ref_Identity, refOut);
325-
AddLocalBuffer(sizeof(cl_int) * 2);
325+
AddLocalBuffer<cl_int>(2);
326326
RunGeneric1D(kts::N, 2);
327327
}
328328

@@ -334,13 +334,13 @@ TEST_P(Execution, Regression_62_Sycl_Barrier) {
334334
};
335335

336336
AddInOutBuffer(kts::N, kts::Ref_Identity, refOut);
337-
AddLocalBuffer(sizeof(cl_int) * 2);
337+
AddLocalBuffer<cl_int>(2);
338338
AddInOutBuffer(kts::N, kts::Ref_Identity, refOut);
339-
AddLocalBuffer(sizeof(cl_int) * 2);
339+
AddLocalBuffer<cl_int>(2);
340340
AddInOutBuffer(kts::N, kts::Ref_Identity, refOut);
341-
AddLocalBuffer(sizeof(cl_int) * 2);
341+
AddLocalBuffer<cl_int>(2);
342342
AddInOutBuffer(kts::N, kts::Ref_Identity, refOut);
343-
AddLocalBuffer(sizeof(cl_int) * 2);
343+
AddLocalBuffer<cl_int>(2);
344344
RunGeneric1D(kts::N, 2);
345345
}
346346

@@ -373,8 +373,8 @@ TEST_P(MultipleLocalDimensionsTests,
373373

374374
kts::Reference1D<cl_int> refOut = [](size_t) { return 1; };
375375

376-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
377-
AddLocalBuffer(local_wg_size * sizeof(cl_int));
376+
AddLocalBuffer<cl_int>(local_wg_size);
377+
AddLocalBuffer<cl_int>(local_wg_size);
378378
AddInputBuffer(kts::N, refIn);
379379
AddInputBuffer(kts::N, refIn);
380380
AddOutputBuffer(kts::N, refOut);

0 commit comments

Comments
 (0)