@@ -60,7 +60,7 @@ std::vector<FFT::Complex> FFT::transform(const vector<Complex>& buf)
60
60
61
61
int m = 1 ;
62
62
63
- start_t = getcputime ();;
63
+ start_t = getcputime ();
64
64
for (int s = 0 ; s < lgN; ++s)
65
65
{
66
66
m <<= 1 ;
@@ -78,11 +78,11 @@ std::vector<FFT::Complex> FFT::transform(const vector<Complex>& buf)
78
78
current_omega *= omega[s];
79
79
}
80
80
}
81
- // for(int i = 0; i < n; i++)
82
- // {
83
- // cout << "Index " << i << ": (after) (s:" << s << ") " << real(result[i]) << " " << imag(result[i]) << endl;
84
- // }
85
- // cout << endl;
81
+ for (int i = 0 ; i < n; i++)
82
+ {
83
+ cout << " Index " << i << " : (after) (s:" << s << " ) " << real (result[i]) << " " << imag (result[i]) << endl;
84
+ }
85
+ cout << endl;
86
86
87
87
}
88
88
@@ -105,47 +105,52 @@ std::vector<FFT::Complex> FFT::transform(const vector<Complex>& buf)
105
105
return result;
106
106
}
107
107
108
- void FFT::transformGPU (const vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev, cl_mem cmDebug, cl_mem cmInv, cl_kernel ckKernel, size_t szGlobalWorkSize,
108
+ void FFT::transformGPU (const vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev,
109
+ cl_mem cmPointsPerGroup, cl_mem cmDebug, cl_mem cmDir, cl_kernel ckKernel, size_t szGlobalWorkSize, size_t szLocalWorkSize, unsigned int points_per_group,
109
110
cl_command_queue cqCommandQueue, cl_int ciErr, int argc, const char **argv)
110
111
{
111
- size_t szLocalWorkSize ;
112
- int inv_i = (inverse) ? 1 : 0 ;
113
- void * inv = (void *)&inv_i ;
112
+ int dir_i = (inverse) ? - 1 : 1 ;
113
+ void * dir = (void *)&dir_i ;
114
+ void * pts_per_grp_p = (void *)&points_per_group ;
114
115
bitReverseCopy (buf, result);
115
- cl_double2 * cl_double2_buf = (cl_double2 *)cl_buf;
116
- cl_double2 * cl_int_debug_buf = (cl_double2 *)cl_debug_buf;
116
+ cl_float2 * cl_float2_buf = (cl_float2 *)cl_buf;
117
+ cl_float2 * cl_float2_debug_buf = (cl_float2 *)cl_debug_buf;
117
118
for (int i = 0 ; i < n; i++)
118
119
{
119
- cl_double2_buf [i].x = (float )real (result[i]);
120
- cl_double2_buf [i].y = (float )imag (result[i]);
121
- cl_int_debug_buf [i].x = -1.0 ;
122
- cl_int_debug_buf [i].y = -1.0 ;
120
+ cl_float2_buf [i].x = (float )real (result[i]);
121
+ cl_float2_buf [i].y = (float )imag (result[i]);
122
+ cl_float2_debug_buf [i].x = -1.0 ;
123
+ cl_float2_debug_buf [i].y = -1.0 ;
123
124
}
124
125
125
126
for (int i = 0 ; i < n; i++)
126
127
{
127
- cout << " Index " << i << " : (before) " << cl_double2_buf [i].x << " " << cl_double2_buf [i].y << endl;
128
+ cout << " Index " << i << " : (before) " << cl_float2_buf [i].x << " " << cl_float2_buf [i].y << endl;
128
129
}
129
130
cout << endl;
130
131
131
- int m = 1 ;
132
+ ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmDev, CL_FALSE, 0 , sizeof (cl_float2) * n, cl_buf, 0 , NULL , NULL );
133
+ if (ciErr != CL_SUCCESS)
134
+ {
135
+ shrLog (" Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
136
+ Cleanup (argc, (char **)argv, EXIT_FAILURE);
137
+ }
132
138
133
- ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmDev , CL_FALSE, 0 , sizeof (cl_double2) * n, cl_buf , 0 , NULL , NULL );
139
+ ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmPointsPerGroup , CL_FALSE, 0 , sizeof (cl_uint), pts_per_grp_p , 0 , NULL , NULL );
134
140
if (ciErr != CL_SUCCESS)
135
141
{
136
142
shrLog (" Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
137
143
Cleanup (argc, (char **)argv, EXIT_FAILURE);
138
144
}
139
145
140
-
141
- ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmDebug, CL_FALSE, 0 , sizeof (cl_double2) * n, cl_debug_buf, 0 , NULL , NULL );
146
+ ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmDebug, CL_FALSE, 0 , sizeof (cl_float2) * n, cl_debug_buf, 0 , NULL , NULL );
142
147
if (ciErr != CL_SUCCESS)
143
148
{
144
149
shrLog (" Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
145
150
Cleanup (argc, (char **)argv, EXIT_FAILURE);
146
151
}
147
152
148
- ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmInv , CL_FALSE, 0 , sizeof (cl_int), inv , 0 , NULL , NULL );
153
+ ciErr = clEnqueueWriteBuffer (cqCommandQueue, cmDir , CL_FALSE, 0 , sizeof (cl_int), dir , 0 , NULL , NULL );
149
154
if (ciErr != CL_SUCCESS)
150
155
{
151
156
shrLog (" Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
@@ -154,54 +159,63 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
154
159
155
160
156
161
157
- for (int s = 0 ; s < lgN; ++s)
158
- {
159
- m <<= 1 ;
160
- szLocalWorkSize = m >> 1 ;
162
+ // for(int s = 0; s < lgN; ++s)
163
+ // {
164
+ // m <<= 1;
165
+ // szLocalWorkSize = m >> 1;
161
166
162
- // cout << "Enqueue with Global Work Size " << szGlobalWorkSize << " and Local Work Size " << szLocalWorkSize << endl;
163
- if (s == 0 )
164
- {
167
+ cout << " Enqueue with Global Work Size " << szGlobalWorkSize << " and Local Work Size " << szLocalWorkSize << endl;
168
+ // if(s == 0)
169
+ // {
165
170
// Launch kernel
166
- ciErr = clEnqueueNDRangeKernel (cqCommandQueue, ckKernel, 1 , NULL , &szGlobalWorkSize, &szLocalWorkSize, 0 , NULL , &start_event);
171
+ // ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, &start_event);
172
+ start_t = getcputime ();
173
+
174
+ ciErr = clEnqueueNDRangeKernel (cqCommandQueue, ckKernel, 1 , NULL , &szGlobalWorkSize, &szLocalWorkSize, 0 , NULL , NULL );
167
175
if (ciErr != CL_SUCCESS)
168
176
{
169
177
shrLog (" Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
170
178
shrLog (" Error is %s\n " , oclErrorString (ciErr));
171
179
Cleanup (argc, (char **)argv, EXIT_FAILURE);
172
180
}
173
- }
174
- else if (s == (lgN-1 ))
175
- {
181
+ // }
182
+ // else if(s == (lgN-1))
183
+ // {
176
184
// Launch kernel
177
- ciErr = clEnqueueNDRangeKernel (cqCommandQueue, ckKernel, 1 , NULL , &szGlobalWorkSize, &szLocalWorkSize, 0 , NULL , &end_event);
178
- if (ciErr != CL_SUCCESS)
179
- {
180
- shrLog (" Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
181
- shrLog (" Error is %s\n " , oclErrorString (ciErr));
182
- Cleanup (argc, (char **)argv, EXIT_FAILURE);
183
- }
184
- }
185
- else
186
- {
185
+ // ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, &end_event);
186
+ // if (ciErr != CL_SUCCESS)
187
+ // {
188
+ // shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
189
+ // shrLog("Error is %s\n", oclErrorString(ciErr));
190
+ // Cleanup(argc, (char **)argv, EXIT_FAILURE);
191
+ // }
192
+ // }
193
+ // else
194
+ // {
187
195
// Launch kernel
188
- ciErr = clEnqueueNDRangeKernel (cqCommandQueue, ckKernel, 1 , NULL , &szGlobalWorkSize, &szLocalWorkSize, 0 , NULL , NULL );
189
- if (ciErr != CL_SUCCESS)
190
- {
191
- shrLog (" Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
192
- shrLog (" Error is %s\n " , oclErrorString (ciErr));
193
- Cleanup (argc, (char **)argv, EXIT_FAILURE);
194
- }
195
- }
196
+ // ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
197
+ // if (ciErr != CL_SUCCESS)
198
+ // {
199
+ // shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
200
+ // shrLog("Error is %s\n", oclErrorString(ciErr));
201
+ // Cleanup(argc, (char **)argv, EXIT_FAILURE);
202
+ // }
203
+ // }
196
204
197
205
clFinish (cqCommandQueue);
198
206
199
- // ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDebug, CL_TRUE, 0, sizeof(cl_float2) * n, cl_debug_buf, 0, NULL, NULL);
200
- // if (ciErr != CL_SUCCESS)
201
- // {
202
- // shrLog("Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
203
- // Cleanup(argc, (char **)argv, EXIT_FAILURE);
204
- // }
207
+ end_t = getcputime ();
208
+ clock_diff = end_t - start_t ;
209
+ shrLog (" CPU transform start microseconds\t %5.2f \n " , start_t );
210
+ shrLog (" CPU transform end microseconds\t %5.2f \n " , end_t );
211
+ shrLog (" CPU transform diff microseconds\t %5.2f \n " , clock_diff);
212
+
213
+ ciErr = clEnqueueReadBuffer (cqCommandQueue, cmDebug, CL_TRUE, 0 , sizeof (cl_float2) * n, cl_debug_buf, 0 , NULL , NULL );
214
+ if (ciErr != CL_SUCCESS)
215
+ {
216
+ shrLog (" Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
217
+ Cleanup (argc, (char **)argv, EXIT_FAILURE);
218
+ }
205
219
206
220
// ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDev, CL_TRUE, 0, sizeof(cl_float2) * n, cl_buf, 0, NULL, NULL);
207
221
// if (ciErr != CL_SUCCESS)
@@ -210,25 +224,25 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
210
224
// Cleanup(argc, (char **)argv, EXIT_FAILURE);
211
225
// }
212
226
213
- // for(int i = 0; i < n; i++)
214
- // {
215
- // cout << "Index " << i << ": (after) (s: " << s << ") " << cl_float2_buf[i].x << " " << cl_float2_buf[i].y << " with omega = (" << cl_int_debug_buf [i].x << "," <<
216
- // cl_int_debug_buf [i].y << ")" << endl;
217
- // cl_int_debug_buf [i].x = -1;
218
- // cl_int_debug_buf [i].y = -1;
219
- // }
220
- // cout << endl;
227
+ for (int i = 0 ; i < n; i++)
228
+ {
229
+ cout << " Index " << i << " ( " << cl_float2_debug_buf [i].x << " ," <<
230
+ cl_float2_debug_buf [i].y << " )" << endl;
231
+ cl_float2_debug_buf [i].x = -1 ;
232
+ cl_float2_debug_buf [i].y = -1 ;
233
+ }
234
+ cout << endl;
221
235
222
- }
236
+ // }
223
237
224
- clGetEventProfilingInfo (start_event, CL_PROFILING_COMMAND_START,
225
- sizeof (start_time), &start_time, NULL );
226
- clGetEventProfilingInfo (end_event, CL_PROFILING_COMMAND_END,
227
- sizeof (end_time), &end_time, NULL );
228
- total_time = (double )(end_time - start_time) / 1e3 ; // convert from nanoseconds to microseconds
229
- shrLog (" GPU transform time\t %5.2f microseconds \n " , total_time);
238
+ // clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
239
+ // sizeof(start_time), &start_time, NULL);
240
+ // clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
241
+ // sizeof(end_time), &end_time, NULL);
242
+ // total_time = (double)(end_time - start_time) / 1e3; // convert from nanoseconds to microseconds
243
+ // shrLog("GPU transform time\t %5.2f microseconds \n", total_time);
230
244
231
- ciErr = clEnqueueReadBuffer (cqCommandQueue, cmDev, CL_TRUE, 0 , sizeof (cl_double2 ) * n, cl_buf, 0 , NULL , NULL );
245
+ ciErr = clEnqueueReadBuffer (cqCommandQueue, cmDev, CL_TRUE, 0 , sizeof (cl_float2 ) * n, cl_buf, 0 , NULL , NULL );
232
246
if (ciErr != CL_SUCCESS)
233
247
{
234
248
shrLog (" Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n " , __LINE__, __FILE__);
@@ -239,27 +253,27 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
239
253
{
240
254
for (int i = 0 ; i < n; ++i)
241
255
{
242
- cl_double2_buf [i].x = cl_double2_buf [i].x / n;
243
- cl_double2_buf [i].y = cl_double2_buf [i].y / n;
256
+ cl_float2_buf [i].x = cl_float2_buf [i].x / n;
257
+ cl_float2_buf [i].y = cl_float2_buf [i].y / n;
244
258
}
245
259
}
246
260
247
261
for (int i = 0 ; i < n; i++)
248
262
{
249
- cout << " Index " << i << " : (after) " << cl_double2_buf [i].x << " " << cl_double2_buf [i].y << endl;
263
+ cout << " Index " << i << " : (after) " << cl_float2_buf [i].x << " " << cl_float2_buf [i].y << endl;
250
264
}
251
265
cout << endl;
252
266
253
- clReleaseEvent (start_event);
254
- clReleaseEvent (end_event);
267
+ // clReleaseEvent(start_event);
268
+ // clReleaseEvent(end_event);
255
269
}
256
270
257
- double FFT::getIntensity (Complex c)
271
+ float FFT::getIntensity (Complex c)
258
272
{
259
273
return abs (c);
260
274
}
261
275
262
- double FFT::getPhase (Complex c)
276
+ float FFT::getPhase (Complex c)
263
277
{
264
278
return arg (c);
265
279
}
0 commit comments