Skip to content
This repository was archived by the owner on Jan 16, 2019. It is now read-only.

Commit 61ecfca

Browse files
committed
got FFTs working up until 1024, can execute the rest on the CPU
1 parent 611e1ab commit 61ecfca

File tree

6 files changed

+293
-114
lines changed

6 files changed

+293
-114
lines changed

oclFFT/FFT.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,6 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
116116
}
117117
cout << endl;
118118

119-
int m = 1;
120-
121119
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmDev, CL_FALSE, 0, sizeof(cl_float2) * n, cl_buf, 0, NULL, NULL);
122120
if (ciErr != CL_SUCCESS)
123121
{

oclFFT/FFT.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class FFT
1616
/* Computes Discrete Fourier Transform of given buffer. */
1717
std::vector<Complex> transform(const std::vector<Complex>& buf);
1818
void transformGPU(const std::vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev,
19-
cl_mem cmPointsPerGroup, cl_mem cmDir, cl_mem cmDebug, cl_kernel ckKernel, size_t szGlobalWorkSize, size_t szLocalWorkSize, unsigned int points_per_group,
19+
cl_mem cmPointsPerGroup, cl_mem cmDebug, cl_mem cmDir, cl_kernel ckKernel, size_t szGlobalWorkSize, size_t szLocalWorkSize, unsigned int points_per_group,
2020
cl_command_queue cqCommandQueue, cl_int ciErr, int argc, const char **argv);
2121
static double getIntensity(Complex c);
2222
static double getPhase(Complex c);

oclSoundFreq/FFT.cpp

+94-80
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ std::vector<FFT::Complex> FFT::transform(const vector<Complex>& buf)
6060

6161
int m = 1;
6262

63-
start_t = getcputime();;
63+
start_t = getcputime();
6464
for (int s = 0; s < lgN; ++s)
6565
{
6666
m <<= 1;
@@ -78,11 +78,11 @@ std::vector<FFT::Complex> FFT::transform(const vector<Complex>& buf)
7878
current_omega *= omega[s];
7979
}
8080
}
81-
// for(int i = 0; i < n; i++)
82-
// {
83-
// cout << "Index " << i << ": (after) (s:" << s << ") " << real(result[i]) << " " << imag(result[i]) << endl;
84-
// }
85-
// cout << endl;
81+
for(int i = 0; i < n; i++)
82+
{
83+
cout << "Index " << i << ": (after) (s:" << s << ") " << real(result[i]) << " " << imag(result[i]) << endl;
84+
}
85+
cout << endl;
8686

8787
}
8888

@@ -105,47 +105,52 @@ std::vector<FFT::Complex> FFT::transform(const vector<Complex>& buf)
105105
return result;
106106
}
107107

108-
void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev, cl_mem cmDebug, cl_mem cmInv, cl_kernel ckKernel, size_t szGlobalWorkSize,
108+
void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev,
109+
cl_mem cmPointsPerGroup, cl_mem cmDebug, cl_mem cmDir, cl_kernel ckKernel, size_t szGlobalWorkSize, size_t szLocalWorkSize, unsigned int points_per_group,
109110
cl_command_queue cqCommandQueue, cl_int ciErr, int argc, const char **argv)
110111
{
111-
size_t szLocalWorkSize;
112-
int inv_i = (inverse) ? 1 : 0;
113-
void * inv = (void *)&inv_i;
112+
int dir_i = (inverse) ? -1 : 1;
113+
void * dir = (void *)&dir_i;
114+
void * pts_per_grp_p = (void *)&points_per_group;
114115
bitReverseCopy(buf, result);
115-
cl_double2 * cl_double2_buf = (cl_double2 *)cl_buf;
116-
cl_double2 * cl_int_debug_buf = (cl_double2 *)cl_debug_buf;
116+
cl_float2 * cl_float2_buf = (cl_float2 *)cl_buf;
117+
cl_float2 * cl_float2_debug_buf = (cl_float2 *)cl_debug_buf;
117118
for(int i = 0; i < n; i++)
118119
{
119-
cl_double2_buf[i].x = (float)real(result[i]);
120-
cl_double2_buf[i].y = (float)imag(result[i]);
121-
cl_int_debug_buf[i].x = -1.0;
122-
cl_int_debug_buf[i].y = -1.0;
120+
cl_float2_buf[i].x = (float)real(result[i]);
121+
cl_float2_buf[i].y = (float)imag(result[i]);
122+
cl_float2_debug_buf[i].x = -1.0;
123+
cl_float2_debug_buf[i].y = -1.0;
123124
}
124125

125126
for(int i = 0; i < n; i++)
126127
{
127-
cout << "Index " << i << ": (before) " << cl_double2_buf[i].x << " " << cl_double2_buf[i].y << endl;
128+
cout << "Index " << i << ": (before) " << cl_float2_buf[i].x << " " << cl_float2_buf[i].y << endl;
128129
}
129130
cout << endl;
130131

131-
int m = 1;
132+
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmDev, CL_FALSE, 0, sizeof(cl_float2) * n, cl_buf, 0, NULL, NULL);
133+
if (ciErr != CL_SUCCESS)
134+
{
135+
shrLog("Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
136+
Cleanup(argc, (char **)argv, EXIT_FAILURE);
137+
}
132138

133-
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmDev, CL_FALSE, 0, sizeof(cl_double2) * n, cl_buf, 0, NULL, NULL);
139+
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmPointsPerGroup, CL_FALSE, 0, sizeof(cl_uint), pts_per_grp_p, 0, NULL, NULL);
134140
if (ciErr != CL_SUCCESS)
135141
{
136142
shrLog("Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
137143
Cleanup(argc, (char **)argv, EXIT_FAILURE);
138144
}
139145

140-
141-
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmDebug, CL_FALSE, 0, sizeof(cl_double2) * n, cl_debug_buf, 0, NULL, NULL);
146+
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmDebug, CL_FALSE, 0, sizeof(cl_float2) * n, cl_debug_buf, 0, NULL, NULL);
142147
if (ciErr != CL_SUCCESS)
143148
{
144149
shrLog("Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
145150
Cleanup(argc, (char **)argv, EXIT_FAILURE);
146151
}
147152

148-
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmInv, CL_FALSE, 0, sizeof(cl_int), inv, 0, NULL, NULL);
153+
ciErr = clEnqueueWriteBuffer(cqCommandQueue, cmDir, CL_FALSE, 0, sizeof(cl_int), dir, 0, NULL, NULL);
149154
if (ciErr != CL_SUCCESS)
150155
{
151156
shrLog("Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
@@ -154,54 +159,63 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
154159

155160

156161

157-
for(int s = 0; s < lgN; ++s)
158-
{
159-
m <<= 1;
160-
szLocalWorkSize = m >> 1;
162+
// for(int s = 0; s < lgN; ++s)
163+
// {
164+
// m <<= 1;
165+
// szLocalWorkSize = m >> 1;
161166

162-
// cout << "Enqueue with Global Work Size " << szGlobalWorkSize << " and Local Work Size " << szLocalWorkSize << endl;
163-
if(s == 0)
164-
{
167+
cout << "Enqueue with Global Work Size " << szGlobalWorkSize << " and Local Work Size " << szLocalWorkSize << endl;
168+
// if(s == 0)
169+
// {
165170
// Launch kernel
166-
ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, &start_event);
171+
// ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, &start_event);
172+
start_t = getcputime();
173+
174+
ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
167175
if (ciErr != CL_SUCCESS)
168176
{
169177
shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
170178
shrLog("Error is %s\n", oclErrorString(ciErr));
171179
Cleanup(argc, (char **)argv, EXIT_FAILURE);
172180
}
173-
}
174-
else if(s == (lgN-1))
175-
{
181+
// }
182+
// else if(s == (lgN-1))
183+
// {
176184
// Launch kernel
177-
ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, &end_event);
178-
if (ciErr != CL_SUCCESS)
179-
{
180-
shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
181-
shrLog("Error is %s\n", oclErrorString(ciErr));
182-
Cleanup(argc, (char **)argv, EXIT_FAILURE);
183-
}
184-
}
185-
else
186-
{
185+
// ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, &end_event);
186+
// if (ciErr != CL_SUCCESS)
187+
// {
188+
// shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
189+
// shrLog("Error is %s\n", oclErrorString(ciErr));
190+
// Cleanup(argc, (char **)argv, EXIT_FAILURE);
191+
// }
192+
// }
193+
// else
194+
// {
187195
// Launch kernel
188-
ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
189-
if (ciErr != CL_SUCCESS)
190-
{
191-
shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
192-
shrLog("Error is %s\n", oclErrorString(ciErr));
193-
Cleanup(argc, (char **)argv, EXIT_FAILURE);
194-
}
195-
}
196+
// ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
197+
// if (ciErr != CL_SUCCESS)
198+
// {
199+
// shrLog("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
200+
// shrLog("Error is %s\n", oclErrorString(ciErr));
201+
// Cleanup(argc, (char **)argv, EXIT_FAILURE);
202+
// }
203+
// }
196204

197205
clFinish(cqCommandQueue);
198206

199-
// ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDebug, CL_TRUE, 0, sizeof(cl_float2) * n, cl_debug_buf, 0, NULL, NULL);
200-
// if (ciErr != CL_SUCCESS)
201-
// {
202-
// shrLog("Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
203-
// Cleanup(argc, (char **)argv, EXIT_FAILURE);
204-
// }
207+
end_t = getcputime();
208+
clock_diff = end_t - start_t;
209+
shrLog("CPU transform start microseconds\t %5.2f \n", start_t);
210+
shrLog("CPU transform end microseconds\t %5.2f \n", end_t);
211+
shrLog("CPU transform diff microseconds\t %5.2f \n", clock_diff);
212+
213+
ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDebug, CL_TRUE, 0, sizeof(cl_float2) * n, cl_debug_buf, 0, NULL, NULL);
214+
if (ciErr != CL_SUCCESS)
215+
{
216+
shrLog("Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
217+
Cleanup(argc, (char **)argv, EXIT_FAILURE);
218+
}
205219

206220
// ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDev, CL_TRUE, 0, sizeof(cl_float2) * n, cl_buf, 0, NULL, NULL);
207221
// if (ciErr != CL_SUCCESS)
@@ -210,25 +224,25 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
210224
// Cleanup(argc, (char **)argv, EXIT_FAILURE);
211225
// }
212226

213-
// for(int i = 0; i < n; i++)
214-
// {
215-
// cout << "Index " << i << ": (after) (s:" << s << ") " << cl_float2_buf[i].x << " " << cl_float2_buf[i].y << " with omega = (" << cl_int_debug_buf[i].x << "," <<
216-
// cl_int_debug_buf[i].y << ")" << endl;
217-
// cl_int_debug_buf[i].x = -1;
218-
// cl_int_debug_buf[i].y = -1;
219-
// }
220-
// cout << endl;
227+
for(int i = 0; i < n; i++)
228+
{
229+
cout << "Index " << i << " (" << cl_float2_debug_buf[i].x << "," <<
230+
cl_float2_debug_buf[i].y << ")" << endl;
231+
cl_float2_debug_buf[i].x = -1;
232+
cl_float2_debug_buf[i].y = -1;
233+
}
234+
cout << endl;
221235

222-
}
236+
// }
223237

224-
clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
225-
sizeof(start_time), &start_time, NULL);
226-
clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
227-
sizeof(end_time), &end_time, NULL);
228-
total_time = (double)(end_time - start_time) / 1e3; // convert from nanoseconds to microseconds
229-
shrLog("GPU transform time\t %5.2f microseconds \n", total_time);
238+
// clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
239+
// sizeof(start_time), &start_time, NULL);
240+
// clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
241+
// sizeof(end_time), &end_time, NULL);
242+
// total_time = (double)(end_time - start_time) / 1e3; // convert from nanoseconds to microseconds
243+
// shrLog("GPU transform time\t %5.2f microseconds \n", total_time);
230244

231-
ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDev, CL_TRUE, 0, sizeof(cl_double2) * n, cl_buf, 0, NULL, NULL);
245+
ciErr = clEnqueueReadBuffer(cqCommandQueue, cmDev, CL_TRUE, 0, sizeof(cl_float2) * n, cl_buf, 0, NULL, NULL);
232246
if (ciErr != CL_SUCCESS)
233247
{
234248
shrLog("Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
@@ -239,27 +253,27 @@ void FFT::transformGPU(const vector<Complex>& buf, void * cl_buf, void * cl_debu
239253
{
240254
for(int i = 0; i < n; ++i)
241255
{
242-
cl_double2_buf[i].x = cl_double2_buf[i].x / n;
243-
cl_double2_buf[i].y = cl_double2_buf[i].y / n;
256+
cl_float2_buf[i].x = cl_float2_buf[i].x / n;
257+
cl_float2_buf[i].y = cl_float2_buf[i].y / n;
244258
}
245259
}
246260

247261
for(int i = 0; i < n; i++)
248262
{
249-
cout << "Index " << i << ": (after) " << cl_double2_buf[i].x << " " << cl_double2_buf[i].y << endl;
263+
cout << "Index " << i << ": (after) " << cl_float2_buf[i].x << " " << cl_float2_buf[i].y << endl;
250264
}
251265
cout << endl;
252266

253-
clReleaseEvent(start_event);
254-
clReleaseEvent(end_event);
267+
// clReleaseEvent(start_event);
268+
// clReleaseEvent(end_event);
255269
}
256270

257-
double FFT::getIntensity(Complex c)
271+
float FFT::getIntensity(Complex c)
258272
{
259273
return abs(c);
260274
}
261275

262-
double FFT::getPhase(Complex c)
276+
float FFT::getPhase(Complex c)
263277
{
264278
return arg(c);
265279
}

oclSoundFreq/FFT.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,17 @@
99
class FFT
1010
{
1111
public:
12-
typedef std::complex<double> Complex;
12+
typedef std::complex<float> Complex;
1313

1414
/* Initializes FFT. n must be a power of 2. */
1515
FFT(int n, bool inverse = false);
1616
/* Computes Discrete Fourier Transform of given buffer. */
1717
std::vector<Complex> transform(const std::vector<Complex>& buf);
18-
void transformGPU(const std::vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev, cl_mem cmInv, cl_mem cmDebug, cl_kernel ckKernel, size_t szGlobalWorkSize,
18+
void transformGPU(const std::vector<Complex>& buf, void * cl_buf, void * cl_debug_buf, cl_mem cmDev,
19+
cl_mem cmPointsPerGroup, cl_mem cmDebug, cl_mem cmDir, cl_kernel ckKernel, size_t szGlobalWorkSize, size_t szLocalWorkSize, unsigned int points_per_group,
1920
cl_command_queue cqCommandQueue, cl_int ciErr, int argc, const char **argv);
20-
static double getIntensity(Complex c);
21-
static double getPhase(Complex c);
21+
static float getIntensity(Complex c);
22+
static float getPhase(Complex c);
2223

2324
private:
2425
int n, lgN;

0 commit comments

Comments
 (0)