ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
fft.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
11 namespace viennacl
12 {
13 namespace linalg
14 {
15 namespace opencl
16 {
17 namespace kernels
18 {
19 
21 
22 
23 // Postprocessing phase of Bluestein algorithm
24 template<typename StringT>
25 void generate_fft_bluestein_post(StringT & source, std::string const & numeric_string)
26 {
27  source.append("__kernel void bluestein_post(__global "); source.append(numeric_string); source.append("2 *Z, \n");
28  source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n");
29  source.append(" unsigned int size) \n");
30  source.append("{ \n");
31  source.append(" unsigned int glb_id = get_global_id(0); \n");
32  source.append(" unsigned int glb_sz = get_global_size(0); \n");
33 
34  source.append(" unsigned int double_size = size << 1; \n");
35  source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
36  source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
37 
38  source.append(" for (unsigned int i = glb_id; i < size; i += glb_sz) { \n");
39  source.append(" unsigned int rm = i * i % (double_size); \n");
40  source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * (-NUM_PI); \n");
41 
42  source.append(" sn_a = sincos(angle, &cs_a); \n");
43 
44  source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
45  source.append(" out[i] = ("); source.append(numeric_string); source.append("2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x); \n");
46  source.append(" } \n");
47  source.append("} \n");
48 }
49 
50 // Preprocessing phase of Bluestein algorithm
51 template<typename StringT>
52 void generate_fft_bluestein_pre(StringT & source, std::string const & numeric_string)
53 {
54  source.append("__kernel void bluestein_pre(__global "); source.append(numeric_string); source.append("2 *input, \n");
55  source.append(" __global "); source.append(numeric_string); source.append("2 *A, \n");
56  source.append(" __global "); source.append(numeric_string); source.append("2 *B, \n");
57  source.append(" unsigned int size, \n");
58  source.append(" unsigned int ext_size \n");
59  source.append(" ) { \n");
60  source.append(" unsigned int glb_id = get_global_id(0); \n");
61  source.append(" unsigned int glb_sz = get_global_size(0); \n");
62 
63  source.append(" unsigned int double_size = size << 1; \n");
64 
65  source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
66  source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
67 
68  source.append(" for (unsigned int i = glb_id; i < size; i += glb_sz) { \n");
69  source.append(" unsigned int rm = i * i % (double_size); \n");
70  source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * NUM_PI; \n");
71 
72  source.append(" sn_a = sincos(-angle, &cs_a); \n");
73 
74  source.append(" "); source.append(numeric_string); source.append("2 a_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
75  source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, -sn_a); \n");
76 
77  source.append(" A[i] = ("); source.append(numeric_string); source.append("2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x); \n");
78  source.append(" B[i] = b_i; \n");
79 
80  // very bad instruction, to be fixed
81  source.append(" if (i) \n");
82  source.append(" B[ext_size - i] = b_i; \n");
83  source.append(" } \n");
84  source.append("} \n");
85 }
86 
88 template<typename StringT>
89 void generate_fft_complex_to_real(StringT & source, std::string const & numeric_string)
90 {
91  source.append("__kernel void complex_to_real(__global "); source.append(numeric_string); source.append("2 *in, \n");
92  source.append(" __global "); source.append(numeric_string); source.append(" *out, \n");
93  source.append(" unsigned int size) { \n");
94  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
95  source.append(" out[i] = in[i].x; \n");
96  source.append("} \n");
97 }
98 
100 template<typename StringT>
101 void generate_fft_div_vec_scalar(StringT & source, std::string const & numeric_string)
102 {
103  source.append("__kernel void fft_div_vec_scalar(__global "); source.append(numeric_string); source.append("2 *input1, \n");
104  source.append(" unsigned int size, \n");
105  source.append(" "); source.append(numeric_string); source.append(" factor) { \n");
106  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
107  source.append(" input1[i] /= factor; \n");
108  source.append("} \n");
109 }
110 
112 template<typename StringT>
113 void generate_fft_mult_vec(StringT & source, std::string const & numeric_string)
114 {
115  source.append("__kernel void fft_mult_vec(__global const "); source.append(numeric_string); source.append("2 *input1, \n");
116  source.append(" __global const "); source.append(numeric_string); source.append("2 *input2, \n");
117  source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n");
118  source.append(" unsigned int size) { \n");
119  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
120  source.append(" "); source.append(numeric_string); source.append("2 in1 = input1[i]; \n");
121  source.append(" "); source.append(numeric_string); source.append("2 in2 = input2[i]; \n");
122 
123  source.append(" output[i] = ("); source.append(numeric_string); source.append("2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x); \n");
124  source.append(" } \n");
125  source.append("} \n");
126 }
127 
129 template<typename StringT>
130 void generate_fft_real_to_complex(StringT & source, std::string const & numeric_string)
131 {
132  source.append("__kernel void real_to_complex(__global "); source.append(numeric_string); source.append(" *in, \n");
133  source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n");
134  source.append(" unsigned int size) { \n");
135  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
136  source.append(" "); source.append(numeric_string); source.append("2 val = 0; \n");
137  source.append(" val.x = in[i]; \n");
138  source.append(" out[i] = val; \n");
139  source.append(" } \n");
140  source.append("} \n");
141 }
142 
144 template<typename StringT>
145 void generate_fft_reverse_inplace(StringT & source, std::string const & numeric_string)
146 {
147  source.append("__kernel void reverse_inplace(__global "); source.append(numeric_string); source.append(" *vec, uint size) { \n");
148  source.append(" for (uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) { \n");
149  source.append(" "); source.append(numeric_string); source.append(" val1 = vec[i]; \n");
150  source.append(" "); source.append(numeric_string); source.append(" val2 = vec[size - i - 1]; \n");
151 
152  source.append(" vec[i] = val2; \n");
153  source.append(" vec[size - i - 1] = val1; \n");
154  source.append(" } \n");
155  source.append("} \n");
156 }
157 
159 template<typename StringT>
160 void generate_fft_transpose(StringT & source, std::string const & numeric_string)
161 {
162  source.append("__kernel void transpose(__global "); source.append(numeric_string); source.append("2 *input, \n");
163  source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n");
164  source.append(" unsigned int row_num, \n");
165  source.append(" unsigned int col_num) { \n");
166  source.append(" unsigned int size = row_num * col_num; \n");
167  source.append(" for (unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
168  source.append(" unsigned int row = i / col_num; \n");
169  source.append(" unsigned int col = i - row*col_num; \n");
170 
171  source.append(" unsigned int new_pos = col * row_num + row; \n");
172 
173  source.append(" output[new_pos] = input[i]; \n");
174  source.append(" } \n");
175  source.append("} \n");
176 }
177 
179 template<typename StringT>
180 void generate_fft_transpose_inplace(StringT & source, std::string const & numeric_string)
181 {
182  source.append("__kernel void transpose_inplace(__global "); source.append(numeric_string); source.append("2* input, \n");
183  source.append(" unsigned int row_num, \n");
184  source.append(" unsigned int col_num) { \n");
185  source.append(" unsigned int size = row_num * col_num; \n");
186  source.append(" for (unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
187  source.append(" unsigned int row = i / col_num; \n");
188  source.append(" unsigned int col = i - row*col_num; \n");
189 
190  source.append(" unsigned int new_pos = col * row_num + row; \n");
191 
192  source.append(" if (i < new_pos) { \n");
193  source.append(" "); source.append(numeric_string); source.append("2 val = input[i]; \n");
194  source.append(" input[i] = input[new_pos]; \n");
195  source.append(" input[new_pos] = val; \n");
196  source.append(" } \n");
197  source.append(" } \n");
198  source.append("} \n");
199 }
200 
202 template<typename StringT>
203 void generate_fft_vandermonde_prod(StringT & source, std::string const & numeric_string)
204 {
205  source.append("__kernel void vandermonde_prod(__global "); source.append(numeric_string); source.append(" *vander, \n");
206  source.append(" __global "); source.append(numeric_string); source.append(" *vector, \n");
207  source.append(" __global "); source.append(numeric_string); source.append(" *result, \n");
208  source.append(" uint size) { \n");
209  source.append(" for (uint i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
210  source.append(" "); source.append(numeric_string); source.append(" mul = vander[i]; \n");
211  source.append(" "); source.append(numeric_string); source.append(" pwr = 1; \n");
212  source.append(" "); source.append(numeric_string); source.append(" val = 0; \n");
213 
214  source.append(" for (uint j = 0; j < size; j++) { \n");
215  source.append(" val = val + pwr * vector[j]; \n");
216  source.append(" pwr *= mul; \n");
217  source.append(" } \n");
218 
219  source.append(" result[i] = val; \n");
220  source.append(" } \n");
221  source.append("} \n");
222 }
223 
225 template<typename StringT>
226 void generate_fft_zero2(StringT & source, std::string const & numeric_string)
227 {
228  source.append("__kernel void zero2(__global "); source.append(numeric_string); source.append("2 *input1, \n");
229  source.append(" __global "); source.append(numeric_string); source.append("2 *input2, \n");
230  source.append(" unsigned int size) { \n");
231  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
232  source.append(" input1[i] = 0; \n");
233  source.append(" input2[i] = 0; \n");
234  source.append(" } \n");
235  source.append("} \n");
236 }
237 
239 
240 // main kernel class
242 template<typename NumericT>
243 struct fft
244 {
245  static std::string program_name()
246  {
248  }
249 
250  static void init(viennacl::ocl::context & ctx)
251  {
252  static std::map<cl_context, bool> init_done;
253  if (!init_done[ctx.handle().get()])
254  {
256  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
257 
258  std::string source;
259  source.reserve(8192);
260 
261  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
262 
263  // unary operations
264  if (numeric_string == "float" || numeric_string == "double")
265  {
266  generate_fft_bluestein_post(source, numeric_string);
267  generate_fft_bluestein_pre(source, numeric_string);
268  generate_fft_complex_to_real(source, numeric_string);
269  generate_fft_div_vec_scalar(source, numeric_string);
270  generate_fft_mult_vec(source, numeric_string);
271  generate_fft_real_to_complex(source, numeric_string);
272  generate_fft_reverse_inplace(source, numeric_string);
273  generate_fft_transpose(source, numeric_string);
274  generate_fft_transpose_inplace(source, numeric_string);
275  generate_fft_vandermonde_prod(source, numeric_string);
276  generate_fft_zero2(source, numeric_string);
277  }
278 
279  std::string prog_name = program_name();
280  #ifdef VIENNACL_BUILD_INFO
281  std::cout << "Creating program " << prog_name << std::endl;
282  #endif
283  ctx.add_program(source, prog_name);
284  init_done[ctx.handle().get()] = true;
285  } //if
286  } //init
287 };
288 
289 } // namespace kernels
290 } // namespace opencl
291 } // namespace linalg
292 } // namespace viennacl
293 #endif
294 
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
void generate_fft_div_vec_scalar(StringT &source, std::string const &numeric_string)
OpenCL kernel generation code for dividing a complex number by a real number.
Definition: fft.hpp:101
Main kernel class for generating OpenCL kernels for the fast Fourier transform.
Definition: fft.hpp:243
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
static std::string program_name()
Definition: fft.hpp:245
void generate_fft_bluestein_post(StringT &source, std::string const &numeric_string)
Definition: fft.hpp:25
Provides OpenCL-related utilities.
void generate_fft_reverse_inplace(StringT &source, std::string const &numeric_string)
Reverses the entries in a vector.
Definition: fft.hpp:145
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:613
void generate_fft_complex_to_real(StringT &source, std::string const &numeric_string)
Extract real part of a complex number array.
Definition: fft.hpp:89
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:189
void generate_fft_zero2(StringT &source, std::string const &numeric_string)
Zero two complex vectors (to avoid kernel launch overhead)
Definition: fft.hpp:226
void generate_fft_mult_vec(StringT &source, std::string const &numeric_string)
Elementwise product of two complex vectors.
Definition: fft.hpp:113
void generate_fft_transpose_inplace(StringT &source, std::string const &numeric_string)
Simplistic inplace matrix transpose function.
Definition: fft.hpp:180
void generate_fft_bluestein_pre(StringT &source, std::string const &numeric_string)
Definition: fft.hpp:52
Representation of an OpenCL kernel in ViennaCL.
void generate_fft_vandermonde_prod(StringT &source, std::string const &numeric_string)
Computes the matrix vector product with a Vandermonde matrix.
Definition: fft.hpp:203
void generate_fft_real_to_complex(StringT &source, std::string const &numeric_string)
Embedds a real-valued vector into a complex one.
Definition: fft.hpp:130
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
static void init(viennacl::ocl::context &ctx)
Definition: fft.hpp:250
void generate_fft_transpose(StringT &source, std::string const &numeric_string)
Simplistic matrix transpose function.
Definition: fft.hpp:160