ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fft_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_FFT_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_FFT_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 //TODO openom Conditions
26 #include <viennacl/vector.hpp>
27 #include <viennacl/matrix.hpp>
28 
30 
31 #include <stdexcept>
32 #include <cmath>
33 #include <complex>
34 
35 namespace viennacl
36 {
37 namespace linalg
38 {
39 namespace host_based
40 {
41 namespace detail
42 {
43  namespace fft
44  {
46 
47  namespace FFT_DATA_ORDER
48  {
50  {
52  };
53  }
54 
56  {
57  vcl_size_t bits_datasize = 0;
58  vcl_size_t ds = 1;
59 
60  while (ds < size)
61  {
62  ds = ds << 1;
63  bits_datasize++;
64  }
65 
66  return bits_datasize;
67  }
68 
70  {
71  n = n - 1;
72 
73  vcl_size_t power = 1;
74 
75  while (power < sizeof(vcl_size_t) * 8)
76  {
77  n = n | (n >> power);
78  power *= 2;
79  }
80 
81  return n + 1;
82  }
83 
85  {
86  v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
87  v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
88  v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
89  v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
90  v = (v >> 16) | (v << 16);
91  v = v >> (32 - bit_size);
92  return v;
93  }
94 
95  template<typename NumericT, unsigned int AlignmentV>
96  void copy_to_complex_array(std::complex<NumericT> * input_complex,
98  {
99 #ifdef VIENNACL_WITH_OPENMP
100  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
101 #endif
102  for (vcl_size_t i = 0; i < size * 2; i += 2)
103  { //change array to complex array
104  input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
105  }
106  }
107 
108  template<typename NumericT>
109  void copy_to_complex_array(std::complex<NumericT> * input_complex,
111  {
112 #ifdef VIENNACL_WITH_OPENMP
113  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
114 #endif
115  for (vcl_size_t i = 0; i < size * 2; i += 2)
116  { //change array to complex array
117  input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
118  }
119  }
120 
121  template<typename NumericT, unsigned int AlignmentV>
122  void copy_to_vector(std::complex<NumericT> * input_complex,
124  {
125 #ifdef VIENNACL_WITH_OPENMP
126  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
127 #endif
128  for (vcl_size_t i = 0; i < size; i += 1)
129  {
130  in(i * 2) = static_cast<NumericT>(std::real(input_complex[i]));
131  in(i * 2 + 1) = static_cast<NumericT>(std::imag(input_complex[i]));
132  }
133  }
134 
135  template<typename NumericT>
136  void copy_to_complex_array(std::complex<NumericT> * input_complex,
137  NumericT const * in, vcl_size_t size)
138  {
139 #ifdef VIENNACL_WITH_OPENMP
140  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
141 #endif
142  for (vcl_size_t i = 0; i < size * 2; i += 2)
143  { //change array to complex array
144  input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
145  }
146  }
147 
148  template<typename NumericT>
149  void copy_to_vector(std::complex<NumericT> * input_complex, NumericT * in, vcl_size_t size)
150  {
151 #ifdef VIENNACL_WITH_OPENMP
152  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
153 #endif
154  for (vcl_size_t i = 0; i < size; i += 1)
155  {
156  in[i * 2] = static_cast<NumericT>(std::real(input_complex[i]));
157  in[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i]));
158  }
159  }
160 
161  template<typename NumericT>
162  void copy_to_vector(std::complex<NumericT> * input_complex,
164  {
165  std::vector<NumericT> temp(2 * size);
166 #ifdef VIENNACL_WITH_OPENMP
167  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
168 #endif
169  for (vcl_size_t i = 0; i < size; i += 1)
170  {
171  temp[i * 2] = static_cast<NumericT>(std::real(input_complex[i]));
172  temp[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i]));
173  }
174  viennacl::copy(temp, in);
175  }
176 
177  template<typename NumericT>
178  void zero2(NumericT *input1, NumericT *input2, vcl_size_t size)
179  {
180 #ifdef VIENNACL_WITH_OPENMP
181  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
182 #endif
183  for (vcl_size_t i = 0; i < size; i += 1)
184  {
185  input1[i] = 0;
186  input2[i] = 0;
187  }
188  }
189 
190  } //namespace fft
191 
192 } //namespace detail
193 
197 template<typename NumericT>
198 void fft_direct(std::complex<NumericT> * input_complex, std::complex<NumericT> * output,
199  vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign,
201 {
202  NumericT const NUM_PI = NumericT(3.14159265358979323846);
203 #ifdef VIENNACL_WITH_OPENMP
204  #pragma omp parallel
205 #endif
206  for (vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
207  {
208  for (vcl_size_t k = 0; k < size; k += 1)
209  {
210  std::complex<NumericT> f = 0;
211  for (vcl_size_t n = 0; n < size; n++)
212  {
213  std::complex<NumericT> input;
214  if (!data_order)
215  input = input_complex[batch_id * stride + n]; //input index here
216  else
217  input = input_complex[n * stride + batch_id];
218  NumericT arg = sign * 2 * NUM_PI * NumericT(k) / NumericT(size * n);
219  NumericT sn = std::sin(arg);
220  NumericT cs = std::cos(arg);
221 
222  std::complex<NumericT> ex(cs, sn);
223  std::complex<NumericT> tmp(input.real() * ex.real() - input.imag() * ex.imag(),
224  input.real() * ex.imag() + input.imag() * ex.real());
225  f = f + tmp;
226  }
227  if (!data_order)
228  output[batch_id * stride + k] = f; // output index here
229  else
230  output[k * stride + batch_id] = f;
231  }
232  }
233 
234 }
235 
242 template<typename NumericT, unsigned int AlignmentV>
246  vcl_size_t batch_num, NumericT sign = NumericT(-1),
248 {
249  std::vector<std::complex<NumericT> > input_complex(size * batch_num);
250  std::vector<std::complex<NumericT> > output(size * batch_num);
251 
252  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], in, size * batch_num);
253 
254  fft_direct(&input_complex[0], &output[0], size, stride, batch_num, sign, data_order);
255 
256  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output[0], out, size * batch_num);
257 }
258 
265 template<typename NumericT, unsigned int AlignmentV>
268  vcl_size_t stride, vcl_size_t batch_num, NumericT sign = NumericT(-1),
270 {
271  vcl_size_t row_num = in.internal_size1();
272  vcl_size_t col_num = in.internal_size2() >> 1;
273 
274  vcl_size_t size_mat = row_num * col_num;
275 
276  std::vector<std::complex<NumericT> > input_complex(size_mat);
277  std::vector<std::complex<NumericT> > output(size_mat);
278 
279  NumericT const * data_A = detail::extract_raw_pointer<NumericT>(in);
280  NumericT * data_B = detail::extract_raw_pointer<NumericT>(out);
281 
282  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], data_A, size_mat);
283 
284  fft_direct(&input_complex[0], &output[0], size, stride, batch_num, sign, data_order);
285 
286  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output[0], data_B, size_mat);
287 }
288 
289 /*
290  * This function performs reorder of 1D input data. Indexes are sorted in bit-reversal order.
291  * Such reordering should be done before in-place FFT.
292  */
293 template<typename NumericT, unsigned int AlignmentV>
295  vcl_size_t bits_datasize, vcl_size_t batch_num,
297 {
298  std::vector<std::complex<NumericT> > input(size * batch_num);
300 #ifdef VIENNACL_WITH_OPENMP
301  #pragma omp parallel for
302 #endif
303  for (vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
304  {
305  for (vcl_size_t i = 0; i < size; i++)
306  {
308  if (i < v)
309  {
310  if (!data_order)
311  {
312  std::complex<NumericT> tmp = input[batch_id * stride + i]; // index
313  input[batch_id * stride + i] = input[batch_id * stride + v]; //index
314  input[batch_id * stride + v] = tmp; //index
315  }
316  else
317  {
318  std::complex<NumericT> tmp = input[i * stride + batch_id]; // index
319  input[i * stride + batch_id] = input[v * stride + batch_id]; //index
320  input[v * stride + batch_id] = tmp; //index
321  }
322  }
323  }
324  }
325  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input[0], in, size * batch_num);
326 }
327 
328 /*
329  * This function performs reorder of 2D input data. Indexes are sorted in bit-reversal order.
330  * Such reordering should be done before in-place FFT.
331  */
332 template<typename NumericT, unsigned int AlignmentV>
334  vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num,
336 {
337 
338  NumericT * data = detail::extract_raw_pointer<NumericT>(in);
339  vcl_size_t row_num = in.internal_size1();
340  vcl_size_t col_num = in.internal_size2() >> 1;
341  vcl_size_t size_mat = row_num * col_num;
342 
343  std::vector<std::complex<NumericT> > input(size_mat);
344 
346 
347 #ifdef VIENNACL_WITH_OPENMP
348  #pragma omp parallel for
349 #endif
350  for (vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
351  {
352  for (vcl_size_t i = 0; i < size; i++)
353  {
355  if (i < v)
356  {
357  if (!data_order)
358  {
359  std::complex<NumericT> tmp = input[batch_id * stride + i]; // index
360  input[batch_id * stride + i] = input[batch_id * stride + v]; //index
361  input[batch_id * stride + v] = tmp; //index
362  } else
363  {
364  std::complex<NumericT> tmp = input[i * stride + batch_id]; // index
365  input[i * stride + batch_id] = input[v * stride + batch_id]; //index
366  input[v * stride + batch_id] = tmp; //index
367  }
368  }
369  }
370  }
372 }
373 
378 template<typename NumericT>
379 void fft_radix2(std::complex<NumericT> * input_complex, vcl_size_t batch_num,
380  vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign,
382 {
383  NumericT const NUM_PI = NumericT(3.14159265358979323846);
384 
385  for (vcl_size_t step = 0; step < bit_size; step++)
386  {
387  vcl_size_t ss = 1 << step;
388  vcl_size_t half_size = size >> 1;
389  NumericT cs, sn;
390 #ifdef VIENNACL_WITH_OPENMP
391  #pragma omp parallel for private(cs,sn) shared(ss,half_size,step)
392 #endif
393  for (vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
394  {
395  for (vcl_size_t tid = 0; tid < half_size; tid++)
396  {
397  vcl_size_t group = (tid & (ss - 1));
398  vcl_size_t pos = ((tid >> step) << (step + 1)) + group;
399  std::complex<NumericT> in1;
400  std::complex<NumericT> in2;
401  vcl_size_t offset;
402  if (!data_order)
403  {
404  offset = batch_id * stride + pos;
405  in1 = input_complex[offset];
406  in2 = input_complex[offset + ss];
407  }
408  else
409  {
410  offset = pos * stride + batch_id;
411  in1 = input_complex[offset];
412  in2 = input_complex[offset + ss * stride];
413  }
414  NumericT arg = NumericT(group) * sign * NUM_PI / NumericT(ss);
415  sn = std::sin(arg);
416  cs = std::cos(arg);
417  std::complex<NumericT> ex(cs, sn);
418  std::complex<NumericT> tmp(in2.real() * ex.real() - in2.imag() * ex.imag(),
419  in2.real() * ex.imag() + in2.imag() * ex.real());
420  if (!data_order)
421  input_complex[offset + ss] = in1 - tmp;
422  else
423  input_complex[offset + ss * stride] = in1 - tmp;
424  input_complex[offset] = in1 + tmp;
425  }
426  }
427  }
428 
429 }
430 
435 template<typename NumericT>
436 void fft_radix2_local(std::complex<NumericT> * input_complex,
437  std::complex<NumericT> * lcl_input, vcl_size_t batch_num, vcl_size_t bit_size,
438  vcl_size_t size, vcl_size_t stride, NumericT sign,
440 {
441  NumericT const NUM_PI = NumericT(3.14159265358979323846);
442 
443  for (vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
444  {
445 #ifdef VIENNACL_WITH_OPENMP
446  #pragma omp parallel for
447 #endif
448  for (vcl_size_t p = 0; p < size; p += 1)
449  {
451  bit_size);
452 
453  if (!data_order)
454  lcl_input[v] = input_complex[batch_id * stride + p]; //index
455  else
456  lcl_input[v] = input_complex[p * stride + batch_id];
457  }
458 
459  for (vcl_size_t s = 0; s < bit_size; s++)
460  {
461  vcl_size_t ss = 1 << s;
462 #ifdef VIENNACL_WITH_OPENMP
463  #pragma omp parallel for
464 #endif
465  for (vcl_size_t tid = 0; tid < size; tid++)
466  {
467  vcl_size_t group = (tid & (ss - 1));
468  vcl_size_t pos = ((tid >> s) << (s + 1)) + group;
469 
470  std::complex<NumericT> in1 = lcl_input[pos];
471  std::complex<NumericT> in2 = lcl_input[pos + ss];
472 
473  NumericT arg = NumericT(group) * sign * NUM_PI / NumericT(ss);
474 
475  NumericT sn = std::sin(arg);
476  NumericT cs = std::cos(arg);
477  std::complex<NumericT> ex(cs, sn);
478 
479  std::complex<NumericT> tmp(in2.real() * ex.real() - in2.imag() * ex.imag(),
480  in2.real() * ex.imag() + in2.imag() * ex.real());
481 
482  lcl_input[pos + ss] = in1 - tmp;
483  lcl_input[pos] = in1 + tmp;
484  }
485 
486  }
487 #ifdef VIENNACL_WITH_OPENMP
488  #pragma omp parallel for
489 #endif
490  //copy local array back to global memory
491  for (vcl_size_t p = 0; p < size; p += 1)
492  {
493  if (!data_order)
494  input_complex[batch_id * stride + p] = lcl_input[p];
495  else
496  input_complex[p * stride + batch_id] = lcl_input[p];
497 
498  }
499 
500  }
501 
502 }
503 
511 template<typename NumericT, unsigned int AlignmentV>
513  vcl_size_t batch_num, NumericT sign = NumericT(-1),
515 {
516 
518 
519  std::vector<std::complex<NumericT> > input_complex(size * batch_num);
520  std::vector<std::complex<NumericT> > lcl_input(size * batch_num);
521  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], in, size * batch_num);
522 
524  {
525  viennacl::linalg::host_based::fft_radix2_local(&input_complex[0], &lcl_input[0], batch_num, bit_size, size, stride, sign, data_order);
526  }
527  else
528  {
529  viennacl::linalg::host_based::reorder<NumericT>(in, size, stride, bit_size, batch_num, data_order);
530  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], in, size * batch_num);
531  viennacl::linalg::host_based::fft_radix2(&input_complex[0], batch_num, bit_size, size, stride, sign, data_order);
532  }
533 
534  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input_complex[0], in, size * batch_num);
535 }
536 
544 template<typename NumericT, unsigned int AlignmentV>
546  vcl_size_t stride, vcl_size_t batch_num, NumericT sign = NumericT(-1),
548 {
549 
551 
552  NumericT * data = detail::extract_raw_pointer<NumericT>(in);
553 
554  vcl_size_t row_num = in.internal_size1();
555  vcl_size_t col_num = in.internal_size2() >> 1;
556  vcl_size_t size_mat = row_num * col_num;
557 
558  std::vector<std::complex<NumericT> > input_complex(size_mat);
559 
560  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], data, size_mat);
562  {
563  //std::cout<<bit_size<<","<<size<<","<<stride<<","<<batch_num<<","<<size<<","<<sign<<","<<data_order<<std::endl;
564  std::vector<std::complex<NumericT> > lcl_input(size_mat);
565  viennacl::linalg::host_based::fft_radix2_local(&input_complex[0], &lcl_input[0], batch_num, bit_size, size, stride, sign, data_order);
566  }
567  else
568  {
569  viennacl::linalg::host_based::reorder<NumericT>(in, size, stride, bit_size, batch_num, data_order);
570  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], data, size_mat);
571  viennacl::linalg::host_based::fft_radix2(&input_complex[0], batch_num, bit_size, size, stride, sign, data_order);
572  }
573 
574  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input_complex[0], data, size_mat);
575 
576 }
577 
585 template<typename NumericT, unsigned int AlignmentV>
587 {
588 
589  vcl_size_t size = in.size() >> 1;
591 
595 
596  std::vector<std::complex<NumericT> > input_complex(size);
597  std::vector<std::complex<NumericT> > output_complex(size);
598 
599  std::vector<std::complex<NumericT> > A_complex(ext_size);
600  std::vector<std::complex<NumericT> > B_complex(ext_size);
601  std::vector<std::complex<NumericT> > Z_complex(ext_size);
602 
604 #ifdef VIENNACL_WITH_OPENMP
605  #pragma omp parallel for
606 #endif
607  for (vcl_size_t i = 0; i < ext_size; i++)
608  {
609  A_complex[i] = 0;
610  B_complex[i] = 0;
611  }
612 
613  vcl_size_t double_size = size << 1;
614 
615  NumericT const NUM_PI = NumericT(3.14159265358979323846);
616 #ifdef VIENNACL_WITH_OPENMP
617  #pragma omp parallel for
618 #endif
619  for (vcl_size_t i = 0; i < size; i++)
620  {
621  vcl_size_t rm = i * i % (double_size);
622  NumericT angle = NumericT(rm) / NumericT(size) * NumericT(NUM_PI);
623 
624  NumericT sn_a = std::sin(-angle);
625  NumericT cs_a = std::cos(-angle);
626 
627  std::complex<NumericT> a_i(cs_a, sn_a);
628  std::complex<NumericT> b_i(cs_a, -sn_a);
629 
630  A_complex[i] = std::complex<NumericT>(input_complex[i].real() * a_i.real() - input_complex[i].imag() * a_i.imag(),
631  input_complex[i].real() * a_i.imag() + input_complex[i].imag() * a_i.real());
632  B_complex[i] = b_i;
633 
634  // very bad instruction, to be fixed
635  if (i)
636  B_complex[ext_size - i] = b_i;
637  }
638 
642 
644 
646 
647  NumericT sn_a, cs_a;
648 #ifdef VIENNACL_WITH_OPENMP
649  #pragma omp parallel for private(sn_a,cs_a)
650 #endif
651  for (vcl_size_t i = 0; i < size; i++)
652  {
653  vcl_size_t rm = i * i % (double_size);
654  NumericT angle = NumericT(rm) / NumericT(size) * NumericT(-NUM_PI);
655  sn_a = std::sin(angle);
656  cs_a = std::cos(angle);
657  std::complex<NumericT> b_i(cs_a, sn_a);
658  output_complex[i] = std::complex<NumericT>(Z_complex[i].real() * b_i.real() - Z_complex[i].imag() * b_i.imag(),
659  Z_complex[i].real() * b_i.imag() + Z_complex[i].imag() * b_i.real());
660  }
661  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output_complex[0], out, size);
662 
663 }
664 
668 template<typename NumericT, unsigned int AlignmentV>
670 {
671  vcl_size_t size = input.size() >> 1;
672  NumericT norm_factor = static_cast<NumericT>(size);
673  for (vcl_size_t i = 0; i < size * 2; i++)
674  input[i] /= norm_factor;
675 
676 }
677 
681 template<typename NumericT, unsigned int AlignmentV>
685 {
686  vcl_size_t size = input1.size() >> 1;
687 
688  std::vector<std::complex<NumericT> > input1_complex(size);
689  std::vector<std::complex<NumericT> > input2_complex(size);
690  std::vector<std::complex<NumericT> > output_complex(size);
693 
694 #ifdef VIENNACL_WITH_OPENMP
695  #pragma omp parallel for
696 #endif
697  for (vcl_size_t i = 0; i < size; i++)
698  {
699  std::complex<NumericT> in1 = input1_complex[i];
700  std::complex<NumericT> in2 = input2_complex[i];
701  output_complex[i] = std::complex<NumericT>(in1.real() * in2.real() - in1.imag() * in2.imag(),
702  in1.real() * in2.imag() + in1.imag() * in2.real());
703  }
704  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output_complex[0], output, size);
705 
706 }
710 template<typename NumericT, unsigned int AlignmentV>
712 {
713  vcl_size_t row_num = input.internal_size1() / 2;
714  vcl_size_t col_num = input.internal_size2() / 2;
715 
716  vcl_size_t size = row_num * col_num;
717 
718  NumericT * data = detail::extract_raw_pointer<NumericT>(input);
719 
720  std::vector<std::complex<NumericT> > input_complex(size);
721 
723 #ifdef VIENNACL_WITH_OPENMP
724  #pragma omp parallel for shared(row_num,col_num)
725 #endif
726  for (vcl_size_t i = 0; i < size; i++)
727  {
728  vcl_size_t row = i / col_num;
729  vcl_size_t col = i - row * col_num;
730  vcl_size_t new_pos = col * row_num + row;
731 
732  if (i < new_pos)
733  {
734  std::complex<NumericT> val = input_complex[i];
735  input_complex[i] = input_complex[new_pos];
736  input_complex[new_pos] = val;
737  }
738  }
739  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input_complex[0], data, size);
740 
741 }
742 
746 template<typename NumericT, unsigned int AlignmentV>
749 {
750 
751  vcl_size_t row_num = input.internal_size1() / 2;
752  vcl_size_t col_num = input.internal_size2() / 2;
753  vcl_size_t size = row_num * col_num;
754 
755  NumericT const * data_A = detail::extract_raw_pointer<NumericT>(input);
756  NumericT * data_B = detail::extract_raw_pointer<NumericT>(output);
757 
758  std::vector<std::complex<NumericT> > input_complex(size);
760 
761  std::vector<std::complex<NumericT> > output_complex(size);
762 #ifdef VIENNACL_WITH_OPENMP
763  #pragma omp parallel for
764 #endif
765  for (vcl_size_t i = 0; i < size; i++)
766  {
767  vcl_size_t row = i / col_num;
768  vcl_size_t col = i % col_num;
769  vcl_size_t new_pos = col * row_num + row;
770  output_complex[new_pos] = input_complex[i];
771  }
772  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output_complex[0], data_B, size);
773 }
774 
778 template<typename NumericT>
781 {
782  NumericT const * data_in = detail::extract_raw_pointer<NumericT>(in);
783  NumericT * data_out = detail::extract_raw_pointer<NumericT>(out);
784 
785 #ifdef VIENNACL_WITH_OPENMP
786  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
787 #endif
788  for (long i2 = 0; i2 < long(size); i2++)
789  {
790  vcl_size_t i = static_cast<vcl_size_t>(i2);
791  data_out[2*i ] = data_in[i];
792  data_out[2*i+1] = NumericT(0);
793  }
794 }
795 
799 template<typename NumericT>
802 {
803  NumericT const * data_in = detail::extract_raw_pointer<NumericT>(in);
804  NumericT * data_out = detail::extract_raw_pointer<NumericT>(out);
805 
806 #ifdef VIENNACL_WITH_OPENMP
807 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
808 #endif
809  for (long i = 0; i < long(size); i++)
810  data_out[i] = data_in[2*i];
811 }
812 
816 template<typename NumericT>
818 {
819  vcl_size_t size = in.size();
820 
821 #ifdef VIENNACL_WITH_OPENMP
822  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
823 #endif
824  for (vcl_size_t i = 0; i < size; i++)
825  {
826  NumericT val1 = in[i];
827  NumericT val2 = in[size - i - 1];
828  in[i] = val2;
829  in[size - i - 1] = val1;
830  }
831 }
832 
833 } //namespace host_based
834 } //namespace linalg
835 } //namespace viennacl
836 
837 #endif /* FFT_OPERATIONS_HPP_ */
void fft_radix2_local(std::complex< NumericT > *input_complex, std::complex< NumericT > *lcl_input, vcl_size_t batch_num, vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation. Kernel for computing bigger amount of data...
Implementation of the dense matrix class.
void zero2(NumericT *input1, NumericT *input2, vcl_size_t size)
void reverse(viennacl::vector_base< NumericT > &in)
Reverse vector to opposite order and save it in input vector.
void radix2(viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 1D algorithm for computing Fourier transformation.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
endcode *Final step
A dense matrix class.
Definition: forwards.h:374
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void real_to_complex(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
void multiply_complex(viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
Complex multiplikation of two vectors.
void fft_direct(std::complex< NumericT > *input_complex, std::complex< NumericT > *output, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct algoritm kenrnel.
vcl_size_t get_reorder_num(vcl_size_t v, vcl_size_t bit_size)
void copy_to_complex_array(std::complex< NumericT > *input_complex, viennacl::vector< NumericT, AlignmentV > const &in, vcl_size_t size)
std::size_t vcl_size_t
Definition: forwards.h:74
void bluestein(viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
Bluestein's algorithm for computing Fourier transformation.
void direct(viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct 1D algorithm for computing Fourier transformation.
void copy_to_vector(std::complex< NumericT > *input_complex, viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
void convolve_i(viennacl::vector< SCALARTYPE, ALIGNMENT > &input1, viennacl::vector< SCALARTYPE, ALIGNMENT > &input2, viennacl::vector< SCALARTYPE, ALIGNMENT > &output)
void transpose(viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
Inplace transpose of matrix.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:231
vcl_size_t num_bits(vcl_size_t size)
void fft_radix2(std::complex< NumericT > *input_complex, vcl_size_t batch_num, vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation. Kernel for computing smaller amount of data...
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:229
void normalize(viennacl::vector< NumericT, AlignmentV > &input)
Normalize vector with his own size.
void complex_to_real(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
void reorder(viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
ScalarType fft(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int, unsigned int, unsigned int batch_size)
Definition: fft_1d.cpp:719
SCALARTYPE sign(SCALARTYPE val)
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...