1 #ifndef VIENNACL_LINALG_HOST_BASED_FFT_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_FFT_OPERATIONS_HPP_
47 namespace FFT_DATA_ORDER
86 v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
87 v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
88 v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
89 v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
90 v = (v >> 16) | (v << 16);
91 v = v >> (32 - bit_size);
95 template<
typename NumericT,
unsigned int AlignmentV>
99 #ifdef VIENNACL_WITH_OPENMP
100 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
104 input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
108 template<
typename NumericT>
112 #ifdef VIENNACL_WITH_OPENMP
113 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
117 input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
121 template<
typename NumericT,
unsigned int AlignmentV>
125 #ifdef VIENNACL_WITH_OPENMP
126 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
130 in(i * 2) =
static_cast<NumericT
>(std::real(input_complex[i]));
131 in(i * 2 + 1) =
static_cast<NumericT
>(std::imag(input_complex[i]));
135 template<
typename NumericT>
139 #ifdef VIENNACL_WITH_OPENMP
140 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
144 input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
148 template<
typename NumericT>
151 #ifdef VIENNACL_WITH_OPENMP
152 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
156 in[i * 2] =
static_cast<NumericT
>(std::real(input_complex[i]));
157 in[i * 2 + 1] =
static_cast<NumericT
>(std::imag(input_complex[i]));
161 template<
typename NumericT>
165 std::vector<NumericT> temp(2 * size);
166 #ifdef VIENNACL_WITH_OPENMP
167 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
171 temp[i * 2] =
static_cast<NumericT
>(std::real(input_complex[i]));
172 temp[i * 2 + 1] =
static_cast<NumericT
>(std::imag(input_complex[i]));
177 template<
typename NumericT>
180 #ifdef VIENNACL_WITH_OPENMP
181 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
197 template<
typename NumericT>
198 void fft_direct(std::complex<NumericT> * input_complex, std::complex<NumericT> * output,
202 NumericT
const NUM_PI = NumericT(3.14159265358979323846);
203 #ifdef VIENNACL_WITH_OPENMP
206 for (
vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
210 std::complex<NumericT> f = 0;
213 std::complex<NumericT> input;
215 input = input_complex[batch_id * stride + n];
217 input = input_complex[n * stride + batch_id];
218 NumericT arg = sign * 2 * NUM_PI * NumericT(k) / NumericT(size * n);
219 NumericT sn = std::sin(arg);
220 NumericT cs = std::cos(arg);
222 std::complex<NumericT> ex(cs, sn);
223 std::complex<NumericT> tmp(input.real() * ex.real() - input.imag() * ex.imag(),
224 input.real() * ex.imag() + input.imag() * ex.real());
228 output[batch_id * stride + k] = f;
230 output[k * stride + batch_id] = f;
242 template<
typename NumericT,
unsigned int AlignmentV>
249 std::vector<std::complex<NumericT> > input_complex(size * batch_num);
250 std::vector<std::complex<NumericT> > output(size * batch_num);
254 fft_direct(&input_complex[0], &output[0], size, stride, batch_num,
sign, data_order);
265 template<
typename NumericT,
unsigned int AlignmentV>
276 std::vector<std::complex<NumericT> > input_complex(size_mat);
277 std::vector<std::complex<NumericT> > output(size_mat);
279 NumericT
const * data_A = detail::extract_raw_pointer<NumericT>(in);
280 NumericT * data_B = detail::extract_raw_pointer<NumericT>(out);
284 fft_direct(&input_complex[0], &output[0], size, stride, batch_num,
sign, data_order);
293 template<
typename NumericT,
unsigned int AlignmentV>
298 std::vector<std::complex<NumericT> > input(size * batch_num);
300 #ifdef VIENNACL_WITH_OPENMP
301 #pragma omp parallel for
303 for (
vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
312 std::complex<NumericT> tmp = input[batch_id * stride + i];
313 input[batch_id * stride + i] = input[batch_id * stride + v];
314 input[batch_id * stride + v] = tmp;
318 std::complex<NumericT> tmp = input[i * stride + batch_id];
319 input[i * stride + batch_id] = input[v * stride + batch_id];
320 input[v * stride + batch_id] = tmp;
332 template<
typename NumericT,
unsigned int AlignmentV>
338 NumericT * data = detail::extract_raw_pointer<NumericT>(in);
343 std::vector<std::complex<NumericT> > input(size_mat);
347 #ifdef VIENNACL_WITH_OPENMP
348 #pragma omp parallel for
350 for (
vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
359 std::complex<NumericT> tmp = input[batch_id * stride + i];
360 input[batch_id * stride + i] = input[batch_id * stride + v];
361 input[batch_id * stride + v] = tmp;
364 std::complex<NumericT> tmp = input[i * stride + batch_id];
365 input[i * stride + batch_id] = input[v * stride + batch_id];
366 input[v * stride + batch_id] = tmp;
378 template<
typename NumericT>
383 NumericT
const NUM_PI = NumericT(3.14159265358979323846);
390 #ifdef VIENNACL_WITH_OPENMP
391 #pragma omp parallel for private(cs,sn) shared(ss,half_size,step)
393 for (
vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
395 for (
vcl_size_t tid = 0; tid < half_size; tid++)
399 std::complex<NumericT> in1;
400 std::complex<NumericT> in2;
404 offset = batch_id * stride + pos;
405 in1 = input_complex[offset];
406 in2 = input_complex[offset + ss];
410 offset = pos * stride + batch_id;
411 in1 = input_complex[offset];
412 in2 = input_complex[offset + ss *
stride];
414 NumericT arg = NumericT(group) * sign * NUM_PI / NumericT(ss);
417 std::complex<NumericT> ex(cs, sn);
418 std::complex<NumericT> tmp(in2.real() * ex.real() - in2.imag() * ex.imag(),
419 in2.real() * ex.imag() + in2.imag() * ex.real());
421 input_complex[offset + ss] = in1 - tmp;
423 input_complex[offset + ss *
stride] = in1 - tmp;
424 input_complex[offset] = in1 + tmp;
435 template<
typename NumericT>
441 NumericT
const NUM_PI = NumericT(3.14159265358979323846);
443 for (
vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
445 #ifdef VIENNACL_WITH_OPENMP
446 #pragma omp parallel for
454 lcl_input[v] = input_complex[batch_id * stride + p];
456 lcl_input[v] = input_complex[p * stride + batch_id];
462 #ifdef VIENNACL_WITH_OPENMP
463 #pragma omp parallel for
468 vcl_size_t pos = ((tid >> s) << (s + 1)) + group;
470 std::complex<NumericT> in1 = lcl_input[pos];
471 std::complex<NumericT> in2 = lcl_input[pos + ss];
473 NumericT arg = NumericT(group) * sign * NUM_PI / NumericT(ss);
475 NumericT sn = std::sin(arg);
476 NumericT cs = std::cos(arg);
477 std::complex<NumericT> ex(cs, sn);
479 std::complex<NumericT> tmp(in2.real() * ex.real() - in2.imag() * ex.imag(),
480 in2.real() * ex.imag() + in2.imag() * ex.real());
482 lcl_input[pos + ss] = in1 - tmp;
483 lcl_input[pos] = in1 + tmp;
487 #ifdef VIENNACL_WITH_OPENMP
488 #pragma omp parallel for
494 input_complex[batch_id * stride + p] = lcl_input[p];
496 input_complex[p * stride + batch_id] = lcl_input[p];
511 template<
typename NumericT,
unsigned int AlignmentV>
519 std::vector<std::complex<NumericT> > input_complex(size * batch_num);
520 std::vector<std::complex<NumericT> > lcl_input(size * batch_num);
529 viennacl::linalg::host_based::reorder<NumericT>(in,
size,
stride, bit_size, batch_num, data_order);
544 template<
typename NumericT,
unsigned int AlignmentV>
552 NumericT * data = detail::extract_raw_pointer<NumericT>(in);
558 std::vector<std::complex<NumericT> > input_complex(size_mat);
564 std::vector<std::complex<NumericT> > lcl_input(size_mat);
569 viennacl::linalg::host_based::reorder<NumericT>(in,
size,
stride, bit_size, batch_num, data_order);
585 template<
typename NumericT,
unsigned int AlignmentV>
596 std::vector<std::complex<NumericT> > input_complex(size);
597 std::vector<std::complex<NumericT> > output_complex(size);
599 std::vector<std::complex<NumericT> > A_complex(ext_size);
600 std::vector<std::complex<NumericT> > B_complex(ext_size);
601 std::vector<std::complex<NumericT> > Z_complex(ext_size);
604 #ifdef VIENNACL_WITH_OPENMP
605 #pragma omp parallel for
615 NumericT
const NUM_PI = NumericT(3.14159265358979323846);
616 #ifdef VIENNACL_WITH_OPENMP
617 #pragma omp parallel for
622 NumericT angle = NumericT(rm) / NumericT(size) * NumericT(NUM_PI);
624 NumericT sn_a = std::sin(-angle);
625 NumericT cs_a = std::cos(-angle);
627 std::complex<NumericT> a_i(cs_a, sn_a);
628 std::complex<NumericT> b_i(cs_a, -sn_a);
630 A_complex[i] = std::complex<NumericT>(input_complex[i].real() * a_i.real() - input_complex[i].imag() * a_i.imag(),
631 input_complex[i].real() * a_i.imag() + input_complex[i].imag() * a_i.real());
636 B_complex[ext_size - i] = b_i;
648 #ifdef VIENNACL_WITH_OPENMP
649 #pragma omp parallel for private(sn_a,cs_a)
654 NumericT angle = NumericT(rm) / NumericT(size) * NumericT(-NUM_PI);
655 sn_a = std::sin(angle);
656 cs_a = std::cos(angle);
657 std::complex<NumericT> b_i(cs_a, sn_a);
658 output_complex[i] = std::complex<NumericT>(Z_complex[i].real() * b_i.real() - Z_complex[i].imag() * b_i.imag(),
659 Z_complex[i].real() * b_i.imag() + Z_complex[i].imag() * b_i.real());
668 template<
typename NumericT,
unsigned int AlignmentV>
672 NumericT norm_factor =
static_cast<NumericT
>(
size);
674 input[i] /= norm_factor;
681 template<
typename NumericT,
unsigned int AlignmentV>
688 std::vector<std::complex<NumericT> > input1_complex(size);
689 std::vector<std::complex<NumericT> > input2_complex(size);
690 std::vector<std::complex<NumericT> > output_complex(size);
694 #ifdef VIENNACL_WITH_OPENMP
695 #pragma omp parallel for
699 std::complex<NumericT> in1 = input1_complex[i];
700 std::complex<NumericT> in2 = input2_complex[i];
701 output_complex[i] = std::complex<NumericT>(in1.real() * in2.real() - in1.imag() * in2.imag(),
702 in1.real() * in2.imag() + in1.imag() * in2.real());
710 template<
typename NumericT,
unsigned int AlignmentV>
718 NumericT * data = detail::extract_raw_pointer<NumericT>(input);
720 std::vector<std::complex<NumericT> > input_complex(size);
723 #ifdef VIENNACL_WITH_OPENMP
724 #pragma omp parallel for shared(row_num,col_num)
734 std::complex<NumericT> val = input_complex[i];
735 input_complex[i] = input_complex[new_pos];
736 input_complex[new_pos] = val;
746 template<
typename NumericT,
unsigned int AlignmentV>
755 NumericT
const * data_A = detail::extract_raw_pointer<NumericT>(input);
756 NumericT * data_B = detail::extract_raw_pointer<NumericT>(output);
758 std::vector<std::complex<NumericT> > input_complex(size);
761 std::vector<std::complex<NumericT> > output_complex(size);
762 #ifdef VIENNACL_WITH_OPENMP
763 #pragma omp parallel for
770 output_complex[new_pos] = input_complex[i];
778 template<
typename NumericT>
782 NumericT
const * data_in = detail::extract_raw_pointer<NumericT>(in);
783 NumericT * data_out = detail::extract_raw_pointer<NumericT>(out);
785 #ifdef VIENNACL_WITH_OPENMP
786 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
788 for (
long i2 = 0; i2 < long(size); i2++)
791 data_out[2*i ] = data_in[i];
792 data_out[2*i+1] = NumericT(0);
799 template<
typename NumericT>
803 NumericT
const * data_in = detail::extract_raw_pointer<NumericT>(in);
804 NumericT * data_out = detail::extract_raw_pointer<NumericT>(out);
806 #ifdef VIENNACL_WITH_OPENMP
807 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
809 for (
long i = 0; i < long(size); i++)
810 data_out[i] = data_in[2*i];
816 template<
typename NumericT>
821 #ifdef VIENNACL_WITH_OPENMP
822 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
826 NumericT val1 = in[i];
827 NumericT val2 = in[size - i - 1];
829 in[size - i - 1] = val1;
void fft_radix2_local(std::complex< NumericT > *input_complex, std::complex< NumericT > *lcl_input, vcl_size_t batch_num, vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation. Kernel for computing bigger amount of data...
Implementation of the dense matrix class.
void zero2(NumericT *input1, NumericT *input2, vcl_size_t size)
void reverse(viennacl::vector_base< NumericT > &in)
Reverse vector to opposite order and save it in input vector.
void radix2(viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 1D algorithm for computing Fourier transformation.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
void real_to_complex(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
void multiply_complex(viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
Complex multiplikation of two vectors.
void fft_direct(std::complex< NumericT > *input_complex, std::complex< NumericT > *output, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct algoritm kenrnel.
vcl_size_t get_reorder_num(vcl_size_t v, vcl_size_t bit_size)
void copy_to_complex_array(std::complex< NumericT > *input_complex, viennacl::vector< NumericT, AlignmentV > const &in, vcl_size_t size)
void bluestein(viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
Bluestein's algorithm for computing Fourier transformation.
void direct(viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct 1D algorithm for computing Fourier transformation.
void copy_to_vector(std::complex< NumericT > *input_complex, viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
void convolve_i(viennacl::vector< SCALARTYPE, ALIGNMENT > &input1, viennacl::vector< SCALARTYPE, ALIGNMENT > &input2, viennacl::vector< SCALARTYPE, ALIGNMENT > &output)
vcl_size_t next_power_2(vcl_size_t n)
void transpose(viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
Inplace transpose of matrix.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
vcl_size_t num_bits(vcl_size_t size)
void fft_radix2(std::complex< NumericT > *input_complex, vcl_size_t batch_num, vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation. Kernel for computing smaller amount of data...
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
void normalize(viennacl::vector< NumericT, AlignmentV > &input)
Normalize vector with his own size.
const vcl_size_t MAX_LOCAL_POINTS_NUM
void complex_to_real(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
void reorder(viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
ScalarType fft(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int, unsigned int, unsigned int batch_size)
SCALARTYPE sign(SCALARTYPE val)
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...