1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
41 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
42 #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
53 template<
typename NumericT>
55 inline unsigned long flip_sign(
unsigned long val) {
return val; }
56 inline unsigned int flip_sign(
unsigned int val) {
return val; }
57 inline unsigned short flip_sign(
unsigned short val) {
return val; }
58 inline unsigned char flip_sign(
unsigned char val) {
return val; }
65 template<
typename NumericT,
typename ScalarT1>
69 typedef NumericT value_type;
71 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
72 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
74 value_type data_alpha = alpha;
87 #ifdef VIENNACL_WITH_OPENMP
88 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
90 for (
long i = 0; i < static_cast<long>(
size1); ++i)
91 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
95 #ifdef VIENNACL_WITH_OPENMP
96 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
98 for (
long i = 0; i < static_cast<long>(
size1); ++i)
99 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
104 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
109 typedef NumericT value_type;
111 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
112 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
113 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
115 value_type data_alpha = alpha;
119 value_type data_beta = beta;
133 if (reciprocal_alpha)
137 #ifdef VIENNACL_WITH_OPENMP
138 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
140 for (
long i = 0; i < static_cast<long>(
size1); ++i)
141 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
145 #ifdef VIENNACL_WITH_OPENMP
146 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
148 for (
long i = 0; i < static_cast<long>(
size1); ++i)
149 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
156 #ifdef VIENNACL_WITH_OPENMP
157 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
159 for (
long i = 0; i < static_cast<long>(
size1); ++i)
160 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
164 #ifdef VIENNACL_WITH_OPENMP
165 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
167 for (
long i = 0; i < static_cast<long>(
size1); ++i)
168 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
174 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
179 typedef NumericT value_type;
181 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
182 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
183 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
185 value_type data_alpha = alpha;
189 value_type data_beta = beta;
203 if (reciprocal_alpha)
207 #ifdef VIENNACL_WITH_OPENMP
208 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
210 for (
long i = 0; i < static_cast<long>(
size1); ++i)
211 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
215 #ifdef VIENNACL_WITH_OPENMP
216 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
218 for (
long i = 0; i < static_cast<long>(
size1); ++i)
219 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
226 #ifdef VIENNACL_WITH_OPENMP
227 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
229 for (
long i = 0; i < static_cast<long>(
size1); ++i)
230 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
234 #ifdef VIENNACL_WITH_OPENMP
235 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
237 for (
long i = 0; i < static_cast<long>(
size1); ++i)
238 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
252 template<
typename NumericT>
255 typedef NumericT value_type;
257 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
264 value_type data_alpha =
static_cast<value_type
>(alpha);
266 #ifdef VIENNACL_WITH_OPENMP
267 #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
269 for (
long i = 0; i < static_cast<long>(loop_bound); ++i)
270 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_alpha;
279 template<
typename NumericT>
282 typedef NumericT value_type;
284 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
285 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
294 #ifdef VIENNACL_WITH_OPENMP
295 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
297 for (
long i = 0; i < static_cast<long>(
size1); ++i)
299 value_type temp = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
300 data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1];
301 data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = temp;
313 template<
typename NumericT,
typename OpT>
317 typedef NumericT value_type;
320 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
321 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
322 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
334 #ifdef VIENNACL_WITH_OPENMP
335 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
337 for (
long i = 0; i < static_cast<long>(
size1); ++i)
338 OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
346 template<
typename NumericT,
typename OpT>
350 typedef NumericT value_type;
353 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
354 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
363 #ifdef VIENNACL_WITH_OPENMP
364 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
366 for (
long i = 0; i < static_cast<long>(
size1); ++i)
367 OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2]);
382 template<
typename NumericT,
typename ScalarT>
387 typedef NumericT value_type;
389 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
390 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
401 #ifdef VIENNACL_WITH_OPENMP
402 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
404 for (
long i = 0; i < static_cast<long>(
size1); ++i)
405 temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] * data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
410 template<
typename NumericT>
415 typedef NumericT value_type;
417 value_type
const * data_x = detail::extract_raw_pointer<value_type>(x);
423 std::vector<value_type> temp(vec_tuple.
const_size());
424 std::vector<value_type const *> data_y(vec_tuple.
const_size());
425 std::vector<vcl_size_t> start_y(vec_tuple.
const_size());
426 std::vector<vcl_size_t> stride_y(vec_tuple.
const_size());
430 data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.
const_at(j));
438 value_type entry_x = data_x[i*inc_x+start_x];
440 temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
453 template<
typename NumericT,
typename ScalarT>
457 typedef NumericT value_type;
459 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
467 #ifdef VIENNACL_WITH_OPENMP
468 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
470 for (
long i = 0; i < static_cast<long>(
size1); ++i)
471 temp += static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1])));
481 template<
typename NumericT,
typename ScalarT>
485 typedef NumericT value_type;
487 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
496 #ifdef VIENNACL_WITH_OPENMP
497 #pragma omp parallel for reduction(+: temp) private(data) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
499 for (
long i = 0; i < static_cast<long>(
size1); ++i)
501 data = data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1];
505 result = std::sqrt(temp);
513 template<
typename NumericT,
typename ScalarT>
517 typedef NumericT value_type;
519 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
529 temp = std::max<value_type>(temp, static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))));
542 template<
typename NumericT>
545 typedef NumericT value_type;
547 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
560 data =
static_cast<value_type
>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])));
576 template<
typename NumericT,
typename ScalarT>
580 typedef NumericT value_type;
582 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
588 value_type temp = data_vec1[
start1];
592 temp = std::max<value_type>(temp, data_vec1[i*inc1+start1]);
602 template<
typename NumericT,
typename ScalarT>
606 typedef NumericT value_type;
608 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
614 value_type temp = data_vec1[
start1];
618 temp = std::min<value_type>(temp, data_vec1[i*inc1+start1]);
634 template<
typename NumericT>
637 NumericT alpha, NumericT beta)
639 typedef NumericT value_type;
641 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
642 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
651 value_type temp1 = 0;
652 value_type temp2 = 0;
653 value_type data_alpha = alpha;
654 value_type data_beta = beta;
656 #ifdef VIENNACL_WITH_OPENMP
657 #pragma omp parallel for private(temp1, temp2) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
659 for (
long i = 0; i < static_cast<long>(
size1); ++i)
661 temp1 = data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1];
662 temp2 = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
664 data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
665 data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
vcl_size_t const_size() const
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
Generic size and resize functionality for different vector and matrix types.
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Worker class for decomposing expression templates.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
NumericT flip_sign(NumericT val)
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common routines for single-threaded or OpenMP-enabled execution on CPU.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
VectorType const & const_at(vcl_size_t i) const
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
Implementation of the ViennaCL scalar class.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Simple enable-if variant that uses the SFINAE pattern.