ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 
27 #include "viennacl/ocl/device.hpp"
28 #include "viennacl/ocl/handle.hpp"
29 #include "viennacl/ocl/kernel.hpp"
30 #include "viennacl/scalar.hpp"
31 #include "viennacl/vector.hpp"
33 #include "viennacl/tools/tools.hpp"
37 
39 
40 #include "viennacl/traits/size.hpp"
44 
47 
49 
50 namespace viennacl
51 {
52 namespace linalg
53 {
54 namespace opencl
55 {
56 
57 //
58 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
59 //
60 
61 const std::string SVD_BIDIAG_PACK_KERNEL = "bidiag_pack";
62 const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL = "house_update_A_left";
63 const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL = "house_update_A_right";
64 const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL = "house_update_QL";
65 const std::string SVD_GIVENS_NEXT_KERNEL = "givens_next";
66 const std::string SVD_COPY_COL_KERNEL = "copy_col";
67 const std::string SVD_COPY_ROW_KERNEL = "copy_row";
68 const std::string SVD_INCLUSIVE_SCAN_KERNEL_1 = "inclusive_scan_1";
69 const std::string SVD_EXCLUSIVE_SCAN_KERNEL_1 = "exclusive_scan_1";
70 const std::string SVD_SCAN_KERNEL_2 = "scan_kernel_2";
71 const std::string SVD_SCAN_KERNEL_3 = "scan_kernel_3";
72 const std::string SVD_SCAN_KERNEL_4 = "scan_kernel_4";
73 
74 template<typename NumericT,
75  typename ScalarT1>
77  matrix_base<NumericT> const & B, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha)
78 {
79  assert(A.row_major() == B.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
80 
81  std::string kernel_name("assign_*m_**00");
82  bool is_scalar_cpu = is_cpu_scalar<ScalarT1>::value;
83  kernel_name[7] = is_scalar_cpu ? 'h' : 'd';
84  kernel_name[10] = flip_sign_alpha ? '1' : '0';
85  kernel_name[11] = reciprocal_alpha ? '1' : '0';
86 
87  scheduler::statement statement = scheduler::preset::av(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &A, &B, &alpha, flip_sign_alpha, reciprocal_alpha);
88  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
89 }
90 
91 
92 template<typename NumericT,
93  typename ScalarT1, typename ScalarT2>
95  matrix_base<NumericT> const & B, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
96  matrix_base<NumericT> const & C, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
97 {
98  assert(A.row_major() == B.row_major() && A.row_major() == C.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
99 
100  std::string kernel_name("assign_*m*m_****");
101  bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;
102  bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;
103  kernel_name[7] = is_scalar_cpu1 ? 'h' : 'd';
104  kernel_name[9] = is_scalar_cpu2 ? 'h' : 'd';
105  kernel_name[12] = flip_sign_alpha ? '1' : '0';
106  kernel_name[13] = reciprocal_alpha ? '1' : '0';
107  kernel_name[14] = flip_sign_beta ? '1' : '0';
108  kernel_name[15] = reciprocal_beta ? '1' : '0';
109 
110  scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &A, &B, &alpha, flip_sign_alpha, reciprocal_alpha, &C, &beta, flip_sign_beta, reciprocal_beta);
111  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
112 }
113 
114 
115 template<typename NumericT,
116  typename ScalarT1, typename ScalarT2>
118  matrix_base<NumericT> const & B, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
119  matrix_base<NumericT> const & C, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
120 {
121  assert(A.row_major() == B.row_major() && A.row_major() == C.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
122 
123  std::string kernel_name("ip_add_*v*v_****");
124  bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;
125  bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;
126  kernel_name[7] = is_scalar_cpu1 ? 'h' : 'd';
127  kernel_name[9] = is_scalar_cpu2 ? 'h' : 'd';
128  kernel_name[12] = flip_sign_alpha ? '1' : '0';
129  kernel_name[13] = reciprocal_alpha ? '1' : '0';
130  kernel_name[14] = flip_sign_beta ? '1' : '0';
131  kernel_name[15] = reciprocal_beta ? '1' : '0';
132 
133 
134  scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &A, &B, &alpha, flip_sign_alpha, reciprocal_alpha, &C, &beta, flip_sign_beta, reciprocal_beta);
135  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
136 }
137 
138 template<typename NumericT,
139  typename SizeT, typename DistanceT>
141  matrix_base<NumericT> & temp_trans)
142 {
143  std::string kernel_name("trans_kernel");
144  viennacl::ocl::kernel& kernel = detail::legacy_kernel_for_matrix(proxy.lhs(),kernel_name);
145  viennacl::ocl::enqueue(kernel(proxy.lhs(),
146  static_cast<cl_uint>(proxy.lhs().start1()), static_cast<cl_uint>(proxy.lhs().start2()),
147  static_cast<cl_uint>(proxy.lhs().internal_size1()), static_cast<cl_uint>(proxy.lhs().internal_size2()),
148  static_cast<cl_uint>(proxy.lhs().size1()), static_cast<cl_uint>(proxy.lhs().size2()),
149  static_cast<cl_uint>(proxy.lhs().stride1()), static_cast<cl_uint>(proxy.lhs().stride2()),
150 
151  temp_trans,
152  static_cast<cl_uint>(temp_trans.start1()), static_cast<cl_uint>(temp_trans.start2()),
153  static_cast<cl_uint>(temp_trans.internal_size1()), static_cast<cl_uint>(temp_trans.internal_size2()),
154  static_cast<cl_uint>(temp_trans.stride1()), static_cast<cl_uint>(temp_trans.stride2())));
155 }
156 
157 template<typename NumericT>
158 void matrix_assign(matrix_base<NumericT> & A, NumericT s, bool up_to_internal_size = false)
159 {
162 
163  dynamic_cast<device_specific::matrix_axpy_template*>(kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).template_of("assign_cpu"))->up_to_internal_size(up_to_internal_size);
164  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("assign_cpu", statement);
165 }
166 
167 template<typename NumericT>
169 {
172  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("diagonal_assign_cpu", statement);
173 }
174 
175 template<typename NumericT>
177 {
179  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_diag_from_vector", statement);
180 }
181 
182 template<typename NumericT>
184 {
186  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_diag_to_vector", statement);
187 }
188 
189 template<typename NumericT>
190 void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & vec)
191 {
192  scheduler::statement statement = scheduler::preset::matrix_row(&vec, &A, i);
193  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_row", statement);
194 }
195 
196 template<typename NumericT>
197 void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & vec)
198 {
200  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_column", statement);
201 }
202 
203 
204 //
206 //
207 
208 // Binary operations A = B .* C and A = B ./ C
214 template<typename NumericT, typename OpT>
217 {
218  assert(A.row_major() == proxy.lhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
219  assert(A.row_major() == proxy.rhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
220  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
221  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
222 
224  scheduler::statement statement = scheduler::preset::binary_element_op(&A, &proxy.lhs(), &proxy.rhs(),TYPE);
226 }
227 
228 
229 // Unary operations
230 
236 template<typename NumericT, typename OpT>
239 {
240  assert(A.row_major() == proxy.lhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
241  assert(A.row_major() == proxy.rhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
242 
243  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
244  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
245 
247  scheduler::statement statement = scheduler::preset::unary_element_op(&A, &proxy.lhs(),TYPE);
249 }
250 
251 
252 
262 template<typename NumericT>
263 void prod_impl(const matrix_base<NumericT> & A, bool trans_A,
264  const vector_base<NumericT> & vec,
265  vector_base<NumericT> & result)
266 {
267  // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
268  assert(viennacl::traits::handle(vec) != viennacl::traits::handle(result) && bool("No direct inplace matrix-vector product possible. Introduce a temporary!"));
269 
270  std::string kernel_name = std::string("mat_vec_") + (trans_A ^ A.row_major()?"T":"N");
271  scheduler::statement statement = scheduler::preset::mat_vec_prod(&A, trans_A, &vec, &result);
272  kernels::row_wise_reduction<NumericT>::execution_handler(viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
273 }
274 
275 //
276 
277 
283 template<typename NumericT, typename ScalarType >
284 void prod_impl(matrix_base<NumericT> const & A, bool A_trans,
285  matrix_base<NumericT> const & B, bool B_trans,
287  ScalarType alpha,
288  ScalarType beta)
289 {
290  bool effective_A_trans = A_trans ^ A.row_major();
291  bool effective_B_trans = B_trans ^ B.row_major();
292 
293  char cAt = effective_A_trans ? 'T' : 'N';
294  char cBt = effective_B_trans ? 'T' : 'N';
295 
296  std::string kernel_prefix("prod_");
297  kernel_prefix+=cAt;
298  kernel_prefix+=cBt;
299 
300  scheduler::statement statement = scheduler::preset::mat_mat_prod(alpha, &A, effective_A_trans, &B, effective_B_trans, beta, &C);
301  kernels::matrix_prod<NumericT>::execution_handler(C.row_major(), viennacl::traits::opencl_context(C)).execute(kernel_prefix, statement);
302 }
303 
304 //
306 //
307 
308 
321 template<typename NumericT, typename ScalarT1>
323  ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
324  const vector_base<NumericT> & vec1,
325  const vector_base<NumericT> & vec2)
326 {
327  assert( (viennacl::traits::size1(A) == viennacl::traits::size(vec1)) && bool("Size mismatch in scaled_rank_1_update: size1(A) != size(v1)"));
328  assert( (viennacl::traits::size2(A) == viennacl::traits::size(vec2)) && bool("Size mismatch in scaled_rank_1_update: size2(A) != size(v2)"));
329 
330  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
332  viennacl::ocl::kernel& kernel= detail::legacy_kernel_for_matrix(A, is_cpu ? "scaled_rank1_update_cpu" : "scaled_rank1_update_gpu");
333 
334  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(A),
335  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
336  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
337  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
339 
340  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
341  options_alpha,
342 
343  viennacl::traits::opencl_handle(vec1),
344  cl_uint(viennacl::traits::start(vec1)),
345  cl_uint(viennacl::traits::stride(vec1)),
346  cl_uint(viennacl::traits::size(vec1)),
347 
348  viennacl::traits::opencl_handle(vec2),
349  cl_uint(viennacl::traits::start(vec2)),
350  cl_uint(viennacl::traits::stride(vec2)),
351  cl_uint(viennacl::traits::size(vec2))
352  )
353  );
354 }
355 
356 //
357 template <typename SCALARTYPE, typename VectorType>
359  VectorType & dh,
360  VectorType & sh
361  )
362 {
363  viennacl::vector<SCALARTYPE> D(dh.size());
364  viennacl::vector<SCALARTYPE> S(sh.size());
365 
366  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
368 
369  viennacl::ocl::enqueue(kernel(
370  A,
371  D,
372  S,
373  static_cast<cl_uint>(A.size1()),
374  static_cast<cl_uint>(A.size2()),
375  static_cast<cl_uint>(A.internal_size2())
376  ));
377 
378  fast_copy(D, dh);
379  fast_copy(S, sh);
380 }
381 
382 
383 template <typename NumericT>
387  )
388 {
389  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
390 
391  if(A.row_major())
392  {
395 
396  viennacl::ocl::enqueue(kernel(
397  A,
398  dh,
399  sh,
400  cl_uint(viennacl::traits::size1(A)),
401  cl_uint(viennacl::traits::size2(A)),
403  ));
404  }
405  else
406  {
409 
410  viennacl::ocl::enqueue(kernel(
411  A,
412  dh,
413  sh,
414  cl_uint(viennacl::traits::size1(A)),
415  cl_uint(viennacl::traits::size2(A)),
417  ));
418  }
419 }
420 
421 
422 template <typename NumericT>
426 {
427 
428  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
429  if(A.row_major())
430  {
433  viennacl::ocl::enqueue(kernel(
434  A,
435  D,
436  static_cast<cl_uint>(start + 1),
437  static_cast<cl_uint>(start),
438  cl_uint(viennacl::traits::size1(A)),
439  cl_uint(viennacl::traits::size2(A)),
441  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
442  ));
443  }
444  else
445  {
448  viennacl::ocl::enqueue(kernel(
449  A,
450  D,
451  static_cast<cl_uint>(start + 1),
452  static_cast<cl_uint>(start),
453  cl_uint(viennacl::traits::size1(A)),
454  cl_uint(viennacl::traits::size2(A)),
456  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
457  ));
458  }
459 
460 
461 
462 
463 }
464 
465 template <typename NumericT>
468 {
469  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
470 
471  if(A.row_major())
472  {
475 
476  viennacl::ocl::enqueue(kernel(
477  A,
478  D,
479  static_cast<cl_uint>(0),
480  static_cast<cl_uint>(0),
481  cl_uint(viennacl::traits::size1(A)),
482  cl_uint(viennacl::traits::size2(A)),
484  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
485  ));
486  }
487  else
488  {
491 
492  viennacl::ocl::enqueue(kernel(
493  A,
494  D,
495  static_cast<cl_uint>(0),
496  static_cast<cl_uint>(0),
497  cl_uint(viennacl::traits::size1(A)),
498  cl_uint(viennacl::traits::size2(A)),
500  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
501  ));
502  }
503 
504 
505 }
506 
507 
508 
509 template <typename NumericT>
512  vcl_size_t A_size1)
513 
514 {
515  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(Q).context());
516 
517  if(Q.row_major())
518  {
521 
522  viennacl::ocl::enqueue(kernel(
523  Q,
524  D,
525  cl_uint(A_size1),
527  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
528  ));
529  }
530  else
531  {
534 
535  viennacl::ocl::enqueue(kernel(
536  Q,
537  D,
538  cl_uint(A_size1),
540  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
541  ));
542  }
543 
544 }
545 
546 
547 template<typename NumericT>
549  vector_base<NumericT>& tmp1,
550  vector_base<NumericT>& tmp2,
551  int l,
552  int m
553  )
554  {
555  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
556 
557  if(matrix.row_major())
558  {
561  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
562  kernel.local_work_size(0, 256);
563 
564  viennacl::ocl::enqueue(kernel(
565  matrix,
566  tmp1,
567  tmp2,
568  cl_uint(viennacl::traits::size1(matrix)),
569  cl_uint(viennacl::traits::internal_size2(matrix)),
570  static_cast<cl_uint>(l),
571  static_cast<cl_uint>(m - 1)
572  ));
573  }
574  else
575  {
578  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
579  kernel.local_work_size(0, 256);
580 
581  viennacl::ocl::enqueue(kernel(
582  matrix,
583  tmp1,
584  tmp2,
585  cl_uint(viennacl::traits::size1(matrix)),
586  cl_uint(viennacl::traits::internal_size2(matrix)),
587  static_cast<cl_uint>(l),
588  static_cast<cl_uint>(m - 1)
589  ));
590  }
591 
592 
593  }
594 
595  template <typename NumericT>
598  vcl_size_t row_start,
599  vcl_size_t col_start,
600  bool copy_col
601  )
602  {
603  std::string kernel_name = copy_col ? SVD_COPY_COL_KERNEL : SVD_COPY_ROW_KERNEL;
604  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
605 
606  if(A.row_major())
607  {
610 
611  viennacl::ocl::enqueue(kernel(
612  A,
613  V,
614  static_cast<cl_uint>(row_start),
615  static_cast<cl_uint>(col_start),
616  copy_col ? cl_uint(viennacl::traits::size1(A))
617  : cl_uint(viennacl::traits::size2(A)),
618  static_cast<cl_uint>(A.internal_size2())
619  ));
620  }
621  else
622  {
625 
626  viennacl::ocl::enqueue(kernel(
627  A,
628  V,
629  static_cast<cl_uint>(row_start),
630  static_cast<cl_uint>(col_start),
631  copy_col ? cl_uint(viennacl::traits::size1(A))
632  : cl_uint(viennacl::traits::size2(A)),
633  static_cast<cl_uint>(A.internal_size2())
634  ));
635  }
636 
637 
638  }
639 
640 #define VIENNACL_SECTION_SIZE 256
641  template<typename NumericT>
643  vector_base<NumericT>& vec2)
644  {
645  vcl_size_t N = static_cast<vcl_size_t>(std::ceil(static_cast<double>(vec1.size()) / static_cast<double>(VIENNACL_SECTION_SIZE)));
648 
649  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context());
655 
656  kernel1.global_work_size(0, VIENNACL_SECTION_SIZE * S.size());
658  viennacl::ocl::enqueue(kernel1(
659  viennacl::traits::opencl_handle(vec1),
660  static_cast<unsigned int>(viennacl::traits::start(vec1)),
661  static_cast<unsigned int>(viennacl::traits::stride(vec1)),
662  static_cast<unsigned int>(viennacl::traits::size(vec1)),
663 
664  viennacl::traits::opencl_handle(vec2),
665  static_cast<unsigned int>(viennacl::traits::start(vec2)),
666  static_cast<unsigned int>(viennacl::traits::stride(vec2)),
667 
668  viennacl::traits::opencl_handle(S),
669  static_cast<unsigned int>(viennacl::traits::start(S)),
670  static_cast<unsigned int>(viennacl::traits::stride(S))));
671 
672 
673  kernel2.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size(S)), 256));
675  viennacl::ocl::enqueue(kernel2(
676  viennacl::traits::opencl_handle(S_ref),
677  static_cast<unsigned int>(viennacl::traits::start(S_ref)),
678  static_cast<unsigned int>(viennacl::traits::stride(S_ref)),
679 
680  viennacl::traits::opencl_handle(S),
681  static_cast<unsigned int>(viennacl::traits::start(S)),
682  static_cast<unsigned int>(viennacl::traits::stride(S)),
683  static_cast<unsigned int>(viennacl::traits::size(S))
684  ));
685 
686  kernel3.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size(S)), 256));
688  viennacl::ocl::enqueue(kernel3(
689  viennacl::traits::opencl_handle(S_ref),
690  static_cast<unsigned int>(viennacl::traits::start(S_ref)),
691  static_cast<unsigned int>(viennacl::traits::stride(S_ref)),
692 
693  viennacl::traits::opencl_handle(S),
694  static_cast<unsigned int>(viennacl::traits::start(S)),
695  static_cast<unsigned int>(viennacl::traits::stride(S))
696  ));
697 
698 
699  kernel4.global_work_size(0, VIENNACL_SECTION_SIZE * S.size());
701  viennacl::ocl::enqueue(kernel4(
702  viennacl::traits::opencl_handle(S),
703  static_cast<unsigned int>(viennacl::traits::start(S)),
704  static_cast<unsigned int>(viennacl::traits::stride(S)),
705 
706  viennacl::traits::opencl_handle(vec2),
707  static_cast<unsigned int>(viennacl::traits::start(vec2)),
708  static_cast<unsigned int>(viennacl::traits::stride(vec2)),
709  static_cast<unsigned int>(viennacl::traits::size(vec2))
710  ));
711 
712 
713  }
714 
715  template<typename NumericT>
717  vector_base<NumericT>& vec2)
718  {
719  vcl_size_t N = static_cast<vcl_size_t>(std::ceil(static_cast<double>(vec1.size()) / static_cast<double>(VIENNACL_SECTION_SIZE)));
722 
723  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context());
729 
730  kernel1.global_work_size(0, VIENNACL_SECTION_SIZE * S.size());
732  viennacl::ocl::enqueue(kernel1(
733  viennacl::traits::opencl_handle(vec1),
734  static_cast<unsigned int>(viennacl::traits::start(vec1)),
735  static_cast<unsigned int>(viennacl::traits::stride(vec1)),
736  static_cast<unsigned int>(viennacl::traits::size(vec1)),
737 
738  viennacl::traits::opencl_handle(vec2),
739  static_cast<unsigned int>(viennacl::traits::start(vec2)),
740  static_cast<unsigned int>(viennacl::traits::stride(vec2)),
741 
742  viennacl::traits::opencl_handle(S),
743  static_cast<unsigned int>(viennacl::traits::start(S)),
744  static_cast<unsigned int>(viennacl::traits::stride(S))));
745 
746  kernel2.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size(S)), 256));
748  viennacl::ocl::enqueue(kernel2(
749  viennacl::traits::opencl_handle(S_ref),
750  static_cast<unsigned int>(viennacl::traits::start(S_ref)),
751  static_cast<unsigned int>(viennacl::traits::stride(S_ref)),
752 
753  viennacl::traits::opencl_handle(S),
754  static_cast<unsigned int>(viennacl::traits::start(S)),
755  static_cast<unsigned int>(viennacl::traits::stride(S)),
756  static_cast<unsigned int>(viennacl::traits::size(S))
757  ));
758 
759  kernel3.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size(S)), 256));
761  viennacl::ocl::enqueue(kernel3(
762  viennacl::traits::opencl_handle(S_ref),
763  static_cast<unsigned int>(viennacl::traits::start(S_ref)),
764  static_cast<unsigned int>(viennacl::traits::stride(S_ref)),
765 
766  viennacl::traits::opencl_handle(S),
767  static_cast<unsigned int>(viennacl::traits::start(S)),
768  static_cast<unsigned int>(viennacl::traits::stride(S))
769  ));
770 
771 
772  kernel4.global_work_size(0, VIENNACL_SECTION_SIZE * S.size());
774  viennacl::ocl::enqueue(kernel4(
775  viennacl::traits::opencl_handle(S),
776  static_cast<unsigned int>(viennacl::traits::start(S)),
777  static_cast<unsigned int>(viennacl::traits::stride(S)),
778 
779  viennacl::traits::opencl_handle(vec2),
780  static_cast<unsigned int>(viennacl::traits::start(vec2)),
781  static_cast<unsigned int>(viennacl::traits::stride(vec2)),
782  static_cast<unsigned int>(viennacl::traits::size(vec2))
783  ));
784 }
785 #undef VIENNACL_SECTION_SIZE
786 
787 } // namespace opencl
788 } //namespace linalg
789 } //namespace viennacl
790 
791 
792 #endif
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &A)
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
const std::string SVD_SCAN_KERNEL_4
void am(matrix_base< NumericT > &A, matrix_base< NumericT > const &B, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &vec)
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
statement matrix_diag_from_vector(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, int id)
Definition: preset.hpp:346
Generic size and resize functionality for different vector and matrix types.
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
const std::string SVD_BIDIAG_PACK_KERNEL
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
size_type stride2() const
Returns the number of columns.
Definition: matrix_def.hpp:225
const std::string SVD_GIVENS_NEXT_KERNEL
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: matrix.hpp:692
ocl::kernel & legacy_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
Definition: common.hpp:43
A dense matrix class.
Definition: forwards.h:374
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:855
Determines row and column increments for matrices and matrix proxies.
void bidiag_pack(matrix_base< NumericT > &A, viennacl::vector< NumericT > &dh, viennacl::vector< NumericT > &sh)
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
Definition: preset.hpp:16
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
const std::string SVD_EXCLUSIVE_SCAN_KERNEL_1
void scaled_rank_1_update(matrix_base< NumericT > &A, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
OpenCL kernel file for singular value decomposition.
const std::string SVD_COPY_ROW_KERNEL
Common implementations shared by OpenCL-based operations.
void copy_vec(matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
const std::string SVD_INCLUSIVE_SCAN_KERNEL_1
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
void ambm(matrix_base< NumericT > &A, matrix_base< NumericT > const &B, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &C, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:722
statement binary_element_op(NumericT const *x, NumericT const *y, NumericT const *z, scheduler::operation_node_type TYPE)
Definition: preset.hpp:267
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:447
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:847
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:607
statement mat_vec_prod(viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::vector_base< NumericT > const *x, viennacl::vector_base< NumericT > const *y)
Definition: preset.hpp:393
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
const std::string SVD_SCAN_KERNEL_3
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:63
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
size_type stride1() const
Returns the number of rows.
Definition: matrix_def.hpp:223
statement unary_element_op(NumericT const *x, NumericT const *y, scheduler::operation_node_type TYPE)
Definition: preset.hpp:288
scheduler::statement av(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a)
Definition: preset.hpp:71
std::size_t vcl_size_t
Definition: forwards.h:74
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:217
void bidiag_pack_svd(viennacl::matrix< SCALARTYPE > &A, VectorType &dh, VectorType &sh)
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
void execute(template_base const &T, statements_container const &statements, viennacl::ocl::context &ctx=viennacl::ocl::current_context(), bool force_compilation=false)
Definition: execute.hpp:44
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
void inclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:215
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
operation_node_type
Enumeration for identifying the possible operations.
Definition: forwards.h:68
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
statement mat_mat_prod(NumericT alpha, viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::matrix_base< NumericT > const *B, bool B_trans, NumericT beta, viennacl::matrix_base< NumericT > const *C)
Definition: preset.hpp:399
statement matrix_diag_to_vector(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, int id)
Definition: preset.hpp:340
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &vec)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:537
#define VIENNACL_SECTION_SIZE
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
const std::string SVD_SCAN_KERNEL_2
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
void matrix_assign(matrix_base< NumericT > &A, NumericT s, bool up_to_internal_size=false)
scheduler::statement diagonal_assign_cpu(matrix_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:130
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
float ScalarType
Definition: fft_1d.cpp:42
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
statement matrix_row(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, unsigned int id)
Definition: preset.hpp:327
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:605
void givens_next(matrix_base< NumericT > &matrix, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
A tag class representing transposed matrices.
Definition: forwards.h:219
statement matrix_column(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, unsigned int id)
Definition: preset.hpp:333
size_type start2() const
Returns the number of columns.
Definition: matrix_def.hpp:221
void execute(container_type::key_type const &key, statements_container const &statements)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:129
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:231
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:504
const std::string SVD_COPY_COL_KERNEL
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:229
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &vec)
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:106
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
const char * operator_string(scheduler::operation_node_type type)
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:133
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
void prod_impl(const matrix_base< NumericT > &A, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &A, matrix_base< NumericT > const &B, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &C, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_diagonal_assign(matrix_base< NumericT > &A, NumericT s)
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.
Definition: matrix_def.hpp:219
Runtime generation of OpenCL kernels for matrix operations.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)