ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sparse_matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/ocl/device.hpp"
27 #include "viennacl/ocl/handle.hpp"
28 #include "viennacl/ocl/kernel.hpp"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/vector.hpp"
31 #include "viennacl/tools/tools.hpp"
39 
40 namespace viennacl
41 {
42 namespace linalg
43 {
44 namespace opencl
45 {
46 
47 //
48 // Compressed matrix
49 //
50 
51 namespace detail
52 {
53  template<typename NumericT, unsigned int AlignmentV>
57  {
58  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
61 
62  viennacl::ocl::enqueue(row_info_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
63  viennacl::traits::opencl_handle(x),
64  cl_uint(A.size1()),
65  cl_uint(info_selector)
66  )
67  );
68  }
69 }
70 
79 template<typename NumericT, unsigned int AlignmentV>
83 {
84  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
86  std::stringstream ss;
87  ss << "vec_mul";
88  unsigned int alignment = AlignmentV; //prevent unreachable code warnings below
89  if (alignment == 4)
90  ss << "4";
91  if (alignment == 8)
92  ss << "8";
93 
95 
97  layout_x.start = cl_uint(viennacl::traits::start(x));
98  layout_x.stride = cl_uint(viennacl::traits::stride(x));
99  layout_x.size = cl_uint(viennacl::traits::size(x));
100  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
101 
103  layout_y.start = cl_uint(viennacl::traits::start(y));
104  layout_y.stride = cl_uint(viennacl::traits::stride(y));
105  layout_y.size = cl_uint(viennacl::traits::size(y));
106  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
107 
108  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
109  x, layout_x,
110  y, layout_y
111  ));
112 }
113 
114 
123 template< typename NumericT, unsigned int AlignmentV>
127 
128  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
132 
133  viennacl::ocl::enqueue(k(sp_A.handle1().opencl_handle(), sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
134  viennacl::traits::opencl_handle(d_A),
135  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
136  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
137  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
139  viennacl::traits::opencl_handle(y),
140  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
141  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
142  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
144 }
145 
155 template<typename NumericT, unsigned int AlignmentV>
159  viennacl::op_trans > const & d_A,
161 
162  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
165  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
166 
167  viennacl::ocl::enqueue(k(sp_A.handle1().opencl_handle(), sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
168  viennacl::traits::opencl_handle(d_A.lhs()),
169  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
170  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
171  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
172  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
173  viennacl::traits::opencl_handle(y),
174  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
175  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
176  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
178 }
179 
180 
181 
182 // triangular solvers
183 
189 template<typename NumericT, unsigned int MAT_AlignmentV>
193 {
194  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
197 
198  k.local_work_size(0, 128);
200  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
201  viennacl::traits::opencl_handle(x),
202  cl_uint(L.size1())
203  )
204  );
205 }
206 
212 template<typename NumericT, unsigned int AlignmentV>
216 {
217  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
219 
221 
222  k.local_work_size(0, 128);
224  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
225  viennacl::traits::opencl_handle(x),
226  cl_uint(L.size1())
227  )
228  );
229 }
230 
231 
237 template<typename NumericT, unsigned int AlignmentV>
241 {
242  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context());
245 
246  k.local_work_size(0, 128);
248  viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(),
249  viennacl::traits::opencl_handle(x),
250  cl_uint(U.size1())
251  )
252  );
253 }
254 
260 template<typename NumericT, unsigned int AlignmentV>
264 {
265  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context());
267 
269 
270  k.local_work_size(0, 128);
272  viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(),
273  viennacl::traits::opencl_handle(x),
274  cl_uint(U.size1())
275  )
276  );
277 }
278 
279 
280 
281 
282 
283 // transposed triangular solvers
284 
285 namespace detail
286 {
287  //
288  // block solves
289  //
290  template<typename NumericT, unsigned int AlignmentV>
293  op_trans> & L,
294  viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks,
295  vector_base<NumericT> const & /* L_diagonal */, //ignored
298  {
299  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L.lhs()).context());
302  block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0));
303 
304  viennacl::ocl::enqueue(block_solve_kernel(L.lhs().handle1().opencl_handle(),
305  L.lhs().handle2().opencl_handle(),
306  L.lhs().handle().opencl_handle(),
307  block_indices.opencl_handle(),
308  x,
309  static_cast<cl_uint>(x.size())));
310  }
311 
312 
313  template<typename NumericT, unsigned int AlignmentV>
316  op_trans> const & U,
317  viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks,
318  vector_base<NumericT> const & U_diagonal,
321  {
322  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U.lhs()).context());
325  block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0));
326 
327  viennacl::ocl::enqueue(block_solve_kernel(U.lhs().handle1().opencl_handle(),
328  U.lhs().handle2().opencl_handle(),
329  U.lhs().handle().opencl_handle(),
330  U_diagonal,
331  block_indices.opencl_handle(),
332  x,
333  static_cast<cl_uint>(x.size())));
334  }
335 
336 
337 }
338 
339 
345 template<typename NumericT, unsigned int AlignmentV>
348  op_trans> const & proxy_L,
351 {
352  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context());
355 
356  k.local_work_size(0, 128);
358  viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
359  viennacl::traits::opencl_handle(x),
360  cl_uint(proxy_L.lhs().size1())
361  )
362  );
363 }
364 
365 
371 template<typename NumericT, unsigned int AlignmentV>
374  op_trans> const & proxy_L,
377 {
378  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context());
380 
381  viennacl::vector<NumericT> diagonal(x.size());
383 
385 
386  k.local_work_size(0, 128);
387  k.global_work_size(0, k.local_work_size());
388  viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
389  viennacl::traits::opencl_handle(diagonal),
390  viennacl::traits::opencl_handle(x),
391  cl_uint(proxy_L.lhs().size1())
392  )
393  );
394 }
395 
401 template<typename NumericT, unsigned int AlignmentV>
404  op_trans> const & proxy_U,
407 {
408  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context());
411 
412  k.local_work_size(0, 128);
414  viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
415  viennacl::traits::opencl_handle(x),
416  cl_uint(proxy_U.lhs().size1())
417  )
418  );
419 }
420 
421 
427 template<typename NumericT, unsigned int AlignmentV>
430  op_trans> const & proxy_U,
433 {
434  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context());
436 
437  viennacl::vector<NumericT> diagonal(x.size());
439 
441 
442  k.local_work_size(0, 128);
443  k.global_work_size(0, k.local_work_size());
444  viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
445  viennacl::traits::opencl_handle(diagonal),
446  viennacl::traits::opencl_handle(x),
447  cl_uint(proxy_U.lhs().size1())
448  )
449  );
450 }
451 
452 
453 //
454 // Compressed Compressed matrix
455 //
456 
465 template<typename NumericT>
469 {
470  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
473 
474  y.clear();
475 
477  layout_x.start = cl_uint(viennacl::traits::start(x));
478  layout_x.stride = cl_uint(viennacl::traits::stride(x));
479  layout_x.size = cl_uint(viennacl::traits::size(x));
480  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
481 
483  layout_y.start = cl_uint(viennacl::traits::start(y));
484  layout_y.stride = cl_uint(viennacl::traits::stride(y));
485  layout_y.size = cl_uint(viennacl::traits::size(y));
486  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
487 
488  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle3().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.nnz1()),
489  x, layout_x,
490  y, layout_y
491  ));
492 }
493 
494 
495 //
496 // Coordinate matrix
497 //
498 
499 namespace detail
500 {
501  template<typename NumericT, unsigned int AlignmentV>
505  {
506  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
509  unsigned int thread_num = 256; //k.local_work_size(0);
510 
511  row_info_kernel.local_work_size(0, thread_num);
512 
513  row_info_kernel.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
514  viennacl::ocl::enqueue(row_info_kernel(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
515  viennacl::traits::opencl_handle(x),
516  cl_uint(info_selector),
517  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
518  viennacl::ocl::local_mem(sizeof(NumericT)*thread_num)) );
519  }
520 }
521 
530 template<typename NumericT, unsigned int AlignmentV>
534 {
535  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
537 
538  y.clear();
539 
541  layout_x.start = cl_uint(viennacl::traits::start(x));
542  layout_x.stride = cl_uint(viennacl::traits::stride(x));
543  layout_x.size = cl_uint(viennacl::traits::size(x));
544  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
545 
547  layout_y.start = cl_uint(viennacl::traits::start(y));
548  layout_y.stride = cl_uint(viennacl::traits::stride(y));
549  layout_y.size = cl_uint(viennacl::traits::size(y));
550  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
551 
552  //std::cout << "prod(coordinate_matrix" << AlignmentV << ", vector) called with internal_nnz=" << A.internal_nnz() << std::endl;
553 
555  unsigned int thread_num = 256; //k.local_work_size(0);
556 
557  k.local_work_size(0, thread_num);
558 
559  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
560  //k.global_work_size(0, thread_num); //Only one work group
561  viennacl::ocl::enqueue(k(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
562  viennacl::traits::opencl_handle(x),
563  layout_x,
564  viennacl::traits::opencl_handle(y),
565  layout_y,
566  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
567  viennacl::ocl::local_mem(sizeof(NumericT)*thread_num)) );
568 
569 }
570 
571 
580 template<typename NumericT, unsigned int AlignmentV>
584 {
585  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
587 
590 
591  y.clear();
592 
593  unsigned int thread_num = 256; //k.local_work_size(0);
594  k.local_work_size(0, thread_num);
595  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
596 
597  viennacl::ocl::enqueue(k(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
598  viennacl::traits::opencl_handle(d_A),
599  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
600  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
601  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
603  viennacl::traits::opencl_handle(y),
604  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
605  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
606  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
608  viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)),
609  viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) );
610 
611 }
612 
621 template<typename NumericT, unsigned int AlignmentV>
625  viennacl::op_trans > const & d_A,
627 {
628  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
630 
632  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
633 
634  y.clear();
635 
636  unsigned int thread_num = 256; //k.local_work_size(0);
637  k.local_work_size(0, thread_num);
638  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
639 
640  viennacl::ocl::enqueue(k(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
641  viennacl::traits::opencl_handle(d_A),
642  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
643  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
644  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
645  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
646  viennacl::traits::opencl_handle(y),
647  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
648  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
649  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
651  viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)),
652  viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) );
653 
654 }
655 
656 
657 //
658 // ELL Matrix
659 //
660 
661 template<typename NumericT, unsigned int AlignmentV>
665 {
666  assert(A.size1() == y.size());
667  assert(A.size2() == x.size());
668 
669  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
671  y.clear();
672 
674  layout_x.start = cl_uint(viennacl::traits::start(x));
675  layout_x.stride = cl_uint(viennacl::traits::stride(x));
676  layout_x.size = cl_uint(viennacl::traits::size(x));
677  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
678 
680  layout_y.start = cl_uint(viennacl::traits::start(y));
681  layout_y.stride = cl_uint(viennacl::traits::stride(y));
682  layout_y.size = cl_uint(viennacl::traits::size(y));
683  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
684 
685  std::stringstream ss;
686  ss << "vec_mul_" << 1;//(AlignmentV != 1?4:1);
688 
689  unsigned int thread_num = 128;
690  unsigned int group_num = 256;
691 
692  k.local_work_size(0, thread_num);
693  k.global_work_size(0, thread_num * group_num);
694 
695  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
696  A.handle().opencl_handle(),
697  viennacl::traits::opencl_handle(x),
698  layout_x,
699  viennacl::traits::opencl_handle(y),
700  layout_y,
701  cl_uint(A.size1()),
702  cl_uint(A.size2()),
703  cl_uint(A.internal_size1()),
704  cl_uint(A.maxnnz()),
705  cl_uint(A.internal_maxnnz())
706  )
707  );
708 
709 
710 }
711 
721 template<typename NumericT, unsigned int AlignmentV>
725 
726  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
730 
731  //unsigned int thread_num = 128;
732  //unsigned int group_num = 256;
733  //
734  //k.local_work_size(0, thread_num);
735  //k.global_work_size(0, thread_num * group_num);
736 
737  viennacl::ocl::enqueue(k(sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
738  cl_uint(sp_A.size1()),
739  cl_uint(sp_A.size2()),
740  cl_uint(sp_A.internal_size1()),
741  cl_uint(sp_A.maxnnz()),
742  cl_uint(sp_A.internal_maxnnz()),
743  viennacl::traits::opencl_handle(d_A),
744  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
745  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
746  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
748  viennacl::traits::opencl_handle(y),
749  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
750  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
751  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
753  )
754  );
755 }
756 
766 template<typename NumericT, unsigned int AlignmentV>
770  viennacl::op_trans > const & d_A,
772 
773  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
776  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
777 
778  //unsigned int thread_num = 128;
779  //unsigned int group_num = 256;
780  //
781  //k.local_work_size(0, thread_num);
782  //k.global_work_size(0, thread_num * group_num);
783 
784  viennacl::ocl::enqueue(k(sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
785  cl_uint(sp_A.size1()),
786  cl_uint(sp_A.size2()),
787  cl_uint(sp_A.internal_size1()),
788  cl_uint(sp_A.maxnnz()),
789  cl_uint(sp_A.internal_maxnnz()),
790  viennacl::traits::opencl_handle(d_A.lhs()),
791  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
792  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
793  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
794  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
795  viennacl::traits::opencl_handle(y),
796  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
797  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
798  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
800  )
801  );
802 }
803 
804 //
805 // SELL-C-\sigma Matrix
806 //
807 
808 template<typename ScalarT, typename IndexT>
812 {
813  assert(A.size1() == y.size());
814  assert(A.size2() == x.size());
815 
816  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
818  y.clear();
819 
821  layout_x.start = cl_uint(viennacl::traits::start(x));
822  layout_x.stride = cl_uint(viennacl::traits::stride(x));
823  layout_x.size = cl_uint(viennacl::traits::size(x));
824  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
825 
827  layout_y.start = cl_uint(viennacl::traits::start(y));
828  layout_y.stride = cl_uint(viennacl::traits::stride(y));
829  layout_y.size = cl_uint(viennacl::traits::size(y));
830  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
831 
832  std::stringstream ss;
833  ss << "vec_mul_" << 1;//(AlignmentV != 1?4:1);
835 
836  vcl_size_t thread_num = A.rows_per_block();
837  unsigned int group_num = 256;
838 
839  k.local_work_size(0, thread_num);
840  k.global_work_size(0, thread_num * group_num);
841 
842  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(),
843  A.handle2().opencl_handle(),
844  A.handle3().opencl_handle(),
845  A.handle().opencl_handle(),
846  viennacl::traits::opencl_handle(x),
847  layout_x,
848  viennacl::traits::opencl_handle(y),
849  layout_y)
850  );
851 }
852 
853 
854 //
855 // Hybrid Matrix
856 //
857 
858 template<typename NumericT, unsigned int AlignmentV>
862 {
863  assert(A.size1() == y.size());
864  assert(A.size2() == x.size());
865 
866  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
868 
870  layout_x.start = cl_uint(viennacl::traits::start(x));
871  layout_x.stride = cl_uint(viennacl::traits::stride(x));
872  layout_x.size = cl_uint(viennacl::traits::size(x));
873  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
874 
876  layout_y.start = cl_uint(viennacl::traits::start(y));
877  layout_y.stride = cl_uint(viennacl::traits::stride(y));
878  layout_y.size = cl_uint(viennacl::traits::size(y));
879  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
880 
882 
883  unsigned int thread_num = 256;
884  unsigned int group_num = 32;
885 
886  k.local_work_size(0, thread_num);
887  k.global_work_size(0, thread_num * group_num);
888 
889  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
890  A.handle().opencl_handle(),
891  A.handle3().opencl_handle(),
892  A.handle4().opencl_handle(),
893  A.handle5().opencl_handle(),
894  viennacl::traits::opencl_handle(x),
895  layout_x,
896  viennacl::traits::opencl_handle(y),
897  layout_y,
898  cl_uint(A.size1()),
899  cl_uint(A.internal_size1()),
900  cl_uint(A.ell_nnz()),
901  cl_uint(A.internal_ellnnz())
902  )
903  );
904 }
905 
906 template<typename NumericT, unsigned int AlignmentV>
910 {
911  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
915 
916  unsigned int thread_num = 256;
917  unsigned int group_num = 32;
918 
919  k.local_work_size(0, thread_num);
920  k.global_work_size(0, thread_num * group_num);
921 
922  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
923  A.handle().opencl_handle(),
924  A.handle3().opencl_handle(),
925  A.handle4().opencl_handle(),
926  A.handle5().opencl_handle(),
927  cl_uint(A.size1()),
928  cl_uint(A.internal_size1()),
929  cl_uint(A.ell_nnz()),
930  cl_uint(A.internal_ellnnz()),
931  viennacl::traits::opencl_handle(d_A),
932  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
933  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
934  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
936  viennacl::traits::opencl_handle(y),
937  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
938  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
939  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
941  )
942  );
943 }
944 
945 template<typename NumericT, unsigned int AlignmentV>
949  viennacl::op_trans > const & d_A,
951 {
952  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
955  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
956 
957  unsigned int thread_num = 256;
958  unsigned int group_num = 32;
959 
960  k.local_work_size(0, thread_num);
961  k.global_work_size(0, thread_num * group_num);
962 
963  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
964  A.handle().opencl_handle(),
965  A.handle3().opencl_handle(),
966  A.handle4().opencl_handle(),
967  A.handle5().opencl_handle(),
968  cl_uint(A.size1()),
969  cl_uint(A.internal_size1()),
970  cl_uint(A.ell_nnz()),
971  cl_uint(A.internal_ellnnz()),
972  viennacl::traits::opencl_handle(d_A.lhs()),
973  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
974  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
975  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
976  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
977  viennacl::traits::opencl_handle(y),
978  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
979  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
980  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
982  )
983  );
984 }
985 
986 
987 } // namespace opencl
988 } //namespace linalg
989 } //namespace viennacl
990 
991 
992 #endif
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:101
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
cl_uint stride
Increment between integers.
Definition: kernel.hpp:50
static void init(viennacl::ocl::context &ctx)
vcl_size_t size1() const
Definition: ell_matrix.hpp:91
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:45
handle_type & handle2()
Definition: ell_matrix.hpp:103
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
const handle_type & handle3() const
Definition: hyb_matrix.hpp:107
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:48
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Various little tools used here and there in ViennaCL.
const handle_type & handle() const
Definition: hyb_matrix.hpp:105
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
Definition: common.hpp:70
A tag class representing a lower triangular matrix.
Definition: forwards.h:809
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
Main kernel class for generating OpenCL kernels for coordinate_matrix.
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:95
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
vcl_size_t size2() const
Definition: ell_matrix.hpp:92
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
const handle_type & handle4() const
Definition: hyb_matrix.hpp:108
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
vcl_size_t rows_per_block() const
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:54
vcl_size_t internal_size1() const
Definition: ell_matrix.hpp:88
Common implementations shared by OpenCL-based operations.
Main kernel class for generating OpenCL kernels for ell_matrix.
Definition: ell_matrix.hpp:156
const handle_type & handle2() const
Definition: hyb_matrix.hpp:106
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Main kernel class for generating OpenCL kernels for compressed_matrix.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:607
OpenCL kernel file for compressed_matrix operations.
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
void inplace_solve(const matrix_base< NumericT > &A, bool A_trans, matrix_base< NumericT > &B, bool B_trans, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
OpenCL kernel file for ell_matrix operations.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
void clear()
Resets all entries to zero.
Definition: matrix.hpp:597
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vcl_size_t size2() const
Definition: hyb_matrix.hpp:99
std::size_t vcl_size_t
Definition: forwards.h:74
Main kernel class for generating OpenCL kernels for ell_matrix.
OpenCL kernel file for sliced_ell_matrix operations.
vcl_size_t maxnnz() const
Definition: ell_matrix.hpp:95
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
OpenCL kernel file for hyb_matrix operations.
handle_type & handle()
Definition: ell_matrix.hpp:100
void clear()
Resets all entries to zero. Does not change the size of the vector.
Definition: vector.hpp:861
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
static void init(viennacl::ocl::context &ctx)
void init()
Definition: Random.hpp:25
OpenCL kernel file for vector operations.
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
vcl_size_t ell_nnz() const
Definition: hyb_matrix.hpp:102
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
OpenCL kernel file for coordinate_matrix operations.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
A tag class representing transposed matrices.
Definition: forwards.h:219
A sparse square matrix in compressed sparse rows format.
const handle_type & handle5() const
Definition: hyb_matrix.hpp:109
vcl_size_t size1() const
Definition: hyb_matrix.hpp:98
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< NumericT > const &, vector_base< NumericT > &x, viennacl::linalg::unit_lower_tag)
static void init(viennacl::ocl::context &ctx)
Definition: hyb_matrix.hpp:183
vcl_size_t internal_maxnnz() const
Definition: ell_matrix.hpp:94
Implementation of the ViennaCL scalar class.
static void init(viennacl::ocl::context &ctx)
Definition: ell_matrix.hpp:163
void prod_impl(const matrix_base< NumericT > &A, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:824
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:52
Main kernel class for generating OpenCL kernels for hyb_matrix.
Definition: hyb_matrix.hpp:176
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void row_info(compressed_matrix< NumericT, AlignmentV > const &A, vector_base< NumericT > &x, viennacl::linalg::detail::row_info_types info_selector)