ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sparse_matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <list>
26 
27 #include "viennacl/forwards.h"
28 #include "viennacl/scalar.hpp"
29 #include "viennacl/vector.hpp"
30 #include "viennacl/tools/tools.hpp"
33 
34 namespace viennacl
35 {
36 namespace linalg
37 {
38 namespace host_based
39 {
40 //
41 // Compressed matrix
42 //
43 
44 namespace detail
45 {
46  template<typename NumericT, unsigned int AlignmentV>
50  {
51  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
52  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
53  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
54  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
55 
56  for (vcl_size_t row = 0; row < mat.size1(); ++row)
57  {
58  NumericT value = 0;
59  unsigned int row_end = row_buffer[row+1];
60 
61  switch (info_selector)
62  {
64  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
65  value = std::max<NumericT>(value, std::fabs(elements[i]));
66  break;
67 
69  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
70  value += std::fabs(elements[i]);
71  break;
72 
74  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
75  value += elements[i] * elements[i];
76  value = std::sqrt(value);
77  break;
78 
80  for (unsigned int i = row_buffer[row]; i < row_end; ++i)
81  {
82  if (col_buffer[i] == row)
83  {
84  value = elements[i];
85  break;
86  }
87  }
88  break;
89  }
90  result_buf[row] = value;
91  }
92  }
93 }
94 
95 
104 template<typename NumericT, unsigned int AlignmentV>
108 {
109  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.handle());
110  NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
111  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
112  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
113  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
114 
115 #ifdef VIENNACL_WITH_OPENMP
116  #pragma omp parallel for
117 #endif
118  for (long row = 0; row < static_cast<long>(mat.size1()); ++row)
119  {
120  NumericT dot_prod = 0;
121  vcl_size_t row_end = row_buffer[row+1];
122  for (vcl_size_t i = row_buffer[row]; i < row_end; ++i)
123  dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.stride() + vec.start()];
124  result_buf[static_cast<vcl_size_t>(row) * result.stride() + result.start()] = dot_prod;
125  }
126 
127 }
128 
137 template<typename NumericT, unsigned int AlignmentV>
139  const viennacl::matrix_base<NumericT> & d_mat,
141 
142  NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.handle());
143  unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1());
144  unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
145 
146  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
147  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
148 
149  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
150  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
151  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
152  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
153  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
154  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
155 
156  vcl_size_t result_start1 = viennacl::traits::start1(result);
157  vcl_size_t result_start2 = viennacl::traits::start2(result);
158  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
159  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
160  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
161  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
162 
164  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
166  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
167 
169  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
171  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
172 
173  if ( d_mat.row_major() ) {
174 #ifdef VIENNACL_WITH_OPENMP
175  #pragma omp parallel for
176 #endif
177  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
178  vcl_size_t row_start = sp_mat_row_buffer[row];
179  vcl_size_t row_end = sp_mat_row_buffer[row+1];
180  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
181  NumericT temp = 0;
182  for (vcl_size_t k = row_start; k < row_end; ++k) {
183  temp += sp_mat_elements[k] * d_mat_wrapper_row(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), col);
184  }
185  if (result.row_major())
186  result_wrapper_row(row, col) = temp;
187  else
188  result_wrapper_col(row, col) = temp;
189  }
190  }
191  }
192  else {
193 #ifdef VIENNACL_WITH_OPENMP
194  #pragma omp parallel for
195 #endif
196  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
197  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
198  vcl_size_t row_start = sp_mat_row_buffer[row];
199  vcl_size_t row_end = sp_mat_row_buffer[row+1];
200  NumericT temp = 0;
201  for (vcl_size_t k = row_start; k < row_end; ++k) {
202  temp += sp_mat_elements[k] * d_mat_wrapper_col(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), static_cast<vcl_size_t>(col));
203  }
204  if (result.row_major())
205  result_wrapper_row(row, col) = temp;
206  else
207  result_wrapper_col(row, col) = temp;
208  }
209  }
210  }
211 
212 }
213 
223 template<typename NumericT, unsigned int AlignmentV>
227  viennacl::op_trans > & d_mat,
229 
230  NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.handle());
231  unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle1());
232  unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
233 
234  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
235  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
236 
237  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
238  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
239  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
240  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
241  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
242  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
243 
244  vcl_size_t result_start1 = viennacl::traits::start1(result);
245  vcl_size_t result_start2 = viennacl::traits::start2(result);
246  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
247  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
248  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
249  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
250 
252  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
254  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
255 
257  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
259  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
260 
261  if ( d_mat.lhs().row_major() ) {
262 #ifdef VIENNACL_WITH_OPENMP
263  #pragma omp parallel for
264 #endif
265  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row) {
266  vcl_size_t row_start = sp_mat_row_buffer[row];
267  vcl_size_t row_end = sp_mat_row_buffer[row+1];
268  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
269  NumericT temp = 0;
270  for (vcl_size_t k = row_start; k < row_end; ++k) {
271  temp += sp_mat_elements[k] * d_mat_wrapper_row(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
272  }
273  if (result.row_major())
274  result_wrapper_row(row, col) = temp;
275  else
276  result_wrapper_col(row, col) = temp;
277  }
278  }
279  }
280  else {
281 #ifdef VIENNACL_WITH_OPENMP
282  #pragma omp parallel for
283 #endif
284  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
285  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
286  vcl_size_t row_start = sp_mat_row_buffer[row];
287  vcl_size_t row_end = sp_mat_row_buffer[row+1];
288  NumericT temp = 0;
289  for (vcl_size_t k = row_start; k < row_end; ++k) {
290  temp += sp_mat_elements[k] * d_mat_wrapper_col(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
291  }
292  if (result.row_major())
293  result_wrapper_row(row, col) = temp;
294  else
295  result_wrapper_col(row, col) = temp;
296  }
297  }
298  }
299 
300 }
301 
302 
303 //
304 // Triangular solve for compressed_matrix, A \ b
305 //
306 namespace detail
307 {
308  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
309  void csr_inplace_solve(IndexArrayT const & row_buffer,
310  IndexArrayT const & col_buffer,
311  ConstScalarArrayT const & element_buffer,
312  ScalarArrayT & vec_buffer,
313  vcl_size_t num_cols,
315  {
316  vcl_size_t row_begin = row_buffer[1];
317  for (vcl_size_t row = 1; row < num_cols; ++row)
318  {
319  NumericT vec_entry = vec_buffer[row];
320  vcl_size_t row_end = row_buffer[row+1];
321  for (vcl_size_t i = row_begin; i < row_end; ++i)
322  {
323  vcl_size_t col_index = col_buffer[i];
324  if (col_index < row)
325  vec_entry -= vec_buffer[col_index] * element_buffer[i];
326  }
327  vec_buffer[row] = vec_entry;
328  row_begin = row_end;
329  }
330  }
331 
332  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
333  void csr_inplace_solve(IndexArrayT const & row_buffer,
334  IndexArrayT const & col_buffer,
335  ConstScalarArrayT const & element_buffer,
336  ScalarArrayT & vec_buffer,
337  vcl_size_t num_cols,
339  {
340  vcl_size_t row_begin = row_buffer[0];
341  for (vcl_size_t row = 0; row < num_cols; ++row)
342  {
343  NumericT vec_entry = vec_buffer[row];
344 
345  // substitute and remember diagonal entry
346  vcl_size_t row_end = row_buffer[row+1];
347  NumericT diagonal_entry = 0;
348  for (vcl_size_t i = row_begin; i < row_end; ++i)
349  {
350  vcl_size_t col_index = col_buffer[i];
351  if (col_index < row)
352  vec_entry -= vec_buffer[col_index] * element_buffer[i];
353  else if (col_index == row)
354  diagonal_entry = element_buffer[i];
355  }
356 
357  vec_buffer[row] = vec_entry / diagonal_entry;
358  row_begin = row_end;
359  }
360  }
361 
362 
363  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
364  void csr_inplace_solve(IndexArrayT const & row_buffer,
365  IndexArrayT const & col_buffer,
366  ConstScalarArrayT const & element_buffer,
367  ScalarArrayT & vec_buffer,
368  vcl_size_t num_cols,
370  {
371  for (vcl_size_t row2 = 1; row2 < num_cols; ++row2)
372  {
373  vcl_size_t row = (num_cols - row2) - 1;
374  NumericT vec_entry = vec_buffer[row];
375  vcl_size_t row_begin = row_buffer[row];
376  vcl_size_t row_end = row_buffer[row+1];
377  for (vcl_size_t i = row_begin; i < row_end; ++i)
378  {
379  vcl_size_t col_index = col_buffer[i];
380  if (col_index > row)
381  vec_entry -= vec_buffer[col_index] * element_buffer[i];
382  }
383  vec_buffer[row] = vec_entry;
384  }
385  }
386 
387  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
388  void csr_inplace_solve(IndexArrayT const & row_buffer,
389  IndexArrayT const & col_buffer,
390  ConstScalarArrayT const & element_buffer,
391  ScalarArrayT & vec_buffer,
392  vcl_size_t num_cols,
394  {
395  for (vcl_size_t row2 = 0; row2 < num_cols; ++row2)
396  {
397  vcl_size_t row = (num_cols - row2) - 1;
398  NumericT vec_entry = vec_buffer[row];
399 
400  // substitute and remember diagonal entry
401  vcl_size_t row_begin = row_buffer[row];
402  vcl_size_t row_end = row_buffer[row+1];
403  NumericT diagonal_entry = 0;
404  for (vcl_size_t i = row_begin; i < row_end; ++i)
405  {
406  vcl_size_t col_index = col_buffer[i];
407  if (col_index > row)
408  vec_entry -= vec_buffer[col_index] * element_buffer[i];
409  else if (col_index == row)
410  diagonal_entry = element_buffer[i];
411  }
412 
413  vec_buffer[row] = vec_entry / diagonal_entry;
414  }
415  }
416 
417 } //namespace detail
418 
419 
420 
427 template<typename NumericT, unsigned int AlignmentV>
429  vector_base<NumericT> & vec,
431 {
432  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
433  NumericT const * elements = detail::extract_raw_pointer<NumericT>(L.handle());
434  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
435  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
436 
437  detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag);
438 }
439 
446 template<typename NumericT, unsigned int AlignmentV>
448  vector_base<NumericT> & vec,
450 {
451  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
452  NumericT const * elements = detail::extract_raw_pointer<NumericT>(L.handle());
453  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.handle1());
454  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.handle2());
455 
456  detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.size2(), tag);
457 }
458 
459 
466 template<typename NumericT, unsigned int AlignmentV>
468  vector_base<NumericT> & vec,
470 {
471  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
472  NumericT const * elements = detail::extract_raw_pointer<NumericT>(U.handle());
473  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
474  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
475 
476  detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag);
477 }
478 
485 template<typename NumericT, unsigned int AlignmentV>
487  vector_base<NumericT> & vec,
489 {
490  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
491  NumericT const * elements = detail::extract_raw_pointer<NumericT>(U.handle());
492  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.handle1());
493  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.handle2());
494 
495  detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.size2(), tag);
496 }
497 
498 
499 
500 
501 
502 
503 
504 //
505 // Triangular solve for compressed_matrix, A^T \ b
506 //
507 
508 namespace detail
509 {
510  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
511  void csr_trans_inplace_solve(IndexArrayT const & row_buffer,
512  IndexArrayT const & col_buffer,
513  ConstScalarArrayT const & element_buffer,
514  ScalarArrayT & vec_buffer,
515  vcl_size_t num_cols,
517  {
518  vcl_size_t col_begin = row_buffer[0];
519  for (vcl_size_t col = 0; col < num_cols; ++col)
520  {
521  NumericT vec_entry = vec_buffer[col];
522  vcl_size_t col_end = row_buffer[col+1];
523  for (vcl_size_t i = col_begin; i < col_end; ++i)
524  {
525  unsigned int row_index = col_buffer[i];
526  if (row_index > col)
527  vec_buffer[row_index] -= vec_entry * element_buffer[i];
528  }
529  col_begin = col_end;
530  }
531  }
532 
533  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
534  void csr_trans_inplace_solve(IndexArrayT const & row_buffer,
535  IndexArrayT const & col_buffer,
536  ConstScalarArrayT const & element_buffer,
537  ScalarArrayT & vec_buffer,
538  vcl_size_t num_cols,
540  {
541  vcl_size_t col_begin = row_buffer[0];
542  for (vcl_size_t col = 0; col < num_cols; ++col)
543  {
544  vcl_size_t col_end = row_buffer[col+1];
545 
546  // Stage 1: Find diagonal entry:
547  NumericT diagonal_entry = 0;
548  for (vcl_size_t i = col_begin; i < col_end; ++i)
549  {
550  vcl_size_t row_index = col_buffer[i];
551  if (row_index == col)
552  {
553  diagonal_entry = element_buffer[i];
554  break;
555  }
556  }
557 
558  // Stage 2: Substitute
559  NumericT vec_entry = vec_buffer[col] / diagonal_entry;
560  vec_buffer[col] = vec_entry;
561  for (vcl_size_t i = col_begin; i < col_end; ++i)
562  {
563  vcl_size_t row_index = col_buffer[i];
564  if (row_index > col)
565  vec_buffer[row_index] -= vec_entry * element_buffer[i];
566  }
567  col_begin = col_end;
568  }
569  }
570 
571  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
572  void csr_trans_inplace_solve(IndexArrayT const & row_buffer,
573  IndexArrayT const & col_buffer,
574  ConstScalarArrayT const & element_buffer,
575  ScalarArrayT & vec_buffer,
576  vcl_size_t num_cols,
578  {
579  for (vcl_size_t col2 = 0; col2 < num_cols; ++col2)
580  {
581  vcl_size_t col = (num_cols - col2) - 1;
582 
583  NumericT vec_entry = vec_buffer[col];
584  vcl_size_t col_begin = row_buffer[col];
585  vcl_size_t col_end = row_buffer[col+1];
586  for (vcl_size_t i = col_begin; i < col_end; ++i)
587  {
588  vcl_size_t row_index = col_buffer[i];
589  if (row_index < col)
590  vec_buffer[row_index] -= vec_entry * element_buffer[i];
591  }
592 
593  }
594  }
595 
596  template<typename NumericT, typename ConstScalarArrayT, typename ScalarArrayT, typename IndexArrayT>
597  void csr_trans_inplace_solve(IndexArrayT const & row_buffer,
598  IndexArrayT const & col_buffer,
599  ConstScalarArrayT const & element_buffer,
600  ScalarArrayT & vec_buffer,
601  vcl_size_t num_cols,
603  {
604  for (vcl_size_t col2 = 0; col2 < num_cols; ++col2)
605  {
606  vcl_size_t col = (num_cols - col2) - 1;
607  vcl_size_t col_begin = row_buffer[col];
608  vcl_size_t col_end = row_buffer[col+1];
609 
610  // Stage 1: Find diagonal entry:
611  NumericT diagonal_entry = 0;
612  for (vcl_size_t i = col_begin; i < col_end; ++i)
613  {
614  vcl_size_t row_index = col_buffer[i];
615  if (row_index == col)
616  {
617  diagonal_entry = element_buffer[i];
618  break;
619  }
620  }
621 
622  // Stage 2: Substitute
623  NumericT vec_entry = vec_buffer[col] / diagonal_entry;
624  vec_buffer[col] = vec_entry;
625  for (vcl_size_t i = col_begin; i < col_end; ++i)
626  {
627  vcl_size_t row_index = col_buffer[i];
628  if (row_index < col)
629  vec_buffer[row_index] -= vec_entry * element_buffer[i];
630  }
631  }
632  }
633 
634 
635  //
636  // block solves
637  //
638  template<typename NumericT, unsigned int AlignmentV>
641  op_trans> & L,
642  viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */,
643  vector_base<NumericT> const & /* L_diagonal */, //ignored
644  vector_base<NumericT> & vec,
646  {
647  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
648 
649  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
650  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
651  NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
652  NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.handle());
653 
654  vcl_size_t col_begin = row_buffer[0];
655  for (vcl_size_t col = 0; col < L.lhs().size1(); ++col)
656  {
657  NumericT vec_entry = vec_buffer[col];
658  vcl_size_t col_end = row_buffer[col+1];
659  for (vcl_size_t i = col_begin; i < col_end; ++i)
660  {
661  unsigned int row_index = col_buffer[i];
662  if (row_index > col)
663  vec_buffer[row_index] -= vec_entry * elements[i];
664  }
665  col_begin = col_end;
666  }
667  }
668 
669  template<typename NumericT, unsigned int AlignmentV>
672  op_trans> & L,
673  viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */,
674  vector_base<NumericT> const & L_diagonal,
675  vector_base<NumericT> & vec,
677  {
678  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
679 
680  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
681  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
682  NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
683  NumericT const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(L_diagonal.handle());
684  NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.handle());
685 
686  vcl_size_t col_begin = row_buffer[0];
687  for (vcl_size_t col = 0; col < L.lhs().size1(); ++col)
688  {
689  vcl_size_t col_end = row_buffer[col+1];
690 
691  NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
692  vec_buffer[col] = vec_entry;
693  for (vcl_size_t i = col_begin; i < col_end; ++i)
694  {
695  vcl_size_t row_index = col_buffer[i];
696  if (row_index > col)
697  vec_buffer[row_index] -= vec_entry * elements[i];
698  }
699  col_begin = col_end;
700  }
701  }
702 
703 
704 
705  template<typename NumericT, unsigned int AlignmentV>
708  op_trans> & U,
709  viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */,
710  vector_base<NumericT> const & /* U_diagonal */, //ignored
711  vector_base<NumericT> & vec,
713  {
714  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
715 
716  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
717  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
718  NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
719  NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.handle());
720 
721  for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
722  {
723  vcl_size_t col = (U.lhs().size1() - col2) - 1;
724 
725  NumericT vec_entry = vec_buffer[col];
726  vcl_size_t col_begin = row_buffer[col];
727  vcl_size_t col_end = row_buffer[col+1];
728  for (vcl_size_t i = col_begin; i < col_end; ++i)
729  {
730  vcl_size_t row_index = col_buffer[i];
731  if (row_index < col)
732  vec_buffer[row_index] -= vec_entry * elements[i];
733  }
734 
735  }
736  }
737 
738  template<typename NumericT, unsigned int AlignmentV>
741  op_trans> & U,
742  viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */,
743  vector_base<NumericT> const & U_diagonal,
744  vector_base<NumericT> & vec,
746  {
747  // Note: The following could be implemented more efficiently using the block structure and possibly OpenMP.
748 
749  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
750  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
751  NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
752  NumericT const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(U_diagonal.handle());
753  NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.handle());
754 
755  for (vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
756  {
757  vcl_size_t col = (U.lhs().size1() - col2) - 1;
758  vcl_size_t col_begin = row_buffer[col];
759  vcl_size_t col_end = row_buffer[col+1];
760 
761  // Stage 2: Substitute
762  NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
763  vec_buffer[col] = vec_entry;
764  for (vcl_size_t i = col_begin; i < col_end; ++i)
765  {
766  vcl_size_t row_index = col_buffer[i];
767  if (row_index < col)
768  vec_buffer[row_index] -= vec_entry * elements[i];
769  }
770  }
771  }
772 
773 
774 } //namespace detail
775 
782 template<typename NumericT, unsigned int AlignmentV>
785  op_trans> const & proxy,
786  vector_base<NumericT> & vec,
788 {
789  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
790  NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
791  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
792  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
793 
794  detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
795 }
796 
803 template<typename NumericT, unsigned int AlignmentV>
806  op_trans> const & proxy,
807  vector_base<NumericT> & vec,
809 {
810  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
811  NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
812  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
813  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
814 
815  detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
816 }
817 
818 
825 template<typename NumericT, unsigned int AlignmentV>
828  op_trans> const & proxy,
829  vector_base<NumericT> & vec,
831 {
832  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
833  NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
834  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
835  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
836 
837  detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
838 }
839 
840 
847 template<typename NumericT, unsigned int AlignmentV>
850  op_trans> const & proxy,
851  vector_base<NumericT> & vec,
853 {
854  NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
855  NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
856  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
857  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
858 
859  detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
860 }
861 
862 
863 
864 //
865 // Compressed Compressed Matrix
866 //
867 
876 template<typename NumericT>
880 {
881  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.handle());
882  NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
883  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
884  unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle1());
885  unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.handle3());
886  unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle2());
887 
888  vector_assign(result, NumericT(0));
889 
890 #ifdef VIENNACL_WITH_OPENMP
891  #pragma omp parallel for
892 #endif
893  for (long i = 0; i < static_cast<long>(mat.nnz1()); ++i)
894  {
895  NumericT dot_prod = 0;
896  vcl_size_t row_end = row_buffer[i+1];
897  for (vcl_size_t j = row_buffer[i]; j < row_end; ++j)
898  dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.stride() + vec.start()];
899  result_buf[row_indices[i] * result.stride() + result.start()] = dot_prod;
900  }
901 
902 }
903 
904 
905 
906 //
907 // Coordinate Matrix
908 //
909 
910 namespace detail
911 {
912  template<typename NumericT, unsigned int AlignmentV>
914  vector_base<NumericT> & vec,
916  {
917  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
918  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
919  unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12());
920 
921  NumericT value = 0;
922  unsigned int last_row = 0;
923 
924  for (vcl_size_t i = 0; i < mat.nnz(); ++i)
925  {
926  unsigned int current_row = coord_buffer[2*i];
927 
928  if (current_row != last_row)
929  {
930  if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2)
931  value = std::sqrt(value);
932 
933  result_buf[last_row] = value;
934  value = 0;
935  last_row = current_row;
936  }
937 
938  switch (info_selector)
939  {
941  value = std::max<NumericT>(value, std::fabs(elements[i]));
942  break;
943 
945  value += std::fabs(elements[i]);
946  break;
947 
949  value += elements[i] * elements[i];
950  break;
951 
952  case viennacl::linalg::detail::SPARSE_ROW_DIAGONAL: //diagonal entry
953  if (coord_buffer[2*i+1] == current_row)
954  value = elements[i];
955  break;
956 
957  //default:
958  // break;
959  }
960  }
961 
962  if (info_selector == viennacl::linalg::detail::SPARSE_ROW_NORM_2)
963  value = std::sqrt(value);
964 
965  result_buf[last_row] = value;
966  }
967 }
968 
977 template<typename NumericT, unsigned int AlignmentV>
981 {
982  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.handle());
983  NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
984  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
985  unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle12());
986 
987  for (vcl_size_t i = 0; i< result.size(); ++i)
988  result_buf[i * result.stride() + result.start()] = 0;
989 
990  for (vcl_size_t i = 0; i < mat.nnz(); ++i)
991  result_buf[coord_buffer[2*i] * result.stride() + result.start()]
992  += elements[i] * vec_buf[coord_buffer[2*i+1] * vec.stride() + vec.start()];
993 }
994 
1003 template<typename NumericT, unsigned int AlignmentV>
1005  const viennacl::matrix_base<NumericT> & d_mat,
1007 
1008  NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.handle());
1009  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12());
1010 
1011  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1012  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1013 
1014  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
1015  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
1016  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
1017  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
1018  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
1019  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
1020 
1021  vcl_size_t result_start1 = viennacl::traits::start1(result);
1022  vcl_size_t result_start2 = viennacl::traits::start2(result);
1023  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1024  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1025  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1026  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1027 
1029  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1031  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1032 
1034  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1036  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1037 
1038  if ( d_mat.row_major() ) {
1039 
1040 #ifdef VIENNACL_WITH_OPENMP
1041  #pragma omp parallel for
1042 #endif
1043  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1044  {
1045  if (result.row_major())
1046  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1047  result_wrapper_row(row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1048  else
1049  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1050  result_wrapper_col(row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1051  }
1052 
1053 #ifdef VIENNACL_WITH_OPENMP
1054  #pragma omp parallel for
1055 #endif
1056  for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
1057  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1058  vcl_size_t r = static_cast<vcl_size_t>(sp_mat_coords[2*i]);
1059  vcl_size_t c = static_cast<vcl_size_t>(sp_mat_coords[2*i+1]);
1060  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1061  NumericT y = d_mat_wrapper_row( c, col);
1062  if (result.row_major())
1063  result_wrapper_row(r, col) += x * y;
1064  else
1065  result_wrapper_col(r, col) += x * y;
1066  }
1067  }
1068  }
1069 
1070  else {
1071 
1072 #ifdef VIENNACL_WITH_OPENMP
1073  #pragma omp parallel for
1074 #endif
1075  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1076  {
1077  if (result.row_major())
1078  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1079  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1080  else
1081  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1082  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1083  }
1084 
1085 #ifdef VIENNACL_WITH_OPENMP
1086  #pragma omp parallel for
1087 #endif
1088  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
1089 
1090  for (vcl_size_t i = 0; i < sp_mat.nnz(); ++i) {
1091 
1092  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1093  vcl_size_t r = static_cast<vcl_size_t>(sp_mat_coords[2*i]);
1094  vcl_size_t c = static_cast<vcl_size_t>(sp_mat_coords[2*i+1]);
1095  NumericT y = d_mat_wrapper_col( c, col);
1096 
1097  if (result.row_major())
1098  result_wrapper_row( r, col) += x*y;
1099  else
1100  result_wrapper_col( r, col) += x*y;
1101  }
1102 
1103  }
1104  }
1105 
1106 }
1107 
1108 
1117 template<typename NumericT, unsigned int AlignmentV>
1121  viennacl::op_trans > & d_mat,
1123 
1124  NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.handle());
1125  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle12());
1126 
1127  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1128  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1129 
1130  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
1131  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
1132  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
1133  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
1134  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
1135  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
1136 
1137  vcl_size_t result_start1 = viennacl::traits::start1(result);
1138  vcl_size_t result_start2 = viennacl::traits::start2(result);
1139  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1140  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1141  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1142  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1143 
1145  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1147  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1148 
1150  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1152  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1153 
1154  if ( d_mat.lhs().row_major() )
1155  {
1156 #ifdef VIENNACL_WITH_OPENMP
1157  #pragma omp parallel for
1158 #endif
1159  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1160  {
1161  if (result.row_major())
1162  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1163  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1164  else
1165  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1166  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1167  }
1168 
1169 #ifdef VIENNACL_WITH_OPENMP
1170  #pragma omp parallel for
1171 #endif
1172  for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
1173  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1174  vcl_size_t r = static_cast<vcl_size_t>(sp_mat_coords[2*i]);
1175  vcl_size_t c = static_cast<vcl_size_t>(sp_mat_coords[2*i+1]);
1176  if (result.row_major())
1177  {
1178  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1179  NumericT y = d_mat_wrapper_row( col, c);
1180  result_wrapper_row(r, col) += x * y;
1181  }
1182  }
1183  else
1184  {
1185  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1186  NumericT y = d_mat_wrapper_row( col, c);
1187  result_wrapper_col(r, col) += x * y;
1188  }
1189  }
1190  }
1191 
1192 
1193  }
1194  else
1195  {
1196 #ifdef VIENNACL_WITH_OPENMP
1197  #pragma omp parallel for
1198 #endif
1199  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1200  {
1201  if (result.row_major())
1202  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1203  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1204  else
1205  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1206  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1207  }
1208 
1209 #ifdef VIENNACL_WITH_OPENMP
1210  #pragma omp parallel for
1211 #endif
1212  for (long i = 0; i < static_cast<long>(sp_mat.nnz()); ++i) {
1213  NumericT x = static_cast<NumericT>(sp_mat_elements[i]);
1214  vcl_size_t r = static_cast<vcl_size_t>(sp_mat_coords[2*i]);
1215  vcl_size_t c = static_cast<vcl_size_t>(sp_mat_coords[2*i+1]);
1216  if (result.row_major())
1217  {
1218  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1219  NumericT y = d_mat_wrapper_col( col, c);
1220  result_wrapper_row(r, col) += x * y;
1221  }
1222  }
1223  else
1224  {
1225  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1226  NumericT y = d_mat_wrapper_col( col, c);
1227  result_wrapper_col(r, col) += x * y;
1228  }
1229  }
1230  }
1231  }
1232 
1233 }
1234 //
1235 // ELL Matrix
1236 //
1245 template<typename NumericT, unsigned int AlignmentV>
1247  const viennacl::vector_base<NumericT> & vec,
1249 {
1250  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.handle());
1251  NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
1252  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1253  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1254 
1255  for (vcl_size_t row = 0; row < mat.size1(); ++row)
1256  {
1257  NumericT sum = 0;
1258 
1259  for (unsigned int item_id = 0; item_id < mat.internal_maxnnz(); ++item_id)
1260  {
1261  vcl_size_t offset = row + item_id * mat.internal_size1();
1262  NumericT val = elements[offset];
1263 
1264  if (val > 0 || val < 0)
1265  {
1266  unsigned int col = coords[offset];
1267  sum += (vec_buf[col * vec.stride() + vec.start()] * val);
1268  }
1269  }
1270 
1271  result_buf[row * result.stride() + result.start()] = sum;
1272  }
1273 }
1274 
1283 template<typename NumericT, unsigned int AlignmentV>
1285  const viennacl::matrix_base<NumericT> & d_mat,
1287 {
1288  NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.handle());
1289  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
1290 
1291  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1292  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1293 
1294  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
1295  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
1296  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
1297  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
1298  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
1299  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
1300 
1301  vcl_size_t result_start1 = viennacl::traits::start1(result);
1302  vcl_size_t result_start2 = viennacl::traits::start2(result);
1303  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1304  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1305  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1306  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1307 
1309  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1311  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1312 
1314  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1316  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1317 
1318  if ( d_mat.row_major() ) {
1319 #ifdef VIENNACL_WITH_OPENMP
1320  #pragma omp parallel for
1321 #endif
1322  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1323  {
1324  if (result.row_major())
1325  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1326  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1327  else
1328  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1329  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1330  }
1331 
1332 #ifdef VIENNACL_WITH_OPENMP
1333  #pragma omp parallel for
1334 #endif
1335  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1336  {
1337  for (long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id)
1338  {
1339  vcl_size_t offset = static_cast<vcl_size_t>(row) + static_cast<vcl_size_t>(item_id) * sp_mat.internal_size1();
1340  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1341  vcl_size_t sp_mat_col = static_cast<vcl_size_t>(sp_mat_coords[offset]);
1342 
1343  if (bool(sp_mat_val))
1344  {
1345  if (result.row_major())
1346  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1347  result_wrapper_row(static_cast<vcl_size_t>(row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
1348  else
1349  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1350  result_wrapper_col(static_cast<vcl_size_t>(row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
1351  }
1352  }
1353  }
1354  }
1355  else {
1356 #ifdef VIENNACL_WITH_OPENMP
1357  #pragma omp parallel for
1358 #endif
1359  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1360  {
1361  if (result.row_major())
1362  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1363  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1364  else
1365  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1366  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1367  }
1368 
1369 #ifdef VIENNACL_WITH_OPENMP
1370  #pragma omp parallel for
1371 #endif
1372  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
1373 
1374  for (unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) {
1375 
1376  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
1377 
1378  vcl_size_t offset = row + item_id * sp_mat.internal_size1();
1379  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1380  vcl_size_t sp_mat_col = static_cast<vcl_size_t>(sp_mat_coords[offset]);
1381 
1382  if (bool(sp_mat_val)) {
1383  if (result.row_major())
1384  result_wrapper_row( row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
1385  else
1386  result_wrapper_col( row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
1387  }
1388  }
1389  }
1390  }
1391  }
1392 
1393 }
1394 
1404 template<typename NumericT, unsigned int AlignmentV>
1408  viennacl::op_trans > & d_mat,
1410 
1411  NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.handle());
1412  unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.handle2());
1413 
1414  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1415  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1416 
1417  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
1418  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
1419  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
1420  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
1421  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
1422  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
1423 
1424  vcl_size_t result_start1 = viennacl::traits::start1(result);
1425  vcl_size_t result_start2 = viennacl::traits::start2(result);
1426  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1427  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1428  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1429  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1430 
1432  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1434  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1435 
1437  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1439  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1440 
1441  if ( d_mat.lhs().row_major() )
1442  {
1443 #ifdef VIENNACL_WITH_OPENMP
1444  #pragma omp parallel for
1445 #endif
1446  for (long row = 0; row < static_cast<long>(sp_mat.size1()); ++row)
1447  {
1448  if (result.row_major())
1449  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1450  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1451  else
1452  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1453  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1454  }
1455 
1456  for (vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1457 
1458  for (unsigned int item_id = 0; item_id < sp_mat.maxnnz(); ++item_id) {
1459 
1460  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
1461 
1462  vcl_size_t offset = row + item_id * sp_mat.internal_size1();
1463  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1464  vcl_size_t sp_mat_col = static_cast<vcl_size_t>(sp_mat_coords[offset]);
1465 
1466  if (bool(sp_mat_val))
1467  {
1468  if (result.row_major())
1469  result_wrapper_row( row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
1470  else
1471  result_wrapper_col( row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
1472  }
1473  }
1474  }
1475  }
1476  }
1477  else
1478  {
1479 #ifdef VIENNACL_WITH_OPENMP
1480  #pragma omp parallel for
1481 #endif
1482  for (long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1483  {
1484  if (result.row_major())
1485  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1486  result_wrapper_row( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1487  else
1488  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row)
1489  result_wrapper_col( row, col) = (NumericT)0; /* filling result with zeros, as the product loops are reordered */
1490  }
1491 
1492 #ifdef VIENNACL_WITH_OPENMP
1493  #pragma omp parallel for
1494 #endif
1495  for (long item_id = 0; item_id < static_cast<long>(sp_mat.maxnnz()); ++item_id) {
1496 
1497  for (vcl_size_t row = 0; row < sp_mat.size1(); ++row) {
1498 
1499  vcl_size_t offset = row + static_cast<vcl_size_t>(item_id) * sp_mat.internal_size1();
1500  NumericT sp_mat_val = static_cast<NumericT>(sp_mat_elements[offset]);
1501  vcl_size_t sp_mat_col = static_cast<vcl_size_t>(sp_mat_coords[offset]);
1502 
1503  if (bool(sp_mat_val))
1504  {
1505  if (result.row_major())
1506  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1507  result_wrapper_row( row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
1508  else
1509  for (vcl_size_t col = 0; col < d_mat.size2(); ++col)
1510  result_wrapper_col( row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
1511  }
1512  }
1513  }
1514  }
1515 
1516 }
1517 
1518 
1519 //
1520 // SELL-C-\sigma Matrix
1521 //
1530 template<typename NumericT, typename IndexT>
1532  const viennacl::vector_base<NumericT> & vec,
1534 {
1535  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.handle());
1536  NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
1537  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1538  IndexT const * columns_per_block = detail::extract_raw_pointer<IndexT>(mat.handle1());
1539  IndexT const * column_indices = detail::extract_raw_pointer<IndexT>(mat.handle2());
1540  IndexT const * block_start = detail::extract_raw_pointer<IndexT>(mat.handle3());
1541 
1542  vcl_size_t num_blocks = mat.size1() / mat.rows_per_block() + 1;
1543 
1544 #ifdef VIENNACL_WITH_OPENMP
1545  #pragma omp parallel for
1546 #endif
1547  for (long block_idx2 = 0; block_idx2 < static_cast<long>(num_blocks); ++block_idx2)
1548  {
1549  vcl_size_t block_idx = static_cast<vcl_size_t>(block_idx2);
1550  vcl_size_t current_columns_per_block = columns_per_block[block_idx];
1551 
1552  std::vector<NumericT> result_values(mat.rows_per_block());
1553 
1554  for (IndexT column_entry_index = 0;
1555  column_entry_index < current_columns_per_block;
1556  ++column_entry_index)
1557  {
1558  vcl_size_t stride_start = block_start[block_idx] + column_entry_index * mat.rows_per_block();
1559  // Note: This for-loop may be unrolled by hand for exploiting vectorization
1560  // Careful benchmarking recommended first, memory channels may be saturated already!
1561  for (IndexT row_in_block = 0; row_in_block < mat.rows_per_block(); ++row_in_block)
1562  {
1563  NumericT val = elements[stride_start + row_in_block];
1564 
1565  result_values[row_in_block] += (val > 0 || val < 0) ? vec_buf[column_indices[stride_start + row_in_block] * vec.stride() + vec.start()] * val : 0;
1566  }
1567  }
1568 
1569  vcl_size_t first_row_in_matrix = block_idx * mat.rows_per_block();
1570  for (IndexT row_in_block = 0; row_in_block < mat.rows_per_block(); ++row_in_block)
1571  {
1572  if (first_row_in_matrix + row_in_block < result.size())
1573  result_buf[(first_row_in_matrix + row_in_block) * result.stride() + result.start()] = result_values[row_in_block];
1574  }
1575  }
1576 }
1577 
1578 
1579 //
1580 // Hybrid Matrix
1581 //
1590 template<typename NumericT, unsigned int AlignmentV>
1592  const viennacl::vector_base<NumericT> & vec,
1594 {
1595  NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.handle());
1596  NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.handle());
1597  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1598  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1599  NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5());
1600  unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
1601  unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
1602 
1603 
1604  for (vcl_size_t row = 0; row < mat.size1(); ++row)
1605  {
1606  NumericT sum = 0;
1607 
1608  //
1609  // Part 1: Process ELL part
1610  //
1611  for (unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
1612  {
1613  vcl_size_t offset = row + item_id * mat.internal_size1();
1614  NumericT val = elements[offset];
1615 
1616  if (val > 0 || val < 0)
1617  {
1618  unsigned int col = coords[offset];
1619  sum += (vec_buf[col * vec.stride() + vec.start()] * val);
1620  }
1621  }
1622 
1623  //
1624  // Part 2: Process HYB part
1625  //
1626  vcl_size_t col_begin = csr_row_buffer[row];
1627  vcl_size_t col_end = csr_row_buffer[row + 1];
1628 
1629  for (vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1630  {
1631  sum += (vec_buf[csr_col_buffer[item_id] * vec.stride() + vec.start()] * csr_elements[item_id]);
1632  }
1633 
1634  result_buf[row * result.stride() + result.start()] = sum;
1635  }
1636 
1637 }
1638 
1639 //
1640 // Hybrid Matrix
1641 //
1650 template<typename NumericT, unsigned int AlignmentV>
1652  const viennacl::matrix_base<NumericT> & d_mat,
1654 {
1655  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1656  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1657 
1658  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat);
1659  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat);
1660  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat);
1661  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat);
1662  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat);
1663  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat);
1664 
1665  vcl_size_t result_start1 = viennacl::traits::start1(result);
1666  vcl_size_t result_start2 = viennacl::traits::start2(result);
1667  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1668  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1669  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1670  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1671 
1673  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1675  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1676 
1678  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1680  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1681 
1682  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1683  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1684  NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5());
1685  unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
1686  unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
1687 
1688 
1689  for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col)
1690  {
1691  for (vcl_size_t row = 0; row < mat.size1(); ++row)
1692  {
1693  NumericT sum = 0;
1694 
1695  //
1696  // Part 1: Process ELL part
1697  //
1698  for (unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
1699  {
1700  vcl_size_t offset = row + item_id * mat.internal_size1();
1701  NumericT val = elements[offset];
1702 
1703  if (bool(val))
1704  {
1705  vcl_size_t col = static_cast<vcl_size_t>(coords[offset]);
1706  if (d_mat.row_major())
1707  sum += d_mat_wrapper_row(col, result_col) * val;
1708  else
1709  sum += d_mat_wrapper_col(col, result_col) * val;
1710  }
1711  }
1712 
1713  //
1714  // Part 2: Process HYB/CSR part
1715  //
1716  vcl_size_t col_begin = csr_row_buffer[row];
1717  vcl_size_t col_end = csr_row_buffer[row + 1];
1718 
1719  if (d_mat.row_major())
1720  for (vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1721  sum += d_mat_wrapper_row(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
1722  else
1723  for (vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1724  sum += d_mat_wrapper_col(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
1725 
1726  if (result.row_major())
1727  result_wrapper_row(row, result_col) = sum;
1728  else
1729  result_wrapper_col(row, result_col) = sum;
1730  }
1731  } // for result_col
1732 }
1733 
1734 
1743 template<typename NumericT, unsigned int AlignmentV>
1747  viennacl::op_trans > & d_mat,
1749 {
1750  NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1751  NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1752 
1753  vcl_size_t d_mat_start1 = viennacl::traits::start1(d_mat.lhs());
1754  vcl_size_t d_mat_start2 = viennacl::traits::start2(d_mat.lhs());
1755  vcl_size_t d_mat_inc1 = viennacl::traits::stride1(d_mat.lhs());
1756  vcl_size_t d_mat_inc2 = viennacl::traits::stride2(d_mat.lhs());
1757  vcl_size_t d_mat_internal_size1 = viennacl::traits::internal_size1(d_mat.lhs());
1758  vcl_size_t d_mat_internal_size2 = viennacl::traits::internal_size2(d_mat.lhs());
1759 
1760  vcl_size_t result_start1 = viennacl::traits::start1(result);
1761  vcl_size_t result_start2 = viennacl::traits::start2(result);
1762  vcl_size_t result_inc1 = viennacl::traits::stride1(result);
1763  vcl_size_t result_inc2 = viennacl::traits::stride2(result);
1764  vcl_size_t result_internal_size1 = viennacl::traits::internal_size1(result);
1765  vcl_size_t result_internal_size2 = viennacl::traits::internal_size2(result);
1766 
1768  d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1770  d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1771 
1773  result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1775  result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1776 
1777  NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.handle());
1778  unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.handle2());
1779  NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.handle5());
1780  unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle3());
1781  unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.handle4());
1782 
1783 
1784  for (vcl_size_t result_col = 0; result_col < result.size2(); ++result_col)
1785  {
1786  for (vcl_size_t row = 0; row < mat.size1(); ++row)
1787  {
1788  NumericT sum = 0;
1789 
1790  //
1791  // Part 1: Process ELL part
1792  //
1793  for (unsigned int item_id = 0; item_id < mat.internal_ellnnz(); ++item_id)
1794  {
1795  vcl_size_t offset = row + item_id * mat.internal_size1();
1796  NumericT val = elements[offset];
1797 
1798  if (bool(val))
1799  {
1800  vcl_size_t col = static_cast<vcl_size_t>(coords[offset]);
1801  if (d_mat.lhs().row_major())
1802  sum += d_mat_wrapper_row(result_col, col) * val;
1803  else
1804  sum += d_mat_wrapper_col(result_col, col) * val;
1805  }
1806  }
1807 
1808  //
1809  // Part 2: Process HYB/CSR part
1810  //
1811  vcl_size_t col_begin = csr_row_buffer[row];
1812  vcl_size_t col_end = csr_row_buffer[row + 1];
1813 
1814  if (d_mat.lhs().row_major())
1815  for (vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1816  sum += d_mat_wrapper_row(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
1817  else
1818  for (vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1819  sum += d_mat_wrapper_col(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
1820 
1821  if (result.row_major())
1822  result_wrapper_row(row, result_col) = sum;
1823  else
1824  result_wrapper_col(row, result_col) = sum;
1825  }
1826  } // for result_col
1827 }
1828 
1829 
1830 } // namespace host_based
1831 } //namespace linalg
1832 } //namespace viennacl
1833 
1834 
1835 #endif
const vcl_size_t & size2() const
Returns the number of columns.
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:101
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
vcl_size_t size1() const
Definition: ell_matrix.hpp:91
handle_type & handle2()
Definition: ell_matrix.hpp:103
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
const handle_type & handle3() const
Definition: hyb_matrix.hpp:107
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Various little tools used here and there in ViennaCL.
const handle_type & handle() const
Definition: hyb_matrix.hpp:105
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
A tag class representing a lower triangular matrix.
Definition: forwards.h:809
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:95
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
void inplace_solve(const matrix_base< NumericT > &A, bool trans_A, matrix_base< NumericT > &B, bool trans_B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
vcl_size_t nnz() const
Returns the number of nonzero entries.
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
Definition: qr.hpp:182
const handle_type & handle4() const
Definition: hyb_matrix.hpp:108
vcl_size_t rows_per_block() const
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
statement sum(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:246
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
Definition: ell_matrix.hpp:88
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
Definition: vector_def.hpp:124
const handle_type & handle2() const
Definition: hyb_matrix.hpp:106
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:73
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
std::size_t vcl_size_t
Definition: forwards.h:74
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &, vcl_size_t, vector_base< NumericT > const &, vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:217
Common routines for single-threaded or OpenMP-enabled execution on CPU.
vcl_size_t maxnnz() const
Definition: ell_matrix.hpp:95
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
handle_type & handle()
Definition: ell_matrix.hpp:100
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
void csr_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
A tag class representing transposed matrices.
Definition: forwards.h:219
void row_info(compressed_matrix< NumericT, AlignmentV > const &mat, vector_base< NumericT > &vec, viennacl::linalg::detail::row_info_types info_selector)
A sparse square matrix in compressed sparse rows format.
const handle_type & handle5() const
Definition: hyb_matrix.hpp:109
vcl_size_t size1() const
Definition: hyb_matrix.hpp:98
size_type start() const
Returns the offset within the buffer.
Definition: vector_def.hpp:122
vcl_size_t size1() const
Returns the number of rows.
vcl_size_t internal_maxnnz() const
Definition: ell_matrix.hpp:94
Implementation of the ViennaCL scalar class.
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
void csr_trans_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:824
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...