ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
27 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
36 
37 namespace viennacl
38 {
39 
40 //#ifdef VIENNACL_WITH_OPENCL
41 // template<class NumericT, class DISTRIBUTION>
42 // rand::random_matrix_t<NumericT, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){
43 // return rand::random_matrix_t<NumericT,DISTRIBUTION>(size1,size2,distribution);
44 // }
45 //#endif
46 
53 template<typename LHS, typename RHS, typename OP>
54 class matrix_expression
55 {
56  typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type lhs_reference_type;
57  typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type rhs_reference_type;
58 
59 public:
61 
62  matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}
63 
66  LHS & lhs() const { return lhs_; }
69  RHS & rhs() const { return rhs_; }
70 
74 
75 private:
77  lhs_reference_type lhs_;
79  rhs_reference_type rhs_;
80 };
81 
82 
84 struct row_iteration {};
85 
87 struct col_iteration {};
88 
89 //STL-like iterator. TODO: STL-compliance...
91 template<typename ROWCOL, typename MatrixT>
92 class matrix_iterator
93 {
94  typedef matrix_iterator<ROWCOL, MatrixT> self_type;
95 public:
96  typedef typename MatrixT::value_type value_type;
97 
98  matrix_iterator(MatrixT & mat,
99  vcl_size_t start_row,
100  vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}
101 
102  value_type operator*(void) { return mat_(row_, col_); }
103  self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MatrixT>::apply(mat_, row_, col_); return *this; }
104  self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
105 
106  bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
107  bool operator!=(self_type const & other) { return !(*this == other); }
108 
109  vcl_size_t index1() { return row_; }
110  vcl_size_t index2() { return col_; }
111 
112  MatrixT & operator()(void) const { return mat_; }
113 
114 private:
115  MatrixT & mat_;
116  vcl_size_t row_;
117  vcl_size_t col_;
118 };
119 
126 template<class NumericT, typename SizeT, typename DistanceT>
128  : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),
129  internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, dense_padding_size)),
130  internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, dense_padding_size)),
131  row_major_fixed_(true), row_major_(is_row_major)
132 {
133  if (rows > 0 && columns > 0)
134  {
135  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx);
136  clear();
137  }
138 }
139 
142 template<class NumericT, typename SizeT, typename DistanceT>
143 template<typename LHS, typename RHS, typename OP>
145  size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),
146  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
147  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
148  row_major_fixed_(true), row_major_(viennacl::traits::row_major(proxy))
149 {
151  if (internal_size() > 0)
152  {
153  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy));
154  clear();
155  self_type::operator=(proxy);
156  }
157 }
158 
159 // CUDA or host memory:
160 template<class NumericT, typename SizeT, typename DistanceT>
162  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
163  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
164  bool is_row_major)
165  : size1_(mat_size1), size2_(mat_size2),
166  start1_(mat_start1), start2_(mat_start2),
167  stride1_(mat_stride1), stride2_(mat_stride2),
168  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
169  row_major_fixed_(true), row_major_(is_row_major)
170 {
171  if (mem_type == viennacl::CUDA_MEMORY)
172  {
173 #ifdef VIENNACL_WITH_CUDA
175  elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
176  elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
177 #else
179 #endif
180  }
181  else if (mem_type == viennacl::MAIN_MEMORY)
182  {
184  elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
185  elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
186  }
187 
188  elements_.raw_size(sizeof(NumericT) * internal_size());
189 }
190 
191 #ifdef VIENNACL_WITH_OPENCL
192 template<class NumericT, typename SizeT, typename DistanceT>
193 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx)
194  : size1_(rows), size2_(columns),
195  start1_(0), start2_(0),
196  stride1_(1), stride2_(1),
197  internal_size1_(rows), internal_size2_(columns),
198  row_major_fixed_(true), row_major_(is_row_major)
199 {
201  elements_.opencl_handle() = mem;
202  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
203  elements_.opencl_handle().context(ctx.opencl_context());
204  elements_.raw_size(sizeof(NumericT)*internal_size());
205 }
206 
207 template<class NumericT, typename SizeT, typename DistanceT>
209  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
210  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
211  bool is_row_major)
212  : size1_(mat_size1), size2_(mat_size2),
213  start1_(mat_start1), start2_(mat_start2),
214  stride1_(mat_stride1), stride2_(mat_stride2),
215  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
216  row_major_fixed_(true), row_major_(is_row_major)
217 {
219  elements_.opencl_handle() = mem;
220  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
221  elements_.opencl_handle().context(ctx.opencl_context());
222  elements_.raw_size(sizeof(NumericT)*internal_size());
223 }
224 #endif
225 
226 template<class NumericT, typename SizeT, typename DistanceT>
228  size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),
229  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
230  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
231  row_major_fixed_(true), row_major_(other.row_major())
232 {
234  if (internal_size() > 0)
235  {
236  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other));
237  clear();
238  self_type::operator=(other);
239  }
240 }
241 
242 template<class NumericT, typename SizeT, typename DistanceT>
244 {
245  if (&other==this)
246  return *this;
247 
248  if (internal_size() == 0)
249  {
250  if (other.internal_size() == 0)
251  return *this;
252  if (!row_major_fixed_)
253  row_major_ = other.row_major();
254  resize(other.size1(), other.size2(), false);
255  }
256 
257  viennacl::linalg::am(*this,
258  other, cpu_value_type(1.0), 1, false, false);
259  return *this;
260 }
261 
266 template<class NumericT, typename SizeT, typename DistanceT>
267 template<typename LHS, typename RHS, typename OP>
269 {
270  assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0)
271  && (viennacl::traits::size2(proxy) == size2() || size2() == 0)
272  && bool("Incompatible matrix sizes!"));
273  if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
274  {
275  size1_ = viennacl::traits::size1(proxy);
276  size2_ = viennacl::traits::size2(proxy);
277  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
278  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
279  if (!row_major_fixed_)
280  row_major_ = viennacl::traits::row_major(proxy);
281  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy));
282  if (size1_ != internal_size1_ || size2_ != internal_size2_)
283  clear();
284  }
285 
286  if (internal_size() > 0)
288 
289  return *this;
290 }
291 
292 
293 // A = trans(B)
294 template<class NumericT, typename SizeT, typename DistanceT>
296 {
297  if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 )
298  {
299  size1_ = viennacl::traits::size1(proxy);
300  size2_ = viennacl::traits::size2(proxy);
301  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
302  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
303  if (!row_major_fixed_)
304  row_major_ = viennacl::traits::row_major(proxy);
305  }
306 
307  if ( handle() == proxy.lhs().handle() )
308  {
309  viennacl::matrix_base<NumericT> temp(proxy.lhs().size2(), proxy.lhs().size1(),proxy.lhs().row_major());
310  viennacl::linalg::trans(proxy, temp);
311  if ( proxy.lhs().size1() != proxy.lhs().size2() )
312  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
313  elements_ = temp.handle();
314  }
315  else
316  {
317  if ( proxy.lhs().size1() != proxy.lhs().size2() )
318  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
319  viennacl::linalg::trans(proxy, *this);
320  }
321  return *this;
322 }
323 
324 template<class NumericT, typename SizeT, typename DistanceT>
325 template<typename LHS, typename RHS, typename OP>
327 {
328  assert( (viennacl::traits::size1(proxy) == size1())
329  && (viennacl::traits::size2(proxy) == size2())
330  && bool("Incompatible matrix sizes!"));
331  assert( (size1() > 0) && bool("Vector not yet initialized!") );
332  assert( (size2() > 0) && bool("Vector not yet initialized!") );
333 
335 
336  return *this;
337 }
338 
339 template<class NumericT, typename SizeT, typename DistanceT>
340 template<typename LHS, typename RHS, typename OP>
342 {
343  assert( (viennacl::traits::size1(proxy) == size1())
344  && (viennacl::traits::size2(proxy) == size2())
345  && bool("Incompatible matrix sizes!"));
346  assert( (size1() > 0) && bool("Vector not yet initialized!") );
347  assert( (size2() > 0) && bool("Vector not yet initialized!") );
348 
350 
351  return *this;
352 }
353 
355 template<class NumericT, typename SizeT, typename DistanceT>
357 {
358  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
359  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
360 
361  if (internal_size() == 0)
362  {
363  size1_ = m.size1();
364  size2_ = m.size2();
365  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
366  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
367  if (internal_size() > 0)
368  {
369  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());
370  clear();
371  }
372  }
373  else
374  viennacl::linalg::matrix_assign(*this, NumericT(0));
375 
376  if (internal_size() > 0)
378 
379  return *this;
380 }
381 
383 template<class NumericT, typename SizeT, typename DistanceT>
385 {
386  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
387  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
388 
389  if (internal_size() == 0)
390  {
391  size1_ = m.size1();
392  size2_ = m.size2();
393  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
394  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
395  if (internal_size() > 0)
396  {
397  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());
398  clear();
399  }
400  }
401  else
402  viennacl::linalg::matrix_assign(*this, NumericT(0));
403 
404  return *this;
405 }
406 
408 template<class NumericT, typename SizeT, typename DistanceT>
410 {
411  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
412  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
413 
414  if (internal_size() == 0)
415  {
416  size1_ = m.size1();
417  size2_ = m.size2();
418  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
419  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
420  if (internal_size() > 0)
421  {
422  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());
423  clear();
424  }
425  }
426 
427  if (internal_size() > 0)
428  {
429  viennacl::linalg::matrix_assign(*this, m(0,0));
430  }
431 
432  return *this;
433 }
434 
435 
436 //read-write access to an element of the matrix/matrix_range/matrix_slice
439 template<class NumericT, typename SizeT, typename DistanceT>
441 {
442  if (row_major_)
443  return entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
444  return entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
445 }
446 
449 template<class NumericT, typename SizeT, typename DistanceT>
451 {
452  if (row_major_)
453  return const_entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
454  return const_entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
455 }
456 
457 //
458 // Operator overloads for enabling implicit conversions:
459 //
460 template<class NumericT, typename SizeT, typename DistanceT>
462 {
464  *this, NumericT(1.0), 1, false, false,
465  other, NumericT(1.0), 1, false, false);
466  return *this;
467 }
468 
469 template<class NumericT, typename SizeT, typename DistanceT>
471 {
473  *this, NumericT(1.0), 1, false, false,
474  other, NumericT(1.0), 1, false, true);
475  return *this;
476 }
477 
479 template<class NumericT, typename SizeT, typename DistanceT>
481 {
482  viennacl::linalg::am(*this,
483  *this, NumericT(val), 1, false, false);
484  return *this;
485 }
486 
488 template<class NumericT, typename SizeT, typename DistanceT>
490 {
491  viennacl::linalg::am(*this,
492  *this, NumericT(val), 1, false, false);
493  return *this;
494 }
495 
497 template<class NumericT, typename SizeT, typename DistanceT>
499 {
500  viennacl::linalg::am(*this,
501  *this, NumericT(val), 1, false, false);
502  return *this;
503 }
504 
506 template<class NumericT, typename SizeT, typename DistanceT>
508 {
509  viennacl::linalg::am(*this,
510  *this, NumericT(val), 1, false, false);
511  return *this;
512 }
513 
515 template<class NumericT, typename SizeT, typename DistanceT>
517 {
518  viennacl::linalg::am(*this,
519  *this, NumericT(val), 1, false, false);
520  return *this;
521 }
522 
524 template<class NumericT, typename SizeT, typename DistanceT>
526 {
527  viennacl::linalg::am(*this,
528  *this, NumericT(val), 1, false, false);
529  return *this;
530 }
531 
532 
533 
535 template<class NumericT, typename SizeT, typename DistanceT>
537 {
538  viennacl::linalg::am(*this,
539  *this, NumericT(val), 1, true, false);
540  return *this;
541 }
542 
544 template<class NumericT, typename SizeT, typename DistanceT>
546 {
547  viennacl::linalg::am(*this,
548  *this, NumericT(val), 1, true, false);
549  return *this;
550 }
551 
553 template<class NumericT, typename SizeT, typename DistanceT>
555 {
556  viennacl::linalg::am(*this,
557  *this, NumericT(val), 1, true, false);
558  return *this;
559 }
560 
562 template<class NumericT, typename SizeT, typename DistanceT>
564 {
565  viennacl::linalg::am(*this,
566  *this, NumericT(val), 1, true, false);
567  return *this;
568 }
569 
571 template<class NumericT, typename SizeT, typename DistanceT>
573 {
574  viennacl::linalg::am(*this,
575  *this, NumericT(val), 1, true, false);
576  return *this;
577 }
578 
580 template<class NumericT, typename SizeT, typename DistanceT>
582 {
583  viennacl::linalg::am(*this,
584  *this, NumericT(val), 1, true, false);
585  return *this;
586 }
587 
588 
590 template<class NumericT, typename SizeT, typename DistanceT>
592 {
594 }
595 
596 template<class NumericT, typename SizeT, typename DistanceT>
598 
599 
600 template<class NumericT, typename SizeT, typename DistanceT>
602 {
603  assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));
604 
605  if (preserve && internal_size() > 0)
606  {
607  //get old entries:
608  std::vector< NumericT > old_entries(internal_size());
609  viennacl::backend::memory_read(elements_, 0, sizeof(NumericT)*internal_size(), &(old_entries[0]));
610 
611  //set up entries of new matrix:
612  std::vector< NumericT > new_entries( viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size)
613  * viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size));
614  for (size_type i=0; i<rows; ++i)
615  {
616  if (i >= size1_)
617  continue;
618 
619  for (size_type j=0; j<columns; ++j)
620  {
621  if (j >= size2_)
622  continue;
623  if (row_major_)
624  new_entries[row_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
625  = old_entries[row_major::mem_index(i, j, internal_size1(), internal_size2())];
626  else
627  new_entries[column_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
628  = old_entries[column_major::mem_index(i, j, internal_size1(), internal_size2())];
629  }
630  }
631 
632  //copy new entries to GPU:
633  size1_ = rows;
634  size2_ = columns;
635  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
636  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
637  viennacl::backend::memory_create(elements_, sizeof(NumericT)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));
638  }
639  else //discard old entries:
640  {
641  size1_ = rows;
642  size2_ = columns;
643  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
644  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
645 
646  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(elements_));
647  clear();
648  }
649 }
650 
651 
658 template<class NumericT, typename F, unsigned int AlignmentV>
659 class matrix : public matrix_base<NumericT>
660 {
661  typedef matrix<NumericT, F, AlignmentV> self_type;
662  typedef matrix_base<NumericT> base_type;
663 public:
664  typedef typename base_type::size_type size_type;
665 
667  explicit matrix() : base_type(static_cast<bool>(viennacl::is_row_major<F>::value)) {}
668 
675  explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, viennacl::is_row_major<F>::value, ctx) {}
676 
684  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
685  : base_type(ptr_to_mem, mem_type,
686  rows, 0, 1, rows,
687  cols, 0, 1, cols,
688  viennacl::is_row_major<F>::value) {}
689 
690 
700  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type,
701  size_type rows, size_type internal_row_count,
702  size_type cols, size_type internal_col_count)
703  : base_type(ptr_to_mem, mem_type,
704  rows, 0, 1, internal_row_count,
705  cols, 0, 1, internal_col_count,
706  true, viennacl::is_row_major<F>::value) {}
707 
708 #ifdef VIENNACL_WITH_OPENCL
709  explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns, viennacl::is_row_major<F>::value) {}
710 #endif
711 
712  template<typename LHS, typename RHS, typename OP>
713  matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {}
714 
716  matrix(identity_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
717  {
718  if (base_type::internal_size() > 0)
720  }
721 
723  matrix(zero_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
724  {
725  if (base_type::internal_size() > 0)
727  }
728 
730  matrix(scalar_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
731  {
732  if (base_type::internal_size() > 0)
734  }
735 
736  matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
737  {
738  base_type::operator=(other);
739  }
740 
741 
742  //copy constructor:
743  matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
744  {
745  base_type::operator=(other);
746  }
747 
748 
749  /*template<typename M1>
750  self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)
751  {
752  self_type temp(proxy.lhs());
753  *this = trans(temp);
754  return *this;
755  }*/
756 
757  using base_type::operator=;
758 
766  void resize(size_type rows, size_type columns, bool preserve = true)
767  {
768  base_type::resize(rows, columns, preserve);
769  }
770 
771 }; //matrix
772 
773 
774 
780 template<class NumericT>
781 std::ostream & operator<<(std::ostream & s, const matrix_base<NumericT> & gpu_matrix)
782 {
783  typedef typename matrix_base<NumericT>::size_type size_type;
784 
785  std::vector<NumericT> tmp(gpu_matrix.internal_size());
786  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0]));
787 
788  s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
789 
790  s << "(";
791  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
792  {
793  s << "(";
794  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
795  {
796  if (gpu_matrix.row_major())
797  s << tmp[row_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
798  else
799  s << tmp[column_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
800 
801  if (j < gpu_matrix.size2() - 1)
802  s << ",";
803  }
804  s << ")";
805  if (i < gpu_matrix.size1() - 1)
806  s << ",";
807  }
808  s << ")";
809  return s;
810 }
811 
817 template<typename LHS, typename RHS, typename OP>
818 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
819 {
821 
822  matrix<ScalarType> temp = expr;
823  s << temp;
824  return s;
825 }
826 
828 template<typename NumericT>
829 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>
830 trans(const matrix_base<NumericT> & mat)
831 {
832  return matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(mat, mat);
833 }
834 
835 //diag():
836 template<typename NumericT>
837 vector_expression< const matrix_base<NumericT>, const int, op_matrix_diag>
838 diag(const matrix_base<NumericT> & A, int k = 0)
839 {
841 }
842 
843 template<typename NumericT>
844 matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>
845 diag(const vector_base<NumericT> & v, int k = 0)
846 {
848 }
849 
850 // row():
851 template<typename NumericT, typename F>
852 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>
853 row(const matrix_base<NumericT, F> & A, unsigned int i)
854 {
855  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);
856 }
857 
858 // column():
859 template<typename NumericT, typename F>
860 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>
861 column(const matrix_base<NumericT, F> & A, unsigned int j)
862 {
863  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);
864 }
865 
867 
868 //
869 //cpu to gpu, generic type:
870 //
876 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
877 void copy(const CPUMatrixT & cpu_matrix,
878  matrix<NumericT, F, AlignmentV> & gpu_matrix )
879 {
880  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
881 
882  //std::cout << "Copying CPUMatrixT!" << std::endl;
883  //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
884  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
885  {
886  gpu_matrix.resize(cpu_matrix.size1(),
887  cpu_matrix.size2(), false);
888  }
889 
890  assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );
891 
892  std::vector<NumericT> data(gpu_matrix.internal_size());
893  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
894  {
895  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
896  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
897  }
898 
899  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
900  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
901  //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
902 }
903 
904 //
905 //cpu to gpu, STL type:
906 //
912 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
913 void copy(const std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix,
914  matrix<NumericT, F, AlignmentV> & gpu_matrix )
915 {
916  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
917 
918  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
919  {
920  gpu_matrix.resize(cpu_matrix.size(),
921  cpu_matrix[0].size(),
922  false);
923  }
924 
925  assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );
926 
927  std::vector<NumericT> data(gpu_matrix.internal_size());
928  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
929  {
930  assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );
931 
932  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
933  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
934  }
935 
936  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
937  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
938 }
939 
940 
941 //
942 //cpu to gpu, another STL type:
943 //
952 template<typename NumericT, typename F, unsigned int AlignmentV>
953 void fast_copy(NumericT * cpu_matrix_begin,
954  NumericT * cpu_matrix_end,
955  matrix<NumericT, F, AlignmentV> & gpu_matrix)
956 {
957  if (gpu_matrix.internal_size() == 0)
958  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);
959  else
960  {
961  assert( (gpu_matrix.internal_size() >= static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin)) && bool("fast_copy(): Matrix not large enough to fit data!"));
962  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), cpu_matrix_begin);
963  }
964 }
965 
966 
967 #ifdef VIENNACL_WITH_EIGEN
968 
973 template<typename F, unsigned int AlignmentV>
974 void copy(const Eigen::MatrixXf & cpu_matrix,
975  matrix<float, F, AlignmentV> & gpu_matrix)
976 {
977  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
978 
979  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
980  {
981  gpu_matrix.resize(cpu_matrix.rows(),
982  cpu_matrix.cols(),
983  false);
984  }
985  else
986  {
987  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
988  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
989  && bool("matrix size mismatch")
990  );
991  }
992 
993  std::vector<float> data(gpu_matrix.internal_size());
994  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
995  {
996  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
997  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
998  }
999 
1000  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(float) * data.size(), &(data[0]));
1001  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1002 }
1003 
1009 template<typename F, unsigned int AlignmentV>
1010 void copy(const Eigen::MatrixXd & cpu_matrix,
1011  matrix<double, F, AlignmentV> & gpu_matrix)
1012 {
1013  typedef typename matrix<double, F, AlignmentV>::size_type size_type;
1014 
1015  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1016  {
1017  gpu_matrix.resize(cpu_matrix.rows(),
1018  cpu_matrix.cols(),
1019  false);
1020  }
1021  else
1022  {
1023  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
1024  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
1025  && bool("matrix size mismatch")
1026  );
1027  }
1028 
1029  std::vector<double> data(gpu_matrix.internal_size());
1030  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1031  {
1032  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1033  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
1034  }
1035 
1036  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(double) * data.size(), &(data[0]));
1037  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1038 }
1039 #endif
1040 
1041 #ifdef VIENNACL_WITH_MTL4
1042 
1047 template<typename NumericT, typename T, typename F, unsigned int AlignmentV>
1048 void copy(const mtl::dense2D<NumericT, T>& cpu_matrix,
1049  matrix<NumericT, F, AlignmentV> & gpu_matrix)
1050 {
1051  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
1052 
1053  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1054  {
1055  gpu_matrix.resize(cpu_matrix.num_rows(),
1056  cpu_matrix.num_cols(),
1057  false);
1058  }
1059  else
1060  {
1061  assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
1062  && (gpu_matrix.size2() == cpu_matrix.num_cols())
1063  && bool("matrix size mismatch")
1064  );
1065  }
1066 
1067  std::vector<NumericT> data(gpu_matrix.internal_size());
1068  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1069  {
1070  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1071  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
1072  }
1073 
1074  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
1075  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1076 }
1077 #endif
1078 
1079 
1080 
1081 
1082 //
1083 //gpu to cpu, generic type
1084 //
1090 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
1091 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1092  CPUMatrixT & cpu_matrix )
1093 {
1094  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1095 
1096  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1097  {
1098  assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));
1099 
1100  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1101  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1102 
1103  //now copy entries to cpu_matrix:
1104  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1105  {
1106  assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));
1107  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1108  cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1109  }
1110  }
1111 }
1112 
1113 //gpu to cpu, STL type
1119 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
1120 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1121  std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix)
1122 {
1123  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1124 
1125  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1126  {
1127  assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));
1128 
1129  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1130  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1131 
1132  //now copy entries to cpu_matrix:
1133  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1134  {
1135  assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));
1136 
1137  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1138  cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1139  }
1140  }
1141 }
1142 
1143 //gpu to cpu, STL type
1151 template<typename NumericT, typename F, unsigned int AlignmentV>
1153  NumericT * cpu_matrix_begin)
1154 {
1155  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin);
1156 }
1157 
1158 
1159 
1161 
1162 
1163 // operator +
1165 template<typename LHS1, typename RHS1, typename OP1,
1166  typename LHS2, typename RHS2, typename OP2>
1167 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1168 const matrix_expression<const LHS2, const RHS2, OP2>,
1169 op_add>
1171  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1172 {
1173  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1174  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1175  && bool("Incompatible matrix sizes!"));
1176  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1177  const matrix_expression<const LHS2, const RHS2, OP2>,
1178  op_add>(proxy1, proxy2);
1179 }
1180 
1181 template<typename LHS1, typename RHS1, typename OP1,
1182  typename NumericT>
1183 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1184 const matrix_base<NumericT>,
1185 op_add>
1187  matrix_base<NumericT> const & proxy2)
1188 {
1189  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1190  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1191  && bool("Incompatible matrix sizes!"));
1192  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1193  const matrix_base<NumericT>,
1194  op_add>(proxy1, proxy2);
1195 }
1196 
1197 template<typename NumericT,
1198  typename LHS2, typename RHS2, typename OP2>
1199 matrix_expression< const matrix_base<NumericT>,
1200 const matrix_expression<const LHS2, const RHS2, OP2>,
1201 op_add>
1202 operator + (matrix_base<NumericT> const & proxy1,
1203  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1204 {
1205  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1206  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1207  && bool("Incompatible matrix sizes!"));
1208  return matrix_expression< const matrix_base<NumericT>,
1209  const matrix_expression<const LHS2, const RHS2, OP2>,
1210  op_add>(proxy1, proxy2);
1211 }
1212 
1214 template<typename NumericT>
1215 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_add >
1216 operator + (const matrix_base<NumericT> & m1, const matrix_base<NumericT> & m2)
1217 {
1218  return matrix_expression< const matrix_base<NumericT>,
1219  const matrix_base<NumericT>,
1220  op_add > (m1, m2);
1221 }
1222 
1223 
1224 // operator -
1225 template<typename LHS1, typename RHS1, typename OP1,
1226  typename LHS2, typename RHS2, typename OP2>
1227 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1228 const matrix_expression<const LHS2, const RHS2, OP2>,
1229 op_sub>
1231  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1232 {
1233  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1234  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1235  && bool("Incompatible matrix sizes!"));
1236  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1237  const matrix_expression<const LHS2, const RHS2, OP2>,
1238  op_sub>(proxy1, proxy2);
1239 }
1240 
1241 template<typename LHS1, typename RHS1, typename OP1,
1242  typename NumericT>
1243 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1244 const matrix_base<NumericT>,
1245 op_sub>
1247  matrix_base<NumericT> const & proxy2)
1248 {
1249  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1250  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1251  && bool("Incompatible matrix sizes!"));
1252  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1253  const matrix_base<NumericT>,
1254  op_sub>(proxy1, proxy2);
1255 }
1256 
1257 template<typename NumericT,
1258  typename LHS2, typename RHS2, typename OP2>
1259 matrix_expression< const matrix_base<NumericT>,
1260 const matrix_expression<const LHS2, const RHS2, OP2>,
1261 op_sub>
1262 operator - (matrix_base<NumericT> const & proxy1,
1263  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1264 {
1265  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1266  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1267  && bool("Incompatible matrix sizes!"));
1268  return matrix_expression< const matrix_base<NumericT>,
1269  const matrix_expression<const LHS2, const RHS2, OP2>,
1270  op_sub>(proxy1, proxy2);
1271 }
1272 
1274 template<typename NumericT>
1275 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_sub >
1276 operator - (const matrix_base<NumericT> & m1, const matrix_base<NumericT> & m2)
1277 {
1278  return matrix_expression< const matrix_base<NumericT>,
1279  const matrix_base<NumericT>,
1280  op_sub > (m1, m2);
1281 }
1282 
1283 
1284 
1285 // operator *
1291 template<typename S1, typename NumericT>
1293 matrix_expression< const matrix_base<NumericT>, const S1, op_mult>
1294 >::type
1295 operator * (S1 const & value, matrix_base<NumericT> const & m1)
1296 {
1297  return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, value);
1298 }
1299 
1301 template<typename NumericT>
1302 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1303 operator * (char value, matrix_base<NumericT> const & m1)
1304 {
1305  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1306 }
1307 
1309 template<typename NumericT>
1310 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1311 operator * (short value, matrix_base<NumericT> const & m1)
1312 {
1313  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1314 }
1315 
1317 template<typename NumericT>
1318 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1319 operator * (int value, matrix_base<NumericT> const & m1)
1320 {
1321  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1322 }
1323 
1325 template<typename NumericT>
1326 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1327 operator * (long value, matrix_base<NumericT> const & m1)
1328 {
1329  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1330 }
1331 
1333 template<typename NumericT>
1334 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1335 operator * (float value, matrix_base<NumericT> const & m1)
1336 {
1337  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1338 }
1339 
1341 template<typename NumericT>
1342 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1343 operator * (double value, matrix_base<NumericT> const & m1)
1344 {
1345  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1346 }
1347 
1348 
1349 
1355 template<typename LHS, typename RHS, typename OP, typename S1>
1357 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1359  S1 const & val)
1360 {
1361  return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
1362 }
1363 
1364 
1370 template<typename S1, typename LHS, typename RHS, typename OP>
1372 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1373 operator * (S1 const & val,
1374  matrix_expression< LHS, RHS, OP> const & proxy)
1375 {
1376  return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
1377 }
1378 
1381 template<typename NumericT, typename S1>
1383 matrix_expression< const matrix_base<NumericT>, const S1, op_mult> >::type
1384 operator * (matrix_base<NumericT> const & m1, S1 const & s1)
1385 {
1386  return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, s1);
1387 }
1388 
1390 template<typename NumericT>
1391 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1392 operator * (matrix_base<NumericT> const & m1, char s1)
1393 {
1394  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1395 }
1396 
1398 template<typename NumericT>
1399 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1400 operator * (matrix_base<NumericT> const & m1, short s1)
1401 {
1402  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1403 }
1404 
1406 template<typename NumericT>
1407 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1408 operator * (matrix_base<NumericT> const & m1, int s1)
1409 {
1410  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1411 }
1412 
1414 template<typename NumericT>
1415 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1416 operator * (matrix_base<NumericT> const & m1, long s1)
1417 {
1418  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1419 }
1420 
1422 template<typename NumericT>
1423 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1424 operator * (matrix_base<NumericT> const & m1, float s1)
1425 {
1426  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1427 }
1428 
1430 template<typename NumericT>
1431 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1432 operator * (matrix_base<NumericT> const & m1, double s1)
1433 {
1434  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1435 }
1436 
1437 
1438 // operator *=
1439 
1441 template<typename NumericT, typename S1>
1442 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1443 operator *= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1444 {
1445  bool is_sign_flip = viennacl::is_flip_sign_scalar<S1>::value;
1447  m1, gpu_val, 1, false, is_sign_flip ? true : false);
1448  return m1;
1449 }
1450 
1452 template<typename NumericT>
1453 matrix_base<NumericT> &
1454 operator *= (matrix_base<NumericT> & m1, char gpu_val)
1455 {
1457  m1, NumericT(gpu_val), 1, false, false);
1458  return m1;
1459 }
1460 
1462 template<typename NumericT>
1463 matrix_base<NumericT> &
1464 operator *= (matrix_base<NumericT> & m1, short gpu_val)
1465 {
1467  m1, NumericT(gpu_val), 1, false, false);
1468  return m1;
1469 }
1470 
1472 template<typename NumericT>
1473 matrix_base<NumericT> &
1474 operator *= (matrix_base<NumericT> & m1, int gpu_val)
1475 {
1477  m1, NumericT(gpu_val), 1, false, false);
1478  return m1;
1479 }
1480 
1482 template<typename NumericT>
1483 matrix_base<NumericT> &
1484 operator *= (matrix_base<NumericT> & m1, long gpu_val)
1485 {
1487  m1, NumericT(gpu_val), 1, false, false);
1488  return m1;
1489 }
1490 
1492 template<typename NumericT>
1493 matrix_base<NumericT> &
1494 operator *= (matrix_base<NumericT> & m1, float gpu_val)
1495 {
1497  m1, NumericT(gpu_val), 1, false, false);
1498  return m1;
1499 }
1500 
1502 template<typename NumericT>
1503 matrix_base<NumericT> &
1504 operator *= (matrix_base<NumericT> & m1, double gpu_val)
1505 {
1507  m1, NumericT(gpu_val), 1, false, false);
1508  return m1;
1509 }
1510 
1511 
1512 
1513 // operator /
1514 
1515 
1521 template<typename LHS, typename RHS, typename OP, typename S1>
1523 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type
1525  S1 const & val)
1526 {
1527  return matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div>(proxy, val);
1528 }
1529 
1530 
1532 template<typename NumericT, typename S1>
1534 matrix_expression< const matrix_base<NumericT>, const S1, op_div> >::type
1535 operator / (matrix_base<NumericT> const & m1, S1 const & s1)
1536 {
1537  return matrix_expression< const matrix_base<NumericT>, const S1, op_div>(m1, s1);
1538 }
1539 
1541 template<typename NumericT>
1542 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1543 operator / (matrix_base<NumericT> const & m1, char s1)
1544 {
1545  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1546 }
1547 
1549 template<typename NumericT>
1550 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1551 operator / (matrix_base<NumericT> const & m1, short s1)
1552 {
1553  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1554 }
1555 
1557 template<typename NumericT>
1558 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1559 operator / (matrix_base<NumericT> const & m1, int s1)
1560 {
1561  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1562 }
1563 
1565 template<typename NumericT>
1566 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1567 operator / (matrix_base<NumericT> const & m1, long s1)
1568 {
1569  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1570 }
1571 
1573 template<typename NumericT>
1574 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1575 operator / (matrix_base<NumericT> const & m1, float s1)
1576 {
1577  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1578 }
1579 
1581 template<typename NumericT>
1582 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1583 operator / (matrix_base<NumericT> const & m1, double s1)
1584 {
1585  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1586 }
1587 
1588 
1589 
1590 // operator /=
1591 
1593 template<typename NumericT, typename S1>
1594 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1595 operator /= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1596 {
1598  m1, gpu_val, 1, true, false);
1599  return m1;
1600 }
1601 
1603 template<typename NumericT>
1604 matrix_base<NumericT> &
1605 operator /= (matrix_base<NumericT> & m1, char gpu_val)
1606 {
1608  m1, NumericT(gpu_val), 1, true, false);
1609  return m1;
1610 }
1611 
1613 template<typename NumericT>
1614 matrix_base<NumericT> &
1615 operator /= (matrix_base<NumericT> & m1, short gpu_val)
1616 {
1618  m1, gpu_val, 1, true, false);
1619  return m1;
1620 }
1621 
1623 template<typename NumericT>
1624 matrix_base<NumericT> &
1625 operator /= (matrix_base<NumericT> & m1, int gpu_val)
1626 {
1628  m1, gpu_val, 1, true, false);
1629  return m1;
1630 }
1631 
1633 template<typename NumericT>
1634 matrix_base<NumericT> &
1635 operator /= (matrix_base<NumericT> & m1, long gpu_val)
1636 {
1638  m1, gpu_val, 1, true, false);
1639  return m1;
1640 }
1641 
1643 template<typename NumericT>
1644 matrix_base<NumericT> &
1645 operator /= (matrix_base<NumericT> & m1, float gpu_val)
1646 {
1648  m1, gpu_val, 1, true, false);
1649  return m1;
1650 }
1651 
1653 template<typename NumericT>
1654 matrix_base<NumericT> &
1655 operator /= (matrix_base<NumericT> & m1, double gpu_val)
1656 {
1658  m1, gpu_val, 1, true, false);
1659  return m1;
1660 }
1661 
1662 
1663 
1664 
1665 
1666 // outer_prod(v1, v2) * val;
1667 template<typename NumericT, typename S1>
1670 const S1,
1671 op_mult>
1672 >::type
1673 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
1674  const S1 & val)
1675 {
1676  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1677  const S1,
1678  op_mult>(proxy, val);
1679 }
1680 
1681 template<typename NumericT, typename S1>
1683 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1684 const NumericT,
1685 op_mult>
1686 >::type
1687 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
1688  const S1 & val)
1689 {
1690  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1691  const NumericT,
1692  op_mult>(proxy, NumericT(val));
1693 }
1694 
1695 // val * outer_prod(v1, v2);
1696 template<typename NumericT, typename S1>
1698 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1699 const S1,
1700 op_mult>
1701 >::type
1702 operator*(const S1 & val,
1703  const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1704 {
1705  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1706  const S1,
1707  op_mult>(proxy, val);
1708 }
1709 
1710 template<typename NumericT, typename S1>
1712 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1713 const NumericT,
1714 op_mult>
1715 >::type
1716 operator*(const S1 & val,
1717  const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1718 {
1719  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1720  const NumericT,
1721  op_mult>(proxy, NumericT(val));
1722 }
1723 
1724 
1725 
1726 //
1727 // Specify available operations:
1728 //
1729 
1732 namespace linalg
1733 {
1734 namespace detail
1735 {
1736 
1737  // x = y
1738  template<typename T>
1739  struct op_executor<matrix_base<T>, op_assign, matrix_base<T> >
1740  {
1741  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1742  {
1743  viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);
1744  }
1745  };
1746 
1747  // x = trans(y)
1748  template<typename T>
1749  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1750  {
1751  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1752  {
1753  matrix_base<T> temp(rhs);
1754  viennacl::linalg::am(lhs, temp, T(1), 1, false, false);
1755  }
1756  };
1757 
1758 
1759  // x += y
1760  template<typename T>
1761  struct op_executor<matrix_base<T>, op_inplace_add, matrix_base<T> >
1762  {
1763  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1764  {
1765  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);
1766  }
1767  };
1768 
1769  // x += trans(y)
1770  template<typename T>
1771  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1772  {
1773  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1774  {
1775  matrix_base<T> temp(rhs);
1776  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false);
1777  }
1778  };
1779 
1780  // x -= y
1781  template<typename T>
1782  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_base<T> >
1783  {
1784  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1785  {
1786  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);
1787  }
1788  };
1789 
1790  // x -= trans(y)
1791  template<typename T>
1792  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1793  {
1794  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1795  {
1796  matrix_base<T> temp(rhs);
1797  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, true);
1798  }
1799  };
1800 
1802 
1803 
1804  // x = alpha * y
1805  template<typename T, typename ScalarType>
1806  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1807  {
1808  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1809  {
1810  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);
1811  }
1812  };
1813 
1814  // x += alpha * y
1815  template<typename T, typename ScalarType>
1816  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1817  {
1818  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1819  {
1820  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);
1821  }
1822  };
1823 
1824  // x -= alpha * y
1825  template<typename T, typename ScalarType>
1826  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1827  {
1828  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1829  {
1830  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);
1831  }
1832  };
1833 
1834 
1836 
1837  // x = alpha * vec_expr
1838  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1839  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1840  {
1841  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1842  {
1843  if (lhs.row_major())
1844  {
1845  matrix<T> temp(proxy.lhs());
1846  lhs = temp * proxy.rhs();
1847  }
1848  else
1849  {
1850  matrix<T, column_major> temp(proxy.lhs());
1851  lhs = temp * proxy.rhs();
1852  }
1853  }
1854  };
1855 
1856  // x += alpha * vec_expr
1857  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1858  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1859  {
1860  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1861  {
1862  if (lhs.row_major())
1863  {
1864  matrix<T> temp(proxy.lhs());
1865  lhs += temp * proxy.rhs();
1866  }
1867  else
1868  {
1869  matrix<T, column_major> temp(proxy.lhs());
1870  lhs += temp * proxy.rhs();
1871  }
1872  }
1873  };
1874 
1875  // x -= alpha * vec_expr
1876  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1877  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1878  {
1879  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1880  {
1881  if (lhs.row_major())
1882  {
1883  matrix<T> temp(proxy.lhs());
1884  lhs -= temp * proxy.rhs();
1885  }
1886  else
1887  {
1888  matrix<T, column_major> temp(proxy.lhs());
1889  lhs -= temp * proxy.rhs();
1890  }
1891  }
1892  };
1893 
1894 
1896 
1897  // x = y / alpha
1898  template<typename T, typename ScalarType>
1899  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1900  {
1901  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1902  {
1903  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);
1904  }
1905  };
1906 
1907  // x += y / alpha
1908  template<typename T, typename ScalarType>
1909  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1910  {
1911  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1912  {
1913  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);
1914  }
1915  };
1916 
1917  // x -= y / alpha
1918  template<typename T, typename ScalarType>
1919  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1920  {
1921  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1922  {
1923  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);
1924  }
1925  };
1926 
1927 
1929 
1930  // x = vec_expr / alpha
1931  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1932  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1933  {
1934  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1935  {
1936  if (lhs.row_major())
1937  {
1938  matrix<T> temp(proxy.lhs());
1939  lhs = temp / proxy.rhs();
1940  }
1941  else
1942  {
1943  matrix<T, column_major> temp(proxy.lhs());
1944  lhs = temp / proxy.rhs();
1945  }
1946  }
1947  };
1948 
1949  // x += vec_expr / alpha
1950  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1951  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1952  {
1953  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1954  {
1955  if (lhs.row_major())
1956  {
1957  matrix<T> temp(proxy.lhs());
1958  lhs += temp / proxy.rhs();
1959  }
1960  else
1961  {
1962  matrix<T, column_major> temp(proxy.lhs());
1963  lhs += temp / proxy.rhs();
1964  }
1965  }
1966  };
1967 
1968  // x -= vec_expr / alpha
1969  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1970  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1971  {
1972  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1973  {
1974  if (lhs.row_major())
1975  {
1976  matrix<T, row_major> temp(proxy.lhs());
1977  lhs -= temp / proxy.rhs();
1978  }
1979  else
1980  {
1981  matrix<T, column_major> temp(proxy.lhs());
1982  lhs -= temp / proxy.rhs();
1983  }
1984  }
1985  };
1986 
1987 
1988 
1989  // generic x = vec_expr1 + vec_expr2:
1990  template<typename T, typename LHS, typename RHS>
1991  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_add> >
1992  {
1993  // generic x = vec_expr1 + vec_expr2:
1994  template<typename LHS1, typename RHS1>
1995  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
1996  {
1997  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
1998  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
1999 
2000  if (op_aliasing_lhs || op_aliasing_rhs)
2001  {
2002  matrix_base<T> temp(proxy.lhs());
2003  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2004  lhs = temp;
2005  }
2006  else
2007  {
2008  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2009  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2010  }
2011  }
2012 
2013  // x = y + z
2014  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2015  {
2017  proxy.lhs(), T(1), 1, false, false,
2018  proxy.rhs(), T(1), 1, false, false);
2019  }
2020 
2021  // x = alpha * y + z
2022  template<typename ScalarType>
2023  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2024  const matrix_base<T>,
2025  op_add> const & proxy)
2026  {
2028  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2029  proxy.rhs(), T(1), 1, false, false);
2030  }
2031 
2032  // x = y / alpha + z
2033  template<typename ScalarType>
2034  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2035  const matrix_base<T>,
2036  op_add> const & proxy)
2037  {
2039  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2040  proxy.rhs(), T(1), 1, false, false);
2041  }
2042 
2043  // x = y + beta * z
2044  template<typename ScalarType>
2045  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2046  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2047  op_add> const & proxy)
2048  {
2050  proxy.lhs(), T(1), 1, false, false,
2051  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2052  }
2053 
2054  // x = y + z / beta
2055  template<typename ScalarType>
2056  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2057  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2058  op_add> const & proxy)
2059  {
2061  proxy.lhs(), T(1), 1, false, false,
2062  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2063  }
2064 
2065  // x = alpha * y + beta * z
2066  template<typename ScalarType1, typename ScalarType2>
2067  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2068  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2069  op_add> const & proxy)
2070  {
2072  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2073  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2074  }
2075 
2076  // x = alpha * y + z / beta
2077  template<typename ScalarType1, typename ScalarType2>
2078  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2079  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2080  op_add> const & proxy)
2081  {
2083  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2084  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2085  }
2086 
2087  // x = y / alpha + beta * z
2088  template<typename ScalarType1, typename ScalarType2>
2089  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2090  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2091  op_add> const & proxy)
2092  {
2094  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2095  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2096  }
2097 
2098  // x = y / alpha + z / beta
2099  template<typename ScalarType1, typename ScalarType2>
2100  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2101  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2102  op_add> const & proxy)
2103  {
2105  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2106  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2107  }
2108  };
2109 
2110  // dense = sparse * dense
2111  template<typename T, typename LHS, typename RHS>
2112  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >
2113  {
2114  template< typename SparseMatrixType>
2115  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2117  viennacl::op_prod> const & proxy)
2118  {
2119  // check for x = A * x
2120  if (op_aliasing(lhs, proxy.rhs()))
2121  {
2122  matrix_base<T> temp(proxy);
2123  lhs = temp;
2124  }
2125  else
2126  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2127  }
2128 
2129  // dense = sparse * trans(dense)
2130  template< typename SparseMatrixType >
2131  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2135  viennacl::op_prod> const & proxy)
2136  {
2137  // check for x = A * x
2138  if (op_aliasing(lhs, proxy.rhs()))
2139  {
2140  matrix_base<T> temp(proxy);
2141  lhs = temp;
2142  }
2143  else
2144  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2145  }
2146 
2147  };
2148 
2149  // generic x += vec_expr1 + vec_expr2:
2150  template<typename T, typename LHS, typename RHS>
2151  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >
2152  {
2153  // generic x += vec_expr1 + vec_expr2:
2154  template<typename LHS1, typename RHS1>
2155  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2156  {
2157  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2158  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2159 
2160  if (op_aliasing_lhs || op_aliasing_rhs)
2161  {
2162  matrix_base<T> temp(proxy.lhs());
2163  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2164  lhs += temp;
2165  }
2166  else
2167  {
2168  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2169  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2170  }
2171  }
2172 
2173  // x += y + z
2174  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2175  {
2177  proxy.lhs(), T(1), 1, false, false,
2178  proxy.rhs(), T(1), 1, false, false);
2179  }
2180 
2181  // x += alpha * y + z
2182  template<typename ScalarType>
2183  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2184  const matrix_base<T>,
2185  op_add> const & proxy)
2186  {
2188  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2189  proxy.rhs(), T(1), 1, false, false);
2190  }
2191 
2192  // x += y / alpha + z
2193  template<typename ScalarType>
2194  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2195  const matrix_base<T>,
2196  op_add> const & proxy)
2197  {
2199  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2200  proxy.rhs(), T(1), 1, false, false);
2201  }
2202 
2203  // x += y + beta * z
2204  template<typename ScalarType>
2205  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2206  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2207  op_add> const & proxy)
2208  {
2210  proxy.lhs(), T(1), 1, false, false,
2211  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2212  }
2213 
2214  // x += y + z / beta
2215  template<typename ScalarType>
2216  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2217  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2218  op_add> const & proxy)
2219  {
2221  proxy.lhs(), T(1), 1, false, false,
2222  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2223  }
2224 
2225  // x += alpha * y + beta * z
2226  template<typename ScalarType1, typename ScalarType2>
2227  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2228  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2229  op_add> const & proxy)
2230  {
2232  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2233  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2234  }
2235 
2236  // x += alpha * y + z / beta
2237  template<typename ScalarType1, typename ScalarType2>
2238  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2239  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2240  op_add> const & proxy)
2241  {
2243  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2244  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2245  }
2246 
2247  // x += y / alpha + beta * z
2248  template<typename ScalarType1, typename ScalarType2>
2249  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2250  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2251  op_add> const & proxy)
2252  {
2254  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2255  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2256  }
2257 
2258  // x += y / alpha + z / beta
2259  template<typename ScalarType1, typename ScalarType2>
2260  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2261  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2262  op_add> const & proxy)
2263  {
2265  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2266  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2267  }
2268  };
2269 
2270 
2271 
2272  // generic x -= vec_expr1 + vec_expr2:
2273  template<typename T, typename LHS, typename RHS>
2274  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >
2275  {
2276  // generic x -= vec_expr1 + vec_expr2:
2277  template<typename LHS1, typename RHS1>
2278  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2279  {
2280  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2281  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2282 
2283  if (op_aliasing_lhs || op_aliasing_rhs)
2284  {
2285  matrix_base<T> temp(proxy.lhs());
2286  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2287  lhs -= temp;
2288  }
2289  else
2290  {
2291  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2292  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2293  }
2294  }
2295 
2296  // x -= y + z
2297  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2298  {
2300  proxy.lhs(), T(1), 1, false, true,
2301  proxy.rhs(), T(1), 1, false, true);
2302  }
2303 
2304  // x -= alpha * y + z
2305  template<typename ScalarType>
2306  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2307  const matrix_base<T>,
2308  op_add> const & proxy)
2309  {
2311  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2312  proxy.rhs(), T(1), 1, false, true);
2313  }
2314 
2315  // x -= y / alpha + z
2316  template<typename ScalarType>
2317  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2318  const matrix_base<T>,
2319  op_add> const & proxy)
2320  {
2322  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2323  proxy.rhs(), T(1), 1, false, true);
2324  }
2325 
2326  // x -= y + beta * z
2327  template<typename ScalarType>
2328  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2329  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2330  op_add> const & proxy)
2331  {
2333  proxy.lhs(), T(1), 1, false, true,
2334  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2335  }
2336 
2337  // x -= y + z / beta
2338  template<typename ScalarType>
2339  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2340  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2341  op_add> const & proxy)
2342  {
2344  proxy.lhs(), T(1), 1, false, true,
2345  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2346  }
2347 
2348  // x -= alpha * y + beta * z
2349  template<typename ScalarType1, typename ScalarType2>
2350  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2351  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2352  op_add> const & proxy)
2353  {
2355  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2356  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2357  }
2358 
2359  // x -= alpha * y + z / beta
2360  template<typename ScalarType1, typename ScalarType2>
2361  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2362  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2363  op_add> const & proxy)
2364  {
2366  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2367  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2368  }
2369 
2370  // x -= y / alpha + beta * z
2371  template<typename ScalarType1, typename ScalarType2>
2372  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2373  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2374  op_add> const & proxy)
2375  {
2377  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2378  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2379  }
2380 
2381  // x -= y / alpha + z / beta
2382  template<typename ScalarType1, typename ScalarType2>
2383  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2384  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2385  op_add> const & proxy)
2386  {
2388  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2389  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2390  }
2391  };
2392 
2393 
2394 
2396 
2397 
2398 
2399  // generic x = vec_expr1 - vec_expr2:
2400  template<typename T, typename LHS, typename RHS>
2401  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >
2402  {
2403  // generic x = vec_expr1 - vec_expr2:
2404  template<typename LHS1, typename RHS1>
2405  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2406  {
2407  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2408  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2409 
2410  if (op_aliasing_lhs || op_aliasing_rhs)
2411  {
2412  matrix_base<T> temp(proxy.lhs());
2413  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2414  lhs = temp;
2415  }
2416  else
2417  {
2418  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2419  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2420  }
2421  }
2422 
2423  // x = y - z
2424  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2425  {
2427  proxy.lhs(), T(1), 1, false, false,
2428  proxy.rhs(), T(1), 1, false, true);
2429  }
2430 
2431  // x = alpha * y - z
2432  template<typename ScalarType>
2433  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2434  const matrix_base<T>,
2435  op_sub> const & proxy)
2436  {
2438  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2439  proxy.rhs(), T(1), 1, false, true);
2440  }
2441 
2442  // x = y / alpha - z
2443  template<typename ScalarType>
2444  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2445  const matrix_base<T>,
2446  op_sub> const & proxy)
2447  {
2449  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2450  proxy.rhs(), T(1), 1, false, true);
2451  }
2452 
2453  // x = y - beta * z
2454  template<typename ScalarType>
2455  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2456  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2457  op_sub> const & proxy)
2458  {
2460  proxy.lhs(), T(1), 1, false, false,
2461  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2462  }
2463 
2464  // x = y - z / beta
2465  template<typename ScalarType>
2466  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2467  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2468  op_sub> const & proxy)
2469  {
2471  proxy.lhs(), T(1), 1, false, false,
2472  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2473  }
2474 
2475  // x = alpha * y - beta * z
2476  template<typename ScalarType1, typename ScalarType2>
2477  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2478  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2479  op_sub> const & proxy)
2480  {
2482  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2483  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2484  }
2485 
2486  // x = alpha * y - z / beta
2487  template<typename ScalarType1, typename ScalarType2>
2488  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2489  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2490  op_sub> const & proxy)
2491  {
2493  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2494  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2495  }
2496 
2497  // x = y / alpha - beta * z
2498  template<typename ScalarType1, typename ScalarType2>
2499  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2500  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2501  op_sub> const & proxy)
2502  {
2504  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2505  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2506  }
2507 
2508  // x = y / alpha - z / beta
2509  template<typename ScalarType1, typename ScalarType2>
2510  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2511  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2512  op_sub> const & proxy)
2513  {
2515  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2516  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2517  }
2518  };
2519 
2520 
2521  // generic x += vec_expr1 - vec_expr2:
2522  template<typename T, typename LHS, typename RHS>
2523  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >
2524  {
2525  // generic x += vec_expr1 - vec_expr2:
2526  template<typename LHS1, typename RHS1>
2527  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2528  {
2529  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2530  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2531 
2532  if (op_aliasing_lhs || op_aliasing_rhs)
2533  {
2534  matrix_base<T> temp(proxy.lhs());
2535  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2536  lhs += temp;
2537  }
2538  else
2539  {
2540  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2541  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2542  }
2543  }
2544 
2545  // x += y - z
2546  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2547  {
2549  proxy.lhs(), T(1), 1, false, false,
2550  proxy.rhs(), T(1), 1, false, true);
2551  }
2552 
2553  // x += alpha * y - z
2554  template<typename ScalarType>
2555  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2556  const matrix_base<T>,
2557  op_sub> const & proxy)
2558  {
2560  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2561  proxy.rhs(), T(1), 1, false, true);
2562  }
2563 
2564  // x += y / alpha - z
2565  template<typename ScalarType>
2566  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2567  const matrix_base<T>,
2568  op_sub> const & proxy)
2569  {
2571  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2572  proxy.rhs(), T(1), 1, false, true);
2573  }
2574 
2575  // x += y - beta * z
2576  template<typename ScalarType>
2577  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2578  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2579  op_sub> const & proxy)
2580  {
2582  proxy.lhs(), T(1), 1, false, false,
2583  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2584  }
2585 
2586  // x += y - z / beta
2587  template<typename ScalarType>
2588  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2589  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2590  op_sub> const & proxy)
2591  {
2593  proxy.lhs(), T(1), 1, false, false,
2594  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2595  }
2596 
2597  // x += alpha * y - beta * z
2598  template<typename ScalarType1, typename ScalarType2>
2599  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2600  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2601  op_sub> const & proxy)
2602  {
2604  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2605  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2606  }
2607 
2608  // x += alpha * y - z / beta
2609  template<typename ScalarType1, typename ScalarType2>
2610  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2611  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2612  op_sub> const & proxy)
2613  {
2615  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2616  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2617  }
2618 
2619  // x += y / alpha - beta * z
2620  template<typename ScalarType1, typename ScalarType2>
2621  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2622  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2623  op_sub> const & proxy)
2624  {
2626  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2627  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2628  }
2629 
2630  // x += y / alpha - z / beta
2631  template<typename ScalarType1, typename ScalarType2>
2632  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2633  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2634  op_sub> const & proxy)
2635  {
2637  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2638  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2639  }
2640  };
2641 
2642 
2643 
2644  // generic x -= vec_expr1 - vec_expr2:
2645  template<typename T, typename LHS, typename RHS>
2646  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >
2647  {
2648  // generic x -= vec_expr1 - vec_expr2:
2649  template<typename LHS1, typename RHS1>
2650  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2651  {
2652  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2653  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2654 
2655  if (op_aliasing_lhs || op_aliasing_rhs)
2656  {
2657  matrix_base<T> temp(proxy.lhs());
2658  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2659  lhs -= temp;
2660  }
2661  else
2662  {
2663  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2664  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2665  }
2666  }
2667 
2668  // x -= y - z
2669  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2670  {
2672  proxy.lhs(), T(1), 1, false, true,
2673  proxy.rhs(), T(1), 1, false, false);
2674  }
2675 
2676  // x -= alpha * y - z
2677  template<typename ScalarType>
2678  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2679  const matrix_base<T>,
2680  op_sub> const & proxy)
2681  {
2683  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2684  proxy.rhs(), T(1), 1, false, false);
2685  }
2686 
2687  // x -= y / alpha - z
2688  template<typename ScalarType>
2689  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2690  const matrix_base<T>,
2691  op_sub> const & proxy)
2692  {
2694  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2695  proxy.rhs(), T(1), 1, false, false);
2696  }
2697 
2698  // x -= y - beta * z
2699  template<typename ScalarType>
2700  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2701  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2702  op_sub> const & proxy)
2703  {
2705  proxy.lhs(), T(1), 1, false, true,
2706  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2707  }
2708 
2709  // x -= y - z / beta
2710  template<typename ScalarType>
2711  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2712  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2713  op_sub> const & proxy)
2714  {
2716  proxy.lhs(), T(1), 1, false, true,
2717  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2718  }
2719 
2720  // x -= alpha * y - beta * z
2721  template<typename ScalarType1, typename ScalarType2>
2722  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2723  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2724  op_sub> const & proxy)
2725  {
2727  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2728  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2729  }
2730 
2731  // x -= alpha * y - z / beta
2732  template<typename ScalarType1, typename ScalarType2>
2733  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2734  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2735  op_sub> const & proxy)
2736  {
2738  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2739  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2740  }
2741 
2742  // x -= y / alpha - beta * z
2743  template<typename ScalarType1, typename ScalarType2>
2744  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2745  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2746  op_sub> const & proxy)
2747  {
2749  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2750  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2751  }
2752 
2753  // x -= y / alpha - z / beta
2754  template<typename ScalarType1, typename ScalarType2>
2755  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2756  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2757  op_sub> const & proxy)
2758  {
2760  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2761  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2762  }
2763  };
2764 
2765 
2767 
2768  template<typename T, typename LHS>
2769  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >
2770  {
2771  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)
2772  {
2773  viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);
2774  }
2775  };
2776 
2777 
2778  template<typename T, typename LHS>
2779  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >
2780  {
2781  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const int, op_matrix_diag> const & proxy)
2782  {
2783  viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);
2784  }
2785  };
2786 
2787  template<typename T, typename LHS>
2788  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >
2789  {
2790  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_row> const & proxy)
2791  {
2792  viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);
2793  }
2794  };
2795 
2796 
2797  template<typename T, typename LHS>
2798  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >
2799  {
2800  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_column> const & proxy)
2801  {
2802  viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);
2803  }
2804  };
2805 
2806 
2808 
2809  // generic x = mat_expr1 .* mat_expr2:
2810  template<typename T, typename LHS, typename RHS, typename OP>
2811  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2812  {
2813  // x = y .* z
2814  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2815  {
2816  viennacl::linalg::element_op(lhs, proxy);
2817  }
2818 
2819  // x = y .* mat_expr
2820  template<typename LHS2, typename RHS2, typename OP2>
2821  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2822  {
2823  matrix_base<T> temp(proxy.rhs());
2824  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2825  }
2826 
2827  // x = mat_expr .* z
2828  template<typename LHS1, typename RHS1, typename OP1>
2829  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2830  {
2831  matrix_base<T> temp(proxy.lhs());
2832  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2833  }
2834 
2835  // x = mat_expr .* mat_expr
2836  template<typename LHS1, typename RHS1, typename OP1,
2837  typename LHS2, typename RHS2, typename OP2>
2838  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2839  const matrix_expression<const LHS2, const RHS2, OP2>,
2840  op_element_binary<OP> > const & proxy)
2841  {
2842  matrix_base<T> temp1(proxy.lhs());
2843  matrix_base<T> temp2(proxy.rhs());
2844  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2845  }
2846  };
2847 
2848  // generic x += mat_expr .* mat_expr:
2849  template<typename T, typename LHS, typename RHS, typename OP>
2850  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2851  {
2852  // x += y .* z
2853  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2854  {
2855  matrix_base<T> temp(proxy);
2856  lhs += temp;
2857  }
2858 
2859  // x += y .* mat_expr
2860  template<typename LHS2, typename RHS2, typename OP2>
2861  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2862  {
2863  matrix_base<T> temp(proxy.rhs());
2864  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2865  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2866  lhs += temp2;
2867  }
2868 
2869  // x += mat_expr .* z
2870  template<typename LHS1, typename RHS1, typename OP1>
2871  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2872  {
2873  matrix_base<T> temp(proxy.lhs());
2874  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2875  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2876  lhs += temp2;
2877  }
2878 
2879  // x += mat_expr .* mat_expr
2880  template<typename LHS1, typename RHS1, typename OP1,
2881  typename LHS2, typename RHS2, typename OP2>
2882  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2883  const matrix_expression<const LHS2, const RHS2, OP2>,
2884  op_element_binary<OP> > const & proxy)
2885  {
2886  matrix_base<T> temp1(proxy.lhs());
2887  matrix_base<T> temp2(proxy.rhs());
2888  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2889  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2890  lhs += temp3;
2891  }
2892  };
2893 
2894  // generic x -= mat_expr1 .* mat_expr2:
2895  template<typename T, typename LHS, typename RHS, typename OP>
2896  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2897  {
2898 
2899  // x -= y .* z
2900  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2901  {
2902  matrix_base<T> temp(proxy);
2903  lhs -= temp;
2904  }
2905 
2906  // x -= y .* mat_expr
2907  template<typename LHS2, typename RHS2, typename OP2>
2908  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2909  {
2910  matrix_base<T> temp(proxy.rhs());
2911  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2912  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2913  lhs -= temp2;
2914  }
2915 
2916  // x -= mat_expr .* z
2917  template<typename LHS1, typename RHS1, typename OP1>
2918  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2919  {
2920  matrix_base<T> temp(proxy.lhs());
2921  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2922  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2923  lhs -= temp2;
2924  }
2925 
2926  // x -= mat_expr .* mat_expr
2927  template<typename LHS1, typename RHS1, typename OP1,
2928  typename LHS2, typename RHS2, typename OP2>
2929  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2930  const matrix_expression<const LHS2, const RHS2, OP2>,
2931  op_element_binary<OP> > const & proxy)
2932  {
2933  matrix_base<T> temp1(proxy.lhs());
2934  matrix_base<T> temp2(proxy.rhs());
2935  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2936  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2937  lhs -= temp3;
2938  }
2939  };
2940 
2942 
2943  template<typename T, typename LHS, typename RHS, typename OP>
2944  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2945  {
2946  // x = OP(y)
2947  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
2948  {
2949  viennacl::linalg::element_op(lhs, proxy);
2950  }
2951 
2952  // x = OP(vec_expr)
2953  template<typename LHS2, typename RHS2, typename OP2>
2954  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2955  const matrix_expression<const LHS2, const RHS2, OP2>,
2956  op_element_unary<OP> > const & proxy)
2957  {
2958  matrix_base<T> temp(proxy.rhs());
2959  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp));
2960  }
2961  };
2962 
2963  template<typename T, typename LHS, typename RHS, typename OP>
2964  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2965  {
2966  // x += OP(y)
2967  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
2968  {
2969  matrix_base<T> temp(proxy);
2970  lhs += temp;
2971  }
2972 
2973  // x += OP(vec_expr)
2974  template<typename LHS2, typename RHS2, typename OP2>
2975  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2976  const matrix_expression<const LHS2, const RHS2, OP2>,
2977  op_element_unary<OP> > const & proxy)
2978  {
2979  matrix_base<T> temp(proxy.rhs());
2980  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
2981  lhs += temp;
2982  }
2983  };
2984 
2985  template<typename T, typename LHS, typename RHS, typename OP>
2986  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2987  {
2988  // x -= OP(y)
2989  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
2990  {
2991  matrix_base<T> temp(proxy);
2992  lhs -= temp;
2993  }
2994 
2995  // x -= OP(vec_expr)
2996  template<typename LHS2, typename RHS2, typename OP2>
2997  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2998  const matrix_expression<const LHS2, const RHS2, OP2>,
2999  op_element_unary<OP> > const & proxy)
3000  {
3001  matrix_base<T> temp(proxy.rhs());
3002  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
3003  lhs -= temp;
3004  }
3005  };
3006 
3007 
3008 
3010 
3011  // C = A * B
3012  template<typename T>
3013  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3014  {
3015  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3016  {
3017  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3018  {
3019  matrix_base<T> temp(rhs);
3020  lhs = temp;
3021  }
3022  else
3023  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3024  }
3025  };
3026 
3027  // C = A * B^T
3028  template<typename T>
3029  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>,
3030  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3031  op_mat_mat_prod> >
3032  {
3033  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3034  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3035  op_mat_mat_prod> const & rhs)
3036  {
3037  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3038  {
3039  matrix_base<T> temp(rhs);
3040  lhs = temp;
3041  }
3042  else
3043  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3044  }
3045  };
3046 
3047  // C = A^T * B
3048  template<typename T>
3049  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3050  const matrix_base<T>,
3051  op_mat_mat_prod> >
3052  {
3053  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3054  const matrix_base<T>,
3055  op_mat_mat_prod> const & rhs)
3056  {
3057  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3058  {
3059  matrix_base<T> temp(rhs);
3060  lhs = temp;
3061  }
3062  else
3063  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3064  }
3065  };
3066 
3067  // C = A^T * B^T
3068  template<typename T>
3069  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3070  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3071  op_mat_mat_prod> >
3072  {
3073  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3074  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3075  op_mat_mat_prod> const & rhs)
3076  {
3077  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3078  {
3079  matrix_base<T> temp(rhs);
3080  lhs = temp;
3081  }
3082  else
3083  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3084  }
3085  };
3086 
3087 
3088  // C += A * B
3089  template<typename T>
3090  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3091  {
3092  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3093  {
3094  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3095  {
3096  matrix_base<T> temp(rhs);
3097  lhs += temp;
3098  }
3099  else
3100  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3101  }
3102  };
3103 
3104  // C += A * B^T
3105  template<typename T>
3106  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>,
3107  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3108  op_mat_mat_prod> >
3109  {
3110  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3111  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3112  op_mat_mat_prod> const & rhs)
3113  {
3114  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3115  {
3116  matrix_base<T> temp(rhs);
3117  lhs += temp;
3118  }
3119  else
3120  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3121  }
3122  };
3123 
3124  // C += A^T * B
3125  template<typename T>
3126  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3127  const matrix_base<T>,
3128  op_mat_mat_prod> >
3129  {
3130  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3131  const matrix_base<T>,
3132  op_mat_mat_prod> const & rhs)
3133  {
3134  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3135  {
3136  matrix_base<T> temp(rhs);
3137  lhs += temp;
3138  }
3139  else
3140  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3141  }
3142  };
3143 
3144  // C += A^T * B^T
3145  template<typename T>
3146  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3147  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3148  op_mat_mat_prod> >
3149  {
3150  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3151  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3152  op_mat_mat_prod> const & rhs)
3153  {
3154  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3155  {
3156  matrix_base<T> temp(rhs);
3157  lhs += temp;
3158  }
3159  else
3160  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3161  }
3162  };
3163 
3164 
3165  // C -= A * B
3166  template<typename T>
3167  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3168  {
3169  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3170  {
3171  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3172  {
3173  matrix_base<T> temp(rhs);
3174  lhs -= temp;
3175  }
3176  else
3177  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3178  }
3179  };
3180 
3181  // C -= A * B^T
3182  template<typename T>
3183  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>,
3184  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3185  op_mat_mat_prod> >
3186  {
3187  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3188  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3189  op_mat_mat_prod> const & rhs)
3190  {
3191  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3192  {
3193  matrix_base<T> temp(rhs);
3194  lhs -= temp;
3195  }
3196  else
3197  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3198  }
3199  };
3200 
3201  // C -= A^T * B
3202  template<typename T>
3203  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3204  const matrix_base<T>,
3205  op_mat_mat_prod> >
3206  {
3207  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3208  const matrix_base<T>,
3209  op_mat_mat_prod> const & rhs)
3210  {
3211  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3212  {
3213  matrix_base<T> temp(rhs);
3214  lhs -= temp;
3215  }
3216  else
3217  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3218  }
3219  };
3220 
3221  // C -= A^T * B^T
3222  template<typename T>
3223  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3224  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3225  op_mat_mat_prod> >
3226  {
3227  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3228  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3229  op_mat_mat_prod> const & rhs)
3230  {
3231  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3232  {
3233  matrix_base<T> temp(rhs);
3234  lhs -= temp;
3235  }
3236  else
3237  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3238  }
3239  };
3240 
3242 
3243  // y = A * x
3244  template<typename T>
3245  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3246  {
3247  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3248  {
3249  // check for x = A * x
3250  if (op_aliasing(lhs, rhs.rhs()))
3251  {
3252  vector_base<T> temp(rhs);
3253  lhs = temp;
3254  }
3255  else
3256  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3257  }
3258  };
3259 
3260  // y = A^T * x
3261  template<typename T>
3262  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3263  const vector_base<T>,
3264  op_prod> >
3265  {
3266  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3267  const vector_base<T>,
3268  op_prod> const & rhs)
3269  {
3270  // check for x = A^T * x
3271  if (op_aliasing(lhs, rhs.rhs()))
3272  {
3273  vector_base<T> temp(rhs);
3274  lhs = temp;
3275  }
3276  else
3277  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3278  }
3279  };
3280 
3281 
3282  // y += A * x
3283  template<typename T>
3284  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3285  {
3286  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3287  {
3288  vector_base<T> temp(rhs);
3289  lhs += temp;
3290  }
3291  };
3292 
3293  // y += A^T * x
3294  template<typename T>
3295  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3296  const vector_base<T>,
3297  op_prod> >
3298  {
3299  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3300  const vector_base<T>,
3301  op_prod> const & rhs)
3302  {
3303  vector_base<T> temp(rhs);
3304  lhs += temp;
3305  }
3306  };
3307 
3308 
3309  // y -= A * x
3310  template<typename T>
3311  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3312  {
3313  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3314  {
3315  vector_base<T> temp(rhs);
3316  lhs -= temp;
3317  }
3318  };
3319 
3320  // y -= A^T * x
3321  template<typename T>
3322  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3323  const vector_base<T>,
3324  op_prod> >
3325  {
3326  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3327  const vector_base<T>,
3328  op_prod> const & rhs)
3329  {
3330  vector_base<T> temp(rhs);
3331  lhs -= temp;
3332  }
3333  };
3334 
3335 
3336 
3338 
3339  // A = v1 * v2^T
3340  template<typename T>
3341  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3342  {
3343  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3344  {
3345  lhs.clear();
3346  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3347  }
3348  };
3349 
3350  // A = alpha * v1 * v2^T
3351  template<typename T, typename ScalarType>
3352  struct op_executor<matrix_base<T>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3353  const ScalarType,
3354  op_mult> >
3355  {
3356  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3357  const ScalarType,
3358  op_mult> const & rhs)
3359  {
3360  lhs.clear();
3361  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3362  }
3363  };
3364 
3365  // A += v1 * v2^T
3366  template<typename T>
3367  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3368  {
3369  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3370  {
3371  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3372  }
3373  };
3374 
3375  // A += alpha * v1 * v2^T
3376  template<typename T, typename ScalarType>
3377  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3378  const ScalarType,
3379  op_mult> >
3380  {
3381  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3382  const ScalarType,
3383  op_mult> const & rhs)
3384  {
3385  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3386  }
3387  };
3388 
3389  // A -= v1 * v2^T
3390  template<typename T>
3391  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3392  {
3393  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3394  {
3395  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());
3396  }
3397  };
3398 
3399  // A -= alpha * v1 * v2^T
3400  template<typename T, typename ScalarType>
3401  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3402  const ScalarType,
3403  op_mult> >
3404  {
3405  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3406  const ScalarType,
3407  op_mult> const & rhs)
3408  {
3409  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());
3410  }
3411  };
3412 
3413 
3414 } // namespace detail
3415 
3416 } // namespace linalg
3417 
3420 } //namespace viennacl
3421 
3422 #endif
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:30
A tag class representing multiplication by a scalar.
Definition: forwards.h:91
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &v)
Dispatcher interface for v = diag(A, k)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:313
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Worker class for decomposing expression templates.
Definition: op_executor.hpp:80
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:235
Implementations of dense matrix related operations including matrix-vector products.
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
Definition: matrix_def.hpp:233
self_type & operator=(const self_type &other)
Definition: matrix.hpp:243
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:483
Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> ...
Definition: forwards.h:461
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Various little tools used here and there in ViennaCL.
matrix_expression< const self_type, const NumericT, op_mult > operator-() const
Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix.
Definition: matrix.hpp:591
A tag class representing the extraction of a matrix column to a vector.
Definition: forwards.h:194
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag class representing a matrix given by a vector placed on a certain (off-)diagonal.
Definition: forwards.h:188
A tag class representing subtraction.
Definition: forwards.h:89
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
viennacl::context context() const
Definition: matrix_def.hpp:46
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
A tag indicating iteration along increasing row index of a matrix.
Definition: matrix.hpp:84
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:374
A tag class representing division.
Definition: forwards.h:97
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
entry_proxy< NumericT > operator()(size_type row_index, size_type col_index)
Read-write access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:440
matrix_iterator(MatrixT &mat, vcl_size_t start_row, vcl_size_t start_col)
Definition: matrix.hpp:98
static vcl_size_t size1(LHS &lhs, RHS &)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator/=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1595
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_base< NumericT >, const S1, op_mult >>::type operator*(S1 const &value, matrix_base< NumericT > const &m1)
Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m...
Definition: matrix.hpp:1295
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:238
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
Forward declaration of dense matrix classes.
bool op_aliasing(vector_base< NumericT > const &, B const &)
Definition: op_executor.hpp:36
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
matrix(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:675
vcl_size_t size1() const
Returns the size of the result vector.
Definition: matrix.hpp:72
viennacl::vector< NumericT > operator-(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 - A * v2', where A is a matrix.
self_type & operator++(void)
Definition: matrix.hpp:103
self_type operator++(int)
Definition: matrix.hpp:104
A tag class representing the (off-)diagonal of a matrix.
Definition: forwards.h:185
base_type::size_type size_type
Definition: matrix.hpp:664
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
value_type operator*(void)
Definition: matrix.hpp:102
size_type size2() const
Definition: matrix_def.hpp:45
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator*=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1443
matrix(zero_matrix< NumericT > const &m)
Creates the matrix from the supplied zero matrix.
Definition: matrix.hpp:723
Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar ...
Definition: tools.hpp:225
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type internal_row_count, size_type cols, size_type internal_col_count)
Wraps a CUDA or host buffer provided by the user including padding of rows and columns.
Definition: matrix.hpp:700
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix_def.hpp:117
Definition: blas3.hpp:36
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &v)
size_type size1() const
Definition: matrix_def.hpp:44
vcl_size_t size2() const
Definition: matrix.hpp:73
void resize(MatrixType &matrix, vcl_size_t rows, vcl_size_t cols)
Generic resize routine for resizing a matrix (ViennaCL, uBLAS, etc.) to a new size/dimension.
Definition: size.hpp:59
void clear()
Resets all entries to zero.
Definition: matrix.hpp:597
Implementations of operations using sparse matrices.
A tag class representing addition.
Definition: forwards.h:87
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
matrix(const self_type &other)
Definition: matrix.hpp:743
matrix_expression(LHS &lhs, RHS &rhs)
Definition: matrix.hpp:62
void scaled_rank_1_update(matrix_base< NumericT > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_expression< const LHS, const RHS, OP >, const S1, op_div > >::type operator/(matrix_expression< const LHS, const RHS, OP > const &proxy, S1 const &val)
Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha.
Definition: matrix.hpp:1524
Determines whether a given expression has a row-major matrix layout.
std::size_t vcl_size_t
Definition: forwards.h:74
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:217
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:235
matrix(const base_type &other)
Definition: matrix.hpp:736
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:838
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:212
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
Definition: matrix_def.hpp:81
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &v)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:215
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
RHS & rhs() const
Get right hand side operand.
Definition: matrix.hpp:69
viennacl::vector< NumericT > operator+(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 + A * v2', where A is a matrix.
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121
A tag class representing matrix-vector products and element-wise multiplications. ...
Definition: forwards.h:93
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base)
Rounds an integer to the next multiple of another integer.
Definition: tools.hpp:133
A dense matrix class.
Definition: forwards.h:368
matrix(matrix_expression< LHS, RHS, OP > const &proxy)
Definition: matrix.hpp:713
bool row_major() const
Definition: matrix_def.hpp:239
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
bool row_major(T const &)
Definition: row_major.hpp:38
matrix(scalar_matrix< NumericT > const &m)
Creates the matrix from the supplied scalar matrix.
Definition: matrix.hpp:730
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can optionally be preserved.
Definition: matrix.hpp:766
float ScalarType
Definition: fft_1d.cpp:42
A tag class representing transposed matrices.
Definition: forwards.h:219
Helper implementations that deduce the dimensions of the supplied matrix-valued expressions.
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:231
self_type & operator*=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:480
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
viennacl::matrix< float > m1
MatrixT::value_type value_type
Definition: matrix.hpp:96
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:229
bool operator!=(self_type const &other)
Definition: matrix.hpp:107
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:330
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:861
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
self_type & operator+=(const matrix_expression< const LHS, const RHS, OP > &proxy)
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
Wraps a CUDA or host buffer provided by the user.
Definition: matrix.hpp:684
self_type & operator/=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:536
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void resize(size_type rows, size_type columns, bool preserve=true)
Definition: matrix.hpp:601
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
memory_types
Definition: forwards.h:344
static vcl_size_t size2(LHS &, RHS &rhs)
matrix(identity_matrix< NumericT > const &m)
Creates the matrix from the supplied identity matrix.
Definition: matrix.hpp:716
void matrix_diag_from_vector(const vector_base< NumericT > &v, int k, matrix_base< NumericT > &A)
Dispatcher interface for A = diag(v, k)
LHS & lhs() const
Get left hand side operand.
Definition: matrix.hpp:66
A tag class representing the extraction of a matrix row to a vector.
Definition: forwards.h:191
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:232
MatrixT & operator()(void) const
Definition: matrix.hpp:112
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Implementation of the ViennaCL scalar class.
static void apply(const MATRIXTYPE &, unsigned int &, unsigned int &)
Definition: forwards.h:608
A collection of compile time type deductions.
A tag for row-major storage of a dense matrix.
Definition: forwards.h:303
matrix()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:667
bool operator==(self_type const &other)
Definition: matrix.hpp:106
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
Definition: mem_handle.hpp:99
A tag indicating iteration along increasing columns index of a matrix.
Definition: matrix.hpp:87
Simple enable-if variant that uses the SFINAE pattern.
self_type & operator-=(const matrix_expression< const LHS, const RHS, OP > &proxy)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)