ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
27 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
36 
37 namespace viennacl
38 {
39 
40 //#ifdef VIENNACL_WITH_OPENCL
41 // template<class NumericT, class DISTRIBUTION>
42 // rand::random_matrix_t<NumericT, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){
43 // return rand::random_matrix_t<NumericT,DISTRIBUTION>(size1,size2,distribution);
44 // }
45 //#endif
46 
53 template<typename LHS, typename RHS, typename OP>
54 class matrix_expression
55 {
56  typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type lhs_reference_type;
57  typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type rhs_reference_type;
58 
59 public:
61 
62  matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}
63 
66  LHS & lhs() const { return lhs_; }
69  RHS & rhs() const { return rhs_; }
70 
74 
75 private:
77  lhs_reference_type lhs_;
79  rhs_reference_type rhs_;
80 };
81 
82 
84 struct row_iteration {};
85 
87 struct col_iteration {};
88 
89 //STL-like iterator. TODO: STL-compliance...
91 template<typename ROWCOL, typename MatrixT>
92 class matrix_iterator
93 {
94  typedef matrix_iterator<ROWCOL, MatrixT> self_type;
95 public:
96  typedef typename MatrixT::value_type value_type;
97 
98  matrix_iterator(MatrixT & mat,
99  vcl_size_t start_row,
100  vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}
101 
102  value_type operator*(void) { return mat_(row_, col_); }
104  self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
105 
106  bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
107  bool operator!=(self_type const & other) { return !(*this == other); }
108 
109  vcl_size_t index1() { return row_; }
110  vcl_size_t index2() { return col_; }
111 
112  MatrixT & operator()(void) const { return mat_; }
113 
114 private:
115  MatrixT & mat_;
116  vcl_size_t row_;
117  vcl_size_t col_;
118 };
119 
126 template<class NumericT, typename SizeT, typename DistanceT>
128  : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),
129  internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, dense_padding_size)),
130  internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, dense_padding_size)),
131  row_major_fixed_(true), row_major_(is_row_major)
132 {
133  if (rows > 0 && columns > 0)
134  {
135  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx);
136  clear();
137  }
138 }
139 
142 template<class NumericT, typename SizeT, typename DistanceT>
143 template<typename LHS, typename RHS, typename OP>
145  size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),
146  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
147  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
148  row_major_fixed_(true), row_major_(viennacl::traits::row_major(proxy))
149 {
151  if (internal_size() > 0)
152  {
153  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy));
154  clear();
155  self_type::operator=(proxy);
156  }
157 }
158 
159 // CUDA or host memory:
160 template<class NumericT, typename SizeT, typename DistanceT>
162  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
163  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
164  bool is_row_major)
165  : size1_(mat_size1), size2_(mat_size2),
166  start1_(mat_start1), start2_(mat_start2),
167  stride1_(mat_stride1), stride2_(mat_stride2),
168  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
169  row_major_fixed_(true), row_major_(is_row_major)
170 {
171  if (mem_type == viennacl::CUDA_MEMORY)
172  {
173 #ifdef VIENNACL_WITH_CUDA
175  elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
176  elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
177 #else
179 #endif
180  }
181  else if (mem_type == viennacl::MAIN_MEMORY)
182  {
184  elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
185  elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
186  }
187 
188  elements_.raw_size(sizeof(NumericT) * internal_size());
189 }
190 
191 #ifdef VIENNACL_WITH_OPENCL
192 template<class NumericT, typename SizeT, typename DistanceT>
193 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx)
194  : size1_(rows), size2_(columns),
195  start1_(0), start2_(0),
196  stride1_(1), stride2_(1),
197  internal_size1_(rows), internal_size2_(columns),
198  row_major_fixed_(true), row_major_(is_row_major)
199 {
201  elements_.opencl_handle() = mem;
202  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
203  elements_.opencl_handle().context(ctx.opencl_context());
204  elements_.raw_size(sizeof(NumericT)*internal_size());
205 }
206 
207 template<class NumericT, typename SizeT, typename DistanceT>
209  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
210  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
211  bool is_row_major)
212  : size1_(mat_size1), size2_(mat_size2),
213  start1_(mat_start1), start2_(mat_start2),
214  stride1_(mat_stride1), stride2_(mat_stride2),
215  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
216  row_major_fixed_(true), row_major_(is_row_major)
217 {
219  elements_.opencl_handle() = mem;
220  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
221  elements_.opencl_handle().context(ctx.opencl_context());
222  elements_.raw_size(sizeof(NumericT)*internal_size());
223 }
224 #endif
225 
226 template<class NumericT, typename SizeT, typename DistanceT>
228  size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),
229  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
230  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
231  row_major_fixed_(true), row_major_(other.row_major())
232 {
234  if (internal_size() > 0)
235  {
236  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other));
237  clear();
238  self_type::operator=(other);
239  }
240 }
241 
242 template<class NumericT, typename SizeT, typename DistanceT>
244 {
245  if (&other==this)
246  return *this;
247 
248  if (internal_size() == 0)
249  {
250  if (other.internal_size() == 0)
251  return *this;
252  if (!row_major_fixed_)
253  row_major_ = other.row_major();
254  resize(other.size1(), other.size2(), false);
255  }
256 
257  viennacl::linalg::am(*this,
258  other, cpu_value_type(1.0), 1, false, false);
259  return *this;
260 }
261 
266 template<class NumericT, typename SizeT, typename DistanceT>
267 template<typename LHS, typename RHS, typename OP>
269 {
270  assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0)
271  && (viennacl::traits::size2(proxy) == size2() || size2() == 0)
272  && bool("Incompatible matrix sizes!"));
273  if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
274  {
275  size1_ = viennacl::traits::size1(proxy);
276  size2_ = viennacl::traits::size2(proxy);
277  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
278  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
279  if (!row_major_fixed_)
280  row_major_ = viennacl::traits::row_major(proxy);
281  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy));
282  if (size1_ != internal_size1_ || size2_ != internal_size2_)
283  clear();
284  }
285 
286  if (internal_size() > 0)
288 
289  return *this;
290 }
291 
292 
293 // A = trans(B)
294 template<class NumericT, typename SizeT, typename DistanceT>
296 {
297  if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 )
298  {
299  size1_ = viennacl::traits::size1(proxy);
300  size2_ = viennacl::traits::size2(proxy);
301  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
302  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
303  if (!row_major_fixed_)
304  row_major_ = viennacl::traits::row_major(proxy);
305  }
306 
307  if ( handle() == proxy.lhs().handle() )
308  {
309  viennacl::matrix_base<NumericT> temp(proxy.lhs().size2(), proxy.lhs().size1(),proxy.lhs().row_major());
310  viennacl::linalg::trans(proxy, temp);
311  if ( proxy.lhs().size1() != proxy.lhs().size2() )
312  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
313  elements_ = temp.handle();
314  }
315  else
316  {
317  if ( proxy.lhs().size1() != proxy.lhs().size2() )
318  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
319  viennacl::linalg::trans(proxy, *this);
320  }
321  return *this;
322 }
323 
324 template<class NumericT, typename SizeT, typename DistanceT>
325 template<typename LHS, typename RHS, typename OP>
327 {
328  assert( (viennacl::traits::size1(proxy) == size1())
329  && (viennacl::traits::size2(proxy) == size2())
330  && bool("Incompatible matrix sizes!"));
331  assert( (size1() > 0) && bool("Vector not yet initialized!") );
332  assert( (size2() > 0) && bool("Vector not yet initialized!") );
333 
335 
336  return *this;
337 }
338 
339 template<class NumericT, typename SizeT, typename DistanceT>
340 template<typename LHS, typename RHS, typename OP>
342 {
343  assert( (viennacl::traits::size1(proxy) == size1())
344  && (viennacl::traits::size2(proxy) == size2())
345  && bool("Incompatible matrix sizes!"));
346  assert( (size1() > 0) && bool("Vector not yet initialized!") );
347  assert( (size2() > 0) && bool("Vector not yet initialized!") );
348 
350 
351  return *this;
352 }
353 
355 template<class NumericT, typename SizeT, typename DistanceT>
357 {
358  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
359  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
360 
361  if (internal_size() == 0)
362  {
363  size1_ = m.size1();
364  size2_ = m.size2();
365  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
366  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
367  if (internal_size() > 0)
368  {
369  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());
370  clear();
371  }
372  }
373  else
374  viennacl::linalg::matrix_assign(*this, NumericT(0));
375 
376  if (internal_size() > 0)
378 
379  return *this;
380 }
381 
383 template<class NumericT, typename SizeT, typename DistanceT>
385 {
386  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
387  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
388 
389  if (internal_size() == 0)
390  {
391  size1_ = m.size1();
392  size2_ = m.size2();
393  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
394  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
395  if (internal_size() > 0)
396  {
397  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());
398  clear();
399  }
400  }
401  else
402  viennacl::linalg::matrix_assign(*this, NumericT(0));
403 
404  return *this;
405 }
406 
408 template<class NumericT, typename SizeT, typename DistanceT>
410 {
411  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
412  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
413 
414  if (internal_size() == 0)
415  {
416  size1_ = m.size1();
417  size2_ = m.size2();
418  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
419  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
420  if (internal_size() > 0)
421  {
422  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());
423  clear();
424  }
425  }
426 
427  if (internal_size() > 0)
428  {
429  viennacl::linalg::matrix_assign(*this, m(0,0));
430  }
431 
432  return *this;
433 }
434 
435 
436 //read-write access to an element of the matrix/matrix_range/matrix_slice
439 template<class NumericT, typename SizeT, typename DistanceT>
441 {
442  if (row_major_)
443  return entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
444  return entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
445 }
446 
449 template<class NumericT, typename SizeT, typename DistanceT>
451 {
452  if (row_major_)
453  return const_entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
454  return const_entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
455 }
456 
457 //
458 // Operator overloads for enabling implicit conversions:
459 //
460 template<class NumericT, typename SizeT, typename DistanceT>
462 {
464  *this, NumericT(1.0), 1, false, false,
465  other, NumericT(1.0), 1, false, false);
466  return *this;
467 }
468 
469 template<class NumericT, typename SizeT, typename DistanceT>
471 {
473  *this, NumericT(1.0), 1, false, false,
474  other, NumericT(1.0), 1, false, true);
475  return *this;
476 }
477 
479 template<class NumericT, typename SizeT, typename DistanceT>
481 {
482  viennacl::linalg::am(*this,
483  *this, NumericT(val), 1, false, false);
484  return *this;
485 }
486 
488 template<class NumericT, typename SizeT, typename DistanceT>
490 {
491  viennacl::linalg::am(*this,
492  *this, NumericT(val), 1, false, false);
493  return *this;
494 }
495 
497 template<class NumericT, typename SizeT, typename DistanceT>
499 {
500  viennacl::linalg::am(*this,
501  *this, NumericT(val), 1, false, false);
502  return *this;
503 }
504 
506 template<class NumericT, typename SizeT, typename DistanceT>
508 {
509  viennacl::linalg::am(*this,
510  *this, NumericT(val), 1, false, false);
511  return *this;
512 }
513 
515 template<class NumericT, typename SizeT, typename DistanceT>
517 {
518  viennacl::linalg::am(*this,
519  *this, NumericT(val), 1, false, false);
520  return *this;
521 }
522 
524 template<class NumericT, typename SizeT, typename DistanceT>
526 {
527  viennacl::linalg::am(*this,
528  *this, NumericT(val), 1, false, false);
529  return *this;
530 }
531 
532 
533 
535 template<class NumericT, typename SizeT, typename DistanceT>
537 {
538  viennacl::linalg::am(*this,
539  *this, NumericT(val), 1, true, false);
540  return *this;
541 }
542 
544 template<class NumericT, typename SizeT, typename DistanceT>
546 {
547  viennacl::linalg::am(*this,
548  *this, NumericT(val), 1, true, false);
549  return *this;
550 }
551 
553 template<class NumericT, typename SizeT, typename DistanceT>
555 {
556  viennacl::linalg::am(*this,
557  *this, NumericT(val), 1, true, false);
558  return *this;
559 }
560 
562 template<class NumericT, typename SizeT, typename DistanceT>
564 {
565  viennacl::linalg::am(*this,
566  *this, NumericT(val), 1, true, false);
567  return *this;
568 }
569 
571 template<class NumericT, typename SizeT, typename DistanceT>
573 {
574  viennacl::linalg::am(*this,
575  *this, NumericT(val), 1, true, false);
576  return *this;
577 }
578 
580 template<class NumericT, typename SizeT, typename DistanceT>
582 {
583  viennacl::linalg::am(*this,
584  *this, NumericT(val), 1, true, false);
585  return *this;
586 }
587 
588 
590 template<class NumericT, typename SizeT, typename DistanceT>
592 {
594 }
595 
596 template<class NumericT, typename SizeT, typename DistanceT>
598 
599 
600 template<class NumericT, typename SizeT, typename DistanceT>
602 {
603  assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));
604 
605  if (preserve && internal_size() > 0)
606  {
607  //get old entries:
608  std::vector< NumericT > old_entries(internal_size());
609  viennacl::backend::memory_read(elements_, 0, sizeof(NumericT)*internal_size(), &(old_entries[0]));
610 
611  //set up entries of new matrix:
612  std::vector< NumericT > new_entries( viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size)
613  * viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size));
614  for (size_type i=0; i<rows; ++i)
615  {
616  if (i >= size1_)
617  continue;
618 
619  for (size_type j=0; j<columns; ++j)
620  {
621  if (j >= size2_)
622  continue;
623  if (row_major_)
624  new_entries[row_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
625  = old_entries[row_major::mem_index(i, j, internal_size1(), internal_size2())];
626  else
627  new_entries[column_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
628  = old_entries[column_major::mem_index(i, j, internal_size1(), internal_size2())];
629  }
630  }
631 
632  //copy new entries to GPU:
633  size1_ = rows;
634  size2_ = columns;
635  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
636  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
637  viennacl::backend::memory_create(elements_, sizeof(NumericT)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));
638  }
639  else //discard old entries:
640  {
641  size1_ = rows;
642  size2_ = columns;
643  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
644  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
645 
646  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(elements_));
647  clear();
648  }
649 }
650 
651 
658 template<class NumericT, typename F, unsigned int AlignmentV>
659 class matrix : public matrix_base<NumericT>
660 {
661  typedef matrix<NumericT, F, AlignmentV> self_type;
662  typedef matrix_base<NumericT> base_type;
663 public:
664  typedef typename base_type::size_type size_type;
665 
667  explicit matrix() : base_type(static_cast<bool>(viennacl::is_row_major<F>::value)) {}
668 
675  explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, viennacl::is_row_major<F>::value, ctx) {}
676 
684  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
685  : base_type(ptr_to_mem, mem_type,
686  rows, 0, 1, rows,
687  cols, 0, 1, cols,
688  viennacl::is_row_major<F>::value) {}
689 
690 
700  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type,
701  size_type rows, size_type internal_row_count,
702  size_type cols, size_type internal_col_count)
703  : base_type(ptr_to_mem, mem_type,
704  rows, 0, 1, internal_row_count,
705  cols, 0, 1, internal_col_count,
706  true, viennacl::is_row_major<F>::value) {}
707 
708 #ifdef VIENNACL_WITH_OPENCL
709  explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns, viennacl::is_row_major<F>::value) {}
710 #endif
711 
712  template<typename LHS, typename RHS, typename OP>
714 
716  matrix(identity_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
717  {
718  if (base_type::internal_size() > 0)
720  }
721 
723  matrix(zero_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
724  {
725  if (base_type::internal_size() > 0)
727  }
728 
730  matrix(scalar_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
731  {
732  if (base_type::internal_size() > 0)
734  }
735 
736  matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
737  {
738  base_type::operator=(other);
739  }
740 
741 
742  //copy constructor:
743  matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
744  {
745  base_type::operator=(other);
746  }
747 
748 
749  /*template<typename M1>
750  self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)
751  {
752  self_type temp(proxy.lhs());
753  *this = trans(temp);
754  return *this;
755  }*/
756 
757  using base_type::operator=;
758 
766  void resize(size_type rows, size_type columns, bool preserve = true)
767  {
768  base_type::resize(rows, columns, preserve);
769  }
770 
771 }; //matrix
772 
773 
774 
780 template<class NumericT>
781 std::ostream & operator<<(std::ostream & s, const matrix_base<NumericT> & gpu_matrix)
782 {
783  typedef typename matrix_base<NumericT>::size_type size_type;
784 
785  std::vector<NumericT> tmp(gpu_matrix.internal_size());
786  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0]));
787 
788  s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
789 
790  s << "(";
791  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
792  {
793  s << "(";
794  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
795  {
796  if (gpu_matrix.row_major())
797  s << tmp[row_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
798  else
799  s << tmp[column_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
800 
801  if (j < gpu_matrix.size2() - 1)
802  s << ",";
803  }
804  s << ")";
805  if (i < gpu_matrix.size1() - 1)
806  s << ",";
807  }
808  s << ")";
809  return s;
810 }
811 
817 template<typename LHS, typename RHS, typename OP>
818 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
819 {
821 
822  matrix<ScalarType> temp = expr;
823  s << temp;
824  return s;
825 }
826 
828 template<typename NumericT>
829 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>
831 {
833 }
834 
835 //diag():
836 template<typename NumericT>
837 vector_expression< const matrix_base<NumericT>, const int, op_matrix_diag>
838 diag(const matrix_base<NumericT> & A, int k = 0)
839 {
841 }
842 
843 template<typename NumericT>
844 matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>
845 diag(const vector_base<NumericT> & v, int k = 0)
846 {
848 }
849 
850 // row():
851 template<typename NumericT, typename F>
852 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>
853 row(const matrix_base<NumericT, F> & A, unsigned int i)
854 {
855  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);
856 }
857 
858 // column():
859 template<typename NumericT, typename F>
860 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>
861 column(const matrix_base<NumericT, F> & A, unsigned int j)
862 {
863  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);
864 }
865 
867 
868 //
869 //cpu to gpu, generic type:
870 //
876 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
877 void copy(const CPUMatrixT & cpu_matrix,
878  matrix<NumericT, F, AlignmentV> & gpu_matrix )
879 {
880  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
881 
882  //std::cout << "Copying CPUMatrixT!" << std::endl;
883  //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
884  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
885  {
886  gpu_matrix.resize(cpu_matrix.size1(),
887  cpu_matrix.size2(), false);
888  }
889 
890  assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );
891 
892  std::vector<NumericT> data(gpu_matrix.internal_size());
893  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
894  {
895  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
896  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
897  }
898 
899  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
900  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
901  //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
902 }
903 
904 //
905 //cpu to gpu, STL type:
906 //
912 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
913 void copy(const std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix,
914  matrix<NumericT, F, AlignmentV> & gpu_matrix )
915 {
916  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
917 
918  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
919  {
920  gpu_matrix.resize(cpu_matrix.size(),
921  cpu_matrix[0].size(),
922  false);
923  }
924 
925  assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );
926 
927  std::vector<NumericT> data(gpu_matrix.internal_size());
928  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
929  {
930  assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );
931 
932  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
933  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
934  }
935 
936  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
937  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
938 }
939 
940 
941 //
942 //cpu to gpu, another STL type:
943 //
952 template<typename NumericT, typename F, unsigned int AlignmentV>
953 void fast_copy(NumericT * cpu_matrix_begin,
954  NumericT * cpu_matrix_end,
955  matrix<NumericT, F, AlignmentV> & gpu_matrix)
956 {
957  if (gpu_matrix.internal_size() == 0)
958  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);
959  else
960  {
961  assert( (gpu_matrix.internal_size() >= static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin)) && bool("fast_copy(): Matrix not large enough to fit data!"));
962  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), cpu_matrix_begin);
963  }
964 }
965 
966 
967 #ifdef VIENNACL_WITH_EIGEN
968 
973 template<typename F, unsigned int AlignmentV>
974 void copy(const Eigen::MatrixXf & cpu_matrix,
975  matrix<float, F, AlignmentV> & gpu_matrix)
976 {
977  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
978 
979  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
980  {
981  gpu_matrix.resize(cpu_matrix.rows(),
982  cpu_matrix.cols(),
983  false);
984  }
985  else
986  {
987  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
988  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
989  && bool("matrix size mismatch")
990  );
991  }
992 
993  std::vector<float> data(gpu_matrix.internal_size());
994  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
995  {
996  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
997  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
998  }
999 
1000  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(float) * data.size(), &(data[0]));
1001  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1002 }
1003 
1009 template<typename F, unsigned int AlignmentV>
1010 void copy(const Eigen::MatrixXd & cpu_matrix,
1011  matrix<double, F, AlignmentV> & gpu_matrix)
1012 {
1013  typedef typename matrix<double, F, AlignmentV>::size_type size_type;
1014 
1015  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1016  {
1017  gpu_matrix.resize(cpu_matrix.rows(),
1018  cpu_matrix.cols(),
1019  false);
1020  }
1021  else
1022  {
1023  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
1024  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
1025  && bool("matrix size mismatch")
1026  );
1027  }
1028 
1029  std::vector<double> data(gpu_matrix.internal_size());
1030  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1031  {
1032  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1033  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
1034  }
1035 
1036  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(double) * data.size(), &(data[0]));
1037  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1038 }
1039 #endif
1040 
1041 #ifdef VIENNACL_WITH_MTL4
1042 
1047 template<typename NumericT, typename T, typename F, unsigned int AlignmentV>
1048 void copy(const mtl::dense2D<NumericT, T>& cpu_matrix,
1049  matrix<NumericT, F, AlignmentV> & gpu_matrix)
1050 {
1051  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
1052 
1053  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1054  {
1055  gpu_matrix.resize(cpu_matrix.num_rows(),
1056  cpu_matrix.num_cols(),
1057  false);
1058  }
1059  else
1060  {
1061  assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
1062  && (gpu_matrix.size2() == cpu_matrix.num_cols())
1063  && bool("matrix size mismatch")
1064  );
1065  }
1066 
1067  std::vector<NumericT> data(gpu_matrix.internal_size());
1068  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1069  {
1070  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1071  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
1072  }
1073 
1074  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
1075  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1076 }
1077 #endif
1078 
1079 
1080 
1081 
1082 //
1083 //gpu to cpu, generic type
1084 //
1090 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
1091 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1092  CPUMatrixT & cpu_matrix )
1093 {
1094  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1095 
1096  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1097  {
1098  assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));
1099 
1100  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1101  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1102 
1103  //now copy entries to cpu_matrix:
1104  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1105  {
1106  assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));
1107  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1108  cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1109  }
1110  }
1111 }
1112 
1113 //gpu to cpu, STL type
1119 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
1120 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1121  std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix)
1122 {
1123  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1124 
1125  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1126  {
1127  assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));
1128 
1129  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1130  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1131 
1132  //now copy entries to cpu_matrix:
1133  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1134  {
1135  assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));
1136 
1137  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1138  cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1139  }
1140  }
1141 }
1142 
1143 //gpu to cpu, STL type
1151 template<typename NumericT, typename F, unsigned int AlignmentV>
1153  NumericT * cpu_matrix_begin)
1154 {
1155  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin);
1156 }
1157 
1158 
1159 
1161 
1162 
1163 // operator +
1165 template<typename LHS1, typename RHS1, typename OP1,
1166  typename LHS2, typename RHS2, typename OP2>
1167 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1168 const matrix_expression<const LHS2, const RHS2, OP2>,
1169 op_add>
1172 {
1173  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1174  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1175  && bool("Incompatible matrix sizes!"));
1178  op_add>(proxy1, proxy2);
1179 }
1180 
1181 template<typename LHS1, typename RHS1, typename OP1,
1182  typename NumericT>
1183 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1184 const matrix_base<NumericT>,
1185 op_add>
1187  matrix_base<NumericT> const & proxy2)
1188 {
1189  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1190  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1191  && bool("Incompatible matrix sizes!"));
1193  const matrix_base<NumericT>,
1194  op_add>(proxy1, proxy2);
1195 }
1196 
1197 template<typename NumericT,
1198  typename LHS2, typename RHS2, typename OP2>
1199 matrix_expression< const matrix_base<NumericT>,
1200 const matrix_expression<const LHS2, const RHS2, OP2>,
1201 op_add>
1204 {
1205  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1206  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1207  && bool("Incompatible matrix sizes!"));
1210  op_add>(proxy1, proxy2);
1211 }
1212 
1214 template<typename NumericT>
1215 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_add >
1217 {
1219  const matrix_base<NumericT>,
1220  op_add > (m1, m2);
1221 }
1222 
1223 
1224 // operator -
1225 template<typename LHS1, typename RHS1, typename OP1,
1226  typename LHS2, typename RHS2, typename OP2>
1227 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1228 const matrix_expression<const LHS2, const RHS2, OP2>,
1229 op_sub>
1232 {
1233  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1234  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1235  && bool("Incompatible matrix sizes!"));
1238  op_sub>(proxy1, proxy2);
1239 }
1240 
1241 template<typename LHS1, typename RHS1, typename OP1,
1242  typename NumericT>
1243 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1244 const matrix_base<NumericT>,
1245 op_sub>
1247  matrix_base<NumericT> const & proxy2)
1248 {
1249  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1250  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1251  && bool("Incompatible matrix sizes!"));
1253  const matrix_base<NumericT>,
1254  op_sub>(proxy1, proxy2);
1255 }
1256 
1257 template<typename NumericT,
1258  typename LHS2, typename RHS2, typename OP2>
1259 matrix_expression< const matrix_base<NumericT>,
1260 const matrix_expression<const LHS2, const RHS2, OP2>,
1261 op_sub>
1264 {
1265  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1266  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1267  && bool("Incompatible matrix sizes!"));
1270  op_sub>(proxy1, proxy2);
1271 }
1272 
1274 template<typename NumericT>
1275 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_sub >
1277 {
1279  const matrix_base<NumericT>,
1280  op_sub > (m1, m2);
1281 }
1282 
1283 
1284 
1285 // operator *
1291 template<typename S1, typename NumericT>
1293 matrix_expression< const matrix_base<NumericT>, const S1, op_mult>
1294 >::type
1295 operator * (S1 const & value, matrix_base<NumericT> const & m1)
1296 {
1297  return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, value);
1298 }
1299 
1301 template<typename NumericT>
1302 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1304 {
1305  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1306 }
1307 
1309 template<typename NumericT>
1310 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1311 operator * (short value, matrix_base<NumericT> const & m1)
1312 {
1313  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1314 }
1315 
1317 template<typename NumericT>
1318 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1320 {
1321  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1322 }
1323 
1325 template<typename NumericT>
1326 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1328 {
1329  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1330 }
1331 
1333 template<typename NumericT>
1334 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1335 operator * (float value, matrix_base<NumericT> const & m1)
1336 {
1337  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1338 }
1339 
1341 template<typename NumericT>
1342 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1343 operator * (double value, matrix_base<NumericT> const & m1)
1344 {
1345  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1346 }
1347 
1348 
1349 
1355 template<typename LHS, typename RHS, typename OP, typename S1>
1357 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1359  S1 const & val)
1360 {
1362 }
1363 
1364 
1370 template<typename S1, typename LHS, typename RHS, typename OP>
1372 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1373 operator * (S1 const & val,
1374  matrix_expression< LHS, RHS, OP> const & proxy)
1375 {
1377 }
1378 
1381 template<typename NumericT, typename S1>
1383 matrix_expression< const matrix_base<NumericT>, const S1, op_mult> >::type
1385 {
1387 }
1388 
1390 template<typename NumericT>
1391 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1393 {
1394  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1395 }
1396 
1398 template<typename NumericT>
1399 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1401 {
1402  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1403 }
1404 
1406 template<typename NumericT>
1407 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1409 {
1410  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1411 }
1412 
1414 template<typename NumericT>
1415 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1417 {
1418  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1419 }
1420 
1422 template<typename NumericT>
1423 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1425 {
1426  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1427 }
1428 
1430 template<typename NumericT>
1431 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1433 {
1434  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1435 }
1436 
1437 
1438 // operator *=
1439 
1441 template<typename NumericT, typename S1>
1442 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1443 operator *= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1444 {
1445  bool is_sign_flip = viennacl::is_flip_sign_scalar<S1>::value;
1447  m1, gpu_val, 1, false, is_sign_flip ? true : false);
1448  return m1;
1449 }
1450 
1452 template<typename NumericT>
1453 matrix_base<NumericT> &
1455 {
1457  m1, NumericT(gpu_val), 1, false, false);
1458  return m1;
1459 }
1460 
1462 template<typename NumericT>
1463 matrix_base<NumericT> &
1465 {
1467  m1, NumericT(gpu_val), 1, false, false);
1468  return m1;
1469 }
1470 
1472 template<typename NumericT>
1473 matrix_base<NumericT> &
1475 {
1477  m1, NumericT(gpu_val), 1, false, false);
1478  return m1;
1479 }
1480 
1482 template<typename NumericT>
1483 matrix_base<NumericT> &
1485 {
1487  m1, NumericT(gpu_val), 1, false, false);
1488  return m1;
1489 }
1490 
1492 template<typename NumericT>
1493 matrix_base<NumericT> &
1495 {
1497  m1, NumericT(gpu_val), 1, false, false);
1498  return m1;
1499 }
1500 
1502 template<typename NumericT>
1503 matrix_base<NumericT> &
1505 {
1507  m1, NumericT(gpu_val), 1, false, false);
1508  return m1;
1509 }
1510 
1511 
1512 
1513 // operator /
1514 
1515 
1521 template<typename LHS, typename RHS, typename OP, typename S1>
1523 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type
1525  S1 const & val)
1526 {
1528 }
1529 
1530 
1532 template<typename NumericT, typename S1>
1534 matrix_expression< const matrix_base<NumericT>, const S1, op_div> >::type
1536 {
1538 }
1539 
1541 template<typename NumericT>
1542 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1544 {
1545  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1546 }
1547 
1549 template<typename NumericT>
1550 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1552 {
1553  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1554 }
1555 
1557 template<typename NumericT>
1558 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1560 {
1561  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1562 }
1563 
1565 template<typename NumericT>
1566 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1568 {
1569  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1570 }
1571 
1573 template<typename NumericT>
1574 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1576 {
1577  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1578 }
1579 
1581 template<typename NumericT>
1582 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1584 {
1585  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1586 }
1587 
1588 
1589 
1590 // operator /=
1591 
1593 template<typename NumericT, typename S1>
1594 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1595 operator /= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1596 {
1598  m1, gpu_val, 1, true, false);
1599  return m1;
1600 }
1601 
1603 template<typename NumericT>
1604 matrix_base<NumericT> &
1606 {
1608  m1, NumericT(gpu_val), 1, true, false);
1609  return m1;
1610 }
1611 
1613 template<typename NumericT>
1614 matrix_base<NumericT> &
1616 {
1618  m1, gpu_val, 1, true, false);
1619  return m1;
1620 }
1621 
1623 template<typename NumericT>
1624 matrix_base<NumericT> &
1626 {
1628  m1, gpu_val, 1, true, false);
1629  return m1;
1630 }
1631 
1633 template<typename NumericT>
1634 matrix_base<NumericT> &
1636 {
1638  m1, gpu_val, 1, true, false);
1639  return m1;
1640 }
1641 
1643 template<typename NumericT>
1644 matrix_base<NumericT> &
1646 {
1648  m1, gpu_val, 1, true, false);
1649  return m1;
1650 }
1651 
1653 template<typename NumericT>
1654 matrix_base<NumericT> &
1656 {
1658  m1, gpu_val, 1, true, false);
1659  return m1;
1660 }
1661 
1662 
1663 
1664 
1665 
1666 // outer_prod(v1, v2) * val;
1667 template<typename NumericT, typename S1>
1670 const S1,
1671 op_mult>
1672 >::type
1674  const S1 & val)
1675 {
1677  const S1,
1678  op_mult>(proxy, val);
1679 }
1680 
1681 template<typename NumericT, typename S1>
1684 const NumericT,
1685 op_mult>
1686 >::type
1688  const S1 & val)
1689 {
1691  const NumericT,
1692  op_mult>(proxy, NumericT(val));
1693 }
1694 
1695 // val * outer_prod(v1, v2);
1696 template<typename NumericT, typename S1>
1699 const S1,
1700 op_mult>
1701 >::type
1702 operator*(const S1 & val,
1704 {
1706  const S1,
1707  op_mult>(proxy, val);
1708 }
1709 
1710 template<typename NumericT, typename S1>
1713 const NumericT,
1714 op_mult>
1715 >::type
1716 operator*(const S1 & val,
1718 {
1720  const NumericT,
1721  op_mult>(proxy, NumericT(val));
1722 }
1723 
1724 
1725 
1726 //
1727 // Specify available operations:
1728 //
1729 
1732 namespace linalg
1733 {
1734 namespace detail
1735 {
1736 
1737  // x = y
1738  template<typename T>
1739  struct op_executor<matrix_base<T>, op_assign, matrix_base<T> >
1740  {
1741  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1742  {
1743  viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);
1744  }
1745  };
1746 
1747  // x = trans(y)
1748  template<typename T>
1749  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1750  {
1751  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1752  {
1753  matrix_base<T> temp(rhs);
1754  viennacl::linalg::am(lhs, temp, T(1), 1, false, false);
1755  }
1756  };
1757 
1758 
1759  // x += y
1760  template<typename T>
1761  struct op_executor<matrix_base<T>, op_inplace_add, matrix_base<T> >
1762  {
1763  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1764  {
1765  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);
1766  }
1767  };
1768 
1769  // x += trans(y)
1770  template<typename T>
1771  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1772  {
1773  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1774  {
1775  matrix_base<T> temp(rhs);
1776  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false);
1777  }
1778  };
1779 
1780  // x -= y
1781  template<typename T>
1782  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_base<T> >
1783  {
1784  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1785  {
1786  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);
1787  }
1788  };
1789 
1790  // x -= trans(y)
1791  template<typename T>
1792  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1793  {
1794  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1795  {
1796  matrix_base<T> temp(rhs);
1797  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, true);
1798  }
1799  };
1800 
1802 
1803 
1804  // x = alpha * y
1805  template<typename T, typename ScalarType>
1806  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1807  {
1808  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1809  {
1810  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);
1811  }
1812  };
1813 
1814  // x += alpha * y
1815  template<typename T, typename ScalarType>
1816  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1817  {
1818  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1819  {
1820  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);
1821  }
1822  };
1823 
1824  // x -= alpha * y
1825  template<typename T, typename ScalarType>
1826  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1827  {
1828  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1829  {
1830  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);
1831  }
1832  };
1833 
1834 
1836 
1837  // x = alpha * vec_expr
1838  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1839  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1840  {
1841  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1842  {
1843  if (lhs.row_major())
1844  {
1845  matrix<T> temp(proxy.lhs());
1846  lhs = temp * proxy.rhs();
1847  }
1848  else
1849  {
1850  matrix<T, column_major> temp(proxy.lhs());
1851  lhs = temp * proxy.rhs();
1852  }
1853  }
1854  };
1855 
1856  // x += alpha * vec_expr
1857  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1858  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1859  {
1860  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1861  {
1862  if (lhs.row_major())
1863  {
1864  matrix<T> temp(proxy.lhs());
1865  lhs += temp * proxy.rhs();
1866  }
1867  else
1868  {
1869  matrix<T, column_major> temp(proxy.lhs());
1870  lhs += temp * proxy.rhs();
1871  }
1872  }
1873  };
1874 
1875  // x -= alpha * vec_expr
1876  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1877  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1878  {
1879  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1880  {
1881  if (lhs.row_major())
1882  {
1883  matrix<T> temp(proxy.lhs());
1884  lhs -= temp * proxy.rhs();
1885  }
1886  else
1887  {
1888  matrix<T, column_major> temp(proxy.lhs());
1889  lhs -= temp * proxy.rhs();
1890  }
1891  }
1892  };
1893 
1894 
1896 
1897  // x = y / alpha
1898  template<typename T, typename ScalarType>
1899  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1900  {
1901  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1902  {
1903  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);
1904  }
1905  };
1906 
1907  // x += y / alpha
1908  template<typename T, typename ScalarType>
1909  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1910  {
1911  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1912  {
1913  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);
1914  }
1915  };
1916 
1917  // x -= y / alpha
1918  template<typename T, typename ScalarType>
1919  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1920  {
1921  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1922  {
1923  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);
1924  }
1925  };
1926 
1927 
1929 
1930  // x = vec_expr / alpha
1931  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1932  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1933  {
1934  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1935  {
1936  if (lhs.row_major())
1937  {
1938  matrix<T> temp(proxy.lhs());
1939  lhs = temp / proxy.rhs();
1940  }
1941  else
1942  {
1943  matrix<T, column_major> temp(proxy.lhs());
1944  lhs = temp / proxy.rhs();
1945  }
1946  }
1947  };
1948 
1949  // x += vec_expr / alpha
1950  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1951  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1952  {
1953  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1954  {
1955  if (lhs.row_major())
1956  {
1957  matrix<T> temp(proxy.lhs());
1958  lhs += temp / proxy.rhs();
1959  }
1960  else
1961  {
1962  matrix<T, column_major> temp(proxy.lhs());
1963  lhs += temp / proxy.rhs();
1964  }
1965  }
1966  };
1967 
1968  // x -= vec_expr / alpha
1969  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1970  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1971  {
1972  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1973  {
1974  if (lhs.row_major())
1975  {
1976  matrix<T, row_major> temp(proxy.lhs());
1977  lhs -= temp / proxy.rhs();
1978  }
1979  else
1980  {
1981  matrix<T, column_major> temp(proxy.lhs());
1982  lhs -= temp / proxy.rhs();
1983  }
1984  }
1985  };
1986 
1987 
1988 
1989  // generic x = vec_expr1 + vec_expr2:
1990  template<typename T, typename LHS, typename RHS>
1991  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_add> >
1992  {
1993  // generic x = vec_expr1 + vec_expr2:
1994  template<typename LHS1, typename RHS1>
1995  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
1996  {
1997  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
1998  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
1999 
2000  if (op_aliasing_lhs || op_aliasing_rhs)
2001  {
2002  matrix_base<T> temp(proxy.lhs());
2003  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2004  lhs = temp;
2005  }
2006  else
2007  {
2008  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2009  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2010  }
2011  }
2012 
2013  // x = y + z
2014  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2015  {
2017  proxy.lhs(), T(1), 1, false, false,
2018  proxy.rhs(), T(1), 1, false, false);
2019  }
2020 
2021  // x = alpha * y + z
2022  template<typename ScalarType>
2023  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2024  const matrix_base<T>,
2025  op_add> const & proxy)
2026  {
2028  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2029  proxy.rhs(), T(1), 1, false, false);
2030  }
2031 
2032  // x = y / alpha + z
2033  template<typename ScalarType>
2034  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2035  const matrix_base<T>,
2036  op_add> const & proxy)
2037  {
2039  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2040  proxy.rhs(), T(1), 1, false, false);
2041  }
2042 
2043  // x = y + beta * z
2044  template<typename ScalarType>
2045  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2046  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2047  op_add> const & proxy)
2048  {
2050  proxy.lhs(), T(1), 1, false, false,
2051  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2052  }
2053 
2054  // x = y + z / beta
2055  template<typename ScalarType>
2056  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2057  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2058  op_add> const & proxy)
2059  {
2061  proxy.lhs(), T(1), 1, false, false,
2062  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2063  }
2064 
2065  // x = alpha * y + beta * z
2066  template<typename ScalarType1, typename ScalarType2>
2067  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2068  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2069  op_add> const & proxy)
2070  {
2072  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2073  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2074  }
2075 
2076  // x = alpha * y + z / beta
2077  template<typename ScalarType1, typename ScalarType2>
2078  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2079  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2080  op_add> const & proxy)
2081  {
2083  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2084  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2085  }
2086 
2087  // x = y / alpha + beta * z
2088  template<typename ScalarType1, typename ScalarType2>
2089  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2090  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2091  op_add> const & proxy)
2092  {
2094  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2095  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2096  }
2097 
2098  // x = y / alpha + z / beta
2099  template<typename ScalarType1, typename ScalarType2>
2100  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2101  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2102  op_add> const & proxy)
2103  {
2105  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2106  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2107  }
2108  };
2109 
2110  // dense = sparse * dense
2111  template<typename T, typename LHS, typename RHS>
2112  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >
2113  {
2114  template< typename SparseMatrixType>
2115  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2117  viennacl::op_prod> const & proxy)
2118  {
2119  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2120  }
2121 
2122  // dense = sparse * trans(dense)
2123  template< typename SparseMatrixType >
2124  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2128  viennacl::op_prod> const & proxy)
2129  {
2130  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2131  }
2132 
2133  };
2134 
2135  // generic x += vec_expr1 + vec_expr2:
2136  template<typename T, typename LHS, typename RHS>
2137  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >
2138  {
2139  // generic x += vec_expr1 + vec_expr2:
2140  template<typename LHS1, typename RHS1>
2141  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2142  {
2143  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2144  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2145 
2146  if (op_aliasing_lhs || op_aliasing_rhs)
2147  {
2148  matrix_base<T> temp(proxy.lhs());
2149  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2150  lhs += temp;
2151  }
2152  else
2153  {
2154  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2155  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2156  }
2157  }
2158 
2159  // x += y + z
2160  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2161  {
2163  proxy.lhs(), T(1), 1, false, false,
2164  proxy.rhs(), T(1), 1, false, false);
2165  }
2166 
2167  // x += alpha * y + z
2168  template<typename ScalarType>
2169  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2170  const matrix_base<T>,
2171  op_add> const & proxy)
2172  {
2174  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2175  proxy.rhs(), T(1), 1, false, false);
2176  }
2177 
2178  // x += y / alpha + z
2179  template<typename ScalarType>
2180  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2181  const matrix_base<T>,
2182  op_add> const & proxy)
2183  {
2185  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2186  proxy.rhs(), T(1), 1, false, false);
2187  }
2188 
2189  // x += y + beta * z
2190  template<typename ScalarType>
2191  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2192  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2193  op_add> const & proxy)
2194  {
2196  proxy.lhs(), T(1), 1, false, false,
2197  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2198  }
2199 
2200  // x += y + z / beta
2201  template<typename ScalarType>
2202  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2203  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2204  op_add> const & proxy)
2205  {
2207  proxy.lhs(), T(1), 1, false, false,
2208  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2209  }
2210 
2211  // x += alpha * y + beta * z
2212  template<typename ScalarType1, typename ScalarType2>
2213  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2214  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2215  op_add> const & proxy)
2216  {
2218  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2219  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2220  }
2221 
2222  // x += alpha * y + z / beta
2223  template<typename ScalarType1, typename ScalarType2>
2224  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2225  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2226  op_add> const & proxy)
2227  {
2229  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2230  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2231  }
2232 
2233  // x += y / alpha + beta * z
2234  template<typename ScalarType1, typename ScalarType2>
2235  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2236  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2237  op_add> const & proxy)
2238  {
2240  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2241  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2242  }
2243 
2244  // x += y / alpha + z / beta
2245  template<typename ScalarType1, typename ScalarType2>
2246  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2247  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2248  op_add> const & proxy)
2249  {
2251  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2252  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2253  }
2254  };
2255 
2256 
2257 
2258  // generic x -= vec_expr1 + vec_expr2:
2259  template<typename T, typename LHS, typename RHS>
2260  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >
2261  {
2262  // generic x -= vec_expr1 + vec_expr2:
2263  template<typename LHS1, typename RHS1>
2264  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2265  {
2266  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2267  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2268 
2269  if (op_aliasing_lhs || op_aliasing_rhs)
2270  {
2271  matrix_base<T> temp(proxy.lhs());
2272  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2273  lhs -= temp;
2274  }
2275  else
2276  {
2277  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2278  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2279  }
2280  }
2281 
2282  // x -= y + z
2283  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2284  {
2286  proxy.lhs(), T(1), 1, false, true,
2287  proxy.rhs(), T(1), 1, false, true);
2288  }
2289 
2290  // x -= alpha * y + z
2291  template<typename ScalarType>
2292  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2293  const matrix_base<T>,
2294  op_add> const & proxy)
2295  {
2297  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2298  proxy.rhs(), T(1), 1, false, true);
2299  }
2300 
2301  // x -= y / alpha + z
2302  template<typename ScalarType>
2303  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2304  const matrix_base<T>,
2305  op_add> const & proxy)
2306  {
2308  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2309  proxy.rhs(), T(1), 1, false, true);
2310  }
2311 
2312  // x -= y + beta * z
2313  template<typename ScalarType>
2314  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2315  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2316  op_add> const & proxy)
2317  {
2319  proxy.lhs(), T(1), 1, false, true,
2320  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2321  }
2322 
2323  // x -= y + z / beta
2324  template<typename ScalarType>
2325  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2326  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2327  op_add> const & proxy)
2328  {
2330  proxy.lhs(), T(1), 1, false, true,
2331  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2332  }
2333 
2334  // x -= alpha * y + beta * z
2335  template<typename ScalarType1, typename ScalarType2>
2336  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2337  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2338  op_add> const & proxy)
2339  {
2341  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2342  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2343  }
2344 
2345  // x -= alpha * y + z / beta
2346  template<typename ScalarType1, typename ScalarType2>
2347  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2348  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2349  op_add> const & proxy)
2350  {
2352  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2353  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2354  }
2355 
2356  // x -= y / alpha + beta * z
2357  template<typename ScalarType1, typename ScalarType2>
2358  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2359  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2360  op_add> const & proxy)
2361  {
2363  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2364  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2365  }
2366 
2367  // x -= y / alpha + z / beta
2368  template<typename ScalarType1, typename ScalarType2>
2369  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2370  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2371  op_add> const & proxy)
2372  {
2374  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2375  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2376  }
2377  };
2378 
2379 
2380 
2382 
2383 
2384 
2385  // generic x = vec_expr1 - vec_expr2:
2386  template<typename T, typename LHS, typename RHS>
2387  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >
2388  {
2389  // generic x = vec_expr1 - vec_expr2:
2390  template<typename LHS1, typename RHS1>
2391  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2392  {
2393  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2394  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2395 
2396  if (op_aliasing_lhs || op_aliasing_rhs)
2397  {
2398  matrix_base<T> temp(proxy.lhs());
2399  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2400  lhs = temp;
2401  }
2402  else
2403  {
2404  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2405  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2406  }
2407  }
2408 
2409  // x = y - z
2410  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2411  {
2413  proxy.lhs(), T(1), 1, false, false,
2414  proxy.rhs(), T(1), 1, false, true);
2415  }
2416 
2417  // x = alpha * y - z
2418  template<typename ScalarType>
2419  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2420  const matrix_base<T>,
2421  op_sub> const & proxy)
2422  {
2424  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2425  proxy.rhs(), T(1), 1, false, true);
2426  }
2427 
2428  // x = y / alpha - z
2429  template<typename ScalarType>
2430  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2431  const matrix_base<T>,
2432  op_sub> const & proxy)
2433  {
2435  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2436  proxy.rhs(), T(1), 1, false, true);
2437  }
2438 
2439  // x = y - beta * z
2440  template<typename ScalarType>
2441  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2442  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2443  op_sub> const & proxy)
2444  {
2446  proxy.lhs(), T(1), 1, false, false,
2447  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2448  }
2449 
2450  // x = y - z / beta
2451  template<typename ScalarType>
2452  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2453  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2454  op_sub> const & proxy)
2455  {
2457  proxy.lhs(), T(1), 1, false, false,
2458  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2459  }
2460 
2461  // x = alpha * y - beta * z
2462  template<typename ScalarType1, typename ScalarType2>
2463  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2464  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2465  op_sub> const & proxy)
2466  {
2468  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2469  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2470  }
2471 
2472  // x = alpha * y - z / beta
2473  template<typename ScalarType1, typename ScalarType2>
2474  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2475  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2476  op_sub> const & proxy)
2477  {
2479  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2480  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2481  }
2482 
2483  // x = y / alpha - beta * z
2484  template<typename ScalarType1, typename ScalarType2>
2485  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2486  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2487  op_sub> const & proxy)
2488  {
2490  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2491  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2492  }
2493 
2494  // x = y / alpha - z / beta
2495  template<typename ScalarType1, typename ScalarType2>
2496  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2497  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2498  op_sub> const & proxy)
2499  {
2501  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2502  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2503  }
2504  };
2505 
2506 
2507  // generic x += vec_expr1 - vec_expr2:
2508  template<typename T, typename LHS, typename RHS>
2509  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >
2510  {
2511  // generic x += vec_expr1 - vec_expr2:
2512  template<typename LHS1, typename RHS1>
2513  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2514  {
2515  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2516  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2517 
2518  if (op_aliasing_lhs || op_aliasing_rhs)
2519  {
2520  matrix_base<T> temp(proxy.lhs());
2521  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2522  lhs += temp;
2523  }
2524  else
2525  {
2526  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2527  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2528  }
2529  }
2530 
2531  // x += y - z
2532  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2533  {
2535  proxy.lhs(), T(1), 1, false, false,
2536  proxy.rhs(), T(1), 1, false, true);
2537  }
2538 
2539  // x += alpha * y - z
2540  template<typename ScalarType>
2541  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2542  const matrix_base<T>,
2543  op_sub> const & proxy)
2544  {
2546  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2547  proxy.rhs(), T(1), 1, false, true);
2548  }
2549 
2550  // x += y / alpha - z
2551  template<typename ScalarType>
2552  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2553  const matrix_base<T>,
2554  op_sub> const & proxy)
2555  {
2557  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2558  proxy.rhs(), T(1), 1, false, true);
2559  }
2560 
2561  // x += y - beta * z
2562  template<typename ScalarType>
2563  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2564  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2565  op_sub> const & proxy)
2566  {
2568  proxy.lhs(), T(1), 1, false, false,
2569  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2570  }
2571 
2572  // x += y - z / beta
2573  template<typename ScalarType>
2574  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2575  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2576  op_sub> const & proxy)
2577  {
2579  proxy.lhs(), T(1), 1, false, false,
2580  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2581  }
2582 
2583  // x += alpha * y - beta * z
2584  template<typename ScalarType1, typename ScalarType2>
2585  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2586  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2587  op_sub> const & proxy)
2588  {
2590  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2591  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2592  }
2593 
2594  // x += alpha * y - z / beta
2595  template<typename ScalarType1, typename ScalarType2>
2596  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2597  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2598  op_sub> const & proxy)
2599  {
2601  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2602  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2603  }
2604 
2605  // x += y / alpha - beta * z
2606  template<typename ScalarType1, typename ScalarType2>
2607  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2608  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2609  op_sub> const & proxy)
2610  {
2612  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2613  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2614  }
2615 
2616  // x += y / alpha - z / beta
2617  template<typename ScalarType1, typename ScalarType2>
2618  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2619  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2620  op_sub> const & proxy)
2621  {
2623  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2624  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2625  }
2626  };
2627 
2628 
2629 
2630  // generic x -= vec_expr1 - vec_expr2:
2631  template<typename T, typename LHS, typename RHS>
2632  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >
2633  {
2634  // generic x -= vec_expr1 - vec_expr2:
2635  template<typename LHS1, typename RHS1>
2636  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2637  {
2638  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2639  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2640 
2641  if (op_aliasing_lhs || op_aliasing_rhs)
2642  {
2643  matrix_base<T> temp(proxy.lhs());
2644  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2645  lhs -= temp;
2646  }
2647  else
2648  {
2649  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2650  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2651  }
2652  }
2653 
2654  // x -= y - z
2655  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2656  {
2658  proxy.lhs(), T(1), 1, false, true,
2659  proxy.rhs(), T(1), 1, false, false);
2660  }
2661 
2662  // x -= alpha * y - z
2663  template<typename ScalarType>
2664  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2665  const matrix_base<T>,
2666  op_sub> const & proxy)
2667  {
2669  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2670  proxy.rhs(), T(1), 1, false, false);
2671  }
2672 
2673  // x -= y / alpha - z
2674  template<typename ScalarType>
2675  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2676  const matrix_base<T>,
2677  op_sub> const & proxy)
2678  {
2680  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2681  proxy.rhs(), T(1), 1, false, false);
2682  }
2683 
2684  // x -= y - beta * z
2685  template<typename ScalarType>
2686  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2687  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2688  op_sub> const & proxy)
2689  {
2691  proxy.lhs(), T(1), 1, false, true,
2692  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2693  }
2694 
2695  // x -= y - z / beta
2696  template<typename ScalarType>
2697  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2698  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2699  op_sub> const & proxy)
2700  {
2702  proxy.lhs(), T(1), 1, false, true,
2703  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2704  }
2705 
2706  // x -= alpha * y - beta * z
2707  template<typename ScalarType1, typename ScalarType2>
2708  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2709  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2710  op_sub> const & proxy)
2711  {
2713  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2714  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2715  }
2716 
2717  // x -= alpha * y - z / beta
2718  template<typename ScalarType1, typename ScalarType2>
2719  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2720  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2721  op_sub> const & proxy)
2722  {
2724  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2725  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2726  }
2727 
2728  // x -= y / alpha - beta * z
2729  template<typename ScalarType1, typename ScalarType2>
2730  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2731  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2732  op_sub> const & proxy)
2733  {
2735  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2736  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2737  }
2738 
2739  // x -= y / alpha - z / beta
2740  template<typename ScalarType1, typename ScalarType2>
2741  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2742  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2743  op_sub> const & proxy)
2744  {
2746  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2747  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2748  }
2749  };
2750 
2751 
2753 
2754  template<typename T, typename LHS>
2755  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >
2756  {
2757  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)
2758  {
2759  viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);
2760  }
2761  };
2762 
2763 
2764  template<typename T, typename LHS>
2765  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >
2766  {
2767  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const int, op_matrix_diag> const & proxy)
2768  {
2769  viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);
2770  }
2771  };
2772 
2773  template<typename T, typename LHS>
2774  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >
2775  {
2776  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_row> const & proxy)
2777  {
2778  viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);
2779  }
2780  };
2781 
2782 
2783  template<typename T, typename LHS>
2784  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >
2785  {
2786  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_column> const & proxy)
2787  {
2788  viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);
2789  }
2790  };
2791 
2792 
2794 
2795  // generic x = mat_expr1 .* mat_expr2:
2796  template<typename T, typename LHS, typename RHS, typename OP>
2797  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2798  {
2799  // x = y .* z
2800  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2801  {
2802  viennacl::linalg::element_op(lhs, proxy);
2803  }
2804 
2805  // x = y .* mat_expr
2806  template<typename LHS2, typename RHS2, typename OP2>
2807  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2808  {
2809  matrix_base<T> temp(proxy.rhs());
2810  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2811  }
2812 
2813  // x = mat_expr .* z
2814  template<typename LHS1, typename RHS1, typename OP1>
2815  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2816  {
2817  matrix_base<T> temp(proxy.lhs());
2818  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2819  }
2820 
2821  // x = mat_expr .* mat_expr
2822  template<typename LHS1, typename RHS1, typename OP1,
2823  typename LHS2, typename RHS2, typename OP2>
2824  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2825  const matrix_expression<const LHS2, const RHS2, OP2>,
2826  op_element_binary<OP> > const & proxy)
2827  {
2828  matrix_base<T> temp1(proxy.lhs());
2829  matrix_base<T> temp2(proxy.rhs());
2830  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2831  }
2832  };
2833 
2834  // generic x += mat_expr .* mat_expr:
2835  template<typename T, typename LHS, typename RHS, typename OP>
2836  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2837  {
2838  // x += y .* z
2839  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2840  {
2841  matrix_base<T> temp(proxy);
2842  lhs += temp;
2843  }
2844 
2845  // x += y .* mat_expr
2846  template<typename LHS2, typename RHS2, typename OP2>
2847  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2848  {
2849  matrix_base<T> temp(proxy.rhs());
2850  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2851  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2852  lhs += temp2;
2853  }
2854 
2855  // x += mat_expr .* z
2856  template<typename LHS1, typename RHS1, typename OP1>
2857  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2858  {
2859  matrix_base<T> temp(proxy.lhs());
2860  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2861  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2862  lhs += temp2;
2863  }
2864 
2865  // x += mat_expr .* mat_expr
2866  template<typename LHS1, typename RHS1, typename OP1,
2867  typename LHS2, typename RHS2, typename OP2>
2868  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2869  const matrix_expression<const LHS2, const RHS2, OP2>,
2870  op_element_binary<OP> > const & proxy)
2871  {
2872  matrix_base<T> temp1(proxy.lhs());
2873  matrix_base<T> temp2(proxy.rhs());
2874  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2875  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2876  lhs += temp3;
2877  }
2878  };
2879 
2880  // generic x -= mat_expr1 .* mat_expr2:
2881  template<typename T, typename LHS, typename RHS, typename OP>
2882  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2883  {
2884 
2885  // x -= y .* z
2886  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2887  {
2888  matrix_base<T> temp(proxy);
2889  lhs -= temp;
2890  }
2891 
2892  // x -= y .* mat_expr
2893  template<typename LHS2, typename RHS2, typename OP2>
2894  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2895  {
2896  matrix_base<T> temp(proxy.rhs());
2897  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2898  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2899  lhs -= temp2;
2900  }
2901 
2902  // x -= mat_expr .* z
2903  template<typename LHS1, typename RHS1, typename OP1>
2904  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2905  {
2906  matrix_base<T> temp(proxy.lhs());
2907  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2908  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2909  lhs -= temp2;
2910  }
2911 
2912  // x -= mat_expr .* mat_expr
2913  template<typename LHS1, typename RHS1, typename OP1,
2914  typename LHS2, typename RHS2, typename OP2>
2915  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2916  const matrix_expression<const LHS2, const RHS2, OP2>,
2917  op_element_binary<OP> > const & proxy)
2918  {
2919  matrix_base<T> temp1(proxy.lhs());
2920  matrix_base<T> temp2(proxy.rhs());
2921  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2922  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2923  lhs -= temp3;
2924  }
2925  };
2926 
2928 
2929  template<typename T, typename LHS, typename RHS, typename OP>
2930  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2931  {
2932  // x = OP(y)
2933  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
2934  {
2935  viennacl::linalg::element_op(lhs, proxy);
2936  }
2937 
2938  // x = OP(vec_expr)
2939  template<typename LHS2, typename RHS2, typename OP2>
2940  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2941  const matrix_expression<const LHS2, const RHS2, OP2>,
2942  op_element_unary<OP> > const & proxy)
2943  {
2944  matrix_base<T> temp(proxy.rhs());
2945  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp));
2946  }
2947  };
2948 
2949  template<typename T, typename LHS, typename RHS, typename OP>
2950  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2951  {
2952  // x += OP(y)
2953  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
2954  {
2955  matrix_base<T> temp(proxy);
2956  lhs += temp;
2957  }
2958 
2959  // x += OP(vec_expr)
2960  template<typename LHS2, typename RHS2, typename OP2>
2961  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2962  const matrix_expression<const LHS2, const RHS2, OP2>,
2963  op_element_unary<OP> > const & proxy)
2964  {
2965  matrix_base<T> temp(proxy.rhs());
2966  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
2967  lhs += temp;
2968  }
2969  };
2970 
2971  template<typename T, typename LHS, typename RHS, typename OP>
2972  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2973  {
2974  // x -= OP(y)
2975  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
2976  {
2977  matrix_base<T> temp(proxy);
2978  lhs -= temp;
2979  }
2980 
2981  // x -= OP(vec_expr)
2982  template<typename LHS2, typename RHS2, typename OP2>
2983  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2984  const matrix_expression<const LHS2, const RHS2, OP2>,
2985  op_element_unary<OP> > const & proxy)
2986  {
2987  matrix_base<T> temp(proxy.rhs());
2988  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
2989  lhs -= temp;
2990  }
2991  };
2992 
2993 
2994 
2996 
2997  // C = A * B
2998  template<typename T>
2999  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3000  {
3001  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3002  {
3003  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3004  }
3005  };
3006 
3007  // C = A * B^T
3008  template<typename T>
3009  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>,
3010  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3011  op_mat_mat_prod> >
3012  {
3013  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3014  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3015  op_mat_mat_prod> const & rhs)
3016  {
3017  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3018  }
3019  };
3020 
3021  // C = A^T * B
3022  template<typename T>
3023  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3024  const matrix_base<T>,
3025  op_mat_mat_prod> >
3026  {
3027  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3028  const matrix_base<T>,
3029  op_mat_mat_prod> const & rhs)
3030  {
3031  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3032  }
3033  };
3034 
3035  // C = A^T * B^T
3036  template<typename T>
3037  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3038  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3039  op_mat_mat_prod> >
3040  {
3041  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3042  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3043  op_mat_mat_prod> const & rhs)
3044  {
3045  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3046  }
3047  };
3048 
3049 
3050  // C += A * B
3051  template<typename T>
3052  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3053  {
3054  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3055  {
3056  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3057  }
3058  };
3059 
3060  // C += A * B^T
3061  template<typename T>
3062  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>,
3063  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3064  op_mat_mat_prod> >
3065  {
3066  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3067  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3068  op_mat_mat_prod> const & rhs)
3069  {
3070  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3071  }
3072  };
3073 
3074  // C += A^T * B
3075  template<typename T>
3076  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3077  const matrix_base<T>,
3078  op_mat_mat_prod> >
3079  {
3080  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3081  const matrix_base<T>,
3082  op_mat_mat_prod> const & rhs)
3083  {
3084  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3085  }
3086  };
3087 
3088  // C += A^T * B^T
3089  template<typename T>
3090  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3091  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3092  op_mat_mat_prod> >
3093  {
3094  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3095  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3096  op_mat_mat_prod> const & rhs)
3097  {
3098  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3099  }
3100  };
3101 
3102 
3103  // C -= A * B
3104  template<typename T>
3105  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3106  {
3107  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3108  {
3109  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3110  }
3111  };
3112 
3113  // C -= A * B^T
3114  template<typename T>
3115  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>,
3116  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3117  op_mat_mat_prod> >
3118  {
3119  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3120  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3121  op_mat_mat_prod> const & rhs)
3122  {
3123  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3124  }
3125  };
3126 
3127  // C -= A^T * B
3128  template<typename T>
3129  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3130  const matrix_base<T>,
3131  op_mat_mat_prod> >
3132  {
3133  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3134  const matrix_base<T>,
3135  op_mat_mat_prod> const & rhs)
3136  {
3137  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3138  }
3139  };
3140 
3141  // C -= A^T * B^T
3142  template<typename T>
3143  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3144  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3145  op_mat_mat_prod> >
3146  {
3147  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3148  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3149  op_mat_mat_prod> const & rhs)
3150  {
3151  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3152  }
3153  };
3154 
3156 
3157  // y = A * x
3158  template<typename T>
3159  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3160  {
3161  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3162  {
3163  // check for x = A * x
3164  if (op_aliasing(lhs, rhs.rhs()))
3165  {
3166  vector_base<T> temp(rhs);
3167  lhs = temp;
3168  }
3169  else
3170  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3171  }
3172  };
3173 
3174  // y = A^T * x
3175  template<typename T>
3176  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3177  const vector_base<T>,
3178  op_prod> >
3179  {
3180  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3181  const vector_base<T>,
3182  op_prod> const & rhs)
3183  {
3184  // check for x = A^T * x
3185  if (op_aliasing(lhs, rhs.rhs()))
3186  {
3187  vector_base<T> temp(rhs);
3188  lhs = temp;
3189  }
3190  else
3191  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3192  }
3193  };
3194 
3195 
3196  // y += A * x
3197  template<typename T>
3198  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3199  {
3200  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3201  {
3202  vector_base<T> temp(rhs);
3203  lhs += temp;
3204  }
3205  };
3206 
3207  // y += A^T * x
3208  template<typename T>
3209  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3210  const vector_base<T>,
3211  op_prod> >
3212  {
3213  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3214  const vector_base<T>,
3215  op_prod> const & rhs)
3216  {
3217  vector_base<T> temp(rhs);
3218  lhs += temp;
3219  }
3220  };
3221 
3222 
3223  // y -= A * x
3224  template<typename T>
3225  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3226  {
3227  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3228  {
3229  vector_base<T> temp(rhs);
3230  lhs -= temp;
3231  }
3232  };
3233 
3234  // y -= A^T * x
3235  template<typename T>
3236  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3237  const vector_base<T>,
3238  op_prod> >
3239  {
3240  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3241  const vector_base<T>,
3242  op_prod> const & rhs)
3243  {
3244  vector_base<T> temp(rhs);
3245  lhs -= temp;
3246  }
3247  };
3248 
3249 
3250 
3252 
3253  // A = v1 * v2^T
3254  template<typename T>
3255  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3256  {
3257  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3258  {
3259  lhs.clear();
3260  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3261  }
3262  };
3263 
3264  // A = alpha * v1 * v2^T
3265  template<typename T, typename ScalarType>
3266  struct op_executor<matrix_base<T>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3267  const ScalarType,
3268  op_mult> >
3269  {
3270  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3271  const ScalarType,
3272  op_mult> const & rhs)
3273  {
3274  lhs.clear();
3275  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3276  }
3277  };
3278 
3279  // A += v1 * v2^T
3280  template<typename T>
3281  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3282  {
3283  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3284  {
3285  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3286  }
3287  };
3288 
3289  // A += alpha * v1 * v2^T
3290  template<typename T, typename ScalarType>
3291  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3292  const ScalarType,
3293  op_mult> >
3294  {
3295  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3296  const ScalarType,
3297  op_mult> const & rhs)
3298  {
3299  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3300  }
3301  };
3302 
3303  // A -= v1 * v2^T
3304  template<typename T>
3305  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3306  {
3307  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3308  {
3309  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());
3310  }
3311  };
3312 
3313  // A -= alpha * v1 * v2^T
3314  template<typename T, typename ScalarType>
3315  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3316  const ScalarType,
3317  op_mult> >
3318  {
3319  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3320  const ScalarType,
3321  op_mult> const & rhs)
3322  {
3323  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());
3324  }
3325  };
3326 
3327 
3328 } // namespace detail
3329 
3330 } // namespace linalg
3331 
3334 } //namespace viennacl
3335 
3336 #endif
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:30
bool operator==(self_type const &other)
Definition: matrix.hpp:106
A tag class representing multiplication by a scalar.
Definition: forwards.h:91
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
Wraps a CUDA or host buffer provided by the user.
Definition: matrix.hpp:684
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &v)
Dispatcher interface for v = diag(A, k)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:313
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Worker class for decomposing expression templates.
Definition: op_executor.hpp:80
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:235
Implementations of dense matrix related operations including matrix-vector products.
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
Definition: matrix_def.hpp:233
self_type & operator=(const self_type &other)
Definition: matrix.hpp:243
matrix(zero_matrix< NumericT > const &m)
Creates the matrix from the supplied zero matrix.
Definition: matrix.hpp:723
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:483
Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> ...
Definition: forwards.h:461
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Various little tools used here and there in ViennaCL.
matrix_expression< const self_type, const NumericT, op_mult > operator-() const
Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix.
Definition: matrix.hpp:591
A tag class representing the extraction of a matrix column to a vector.
Definition: forwards.h:194
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag class representing a matrix given by a vector placed on a certain (off-)diagonal.
Definition: forwards.h:188
A tag class representing subtraction.
Definition: forwards.h:89
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
viennacl::context context() const
Definition: matrix_def.hpp:46
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
A tag indicating iteration along increasing row index of a matrix.
Definition: matrix.hpp:84
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:374
A tag class representing division.
Definition: forwards.h:97
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
entry_proxy< NumericT > operator()(size_type row_index, size_type col_index)
Read-write access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:440
static vcl_size_t size1(LHS &lhs, RHS &)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator/=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1595
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_base< NumericT >, const S1, op_mult >>::type operator*(S1 const &value, matrix_base< NumericT > const &m1)
Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m...
Definition: matrix.hpp:1295
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:238
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
Forward declaration of dense matrix classes.
bool op_aliasing(vector_base< NumericT > const &, B const &)
Definition: op_executor.hpp:36
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
bool operator!=(self_type const &other)
Definition: matrix.hpp:107
matrix(scalar_matrix< NumericT > const &m)
Creates the matrix from the supplied scalar matrix.
Definition: matrix.hpp:730
vcl_size_t size1() const
Returns the size of the result vector.
Definition: matrix.hpp:72
self_type operator++(int)
Definition: matrix.hpp:104
viennacl::vector< NumericT > operator-(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 - A * v2', where A is a matrix.
A tag class representing the (off-)diagonal of a matrix.
Definition: forwards.h:185
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can optionally be preserved.
Definition: matrix.hpp:766
size_type size2() const
Definition: matrix_def.hpp:45
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator*=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1443
Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar ...
Definition: tools.hpp:225
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix_def.hpp:117
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &v)
matrix(matrix_expression< LHS, RHS, OP > const &proxy)
Definition: matrix.hpp:713
size_type size1() const
Definition: matrix_def.hpp:44
vcl_size_t size2() const
Definition: matrix.hpp:73
void resize(MatrixType &matrix, vcl_size_t rows, vcl_size_t cols)
Generic resize routine for resizing a matrix (ViennaCL, uBLAS, etc.) to a new size/dimension.
Definition: size.hpp:59
MatrixT::value_type value_type
Definition: matrix.hpp:96
void clear()
Resets all entries to zero.
Definition: matrix.hpp:597
MatrixT & operator()(void) const
Definition: matrix.hpp:112
Implementations of operations using sparse matrices.
A tag class representing addition.
Definition: forwards.h:87
matrix(const self_type &other)
Definition: matrix.hpp:743
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
matrix_expression(LHS &lhs, RHS &rhs)
Definition: matrix.hpp:62
void scaled_rank_1_update(matrix_base< NumericT > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_expression< const LHS, const RHS, OP >, const S1, op_div > >::type operator/(matrix_expression< const LHS, const RHS, OP > const &proxy, S1 const &val)
Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha.
Definition: matrix.hpp:1524
Determines whether a given expression has a row-major matrix layout.
std::size_t vcl_size_t
Definition: forwards.h:74
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:217
matrix()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:667
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:235
matrix(identity_matrix< NumericT > const &m)
Creates the matrix from the supplied identity matrix.
Definition: matrix.hpp:716
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:838
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:212
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
Definition: matrix_def.hpp:81
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &v)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:215
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
base_type::size_type size_type
Definition: matrix.hpp:664
matrix(const base_type &other)
Definition: matrix.hpp:736
RHS & rhs() const
Get right hand side operand.
Definition: matrix.hpp:69
viennacl::vector< NumericT > operator+(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 + A * v2', where A is a matrix.
matrix_iterator(MatrixT &mat, vcl_size_t start_row, vcl_size_t start_col)
Definition: matrix.hpp:98
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121
A tag class representing matrix-vector products and element-wise multiplications. ...
Definition: forwards.h:93
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base)
Rounds an integer to the next multiple of another integer.
Definition: tools.hpp:133
A dense matrix class.
Definition: forwards.h:368
bool row_major() const
Definition: matrix_def.hpp:239
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
bool row_major(T const &)
Definition: row_major.hpp:38
float ScalarType
Definition: fft_1d.cpp:42
value_type operator*(void)
Definition: matrix.hpp:102
A tag class representing transposed matrices.
Definition: forwards.h:219
Helper implementations that deduce the dimensions of the supplied matrix-valued expressions.
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type internal_row_count, size_type cols, size_type internal_col_count)
Wraps a CUDA or host buffer provided by the user including padding of rows and columns.
Definition: matrix.hpp:700
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:231
self_type & operator*=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:480
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
viennacl::matrix< float > m1
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:229
matrix(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:675
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:330
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:861
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
self_type & operator+=(const matrix_expression< const LHS, const RHS, OP > &proxy)
self_type & operator/=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:536
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void resize(size_type rows, size_type columns, bool preserve=true)
Definition: matrix.hpp:601
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
memory_types
Definition: forwards.h:344
static vcl_size_t size2(LHS &, RHS &rhs)
void matrix_diag_from_vector(const vector_base< NumericT > &v, int k, matrix_base< NumericT > &A)
Dispatcher interface for A = diag(v, k)
LHS & lhs() const
Get left hand side operand.
Definition: matrix.hpp:66
A tag class representing the extraction of a matrix row to a vector.
Definition: forwards.h:191
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:232
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Implementation of the ViennaCL scalar class.
static void apply(const MATRIXTYPE &, unsigned int &, unsigned int &)
Definition: forwards.h:608
A collection of compile time type deductions.
A tag for row-major storage of a dense matrix.
Definition: forwards.h:303
self_type & operator++(void)
Definition: matrix.hpp:103
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
Definition: mem_handle.hpp:99
A tag indicating iteration along increasing columns index of a matrix.
Definition: matrix.hpp:87
Simple enable-if variant that uses the SFINAE pattern.
self_type & operator-=(const matrix_expression< const LHS, const RHS, OP > &proxy)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)