ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
compressed_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_
2 #define VIENNACL_COMPRESSED_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <list>
27 #include <map>
28 #include "viennacl/forwards.h"
29 #include "viennacl/vector.hpp"
30 
32 
33 #include "viennacl/tools/tools.hpp"
35 
36 #ifdef VIENNACL_WITH_UBLAS
37 #include <boost/numeric/ublas/matrix_sparse.hpp>
38 #endif
39 
40 namespace viennacl
41 {
42 namespace detail
43 {
44 
49  template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
50  void copy_impl(const CPUMatrixT & cpu_matrix,
52  vcl_size_t nonzeros)
53  {
54  assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
55  assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
56 
57  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1);
58  viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), nonzeros);
59  std::vector<NumericT> elements(nonzeros);
60 
61  vcl_size_t row_index = 0;
62  vcl_size_t data_index = 0;
63 
64  for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1();
65  row_it != cpu_matrix.end1();
66  ++row_it)
67  {
68  row_buffer.set(row_index, data_index);
69  ++row_index;
70 
71  for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin();
72  col_it != row_it.end();
73  ++col_it)
74  {
75  col_buffer.set(data_index, col_it.index2());
76  elements[data_index] = *col_it;
77  ++data_index;
78  }
79  data_index = viennacl::tools::align_to_multiple<vcl_size_t>(data_index, AlignmentV); //take care of alignment
80  }
81  row_buffer.set(row_index, data_index);
82 
83  gpu_matrix.set(row_buffer.get(),
84  col_buffer.get(),
85  &elements[0],
86  cpu_matrix.size1(),
87  cpu_matrix.size2(),
88  nonzeros);
89  }
90 }
91 
92 //
93 // host to device:
94 //
95 
96 //provide copy-operation:
111 template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
112 void copy(const CPUMatrixT & cpu_matrix,
114 {
115  if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
116  {
117  //determine nonzeros:
118  vcl_size_t num_entries = 0;
119  for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1();
120  row_it != cpu_matrix.end1();
121  ++row_it)
122  {
123  vcl_size_t entries_per_row = 0;
124  for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin();
125  col_it != row_it.end();
126  ++col_it)
127  {
128  ++entries_per_row;
129  }
130  num_entries += viennacl::tools::align_to_multiple<vcl_size_t>(entries_per_row, AlignmentV);
131  }
132 
133  if (num_entries == 0) //we copy an empty matrix
134  num_entries = 1;
135 
136  //set up matrix entries:
137  viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, num_entries);
138  }
139 }
140 
141 
142 //adapted for std::vector< std::map < > > argument:
148 template<typename SizeT, typename NumericT, unsigned int AlignmentV>
149 void copy(const std::vector< std::map<SizeT, NumericT> > & cpu_matrix,
151 {
152  vcl_size_t nonzeros = 0;
153  vcl_size_t max_col = 0;
154  for (vcl_size_t i=0; i<cpu_matrix.size(); ++i)
155  {
156  if (cpu_matrix[i].size() > 0)
157  nonzeros += ((cpu_matrix[i].size() - 1) / AlignmentV + 1) * AlignmentV;
158  if (cpu_matrix[i].size() > 0)
159  max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
160  }
161 
162  viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericT, SizeT>(cpu_matrix, cpu_matrix.size(), max_col + 1),
163  gpu_matrix,
164  nonzeros);
165 }
166 
167 #ifdef VIENNACL_WITH_UBLAS
168 
172 template<typename ScalarType, typename F, vcl_size_t IB, typename IA, typename TA>
173 void copy(const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA> & ublas_matrix,
175 {
176  assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
177  assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
178 
179  //we just need to copy the CSR arrays:
180  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1);
181  for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i)
182  row_buffer.set(i, ublas_matrix.index1_data()[i]);
183 
184  viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz());
185  for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i)
186  col_buffer.set(i, ublas_matrix.index2_data()[i]);
187 
188  gpu_matrix.set(row_buffer.get(),
189  col_buffer.get(),
190  &(ublas_matrix.value_data()[0]),
191  ublas_matrix.size1(),
192  ublas_matrix.size2(),
193  ublas_matrix.nnz());
194 
195 }
196 #endif
197 
198 #ifdef VIENNACL_WITH_EIGEN
199 
203 template<typename NumericT, int flags, unsigned int AlignmentV>
204 void copy(const Eigen::SparseMatrix<NumericT, flags> & eigen_matrix,
205  compressed_matrix<NumericT, AlignmentV> & gpu_matrix)
206 {
207  assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") );
208  assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") );
209 
210  std::vector< std::map<unsigned int, NumericT> > stl_matrix(eigen_matrix.rows());
211 
212  for (int k=0; k < eigen_matrix.outerSize(); ++k)
213  for (typename Eigen::SparseMatrix<NumericT, flags>::InnerIterator it(eigen_matrix, k); it; ++it)
214  stl_matrix[it.row()][it.col()] = it.value();
215 
216  copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, eigen_matrix.rows(), eigen_matrix.cols()), gpu_matrix);
217 }
218 #endif
219 
220 
221 #ifdef VIENNACL_WITH_MTL4
222 
226 template<typename NumericT, unsigned int AlignmentV>
227 void copy(const mtl::compressed2D<NumericT> & cpu_matrix,
228  compressed_matrix<NumericT, AlignmentV> & gpu_matrix)
229 {
230  assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") );
231  assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") );
232 
233  typedef mtl::compressed2D<NumericT> MatrixType;
234 
235  std::vector< std::map<unsigned int, NumericT> > stl_matrix(cpu_matrix.num_rows());
236 
237  using mtl::traits::range_generator;
239 
240  // Choose between row and column traversal
241  typedef typename min<range_generator<mtl::tag::row, MatrixType>,
242  range_generator<mtl::tag::col, MatrixType> >::type range_type;
243  range_type my_range;
244 
245  // Type of outer cursor
246  typedef typename range_type::type c_type;
247  // Type of inner cursor
248  typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type;
249 
250  // Define the property maps
251  typename mtl::traits::row<MatrixType>::type row(cpu_matrix);
252  typename mtl::traits::col<MatrixType>::type col(cpu_matrix);
253  typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix);
254 
255  // Now iterate over the matrix
256  for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor)
257  for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor)
258  stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor);
259 
260  copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, cpu_matrix.num_rows(), cpu_matrix.num_cols()), gpu_matrix);
261 }
262 #endif
263 
264 
265 
266 
267 
268 
269 
270 //
271 // device to host:
272 //
282 template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
284  CPUMatrixT & cpu_matrix )
285 {
286  assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
287  assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
288 
289  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
290  {
291  //get raw data from memory:
292  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1);
293  viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz());
294  std::vector<NumericT> elements(gpu_matrix.nnz());
295 
296  //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl;
297 
298  viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get());
299  viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get());
300  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0]));
301 
302  //fill the cpu_matrix:
303  vcl_size_t data_index = 0;
304  for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row)
305  {
306  while (data_index < row_buffer[row])
307  {
308  if (col_buffer[data_index] >= gpu_matrix.size2())
309  {
310  std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
311  return;
312  }
313 
314  if (std::fabs(elements[data_index]) > static_cast<NumericT>(0))
315  cpu_matrix(row-1, static_cast<vcl_size_t>(col_buffer[data_index])) = elements[data_index];
316  ++data_index;
317  }
318  }
319  }
320 }
321 
322 
328 template<typename NumericT, unsigned int AlignmentV>
330  std::vector< std::map<unsigned int, NumericT> > & cpu_matrix)
331 {
332  tools::sparse_matrix_adapter<NumericT> temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size());
333  copy(gpu_matrix, temp);
334 }
335 
336 #ifdef VIENNACL_WITH_UBLAS
337 
341 template<typename ScalarType, unsigned int AlignmentV, typename F, vcl_size_t IB, typename IA, typename TA>
343  boost::numeric::ublas::compressed_matrix<ScalarType> & ublas_matrix)
344 {
345  assert( (viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
346  assert( (viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
347 
348  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1);
349  viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz());
350 
351  viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get());
352  viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get());
353 
354  ublas_matrix.clear();
355  ublas_matrix.reserve(gpu_matrix.nnz());
356 
357  ublas_matrix.set_filled(gpu_matrix.size1() + 1, gpu_matrix.nnz());
358 
359  for (vcl_size_t i=0; i<ublas_matrix.size1() + 1; ++i)
360  ublas_matrix.index1_data()[i] = row_buffer[i];
361 
362  for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i)
363  ublas_matrix.index2_data()[i] = col_buffer[i];
364 
365  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(ScalarType) * gpu_matrix.nnz(), &(ublas_matrix.value_data()[0]));
366 
367 }
368 #endif
369 
370 #ifdef VIENNACL_WITH_EIGEN
371 
372 template<typename NumericT, int flags, unsigned int AlignmentV>
373 void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix,
374  Eigen::SparseMatrix<NumericT, flags> & eigen_matrix)
375 {
376  assert( (static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") );
377  assert( (static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") );
378 
379  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
380  {
381  //get raw data from memory:
382  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1);
383  viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz());
384  std::vector<NumericT> elements(gpu_matrix.nnz());
385 
386  viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get());
387  viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get());
388  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0]));
389 
390  eigen_matrix.setZero();
391  vcl_size_t data_index = 0;
392  for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row)
393  {
394  while (data_index < row_buffer[row])
395  {
396  assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer"));
397  if (elements[data_index] != static_cast<NumericT>(0.0))
398  eigen_matrix.insert(row-1, col_buffer[data_index]) = elements[data_index];
399  ++data_index;
400  }
401  }
402  }
403 }
404 #endif
405 
406 
407 
408 #ifdef VIENNACL_WITH_MTL4
409 
410 template<typename NumericT, unsigned int AlignmentV>
411 void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix,
412  mtl::compressed2D<NumericT> & mtl4_matrix)
413 {
414  assert( (static_cast<vcl_size_t>(mtl4_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") );
415  assert( (static_cast<vcl_size_t>(mtl4_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") );
416 
417  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
418  {
419 
420  //get raw data from memory:
421  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1);
422  viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz());
423  std::vector<NumericT> elements(gpu_matrix.nnz());
424 
425  viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get());
426  viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get());
427  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0]));
428 
429  //set_to_zero(mtl4_matrix);
430  //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2());
431 
432  mtl::matrix::inserter< mtl::compressed2D<NumericT> > ins(mtl4_matrix);
433  vcl_size_t data_index = 0;
434  for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row)
435  {
436  while (data_index < row_buffer[row])
437  {
438  assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer"));
439  if (elements[data_index] != static_cast<NumericT>(0.0))
440  ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<NumericT> >::value_type(elements[data_index]);
441  ++data_index;
442  }
443  }
444  }
445 }
446 #endif
447 
448 
449 
450 
451 
453 
458 template<class NumericT, unsigned int AlignmentV /* see VCLForwards.h */>
460 {
461 public:
465 
467  compressed_matrix() : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) {}
468 
477  : rows_(rows), cols_(cols), nonzeros_(nonzeros), row_block_num_(0)
478  {
479  row_buffer_.switch_active_handle_id(ctx.memory_type());
480  col_buffer_.switch_active_handle_id(ctx.memory_type());
481  elements_.switch_active_handle_id(ctx.memory_type());
482  row_blocks_.switch_active_handle_id(ctx.memory_type());
483 
484 #ifdef VIENNACL_WITH_OPENCL
485  if (ctx.memory_type() == OPENCL_MEMORY)
486  {
487  row_buffer_.opencl_handle().context(ctx.opencl_context());
488  col_buffer_.opencl_handle().context(ctx.opencl_context());
489  elements_.opencl_handle().context(ctx.opencl_context());
490  row_blocks_.opencl_handle().context(ctx.opencl_context());
491  }
492 #endif
493  if (rows > 0)
494  {
496  }
497  if (nonzeros > 0)
498  {
500  viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, ctx);
501  }
502  }
503 
511  : rows_(rows), cols_(cols), nonzeros_(0), row_block_num_(0)
512  {
513  row_buffer_.switch_active_handle_id(ctx.memory_type());
514  col_buffer_.switch_active_handle_id(ctx.memory_type());
515  elements_.switch_active_handle_id(ctx.memory_type());
516  row_blocks_.switch_active_handle_id(ctx.memory_type());
517 
518 #ifdef VIENNACL_WITH_OPENCL
519  if (ctx.memory_type() == OPENCL_MEMORY)
520  {
521  row_buffer_.opencl_handle().context(ctx.opencl_context());
522  col_buffer_.opencl_handle().context(ctx.opencl_context());
523  elements_.opencl_handle().context(ctx.opencl_context());
524  row_blocks_.opencl_handle().context(ctx.opencl_context());
525  }
526 #endif
527  if (rows > 0)
528  {
530  }
531  }
532 
537  explicit compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0)
538  {
539  row_buffer_.switch_active_handle_id(ctx.memory_type());
540  col_buffer_.switch_active_handle_id(ctx.memory_type());
541  elements_.switch_active_handle_id(ctx.memory_type());
542  row_blocks_.switch_active_handle_id(ctx.memory_type());
543 
544 #ifdef VIENNACL_WITH_OPENCL
545  if (ctx.memory_type() == OPENCL_MEMORY)
546  {
547  row_buffer_.opencl_handle().context(ctx.opencl_context());
548  col_buffer_.opencl_handle().context(ctx.opencl_context());
549  elements_.opencl_handle().context(ctx.opencl_context());
550  row_blocks_.opencl_handle().context(ctx.opencl_context());
551  }
552 #endif
553  }
554 
555 
556 #ifdef VIENNACL_WITH_OPENCL
557 
566  explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements,
567  vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros) :
568  rows_(rows), cols_(cols), nonzeros_(nonzeros), row_block_num_(0)
569  {
571  row_buffer_.opencl_handle() = mem_row_buffer;
572  row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
573  row_buffer_.raw_size(sizeof(cl_uint) * (rows + 1));
574 
576  col_buffer_.opencl_handle() = mem_col_buffer;
577  col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
578  col_buffer_.raw_size(sizeof(cl_uint) * nonzeros);
579 
581  elements_.opencl_handle() = mem_elements;
582  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
583  elements_.raw_size(sizeof(NumericT) * nonzeros);
584 
585  //generate block information for CSR-adaptive:
586  generate_row_block_information();
587  }
588 #endif
589 
590 
593  {
594  assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") );
595  assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") );
596 
597  rows_ = other.size1();
598  cols_ = other.size2();
599  nonzeros_ = other.nnz();
600  row_block_num_ = other.row_block_num_;
601 
602  viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_);
603  viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_);
604  viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_blocks_, row_blocks_);
605  viennacl::backend::typesafe_memory_copy<NumericT>(other.elements_, elements_);
606 
607  return *this;
608  }
609 
610 
624  void set(const void * row_jumper,
625  const void * col_buffer,
626  const NumericT * elements,
627  vcl_size_t rows,
628  vcl_size_t cols,
629  vcl_size_t nonzeros)
630  {
631  assert( (rows > 0) && bool("Error in compressed_matrix::set(): Number of rows must be larger than zero!"));
632  assert( (cols > 0) && bool("Error in compressed_matrix::set(): Number of columns must be larger than zero!"));
633  assert( (nonzeros > 0) && bool("Error in compressed_matrix::set(): Number of nonzeros must be larger than zero!"));
634  //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl;
635 
636  //row_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY);
638 
639  //col_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY);
641 
642  //elements_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY);
643  viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, viennacl::traits::context(elements_), elements);
644 
645  nonzeros_ = nonzeros;
646  rows_ = rows;
647  cols_ = cols;
648 
649  //generate block information for CSR-adaptive:
650  generate_row_block_information();
651  }
652 
654  void reserve(vcl_size_t new_nonzeros)
655  {
656  if (new_nonzeros > nonzeros_)
657  {
658  handle_type col_buffer_old;
659  handle_type elements_old;
660  viennacl::backend::memory_shallow_copy(col_buffer_, col_buffer_old);
661  viennacl::backend::memory_shallow_copy(elements_, elements_old);
662 
664  viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_));
665  viennacl::backend::memory_create(elements_, sizeof(NumericT) * new_nonzeros, viennacl::traits::context(elements_));
666 
667  viennacl::backend::memory_copy(col_buffer_old, col_buffer_, 0, 0, size_deducer.element_size() * nonzeros_);
668  viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(NumericT)* nonzeros_);
669 
670  nonzeros_ = new_nonzeros;
671  }
672  }
673 
680  void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true)
681  {
682  assert(new_size1 > 0 && new_size2 > 0 && bool("Cannot resize to zero size!"));
683 
684  if (new_size1 != rows_ || new_size2 != cols_)
685  {
686  std::vector<std::map<unsigned int, NumericT> > stl_sparse_matrix;
687  if (rows_ > 0)
688  {
689  if (preserve)
690  {
691  stl_sparse_matrix.resize(rows_);
692  viennacl::copy(*this, stl_sparse_matrix);
693  } else
694  stl_sparse_matrix[0][0] = 0;
695  } else {
696  stl_sparse_matrix.resize(new_size1);
697  stl_sparse_matrix[0][0] = 0; //enforces nonzero array sizes if matrix was initially empty
698  }
699 
700  stl_sparse_matrix.resize(new_size1);
701 
702  //discard entries with column index larger than new_size2
703  if (new_size2 < cols_ && rows_ > 0)
704  {
705  for (vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i)
706  {
707  std::list<unsigned int> to_delete;
708  for (typename std::map<unsigned int, NumericT>::iterator it = stl_sparse_matrix[i].begin();
709  it != stl_sparse_matrix[i].end();
710  ++it)
711  {
712  if (it->first >= new_size2)
713  to_delete.push_back(it->first);
714  }
715 
716  for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
717  stl_sparse_matrix[i].erase(*it);
718  }
719  }
720 
721  viennacl::copy(stl_sparse_matrix, *this);
722 
723  rows_ = new_size1;
724  cols_ = new_size2;
725  }
726  }
727 
729  void clear()
730  {
731  viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, rows_ + 1);
732  viennacl::backend::typesafe_host_array<unsigned int> host_col_buffer(col_buffer_, 1);
733  std::vector<NumericT> host_elements(1);
734 
735  viennacl::backend::memory_create(row_buffer_, host_row_buffer.element_size() * (rows_ + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get());
736  viennacl::backend::memory_create(col_buffer_, host_col_buffer.element_size() * 1, viennacl::traits::context(col_buffer_), host_col_buffer.get());
737  viennacl::backend::memory_create(elements_, sizeof(NumericT) * 1, viennacl::traits::context(elements_), &(host_elements[0]));
738 
739  nonzeros_ = 0;
740  }
741 
744  {
745  assert( (i < rows_) && (j < cols_) && bool("compressed_matrix access out of bounds!"));
746 
747  vcl_size_t index = element_index(i, j);
748 
749  // check for element in sparsity pattern
750  if (index < nonzeros_)
751  return entry_proxy<NumericT>(index, elements_);
752 
753  // Element not found. Copying required. Very slow, but direct entry manipulation is painful anyway...
754  std::vector< std::map<unsigned int, NumericT> > cpu_backup(rows_);
755  tools::sparse_matrix_adapter<NumericT> adapted_cpu_backup(cpu_backup, rows_, cols_);
756  viennacl::copy(*this, adapted_cpu_backup);
757  cpu_backup[i][static_cast<unsigned int>(j)] = 0.0;
758  viennacl::copy(adapted_cpu_backup, *this);
759 
760  index = element_index(i, j);
761 
762  assert(index < nonzeros_);
763 
764  return entry_proxy<NumericT>(index, elements_);
765  }
766 
768  const vcl_size_t & size1() const { return rows_; }
770  const vcl_size_t & size2() const { return cols_; }
772  const vcl_size_t & nnz() const { return nonzeros_; }
774  const vcl_size_t & blocks1() const { return row_block_num_; }
775 
777  const handle_type & handle1() const { return row_buffer_; }
779  const handle_type & handle2() const { return col_buffer_; }
781  const handle_type & handle3() const { return row_blocks_; }
783  const handle_type & handle() const { return elements_; }
784 
786  handle_type & handle1() { return row_buffer_; }
788  handle_type & handle2() { return col_buffer_; }
790  handle_type & handle3() { return row_blocks_; }
792  handle_type & handle() { return elements_; }
793 
799  {
800  viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx);
801  viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx);
802  viennacl::backend::switch_memory_context<NumericT>(elements_, new_ctx);
803  }
804 
807  {
808  return row_buffer_.get_active_handle_id();
809  }
810 
811 private:
812 
814  vcl_size_t element_index(vcl_size_t i, vcl_size_t j)
815  {
816  //read row indices
817  viennacl::backend::typesafe_host_array<unsigned int> row_indices(row_buffer_, 2);
818  viennacl::backend::memory_read(row_buffer_, row_indices.element_size()*i, row_indices.element_size()*2, row_indices.get());
819 
820  //get column indices for row i:
821  viennacl::backend::typesafe_host_array<unsigned int> col_indices(col_buffer_, row_indices[1] - row_indices[0]);
822  viennacl::backend::memory_read(col_buffer_, col_indices.element_size()*row_indices[0], row_indices.element_size()*col_indices.size(), col_indices.get());
823 
824  for (vcl_size_t k=0; k<col_indices.size(); ++k)
825  {
826  if (col_indices[k] == j)
827  return row_indices[0] + k;
828  }
829 
830  // if not found, return index past the end of the matrix (cf. matrix.end() in the spirit of the STL)
831  return nonzeros_;
832  }
833 
834  void generate_row_block_information()
835  {
836  viennacl::backend::typesafe_host_array<unsigned int> row_buffer(row_buffer_, rows_ + 1);
837  viennacl::backend::memory_read(row_buffer_, 0, row_buffer.raw_size(), row_buffer.get());
838 
839  viennacl::backend::typesafe_host_array<unsigned int> row_blocks(row_buffer_, rows_ + 1);
840 
841  vcl_size_t num_entries_in_current_batch = 0;
842 
843  const vcl_size_t shared_mem_size = 1024; // number of column indices loaded to shared memory, number of floating point values loaded to shared memory
844 
845  row_block_num_ = 0;
846  row_blocks.set(0, 0);
847  for (vcl_size_t i=0; i<rows_; ++i)
848  {
849  vcl_size_t entries_in_row = vcl_size_t(row_buffer[i+1]) - vcl_size_t(row_buffer[i]);
850  num_entries_in_current_batch += entries_in_row;
851 
852  if (num_entries_in_current_batch > shared_mem_size)
853  {
854  vcl_size_t rows_in_batch = i - row_blocks[row_block_num_];
855  if (rows_in_batch > 0) // at least one full row is in the batch. Use current row in next batch.
856  row_blocks.set(++row_block_num_, i--);
857  else // row is larger than buffer in shared memory
858  row_blocks.set(++row_block_num_, i+1);
859  num_entries_in_current_batch = 0;
860  }
861  }
862  if (num_entries_in_current_batch > 0)
863  row_blocks.set(++row_block_num_, rows_);
864 
865  if (row_block_num_ > 0) //matrix might be empty...
867  row_blocks.element_size() * (row_block_num_ + 1),
868  viennacl::traits::context(row_buffer_), row_blocks.get());
869 
870  }
871 
872  // /** @brief Copy constructor is by now not available. */
873  //compressed_matrix(compressed_matrix const &);
874 
875 
876  vcl_size_t rows_;
877  vcl_size_t cols_;
878  vcl_size_t nonzeros_;
879  vcl_size_t row_block_num_;
880  handle_type row_buffer_;
881  handle_type row_blocks_;
882  handle_type col_buffer_;
883  handle_type elements_;
884 };
885 
886 
887 
888 //
889 // Specify available operations:
890 //
891 
894 namespace linalg
895 {
896 namespace detail
897 {
898  // x = A * y
899  template<typename T, unsigned int A>
900  struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> >
901  {
902  static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
903  {
904  // check for the special case x = A * x
905  if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
906  {
907  viennacl::vector<T> temp(lhs);
908  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
909  lhs = temp;
910  }
911  else
912  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
913  }
914  };
915 
916  template<typename T, unsigned int A>
917  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> >
918  {
919  static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
920  {
921  viennacl::vector<T> temp(lhs);
922  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
923  lhs += temp;
924  }
925  };
926 
927  template<typename T, unsigned int A>
928  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> >
929  {
930  static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
931  {
932  viennacl::vector<T> temp(lhs);
933  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
934  lhs -= temp;
935  }
936  };
937 
938 
939  // x = A * vec_op
940  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
941  struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
942  {
943  static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
944  {
945  viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
946  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
947  }
948  };
949 
950  // x = A * vec_op
951  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
952  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> >
953  {
954  static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
955  {
956  viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
957  viennacl::vector<T> temp_result(lhs);
958  viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
959  lhs += temp_result;
960  }
961  };
962 
963  // x = A * vec_op
964  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
965  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
966  {
967  static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
968  {
969  viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
970  viennacl::vector<T> temp_result(lhs);
971  viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
972  lhs -= temp_result;
973  }
974  };
975 
976 } // namespace detail
977 } // namespace linalg
979 }
980 
981 #endif
const vcl_size_t & size2() const
Returns the number of columns.
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
vcl_size_t element_size() const
Definition: util.hpp:112
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
const vcl_size_t & size1() const
Returns the number of rows.
Various little tools used here and there in ViennaCL.
void switch_memory_context(viennacl::context new_ctx)
Switches the memory context of the matrix.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A proxy class for entries in a vector.
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
vcl_size_t element_size(memory_types)
Definition: memory.hpp:299
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
entry_proxy< NumericT > operator()(vcl_size_t i, vcl_size_t j)
Returns a reference to the (i,j)-th entry of the sparse matrix. If (i,j) does not exist (zero)...
handle_type & handle3()
Returns the OpenCL handle to the row block array.
Definition: blas3.hpp:36
handle_type & handle()
Returns the OpenCL handle to the matrix entry array.
Implementations of operations using sparse matrices.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
void clear()
Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity p...
void reserve(vcl_size_t new_nonzeros)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
Adapts a constant sparse matrix type made up from std::vector > to basic ub...
Definition: adapter.hpp:183
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< NumericT >::ResultType > value_type
std::size_t vcl_size_t
Definition: forwards.h:74
compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx)
Construction of a compressed matrix with the supplied number of rows and columns. If the number of no...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
viennacl::memory_types memory_type() const
Definition: context.hpp:76
void copy_impl(const CPUMatrixT &cpu_matrix, compressed_compressed_matrix< NumericT > &gpu_matrix, vcl_size_t nonzero_rows, vcl_size_t nonzeros)
viennacl::memory_types memory_context() const
Returns the current memory context to determine whether the matrix is set up for OpenMP, OpenCL, or CUDA.
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
handle_type & handle1()
Returns the OpenCL handle to the row index array.
compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros=0, viennacl::context ctx=viennacl::context())
Construction of a compressed matrix with the supplied number of rows and columns. If the number of no...
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
compressed_matrix(viennacl::context ctx)
Creates an empty compressed_matrix, but sets the respective context information.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(const void *row_jumper, const void *col_buffer, const NumericT *elements, vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros)
Sets the row, column and value arrays of the compressed matrix.
void set(vcl_size_t index, U value)
Definition: util.hpp:115
viennacl::backend::mem_handle handle_type
float ScalarType
Definition: fft_1d.cpp:42
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
Adapts a non-const sparse matrix type made up from std::vector > to basic u...
Definition: adapter.hpp:357
A sparse square matrix in compressed sparse rows format.
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
memory_types
Definition: forwards.h:344
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:232
compressed_matrix()
Default construction of a compressed matrix. No memory is allocated.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
void memory_shallow_copy(mem_handle const &src_buffer, mem_handle &dst_buffer)
A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. The uninitialized b...
Definition: memory.hpp:177
compressed_matrix & operator=(compressed_matrix const &other)
Assignment a compressed matrix from possibly another memory domain.
handle_type & handle2()
Returns the OpenCL handle to the column index array.
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:118