1 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_
2 #define VIENNACL_COMPRESSED_MATRIX_HPP_
36 #ifdef VIENNACL_WITH_UBLAS
37 #include <boost/numeric/ublas/matrix_sparse.hpp>
45 template<
typename CPUMatrixT,
typename NumericT,
unsigned int AlignmentV>
55 std::vector<NumericT> elements(nonzeros);
60 for (
typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1();
61 row_it != cpu_matrix.end1();
64 row_buffer.set(row_index, data_index);
67 for (
typename CPUMatrixT::const_iterator2 col_it = row_it.begin();
68 col_it != row_it.end();
71 col_buffer.set(data_index, col_it.index2());
72 elements[data_index] = *col_it;
75 data_index = viennacl::tools::align_to_multiple<vcl_size_t>(data_index, AlignmentV);
77 row_buffer.set(row_index, data_index);
79 gpu_matrix.
set(row_buffer.get(),
103 template<
typename CPUMatrixT,
typename NumericT,
unsigned int AlignmentV>
104 void copy(
const CPUMatrixT & cpu_matrix,
107 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
111 for (
typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1();
112 row_it != cpu_matrix.end1();
116 for (
typename CPUMatrixT::const_iterator2 col_it = row_it.begin();
117 col_it != row_it.end();
122 num_entries += viennacl::tools::align_to_multiple<vcl_size_t>(entries_per_row, AlignmentV);
125 if (num_entries == 0)
140 template<
typename SizeT,
typename NumericT,
unsigned int AlignmentV>
141 void copy(
const std::vector< std::map<SizeT, NumericT> > & cpu_matrix,
146 for (
vcl_size_t i=0; i<cpu_matrix.size(); ++i)
148 if (cpu_matrix[i].
size() > 0)
149 nonzeros += ((cpu_matrix[i].
size() - 1) / AlignmentV + 1) * AlignmentV;
150 if (cpu_matrix[i].
size() > 0)
151 max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
159 #ifdef VIENNACL_WITH_UBLAS
160 template<
typename ScalarType,
typename F, vcl_
size_t IB,
typename IA,
typename TA>
161 void copy(
const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA> & ublas_matrix,
169 for (
vcl_size_t i=0; i<=ublas_matrix.size1(); ++i)
170 row_buffer.
set(i, ublas_matrix.index1_data()[i]);
173 for (
vcl_size_t i=0; i<ublas_matrix.nnz(); ++i)
174 col_buffer.
set(i, ublas_matrix.index2_data()[i]);
176 gpu_matrix.
set(row_buffer.get(),
178 &(ublas_matrix.value_data()[0]),
179 ublas_matrix.size1(),
180 ublas_matrix.size2(),
186 #ifdef VIENNACL_WITH_EIGEN
187 template<
typename NumericT,
int flags,
unsigned int AlignmentV>
188 void copy(
const Eigen::SparseMatrix<NumericT, flags> & eigen_matrix,
189 compressed_matrix<NumericT, AlignmentV> & gpu_matrix)
191 assert( (gpu_matrix.size1() == 0 ||
static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) &&
bool(
"Size mismatch") );
192 assert( (gpu_matrix.size2() == 0 ||
static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) &&
bool(
"Size mismatch") );
194 std::vector< std::map<unsigned int, NumericT> > stl_matrix(eigen_matrix.rows());
196 for (
int k=0; k < eigen_matrix.outerSize(); ++k)
197 for (
typename Eigen::SparseMatrix<NumericT, flags>::InnerIterator it(eigen_matrix, k); it; ++it)
198 stl_matrix[it.row()][it.col()] = it.value();
200 copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, eigen_matrix.rows(), eigen_matrix.cols()), gpu_matrix);
205 #ifdef VIENNACL_WITH_MTL4
206 template<
typename NumericT,
unsigned int AlignmentV>
207 void copy(
const mtl::compressed2D<NumericT> & cpu_matrix,
208 compressed_matrix<NumericT, AlignmentV> & gpu_matrix)
210 assert( (gpu_matrix.size1() == 0 ||
static_cast<vcl_size_t>(cpu_matrix.num_rows()) == gpu_matrix.size1()) &&
bool(
"Size mismatch") );
211 assert( (gpu_matrix.size2() == 0 ||
static_cast<vcl_size_t>(cpu_matrix.num_cols()) == gpu_matrix.size2()) &&
bool(
"Size mismatch") );
213 typedef mtl::compressed2D<NumericT> MatrixType;
215 std::vector< std::map<unsigned int, NumericT> > stl_matrix(cpu_matrix.num_rows());
217 using mtl::traits::range_generator;
221 typedef typename min<range_generator<mtl::tag::row, MatrixType>,
222 range_generator<mtl::tag::col, MatrixType> >::type range_type;
226 typedef typename range_type::type c_type;
228 typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type;
231 typename mtl::traits::row<MatrixType>::type
row(cpu_matrix);
232 typename mtl::traits::col<MatrixType>::type col(cpu_matrix);
233 typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix);
236 for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor)
237 for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor)
238 stl_matrix[
row(*icursor)][col(*icursor)] = value(*icursor);
240 copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, cpu_matrix.num_rows(), cpu_matrix.num_cols()), gpu_matrix);
262 template<
typename CPUMatrixT,
typename NumericT,
unsigned int AlignmentV>
264 CPUMatrixT & cpu_matrix )
269 if ( gpu_matrix.
size1() > 0 && gpu_matrix.
size2() > 0 )
274 std::vector<NumericT> elements(gpu_matrix.
nnz());
286 while (data_index < row_buffer[
row])
288 if (col_buffer[data_index] >= gpu_matrix.
size2())
290 std::cerr <<
"ViennaCL encountered invalid data at colbuffer[" << data_index <<
"]: " << col_buffer[data_index] << std::endl;
294 if (std::fabs(elements[data_index]) >
static_cast<NumericT
>(0))
295 cpu_matrix(row-1, static_cast<vcl_size_t>(col_buffer[data_index])) = elements[data_index];
308 template<
typename NumericT,
unsigned int AlignmentV>
310 std::vector< std::map<unsigned int, NumericT> > & cpu_matrix)
313 copy(gpu_matrix, temp);
316 #ifdef VIENNACL_WITH_UBLAS
317 template<
typename ScalarType,
unsigned int AlignmentV,
typename F, vcl_
size_t IB,
typename IA,
typename TA>
319 boost::numeric::ublas::compressed_matrix<ScalarType> & ublas_matrix)
330 ublas_matrix.clear();
331 ublas_matrix.reserve(gpu_matrix.
nnz());
333 ublas_matrix.set_filled(gpu_matrix.
size1() + 1, gpu_matrix.
nnz());
335 for (
vcl_size_t i=0; i<ublas_matrix.size1() + 1; ++i)
336 ublas_matrix.index1_data()[i] = row_buffer[i];
338 for (
vcl_size_t i=0; i<ublas_matrix.nnz(); ++i)
339 ublas_matrix.index2_data()[i] = col_buffer[i];
346 #ifdef VIENNACL_WITH_EIGEN
347 template<
typename NumericT,
int flags,
unsigned int AlignmentV>
348 void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix,
349 Eigen::SparseMatrix<NumericT, flags> & eigen_matrix)
351 assert( (static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) &&
bool(
"Size mismatch") );
352 assert( (static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) &&
bool(
"Size mismatch") );
354 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
359 std::vector<NumericT> elements(gpu_matrix.nnz());
365 eigen_matrix.setZero();
369 while (data_index < row_buffer[
row])
371 assert(col_buffer[data_index] < gpu_matrix.size2() && bool(
"ViennaCL encountered invalid data at col_buffer"));
372 if (elements[data_index] != static_cast<NumericT>(0.0))
373 eigen_matrix.insert(row-1, col_buffer[data_index]) = elements[data_index];
383 #ifdef VIENNACL_WITH_MTL4
384 template<
typename NumericT,
unsigned int AlignmentV>
385 void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix,
386 mtl::compressed2D<NumericT> & mtl4_matrix)
388 assert( (static_cast<vcl_size_t>(mtl4_matrix.num_rows()) == gpu_matrix.size1()) &&
bool(
"Size mismatch") );
389 assert( (static_cast<vcl_size_t>(mtl4_matrix.num_cols()) == gpu_matrix.size2()) &&
bool(
"Size mismatch") );
391 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
397 std::vector<NumericT> elements(gpu_matrix.nnz());
406 mtl::matrix::inserter< mtl::compressed2D<NumericT> > ins(mtl4_matrix);
410 while (data_index < row_buffer[row])
412 assert(col_buffer[data_index] < gpu_matrix.size2() && bool(
"ViennaCL encountered invalid data at col_buffer"));
413 if (elements[data_index] != static_cast<NumericT>(0.0))
414 ins(row-1, col_buffer[data_index]) <<
typename mtl::Collection< mtl::compressed2D<NumericT> >::value_type(elements[data_index]);
432 template<
class NumericT,
unsigned int AlignmentV >
451 : rows_(rows), cols_(cols), nonzeros_(nonzeros)
457 #ifdef VIENNACL_WITH_OPENCL
460 row_buffer_.opencl_handle().context(ctx.opencl_context());
461 col_buffer_.opencl_handle().context(ctx.opencl_context());
462 elements_.opencl_handle().context(ctx.opencl_context());
483 : rows_(rows), cols_(cols), nonzeros_(0)
489 #ifdef VIENNACL_WITH_OPENCL
492 row_buffer_.opencl_handle().context(ctx.opencl_context());
493 col_buffer_.opencl_handle().context(ctx.opencl_context());
494 elements_.opencl_handle().context(ctx.opencl_context());
509 #ifdef VIENNACL_WITH_OPENCL
512 row_buffer_.opencl_handle().context(ctx.opencl_context());
513 col_buffer_.opencl_handle().context(ctx.opencl_context());
514 elements_.opencl_handle().context(ctx.opencl_context());
520 #ifdef VIENNACL_WITH_OPENCL
521 explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements,
523 rows_(rows), cols_(cols), nonzeros_(nonzeros)
526 row_buffer_.opencl_handle() = mem_row_buffer;
527 row_buffer_.opencl_handle().inc();
528 row_buffer_.
raw_size(
sizeof(cl_uint) * (rows + 1));
531 col_buffer_.opencl_handle() = mem_col_buffer;
532 col_buffer_.opencl_handle().inc();
533 col_buffer_.
raw_size(
sizeof(cl_uint) * nonzeros);
536 elements_.opencl_handle() = mem_elements;
537 elements_.opencl_handle().inc();
538 elements_.
raw_size(
sizeof(NumericT) * nonzeros);
546 assert( (rows_ == 0 || rows_ == other.
size1()) &&
bool(
"Size mismatch") );
547 assert( (cols_ == 0 || cols_ == other.
size2()) &&
bool(
"Size mismatch") );
549 rows_ = other.
size1();
550 cols_ = other.
size2();
551 nonzeros_ = other.
nnz();
553 viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_);
554 viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_);
555 viennacl::backend::typesafe_memory_copy<NumericT>(other.elements_, elements_);
570 void set(
const void * row_jumper,
571 const void * col_buffer,
572 const NumericT * elements,
577 assert( (rows > 0) &&
bool(
"Error in compressed_matrix::set(): Number of rows must be larger than zero!"));
578 assert( (cols > 0) &&
bool(
"Error in compressed_matrix::set(): Number of columns must be larger than zero!"));
579 assert( (nonzeros > 0) &&
bool(
"Error in compressed_matrix::set(): Number of nonzeros must be larger than zero!"));
591 nonzeros_ = nonzeros;
599 if (new_nonzeros > nonzeros_)
613 nonzeros_ = new_nonzeros;
625 assert(new_size1 > 0 && new_size2 > 0 &&
bool(
"Cannot resize to zero size!"));
627 if (new_size1 != rows_ || new_size2 != cols_)
629 std::vector<std::map<unsigned int, NumericT> > stl_sparse_matrix;
634 stl_sparse_matrix.resize(rows_);
637 stl_sparse_matrix[0][0] = 0;
639 stl_sparse_matrix.resize(new_size1);
640 stl_sparse_matrix[0][0] = 0;
643 stl_sparse_matrix.resize(new_size1);
646 if (new_size2 < cols_ && rows_ > 0)
648 for (
vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i)
650 std::list<unsigned int> to_delete;
651 for (
typename std::map<unsigned int, NumericT>::iterator it = stl_sparse_matrix[i].begin();
652 it != stl_sparse_matrix[i].end();
655 if (it->first >= new_size2)
656 to_delete.push_back(it->first);
659 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
660 stl_sparse_matrix[i].erase(*it);
676 std::vector<NumericT> host_elements(1);
688 assert( (i < rows_) && (j < cols_) &&
bool(
"compressed_matrix access out of bounds!"));
693 if (index < nonzeros_)
697 std::vector< std::map<unsigned int, NumericT> > cpu_backup(rows_);
700 cpu_backup[i][
static_cast<unsigned int>(j)] = 0.0;
703 index = element_index(i, j);
705 assert(index < nonzeros_);
733 viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx);
734 viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx);
735 viennacl::backend::switch_memory_context<NumericT>(elements_, new_ctx);
753 viennacl::backend::memory_read(col_buffer_, col_indices.element_size()*row_indices[0], row_indices.element_size()*col_indices.size(), col_indices.get());
755 for (
vcl_size_t k=0; k<col_indices.size(); ++k)
757 if (col_indices[k] == j)
758 return row_indices[0] + k;
790 template<
typename T,
unsigned int A>
791 struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> >
793 static void apply(vector_base<T> & lhs, vector_expression<
const compressed_matrix<T, A>,
const vector_base<T>, op_prod>
const & rhs)
807 template<
typename T,
unsigned int A>
808 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> >
810 static void apply(vector_base<T> & lhs, vector_expression<
const compressed_matrix<T, A>,
const vector_base<T>, op_prod>
const & rhs)
818 template<
typename T,
unsigned int A>
819 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> >
821 static void apply(vector_base<T> & lhs, vector_expression<
const compressed_matrix<T, A>,
const vector_base<T>, op_prod>
const & rhs)
831 template<
typename T,
unsigned int A,
typename LHS,
typename RHS,
typename OP>
832 struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
834 static void apply(vector_base<T> & lhs, vector_expression<
const compressed_matrix<T, A>,
const vector_expression<const LHS, const RHS, OP>, op_prod>
const & rhs)
842 template<
typename T,
unsigned int A,
typename LHS,
typename RHS,
typename OP>
843 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> >
845 static void apply(vector_base<T> & lhs, vector_expression<
const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod>
const & rhs)
855 template<
typename T,
unsigned int A,
typename LHS,
typename RHS,
typename OP>
856 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
858 static void apply(vector_base<T> & lhs, vector_expression<
const compressed_matrix<T, A>,
const vector_expression<const LHS, const RHS, OP>, op_prod>
const & rhs)
const vcl_size_t & size2() const
Returns the number of columns.
Helper class implementing an array on the host. Default case: No conversion necessary.
vcl_size_t element_size() const
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
const vcl_size_t & size1() const
Returns the number of rows.
void switch_memory_context(viennacl::context new_ctx)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
A proxy class for entries in a vector.
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
vcl_size_t element_size(memory_types)
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
entry_proxy< NumericT > operator()(vcl_size_t i, vcl_size_t j)
Returns a reference to the (i,j)-th entry of the sparse matrix. If (i,j) does not exist (zero)...
handle_type & handle()
Returns the OpenCL handle to the matrix entry array.
Implementations of operations using sparse matrices.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
void clear()
Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity p...
void reserve(vcl_size_t new_nonzeros)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< NumericT >::ResultType > value_type
compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx)
Construction of a compressed matrix with the supplied number of rows and columns. If the number of no...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
viennacl::memory_types memory_type() const
void copy_impl(const CPUMatrixT &cpu_matrix, compressed_compressed_matrix< NumericT > &gpu_matrix, vcl_size_t nonzero_rows, vcl_size_t nonzeros)
viennacl::memory_types memory_context() const
handle_type & handle1()
Returns the OpenCL handle to the row index array.
compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros=0, viennacl::context ctx=viennacl::context())
Construction of a compressed matrix with the supplied number of rows and columns. If the number of no...
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
compressed_matrix(viennacl::context ctx)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(const void *row_jumper, const void *col_buffer, const NumericT *elements, vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros)
Sets the row, column and value arrays of the compressed matrix.
void set(vcl_size_t index, U value)
viennacl::backend::mem_handle handle_type
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
A sparse square matrix in compressed sparse rows format.
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
T min(const T &lhs, const T &rhs)
Minimum.
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
compressed_matrix()
Default construction of a compressed matrix. No memory is allocated.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
void memory_shallow_copy(mem_handle const &src_buffer, mem_handle &dst_buffer)
A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. The uninitialized b...
compressed_matrix & operator=(compressed_matrix const &other)
Assignment a compressed matrix from possibly another memory domain.
handle_type & handle2()
Returns the OpenCL handle to the column index array.
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...