1 #ifndef VIENNACL_MATRIX_PROXY_HPP_
2 #define VIENNACL_MATRIX_PROXY_HPP_
37 template<
typename MatrixType>
38 class matrix_range :
public matrix_base<typename MatrixType::cpu_value_type>
40 typedef matrix_base<typename MatrixType::cpu_value_type> base_type;
41 typedef matrix_range<MatrixType> self_type;
54 range const & row_range,
61 range const & row_range,
73 using base_type::operator=;
77 template<
typename MatrixType>
85 range const & row_range,
92 range const & row_range,
104 template<
typename CPUMatrixT,
typename NumericT>
105 void copy(
const CPUMatrixT & cpu_matrix,
108 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
109 && (cpu_matrix.size2() == gpu_matrix_range.size2())
110 &&
bool(
"Matrix size mismatch!"));
112 if ( gpu_matrix_range.start2() != 0)
114 std::vector<NumericT> entries(gpu_matrix_range.size2());
117 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
119 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
120 entries[j] = cpu_matrix(i,j);
122 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
123 vcl_size_t num_entries = gpu_matrix_range.size2();
131 std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
134 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
135 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
136 entries[i*gpu_matrix_range.internal_size2() + j] = cpu_matrix(i,j);
138 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
139 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2();
146 template<
typename CPUMatrixT,
typename NumericT>
147 void copy(
const CPUMatrixT & cpu_matrix,
150 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
151 && (cpu_matrix.size2() == gpu_matrix_range.size2())
152 &&
bool(
"Matrix size mismatch!"));
154 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.size1())
156 std::vector<NumericT> entries(gpu_matrix_range.size1());
159 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
161 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
162 entries[i] = cpu_matrix(i,j);
164 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
165 vcl_size_t num_entries = gpu_matrix_range.size1();
173 std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
176 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
177 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
178 entries[i + j*gpu_matrix_range.internal_size1()] = cpu_matrix(i,j);
180 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
181 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
195 template<
typename CPUMatrixT,
typename NumericT>
197 CPUMatrixT & cpu_matrix)
199 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
200 && (cpu_matrix.size2() == gpu_matrix_range.size2())
201 &&
bool(
"Matrix size mismatch!"));
203 if ( gpu_matrix_range.start2() != 0)
205 std::vector<NumericT> entries(gpu_matrix_range.size2());
208 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
210 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
211 vcl_size_t num_entries = gpu_matrix_range.size2();
215 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
216 cpu_matrix(i,j) = entries[j];
222 std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
224 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
228 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
229 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
230 cpu_matrix(i,j) = entries[i*gpu_matrix_range.internal_size2() + j];
237 template<
typename CPUMatrixT,
typename NumericT>
239 CPUMatrixT & cpu_matrix)
241 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
242 && (cpu_matrix.size2() == gpu_matrix_range.size2())
243 &&
bool(
"Matrix size mismatch!"));
245 if ( gpu_matrix_range.start1() != 0)
247 std::vector<NumericT> entries(gpu_matrix_range.size1());
250 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
252 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
253 vcl_size_t num_entries = gpu_matrix_range.size1();
257 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
258 cpu_matrix(i,j) = entries[i];
264 std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
267 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
268 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
272 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
273 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
274 cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.internal_size1()];
283 template<
typename MatrixType>
286 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
292 template<
typename MatrixType>
295 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
319 template<
typename MatrixType>
320 class matrix_slice :
public matrix_base<typename MatrixType::cpu_value_type>
322 typedef matrix_base<typename MatrixType::cpu_value_type> base_type;
323 typedef matrix_slice<MatrixType> self_type;
336 slice const & row_slice,
343 slice const & row_slice,
355 using base_type::operator=;
359 template<
typename MatrixType>
367 slice const & row_slice,
374 slice const & row_slice,
387 template<
typename CPUMatrixT,
typename NumericT>
388 void copy(
const CPUMatrixT & cpu_matrix,
391 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
392 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
393 &&
bool(
"Matrix size mismatch!"));
395 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
397 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
399 std::vector<NumericT> entries(num_entries);
402 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
404 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
407 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
408 entries[j * gpu_matrix_slice.stride2()] = cpu_matrix(i,j);
416 template<
typename CPUMatrixT,
typename NumericT>
417 void copy(
const CPUMatrixT & cpu_matrix,
420 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
421 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
422 &&
bool(
"Matrix size mismatch!"));
425 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
427 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
429 std::vector<NumericT> entries(num_entries);
432 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
434 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
438 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
439 entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j);
454 template<
typename CPUMatrixT,
typename NumericT>
456 CPUMatrixT & cpu_matrix)
458 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
459 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
460 &&
bool(
"Matrix size mismatch!"));
462 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
464 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
466 std::vector<NumericT> entries(num_entries);
469 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
471 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
475 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
476 cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()];
484 template<
typename CPUMatrixT,
typename NumericT>
486 CPUMatrixT & cpu_matrix)
488 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
489 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
490 &&
bool(
"Matrix size mismatch!"));
492 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
494 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
496 std::vector<NumericT> entries(num_entries);
499 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
501 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
505 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
506 cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()];
516 template<
typename MatrixType>
519 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
524 template<
typename MatrixType>
527 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
532 template<
typename MatrixType>
535 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
viennacl::tools::shared_ptr< char > handle_type
MatrixType::handle_type handle_type
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
DistanceT difference_type
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
matrix_range(matrix_range< MatrixType > const &A, range const &row_range, range const &col_range)
Class for representing strided submatrices of a bigger matrix A.
range::size_type size_type
MatrixType::value_type value_type
MatrixType::handle_type handle_type
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
MatrixType::handle_type handle_type
size_type stride2() const
Returns the number of columns.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
range::difference_type difference_type
Forward declaration of dense matrix classes.
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
matrix_slice(self_type const &other)
MatrixType::value_type value_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
range::size_type size_type
matrix_range(self_type const &A, range const &row_range, range const &col_range)
result_of::size_type< T >::type start(T const &obj)
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
range::difference_type difference_type
const value_type & const_reference
size_type stride1() const
Returns the number of rows.
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
matrix_range(self_type const &other)
size_type size2() const
Returns the number of columns.
handle_type & handle()
Returns the OpenCL handle, non-const-version.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
size_type size1() const
Returns the number of rows.
MatrixType::handle_type handle_type
matrix_slice(self_type const &A, slice const &row_slice, slice const &col_slice)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Implementation of a slice object for use with proxy objects.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Implementation of a range object for use with proxy objects.
size_type start2() const
Returns the number of columns.
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Class for representing non-strided submatrices of a bigger matrix A.
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
const value_type & const_reference
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A tag for row-major storage of a dense matrix.
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
size_type start1() const
Returns the number of rows.
matrix_slice(matrix_slice< MatrixType > const &A, slice const &row_slice, slice const &col_slice)