58 template<
typename NumericT>
61 if (std::fabs(s1 - s2) > 0)
62 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
66 template<
typename NumericT>
69 std::vector<NumericT> v2_cpu(v2.
size());
73 for (std::size_t i=0;i<v1.size(); ++i)
75 if (
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
76 v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) /
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
80 if (v2_cpu[i] > 0.0001)
83 std::cout <<
"Error at entry " << i <<
": " << v1[i] <<
" vs. " << v2[i] << std::endl;
89 NumericT inf_norm = 0;
90 for (std::size_t i=0;i<v2_cpu.size(); ++i)
91 inf_norm = std::max<NumericT>(inf_norm, std::fabs(v2_cpu[i]));
96 template<
typename NumericT>
100 for (std::size_t i=0; i<A2.
size1(); ++i)
101 for (std::size_t j=0; j<A2.
size2(); ++j)
109 return diff(host_values, vcl_device_values);
113 template<
typename HostContainerT,
typename DeviceContainerT,
typename NumericT>
114 void check(HostContainerT
const & host_container, DeviceContainerT
const & device_container,
115 std::string current_stage, NumericT epsilon)
117 current_stage.resize(25,
' ');
118 std::cout <<
"Testing operation: " << current_stage;
119 NumericT rel_error = std::fabs(
diff(host_container, device_container));
121 if (rel_error > epsilon)
123 std::cout << std::endl;
124 std::cout <<
"# Error at operation: " << current_stage << std::endl;
125 std::cout <<
" diff: " << rel_error << std::endl;
128 std::cout <<
"PASS" << std::endl;
134 template<
typename LHS,
typename RHS>
135 static void apply(LHS & lhs, RHS
const & rhs) { lhs = rhs; }
137 static std::string
str() {
return "="; }
142 template<
typename LHS,
typename RHS>
143 static void apply(LHS & lhs, RHS
const & rhs) { lhs += rhs; }
145 static std::string
str() {
return "+="; }
150 template<
typename LHS,
typename RHS>
151 static void apply(LHS & lhs, RHS
const & rhs) { lhs -= rhs; }
153 static std::string
str() {
return "-="; }
160 template<
typename OpT,
typename NumericT,
typename HostMatrixT,
typename DeviceMatrixT>
162 HostMatrixT & host_A, HostMatrixT & host_B, HostMatrixT & host_C,
163 DeviceMatrixT & device_A, std::string name_A,
164 DeviceMatrixT & device_B, std::string name_B,
165 DeviceMatrixT & device_C,
bool copy_from_A,
166 bool trans_first,
bool trans_second)
168 for (std::size_t i = 0; i<host_A.size(); ++i)
169 for (std::size_t j = 0; j<host_A[i].size(); ++j)
171 host_A[i][j] = random<NumericT>();
172 host_B[i][j] = random<NumericT>();
182 for (std::size_t i = 0; i<host_A.size(); ++i)
183 for (std::size_t j = 0; j<host_A[i].size(); ++j)
186 for (std::size_t k = 0; k<host_A[i].size(); ++k)
187 tmp += (trans_first ? host_A[k][i] : host_A[i][k])
188 * (trans_second ? host_B[j][k] : host_B[k][j]);
189 OpT::apply(host_C[i][j], tmp);
192 if (trans_first && trans_second)
195 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"^T*") + name_B + std::string(
"^T"), epsilon);
197 else if (trans_first && !trans_second)
200 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"^T*") + name_B + std::string(
""), epsilon);
202 else if (!trans_first && trans_second)
205 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"*") + name_B + std::string(
"^T"), epsilon);
210 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"*") + name_B + std::string(
""), epsilon);
216 template<
typename OpT,
typename NumericT,
typename HostMatrixT,
typename DeviceMatrixT>
218 HostMatrixT & host_A, HostMatrixT & host_B, HostMatrixT & host_C,
219 DeviceMatrixT & device_A, std::string name_A,
220 DeviceMatrixT & device_B, std::string name_B,
221 DeviceMatrixT & device_C,
bool copy_from_A)
223 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
false,
false);
224 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
false,
true);
225 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
true,
false);
226 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
true,
true);
232 template<
typename NumericT>
241 std::vector<NumericT> std_x(N);
242 std::vector<NumericT> std_y(N);
243 std::vector<NumericT> std_z(N);
245 for (std::size_t i=0; i<std_x.size(); ++i)
246 std_x[i] = NumericT(i + 1);
247 for (std::size_t i=0; i<std_y.size(); ++i)
248 std_y[i] = NumericT(i*i + 1);
249 for (std::size_t i=0; i<std_z.size(); ++i)
250 std_z[i] = NumericT(2 * i + 1);
262 check(std_x, vcl_x,
"x = x", epsilon);
265 std_x[0] = std_x[2]; std_x[1] = std_x[3];
267 check(std_x, vcl_x,
"x = x (range)", epsilon);
273 std::vector<std::vector<NumericT> > std_A(N, std::vector<NumericT>(N, NumericT(1)));
274 std::vector<std::vector<NumericT> > std_B(N, std::vector<NumericT>(N, NumericT(2)));
275 std::vector<std::vector<NumericT> > std_C(N, std::vector<NumericT>(N, NumericT(3)));
287 check(std_A, vcl_A,
"A = A", epsilon);
290 std_A[0][0] = std_A[0][2]; std_A[0][1] = std_A[0][3];
292 check(std_A, vcl_A,
"A = A (range)", epsilon);
295 for (std::size_t i = 0; i<std_y.size(); ++i)
298 for (std::size_t j = 0; j<std_x.size(); ++j)
299 val += std_A[i][j] * std_x[j];
303 check(std_y, vcl_x,
"x = A*x", epsilon);
306 std::vector< std::map<unsigned int, NumericT> > std_Asparse(N);
308 for (std::size_t i=0; i<std_Asparse.size(); ++i)
311 std_Asparse[i][i-1] = random<NumericT>();
312 std_Asparse[i][i] = NumericT(1) + random<NumericT>();
313 if (i < std_Asparse.size() - 1)
314 std_Asparse[i][i+1] = random<NumericT>();
330 for (std::size_t i=0; i<std_Asparse.size(); ++i)
333 for (
typename std::map<unsigned int, NumericT>::const_iterator it = std_Asparse[i].begin(); it != std_Asparse[i].end(); ++it)
334 val += it->second * std_x[it->first];
340 check(std_y, vcl_x,
"x = A*x (sparse, csr)", epsilon);
344 check(std_y, vcl_x,
"x = A*x (sparse, coo)", epsilon);
348 check(std_y, vcl_x,
"x = A*x (sparse, ell)", epsilon);
352 check(std_y, vcl_x,
"x = A*x (sparse, sell)", epsilon);
356 check(std_y, vcl_x,
"x = A*x (sparse, hyb)", epsilon);
357 std::cout << std::endl;
363 test_gemm<op_assign>(epsilon, std_A, std_B, std_C, vcl_A,
"A", vcl_B,
"B", vcl_A,
true);
364 test_gemm<op_assign>(epsilon, std_B, std_A, std_C, vcl_B,
"B", vcl_A,
"A", vcl_A,
false);
365 test_gemm<op_assign>(epsilon, std_A, std_A, std_C, vcl_A,
"A", vcl_A,
"A", vcl_A,
true);
366 std::cout << std::endl;
368 test_gemm<op_plus_assign>(epsilon, std_A, std_B, std_C, vcl_A,
"A", vcl_B,
"B", vcl_A,
true);
369 test_gemm<op_plus_assign>(epsilon, std_B, std_A, std_C, vcl_B,
"B", vcl_A,
"A", vcl_A,
false);
370 test_gemm<op_plus_assign>(epsilon, std_A, std_A, std_C, vcl_A,
"A", vcl_A,
"A", vcl_A,
true);
371 std::cout << std::endl;
373 test_gemm<op_minus_assign>(epsilon, std_A, std_B, std_C, vcl_A,
"A", vcl_B,
"B", vcl_A,
true);
374 test_gemm<op_minus_assign>(epsilon, std_B, std_A, std_C, vcl_B,
"B", vcl_A,
"A", vcl_A,
false);
375 test_gemm<op_minus_assign>(epsilon, std_A, std_A, std_C, vcl_A,
"A", vcl_A,
"A", vcl_A,
true);
376 std::cout << std::endl;
385 for (std::size_t i = 0; i<std_A.size(); ++i)
386 for (std::size_t j = 0; j<std_A[i].size(); ++j)
389 for (std::size_t k = 0; k<std_A[i].size(); ++k)
390 tmp += std_Asparse[i][k] * std_A[k][j];
396 check(std_C, vcl_A,
"A = csr*A", epsilon);
400 check(std_C, vcl_A,
"A = coo*A", epsilon);
404 check(std_C, vcl_A,
"A = ell*A", epsilon);
412 check(std_C, vcl_A,
"A = hyb*A", epsilon);
416 for (std::size_t i = 0; i<std_A.size(); ++i)
417 for (std::size_t j = 0; j<std_A[i].size(); ++j)
420 for (std::size_t k = 0; k<std_A[i].size(); ++k)
421 tmp += std_Asparse[i][k] * std_A[j][k];
427 check(std_C, vcl_A,
"A = csr*A^T", epsilon);
431 check(std_C, vcl_A,
"A = coo*A^T", epsilon);
435 check(std_C, vcl_A,
"A = ell*A^T", epsilon);
443 check(std_C, vcl_A,
"A = hyb*A^T", epsilon);
454 std::cout << std::endl;
455 std::cout <<
"----------------------------------------------" << std::endl;
456 std::cout <<
"----------------------------------------------" << std::endl;
457 std::cout <<
"## Test :: Self-Assignment" << std::endl;
458 std::cout <<
"----------------------------------------------" << std::endl;
459 std::cout <<
"----------------------------------------------" << std::endl;
460 std::cout << std::endl;
462 int retval = EXIT_SUCCESS;
464 std::cout << std::endl;
465 std::cout <<
"----------------------------------------------" << std::endl;
466 std::cout << std::endl;
468 typedef float NumericT;
469 NumericT epsilon =
static_cast<NumericT
>(1E-4);
470 std::cout <<
"# Testing setup:" << std::endl;
471 std::cout <<
" eps: " << epsilon << std::endl;
472 std::cout <<
" numeric: float" << std::endl;
473 retval = test<NumericT>(epsilon);
474 if ( retval == EXIT_SUCCESS )
475 std::cout <<
"# Test passed" << std::endl;
479 std::cout << std::endl;
480 std::cout <<
"----------------------------------------------" << std::endl;
481 std::cout << std::endl;
485 std::cout << std::endl;
486 std::cout <<
"------- Test completed --------" << std::endl;
487 std::cout << std::endl;
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
void test_gemm(NumericT epsilon, HostMatrixT &host_A, HostMatrixT &host_B, HostMatrixT &host_C, DeviceMatrixT &device_A, std::string name_A, DeviceMatrixT &device_B, std::string name_B, DeviceMatrixT &device_C, bool copy_from_A, bool trans_first, bool trans_second)
A reader and writer for the matrix market format is implemented here.
NumericT diff(NumericT const &s1, viennacl::scalar< NumericT > const &s2)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Implementation of the coordinate_matrix class.
static void apply(LHS &lhs, RHS const &rhs)
viennacl::vector< float > v1
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Sparse matrix class using the ELLPACK format for storing the nonzeros.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Implementations of incomplete factorization preconditioners. Convenience header file.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
int test(NumericT epsilon)
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
size_type size2() const
Returns the number of columns.
Implementation of the ell_matrix class.
size_type size1() const
Returns the number of rows.
Proxy classes for vectors.
Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number o...
Proxy classes for matrices.
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void apply(LHS &lhs, RHS const &rhs)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
static void apply(LHS &lhs, RHS const &rhs)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
void check(HostContainerT const &host_container, DeviceContainerT const &device_container, std::string current_stage, NumericT epsilon)
Common routines used within ILU-type preconditioners.
Implementation of the ViennaCL scalar class.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)