30 #include <boost/numeric/ublas/io.hpp>
31 #include <boost/numeric/ublas/triangular.hpp>
32 #include <boost/numeric/ublas/matrix_sparse.hpp>
33 #include <boost/numeric/ublas/matrix.hpp>
34 #include <boost/numeric/ublas/matrix_proxy.hpp>
35 #include <boost/numeric/ublas/lu.hpp>
36 #include <boost/numeric/ublas/io.hpp>
43 #define VIENNACL_HAVE_UBLAS 1
52 #include "viennacl/device_specific/code_generator.hpp"
57 using namespace boost::numeric;
61 static const unsigned int min_large_block_size = 32;
62 static const unsigned int max_large_block_size = 128;
63 static const unsigned int n_large_blocks =
static_cast<unsigned int>(std::log(max_large_block_size/min_large_block_size)/std::log(2.0)+1.0);
65 static const unsigned int min_alignment = 1;
66 static const unsigned int max_alignment = 8;
68 static const unsigned int max_small_block_size = max_alignment;
73 template<
typename ScalarType>
78 return (s1 - s2) /
std::max(fabs(s1), fabs(s2));
82 template<
typename ScalarType,
typename VCLMatrixType>
85 ublas::matrix<ScalarType> mat2_cpu(mat2.size1(), mat2.size2());
91 for (
unsigned int i = 0; i < mat2_cpu.size1(); ++i)
93 for (
unsigned int j = 0; j < mat2_cpu.size2(); ++j)
95 act = fabs(mat2_cpu(i,j) - mat1(i,j)) /
std::max( fabs(mat2_cpu(i, j)), fabs(mat1(i,j)) );
114 template<
typename NumericT,
typename Epsilon,
115 typename ReferenceMatrixTypeA,
typename ReferenceMatrixTypeB,
typename ReferenceMatrixTypeC,
116 typename MatrixTypeA,
typename MatrixTypeB,
typename MatrixTypeC>
119 ReferenceMatrixTypeA
const &
A, ReferenceMatrixTypeA
const & A_trans,
120 ReferenceMatrixTypeB
const & B, ReferenceMatrixTypeB
const & B_trans,
121 ReferenceMatrixTypeC & C,
123 MatrixTypeA
const & vcl_A, MatrixTypeA
const & vcl_A_trans,
124 MatrixTypeB
const & vcl_B, MatrixTypeB
const & vcl_B_trans,
128 int retval = EXIT_SUCCESS;
133 std::cout <<
"Testing C = alpha*prod(A,B) + beta*C ..." << std::endl;
138 viennacl::device_specific::generate_enqueue_statement(statement, statement.
array()[0]);
140 act_diff = std::fabs(
diff(C, vcl_C));
141 if ( act_diff > epsilon )
143 std::cout <<
"# Error at operation: matrix-matrix product" << std::endl;
144 std::cout <<
" diff: " << act_diff << std::endl;
145 retval = EXIT_FAILURE;
148 std::cout <<
"Test C = A * B passed!" << std::endl;
152 std::cout <<
"Testing C = alpha*trans(A) * B + beta*C ..." << std::endl;
156 viennacl::device_specific::generate_enqueue_statement(statement, statement.
array()[0]);
158 act_diff = std::fabs(
diff(C, vcl_C));
159 if ( act_diff > epsilon )
161 std::cout <<
"# Error at operation: matrix-matrix product" << std::endl;
162 std::cout <<
" diff: " << act_diff << std::endl;
163 retval = EXIT_FAILURE;
165 else std::cout <<
"Test C = trans(A) * B passed!" << std::endl;
168 std::cout <<
"Testing C = alpha*A * trans(B) + beta*C ..." << std::endl;
172 viennacl::device_specific::generate_enqueue_statement(statement, statement.
array()[0]);
174 act_diff = std::fabs(
diff(C, vcl_C));
175 if ( act_diff > epsilon )
177 std::cout <<
"# Error at operation: matrix-matrix product" << std::endl;
178 std::cout <<
" diff: " << act_diff << std::endl;
179 retval = EXIT_FAILURE;
181 else std::cout <<
"Test C = A * trans(B) passed!" << std::endl;
184 std::cout <<
"Testing C = alpha*trans(A) * trans(B) + beta*C ..." << std::endl;
188 viennacl::device_specific::generate_enqueue_statement(statement, statement.
array()[0]);
190 act_diff = std::fabs(
diff(C, vcl_C));
191 if ( act_diff > epsilon )
193 std::cout <<
"# Error at operation: matrix-matrix product" << std::endl;
194 std::cout <<
" diff: " << act_diff << std::endl;
195 retval = EXIT_FAILURE;
197 else std::cout <<
"Test C = trans(A) * trans(B) passed!" << std::endl;
204 template<
typename NumericT,
typename F_A,
typename F_B,
typename F_C,
typename Epsilon>
209 std::size_t matrix_size1 = 2*max_large_block_size;
210 std::size_t matrix_size2 = 3*max_large_block_size;
211 std::size_t matrix_size3 = 4*max_large_block_size;
216 ublas::matrix<NumericT>
A(matrix_size1, matrix_size2);
217 ublas::matrix<NumericT> B(matrix_size2, matrix_size3);
218 ublas::matrix<NumericT> C(matrix_size1, matrix_size3);
221 for (std::size_t i = 0; i < A.size1(); ++i)
222 for (std::size_t j = 0; j < A.size2(); ++j)
223 A(i,j) =
static_cast<NumericT>(0.1) * random<NumericT>();
224 for (std::size_t i = 0; i < B.size1(); ++i)
225 for (std::size_t j = 0; j < B.size2(); ++j)
226 B(i,j) =
static_cast<NumericT>(0.1) * random<NumericT>();
227 for (std::size_t i = 0; i < C.size1(); ++i)
228 for (std::size_t j = 0; j < C.size2(); ++j)
229 C(i,j) =
static_cast<NumericT>(0.1) * random<NumericT>();
232 ublas::matrix<NumericT> A_trans =
trans(A);
233 ublas::matrix<NumericT> B_trans =
trans(B);
261 std::cout <<
"--- Part 1: Testing matrix-matrix products ---" << std::endl;
269 std::cout <<
"Now using A=matrix, B=matrix, C=matrix" << std::endl;
270 ret = test_prod<NumericT>(epsilon,
271 A, A_trans, B, B_trans, C,
275 if (ret != EXIT_SUCCESS)
281 template<
typename NumericT,
typename Epsilon >
282 int test(Epsilon
const& epsilon)
286 std::cout <<
"///////////////////////////////////////" << std::endl;
287 std::cout <<
"/// Now testing A=row, B=row, C=row ///" << std::endl;
288 std::cout <<
"///////////////////////////////////////" << std::endl;
289 ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::row_major>(epsilon);
290 if (ret != EXIT_SUCCESS)
293 std::cout <<
"///////////////////////////////////////" << std::endl;
294 std::cout <<
"/// Now testing A=col, B=row, C=row ///" << std::endl;
295 std::cout <<
"///////////////////////////////////////" << std::endl;
296 ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::row_major>(epsilon);
297 if (ret != EXIT_SUCCESS)
300 std::cout <<
"///////////////////////////////////////" << std::endl;
301 std::cout <<
"/// Now testing A=row, B=col, C=row ///" << std::endl;
302 std::cout <<
"///////////////////////////////////////" << std::endl;
303 ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::row_major>(epsilon);
304 if (ret != EXIT_SUCCESS)
307 std::cout <<
"///////////////////////////////////////" << std::endl;
308 std::cout <<
"/// Now testing A=col, B=col, C=row ///" << std::endl;
309 std::cout <<
"///////////////////////////////////////" << std::endl;
310 ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::row_major>(epsilon);
311 if (ret != EXIT_SUCCESS)
315 std::cout <<
"///////////////////////////////////////" << std::endl;
316 std::cout <<
"/// Now testing A=row, B=row, C=col ///" << std::endl;
317 std::cout <<
"///////////////////////////////////////" << std::endl;
318 ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::column_major>(epsilon);
319 if (ret != EXIT_SUCCESS)
322 std::cout <<
"///////////////////////////////////////" << std::endl;
323 std::cout <<
"/// Now testing A=col, B=row, C=col ///" << std::endl;
324 std::cout <<
"///////////////////////////////////////" << std::endl;
325 ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::column_major>(epsilon);
326 if (ret != EXIT_SUCCESS)
329 std::cout <<
"///////////////////////////////////////" << std::endl;
330 std::cout <<
"/// Now testing A=row, B=col, C=col ///" << std::endl;
331 std::cout <<
"///////////////////////////////////////" << std::endl;
332 ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::column_major>(epsilon);
333 if (ret != EXIT_SUCCESS)
336 std::cout <<
"///////////////////////////////////////" << std::endl;
337 std::cout <<
"/// Now testing A=col, B=col, C=col ///" << std::endl;
338 std::cout <<
"///////////////////////////////////////" << std::endl;
339 ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::column_major>(epsilon);
340 if (ret != EXIT_SUCCESS)
348 int main(
int argc,
char* argv[])
350 std::vector<std::string> args(argv,argv+argc);
351 int retval = EXIT_SUCCESS;
353 typedef std::vector<viennacl::ocl::device> devices_type;
359 for (devices_type::iterator it = dev.begin(); it != dev.end(); ++it){
360 std::cout << std::endl;
361 std::cout <<
"----------------------------------------------" << std::endl;
362 std::cout <<
"----------------------------------------------" << std::endl;
363 std::cout <<
"## Test :: Generated BLAS 3 routines" << std::endl;
364 std::cout <<
"----------------------------------------------" << std::endl;
365 std::cout <<
"----------------------------------------------" << std::endl;
366 std::cout << std::endl;
368 int retval = EXIT_SUCCESS;
370 srand(static_cast<unsigned int>(time(NULL)));
372 std::cout << std::endl;
373 std::cout <<
"----------------------------------------------" << std::endl;
374 std::cout << std::endl;
377 NumericT epsilon =
NumericT(1.0E-3);
378 std::cout <<
"# Testing setup:" << std::endl;
382 std::cout <<
" eps: " << epsilon << std::endl;
383 std::cout <<
" numeric: float" << std::endl;
384 retval = test<NumericT>(epsilon);
385 if ( retval == EXIT_SUCCESS )
386 std::cout <<
"# Test passed" << std::endl;
390 std::cout << std::endl;
391 std::cout <<
"----------------------------------------------" << std::endl;
392 std::cout << std::endl;
393 #ifdef VIENNACL_WITH_OPENCL
399 NumericT epsilon = 1.0E-11;
400 std::cout <<
"# Testing setup:" << std::endl;
401 std::cout <<
" eps: " << epsilon << std::endl;
402 std::cout <<
" numeric: double" << std::endl;
403 retval = test<NumericT>(epsilon);
404 if ( retval == EXIT_SUCCESS )
405 std::cout <<
"# Test passed" << std::endl;
409 std::cout << std::endl;
410 std::cout <<
"----------------------------------------------" << std::endl;
411 std::cout << std::endl;
414 std::cout << std::endl;
415 std::cout <<
"------- Test completed --------" << std::endl;
416 std::cout << std::endl;
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar< int > s2
**********************************ViennaCL Change Logs *************************************Version x ***Version While the work for the upcoming release is in full this maintenance release fixes a couple of bugs and performance regressions reported to x and this increases the performance of matrix matrix multiplications to GFLOPs in single precision on a GeForce GTX Thanks to Paul Dufort for bringing this to our attention Added support for the operation A
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
A tag class representing assignment.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
container_type const & array() const
T max(const T &lhs, const T &rhs)
Maximum.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
int main(int argc, char *argv[])
viennacl::scalar< float > s1
int test(Epsilon const &epsilon)
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Implementations of dense direct solvers are found here.
Proxy classes for matrices.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
int test_prod(Epsilon const &epsilon, ReferenceMatrixTypeA const &A, ReferenceMatrixTypeA const &A_trans, ReferenceMatrixTypeB const &B, ReferenceMatrixTypeB const &B_trans, ReferenceMatrixTypeC &C, MatrixTypeA const &vcl_A, MatrixTypeA const &vcl_A_trans, MatrixTypeB const &vcl_B, MatrixTypeB const &vcl_B_trans, MatrixTypeC &vcl_C)
std::vector< viennacl::ocl::device > const & devices() const
Returns a vector with all devices in this context.
Implementation of the ViennaCL scalar class.
Added support for prod(A, B)