ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
scheduler_matrix_matrix.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
23 //
24 // *** System
25 //
26 #include <iostream>
27 
28 //
29 // *** Boost
30 //
31 #include <boost/numeric/ublas/io.hpp>
32 #include <boost/numeric/ublas/triangular.hpp>
33 #include <boost/numeric/ublas/matrix_sparse.hpp>
34 #include <boost/numeric/ublas/matrix.hpp>
35 #include <boost/numeric/ublas/matrix_proxy.hpp>
36 #include <boost/numeric/ublas/lu.hpp>
37 #include <boost/numeric/ublas/io.hpp>
38 
39 //
40 // *** ViennaCL
41 //
42 //#define VIENNACL_DEBUG_ALL
43 //#define VIENNACL_DEBUG_BUILD
44 #define VIENNACL_WITH_UBLAS 1
45 #include "viennacl/scalar.hpp"
46 #include "viennacl/matrix.hpp"
48 #include "viennacl/vector.hpp"
49 #include "viennacl/linalg/prod.hpp"
53 
56 
57 //
58 // -------------------------------------------------------------
59 //
60 using namespace boost::numeric;
61 //
62 // -------------------------------------------------------------
63 //
64 template<typename ScalarType>
66 {
68  if (s1 != s2)
69  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
70  return 0;
71 }
72 
73 template<typename ScalarType>
74 ScalarType diff(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
75 {
76  ublas::vector<ScalarType> v2_cpu(v2.size());
77  viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8)
78  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
79 
80  for (std::size_t i=0;i<v1.size(); ++i)
81  {
82  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
83  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
84  else
85  v2_cpu[i] = 0.0;
86  }
87 
88  return norm_inf(v2_cpu);
89 }
90 
91 
92 template<typename ScalarType, typename VCLMatrixType>
93 ScalarType diff(ublas::matrix<ScalarType> & mat1, VCLMatrixType & mat2)
94 {
95  ublas::matrix<ScalarType> mat2_cpu(mat2.size1(), mat2.size2());
96  viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8)
97  viennacl::copy(mat2, mat2_cpu);
98  ScalarType ret = 0;
99  ScalarType act = 0;
100 
101  for (unsigned int i = 0; i < mat2_cpu.size1(); ++i)
102  {
103  for (unsigned int j = 0; j < mat2_cpu.size2(); ++j)
104  {
105  act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) );
106  if (act > ret)
107  ret = act;
108  }
109  }
110  //std::cout << ret << std::endl;
111  return ret;
112 }
113 
114 
115 
116 
117 
118 
119 //
120 // Part 1: Matrix-matrix multiplications
121 //
122 
123 
124 template< typename NumericT, typename Epsilon,
125  typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC,
126  typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
127 int test_prod(Epsilon const& epsilon,
128 
129  ReferenceMatrixTypeA const & A, ReferenceMatrixTypeA const & A_trans,
130  ReferenceMatrixTypeB const & B, ReferenceMatrixTypeB const & B_trans,
131  ReferenceMatrixTypeC & C,
132 
133  MatrixTypeA const & vcl_A, MatrixTypeA const & vcl_A_trans,
134  MatrixTypeB const & vcl_B, MatrixTypeB const & vcl_B_trans,
135  MatrixTypeC & vcl_C
136  )
137 {
138  int retval = EXIT_SUCCESS;
139  NumericT act_diff = 0;
140 
141 
142  // Test: C +-= A * B --------------------------------------------------------------------------
143  C = viennacl::linalg::prod(A, B);
144  {
145  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A, vcl_B));
146  viennacl::scheduler::execute(my_statement);
147  }
148  act_diff = std::fabs(diff(C, vcl_C));
149 
150  if ( act_diff > epsilon )
151  {
152  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
153  std::cout << " diff: " << act_diff << std::endl;
154  retval = EXIT_FAILURE;
155  }
156  else
157  std::cout << "Test C = A * B passed!" << std::endl;
158 
159 
160  C += viennacl::linalg::prod(A, B);
161  {
163  viennacl::scheduler::execute(my_statement);
164  }
165  act_diff = std::fabs(diff(C, vcl_C));
166 
167  if ( act_diff > epsilon )
168  {
169  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
170  std::cout << " diff: " << act_diff << std::endl;
171  retval = EXIT_FAILURE;
172  }
173  else
174  std::cout << "Test C += A * B passed!" << std::endl;
175 
176  C -= viennacl::linalg::prod(A, B);
177  {
179  viennacl::scheduler::execute(my_statement);
180  }
181  act_diff = std::fabs(diff(C, vcl_C));
182 
183  if ( act_diff > epsilon )
184  {
185  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
186  std::cout << " diff: " << act_diff << std::endl;
187  retval = EXIT_FAILURE;
188  }
189  else
190  std::cout << "Test C -= A * B passed!" << std::endl;
191 
192 
193 
194 
195 
196  // Test: C +-= A * trans(B) --------------------------------------------------------------------------
197  C = boost::numeric::ublas::prod(A, trans(B_trans));
198  {
199  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)));
200  viennacl::scheduler::execute(my_statement);
201  }
202  act_diff = std::fabs(diff(C, vcl_C));
203 
204  if ( act_diff > epsilon )
205  {
206  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
207  std::cout << " diff: " << act_diff << std::endl;
208  retval = EXIT_FAILURE;
209  }
210  else
211  std::cout << "Test C = A * trans(B) passed!" << std::endl;
212 
213 
214  C += boost::numeric::ublas::prod(A, trans(B_trans));
215  {
216  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)));
217  viennacl::scheduler::execute(my_statement);
218  }
219  act_diff = std::fabs(diff(C, vcl_C));
220 
221  if ( act_diff > epsilon )
222  {
223  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
224  std::cout << " diff: " << act_diff << std::endl;
225  retval = EXIT_FAILURE;
226  }
227  else
228  std::cout << "Test C += A * trans(B) passed!" << std::endl;
229 
230 
231  C -= boost::numeric::ublas::prod(A, trans(B_trans));
232  {
233  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)));
234  viennacl::scheduler::execute(my_statement);
235  }
236  act_diff = std::fabs(diff(C, vcl_C));
237 
238  if ( act_diff > epsilon )
239  {
240  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
241  std::cout << " diff: " << act_diff << std::endl;
242  retval = EXIT_FAILURE;
243  }
244  else
245  std::cout << "Test C -= A * trans(B) passed!" << std::endl;
246 
247 
248 
249  // Test: C +-= trans(A) * B --------------------------------------------------------------------------
250  C = boost::numeric::ublas::prod(trans(A_trans), B);
251  {
252  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B));
253  viennacl::scheduler::execute(my_statement);
254  }
255  act_diff = std::fabs(diff(C, vcl_C));
256 
257  if ( act_diff > epsilon )
258  {
259  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
260  std::cout << " diff: " << act_diff << std::endl;
261  retval = EXIT_FAILURE;
262  }
263  else
264  std::cout << "Test C = trans(A) * B passed!" << std::endl;
265 
266 
267  C += boost::numeric::ublas::prod(trans(A_trans), B);
268  {
269  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B));
270  viennacl::scheduler::execute(my_statement);
271  }
272  act_diff = std::fabs(diff(C, vcl_C));
273 
274  if ( act_diff > epsilon )
275  {
276  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
277  std::cout << " diff: " << act_diff << std::endl;
278  retval = EXIT_FAILURE;
279  }
280  else
281  std::cout << "Test C += trans(A) * B passed!" << std::endl;
282 
283 
284  C -= boost::numeric::ublas::prod(trans(A_trans), B);
285  {
286  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B));
287  viennacl::scheduler::execute(my_statement);
288  }
289  act_diff = std::fabs(diff(C, vcl_C));
290 
291  if ( act_diff > epsilon )
292  {
293  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
294  std::cout << " diff: " << act_diff << std::endl;
295  retval = EXIT_FAILURE;
296  }
297  else
298  std::cout << "Test C -= trans(A) * B passed!" << std::endl;
299 
300 
301 
302 
303 
304  // Test: C +-= trans(A) * trans(B) --------------------------------------------------------------------------
305  C = boost::numeric::ublas::prod(trans(A_trans), trans(B_trans));
306  {
307  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)));
308  viennacl::scheduler::execute(my_statement);
309  }
310  act_diff = std::fabs(diff(C, vcl_C));
311 
312  if ( act_diff > epsilon )
313  {
314  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
315  std::cout << " diff: " << act_diff << std::endl;
316  retval = EXIT_FAILURE;
317  }
318  else
319  std::cout << "Test C = trans(A) * trans(B) passed!" << std::endl;
320 
321  C += boost::numeric::ublas::prod(trans(A_trans), trans(B_trans));
322  {
323  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)));
324  viennacl::scheduler::execute(my_statement);
325  }
326  act_diff = std::fabs(diff(C, vcl_C));
327 
328  if ( act_diff > epsilon )
329  {
330  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
331  std::cout << " diff: " << act_diff << std::endl;
332  retval = EXIT_FAILURE;
333  }
334  else
335  std::cout << "Test C += trans(A) * trans(B) passed!" << std::endl;
336 
337 
338  C -= boost::numeric::ublas::prod(trans(A_trans), trans(B_trans));
339  {
340  viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)));
341  viennacl::scheduler::execute(my_statement);
342  }
343  act_diff = std::fabs(diff(C, vcl_C));
344 
345  if ( act_diff > epsilon )
346  {
347  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
348  std::cout << " diff: " << act_diff << std::endl;
349  retval = EXIT_FAILURE;
350  }
351  else
352  std::cout << "Test C -= trans(A) * trans(B) passed!" << std::endl;
353 
354 
355 
356 
357  return retval;
358 }
359 
360 
361 
362 template< typename NumericT, typename F_A, typename F_B, typename F_C, typename Epsilon >
363 int test_prod(Epsilon const& epsilon)
364 {
365  int ret;
366 
367  std::size_t matrix_size1 = 29; //some odd number, not too large
368  std::size_t matrix_size2 = 47; //some odd number, not too large
369  std::size_t matrix_size3 = 33; //some odd number, not too large
370  //std::size_t matrix_size1 = 128; //some odd number, not too large
371  //std::size_t matrix_size2 = 64; //some odd number, not too large
372  //std::size_t matrix_size3 = 128; //some odd number, not too large
373  //std::size_t matrix_size1 = 256; // for testing AMD kernels
374  //std::size_t matrix_size2 = 256; // for testing AMD kernels
375  //std::size_t matrix_size3 = 256; // for testing AMD kernels
376 
377  // --------------------------------------------------------------------------
378 
379  // ublas reference:
380  ublas::matrix<NumericT> A(matrix_size1, matrix_size2);
381  ublas::matrix<NumericT> big_A = ublas::scalar_matrix<NumericT>(4*matrix_size1, 4*matrix_size2, NumericT(3.1415));
382 
383  ublas::matrix<NumericT> B(matrix_size2, matrix_size3);
384  ublas::matrix<NumericT> big_B = ublas::scalar_matrix<NumericT>(4*matrix_size2, 4*matrix_size3, NumericT(42.0));
385 
386  ublas::matrix<NumericT> C(matrix_size1, matrix_size3);
387 
388  //fill A and B:
389  for (unsigned int i = 0; i < A.size1(); ++i)
390  for (unsigned int j = 0; j < A.size2(); ++j)
391  A(i,j) = static_cast<NumericT>(0.1) * random<NumericT>();
392  for (unsigned int i = 0; i < B.size1(); ++i)
393  for (unsigned int j = 0; j < B.size2(); ++j)
394  B(i,j) = static_cast<NumericT>(0.1) * random<NumericT>();
395 
396  ublas::matrix<NumericT> A_trans = trans(A);
397  ublas::matrix<NumericT> big_A_trans = trans(big_A);
398 
399  ublas::matrix<NumericT> B_trans = trans(B);
400  ublas::matrix<NumericT> big_B_trans = trans(big_B);
401 
402  //
403  // ViennaCL objects
404  //
405 
406  // A
407  viennacl::range range1_A(matrix_size1, 2*matrix_size1);
408  viennacl::range range2_A(matrix_size2, 2*matrix_size2);
409  viennacl::slice slice1_A(matrix_size1, 2, matrix_size1);
410  viennacl::slice slice2_A(matrix_size2, 3, matrix_size2);
411 
412  viennacl::matrix<NumericT, F_A> vcl_A(matrix_size1, matrix_size2);
413  viennacl::copy(A, vcl_A);
414 
415  viennacl::matrix<NumericT, F_A> vcl_big_range_A(4*matrix_size1, 4*matrix_size2);
416  viennacl::matrix_range<viennacl::matrix<NumericT, F_A> > vcl_range_A(vcl_big_range_A, range1_A, range2_A);
417  viennacl::copy(A, vcl_range_A);
418 
419  viennacl::matrix<NumericT, F_A> vcl_big_slice_A(4*matrix_size1, 4*matrix_size2);
420  viennacl::matrix_slice<viennacl::matrix<NumericT, F_A> > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A);
421  viennacl::copy(A, vcl_slice_A);
422 
423 
424  // A^T
425  viennacl::matrix<NumericT, F_A> vcl_A_trans(matrix_size2, matrix_size1);
426  viennacl::copy(A_trans, vcl_A_trans);
427 
428  viennacl::matrix<NumericT, F_A> vcl_big_range_A_trans(4*matrix_size2, 4*matrix_size1);
429  viennacl::matrix_range<viennacl::matrix<NumericT, F_A> > vcl_range_A_trans(vcl_big_range_A_trans, range2_A, range1_A);
430  viennacl::copy(A_trans, vcl_range_A_trans);
431 
432  viennacl::matrix<NumericT, F_A> vcl_big_slice_A_trans(4*matrix_size2, 4*matrix_size1);
433  viennacl::matrix_slice<viennacl::matrix<NumericT, F_A> > vcl_slice_A_trans(vcl_big_slice_A_trans, slice2_A, slice1_A);
434  viennacl::copy(A_trans, vcl_slice_A_trans);
435 
436 
437 
438  // B
439  viennacl::range range1_B(2*matrix_size2, 3*matrix_size2);
440  viennacl::range range2_B(2*matrix_size3, 3*matrix_size3);
441  viennacl::slice slice1_B(matrix_size2, 3, matrix_size2);
442  viennacl::slice slice2_B(matrix_size3, 2, matrix_size3);
443 
444  viennacl::matrix<NumericT, F_B> vcl_B(matrix_size2, matrix_size3);
445  viennacl::copy(B, vcl_B);
446 
447  viennacl::matrix<NumericT, F_B> vcl_big_range_B(4*matrix_size2, 4*matrix_size3);
448  viennacl::matrix_range<viennacl::matrix<NumericT, F_B> > vcl_range_B(vcl_big_range_B, range1_B, range2_B);
449  viennacl::copy(B, vcl_range_B);
450 
451  viennacl::matrix<NumericT, F_B> vcl_big_slice_B(4*matrix_size2, 4*matrix_size3);
452  viennacl::matrix_slice<viennacl::matrix<NumericT, F_B> > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B);
453  viennacl::copy(B, vcl_slice_B);
454 
455 
456  // B^T
457 
458  viennacl::matrix<NumericT, F_B> vcl_B_trans(matrix_size3, matrix_size2);
459  viennacl::copy(B_trans, vcl_B_trans);
460 
461  viennacl::matrix<NumericT, F_B> vcl_big_range_B_trans(4*matrix_size3, 4*matrix_size2);
462  viennacl::matrix_range<viennacl::matrix<NumericT, F_B> > vcl_range_B_trans(vcl_big_range_B_trans, range2_B, range1_B);
463  viennacl::copy(B_trans, vcl_range_B_trans);
464 
465  viennacl::matrix<NumericT, F_B> vcl_big_slice_B_trans(4*matrix_size3, 4*matrix_size2);
466  viennacl::matrix_slice<viennacl::matrix<NumericT, F_B> > vcl_slice_B_trans(vcl_big_slice_B_trans, slice2_B, slice1_B);
467  viennacl::copy(B_trans, vcl_slice_B_trans);
468 
469 
470  // C
471 
472  viennacl::range range1_C(matrix_size1-1, 2*matrix_size1-1);
473  viennacl::range range2_C(matrix_size3-1, 2*matrix_size3-1);
474  viennacl::slice slice1_C(matrix_size1-1, 3, matrix_size1);
475  viennacl::slice slice2_C(matrix_size3-1, 3, matrix_size3);
476 
477  viennacl::matrix<NumericT, F_C> vcl_C(matrix_size1, matrix_size3);
478 
479  viennacl::matrix<NumericT, F_C> vcl_big_range_C(4*matrix_size1, 4*matrix_size3);
480  viennacl::matrix_range<viennacl::matrix<NumericT, F_C> > vcl_range_C(vcl_big_range_C, range1_C, range2_C);
481 
482  viennacl::matrix<NumericT, F_C> vcl_big_slice_C(4*matrix_size1, 4*matrix_size3);
483  viennacl::matrix_slice<viennacl::matrix<NumericT, F_C> > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C);
484 
485 
486  std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl;
487 
491 
492  //
493  //
494  std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl;
495  ret = test_prod<NumericT>(epsilon,
496  A, A_trans, B, B_trans, C,
497  vcl_A, vcl_A_trans,
498  vcl_B, vcl_B_trans,
499  vcl_C);
500  if (ret != EXIT_SUCCESS)
501  return ret;
502 
503 
504  //
505  //
506  std::cout << "Now using A=matrix, B=matrix, C=range" << std::endl;
507  ret = test_prod<NumericT>(epsilon,
508  A, A_trans, B, B_trans, C,
509  vcl_A, vcl_A_trans,
510  vcl_B, vcl_B_trans,
511  vcl_range_C);
512  if (ret != EXIT_SUCCESS)
513  return ret;
514 
515  //
516  //
517  std::cout << "Now using A=matrix, B=matrix, C=slice" << std::endl;
518  ret = test_prod<NumericT>(epsilon,
519  A, A_trans, B, B_trans, C,
520  vcl_A, vcl_A_trans,
521  vcl_B, vcl_B_trans,
522  vcl_slice_C);
523  if (ret != EXIT_SUCCESS)
524  return ret;
525 
526 
527 
528  //
529  //
530  std::cout << "Now using A=matrix, B=range, C=matrix" << std::endl;
531  ret = test_prod<NumericT>(epsilon,
532  A, A_trans, B, B_trans, C,
533  vcl_A, vcl_A_trans,
534  vcl_range_B, vcl_range_B_trans,
535  vcl_C);
536  if (ret != EXIT_SUCCESS)
537  return ret;
538 
539 
540  //
541  //
542  std::cout << "Now using A=matrix, B=range, C=range" << std::endl;
543  ret = test_prod<NumericT>(epsilon,
544  A, A_trans, B, B_trans, C,
545  vcl_A, vcl_A_trans,
546  vcl_range_B, vcl_range_B_trans,
547  vcl_range_C);
548  if (ret != EXIT_SUCCESS)
549  return ret;
550 
551  //
552  //
553  std::cout << "Now using A=matrix, B=range, C=slice" << std::endl;
554  ret = test_prod<NumericT>(epsilon,
555  A, A_trans, B, B_trans, C,
556  vcl_A, vcl_A_trans,
557  vcl_range_B, vcl_range_B_trans,
558  vcl_slice_C);
559  if (ret != EXIT_SUCCESS)
560  return ret;
561 
562 
563  //
564  //
565  std::cout << "Now using A=matrix, B=slice, C=matrix" << std::endl;
566  ret = test_prod<NumericT>(epsilon,
567  A, A_trans, B, B_trans, C,
568  vcl_A, vcl_A_trans,
569  vcl_slice_B, vcl_slice_B_trans,
570  vcl_C);
571  if (ret != EXIT_SUCCESS)
572  return ret;
573 
574 
575  //
576  //
577  std::cout << "Now using A=matrix, B=slice, C=range" << std::endl;
578  ret = test_prod<NumericT>(epsilon,
579  A, A_trans, B, B_trans, C,
580  vcl_A, vcl_A_trans,
581  vcl_slice_B, vcl_slice_B_trans,
582  vcl_range_C);
583  if (ret != EXIT_SUCCESS)
584  return ret;
585 
586  //
587  //
588  std::cout << "Now using A=matrix, B=slice, C=slice" << std::endl;
589  ret = test_prod<NumericT>(epsilon,
590  A, A_trans, B, B_trans, C,
591  vcl_A, vcl_A_trans,
592  vcl_slice_B, vcl_slice_B_trans,
593  vcl_slice_C);
594  if (ret != EXIT_SUCCESS)
595  return ret;
596 
597 
601 
602  //
603  //
604  std::cout << "Now using A=range, B=matrix, C=matrix" << std::endl;
605  ret = test_prod<NumericT>(epsilon,
606  A, A_trans, B, B_trans, C,
607  vcl_range_A, vcl_range_A_trans,
608  vcl_B, vcl_B_trans,
609  vcl_C);
610  if (ret != EXIT_SUCCESS)
611  return ret;
612 
613 
614  //
615  //
616  std::cout << "Now using A=range, B=matrix, C=range" << std::endl;
617  ret = test_prod<NumericT>(epsilon,
618  A, A_trans, B, B_trans, C,
619  vcl_range_A, vcl_range_A_trans,
620  vcl_B, vcl_B_trans,
621  vcl_range_C);
622  if (ret != EXIT_SUCCESS)
623  return ret;
624 
625  //
626  //
627  std::cout << "Now using A=range, B=matrix, C=slice" << std::endl;
628  ret = test_prod<NumericT>(epsilon,
629  A, A_trans, B, B_trans, C,
630  vcl_range_A, vcl_range_A_trans,
631  vcl_B, vcl_B_trans,
632  vcl_slice_C);
633  if (ret != EXIT_SUCCESS)
634  return ret;
635 
636 
637 
638  //
639  //
640  std::cout << "Now using A=range, B=range, C=matrix" << std::endl;
641  ret = test_prod<NumericT>(epsilon,
642  A, A_trans, B, B_trans, C,
643  vcl_range_A, vcl_range_A_trans,
644  vcl_range_B, vcl_range_B_trans,
645  vcl_C);
646  if (ret != EXIT_SUCCESS)
647  return ret;
648 
649 
650  //
651  //
652  std::cout << "Now using A=range, B=range, C=range" << std::endl;
653  ret = test_prod<NumericT>(epsilon,
654  A, A_trans, B, B_trans, C,
655  vcl_range_A, vcl_range_A_trans,
656  vcl_range_B, vcl_range_B_trans,
657  vcl_range_C);
658  if (ret != EXIT_SUCCESS)
659  return ret;
660 
661  //
662  //
663  std::cout << "Now using A=range, B=range, C=slice" << std::endl;
664  ret = test_prod<NumericT>(epsilon,
665  A, A_trans, B, B_trans, C,
666  vcl_range_A, vcl_range_A_trans,
667  vcl_range_B, vcl_range_B_trans,
668  vcl_slice_C);
669  if (ret != EXIT_SUCCESS)
670  return ret;
671 
672 
673  //
674  //
675  std::cout << "Now using A=range, B=slice, C=matrix" << std::endl;
676  ret = test_prod<NumericT>(epsilon,
677  A, A_trans, B, B_trans, C,
678  vcl_range_A, vcl_range_A_trans,
679  vcl_slice_B, vcl_slice_B_trans,
680  vcl_C);
681  if (ret != EXIT_SUCCESS)
682  return ret;
683 
684 
685  //
686  //
687  std::cout << "Now using A=range, B=slice, C=range" << std::endl;
688  ret = test_prod<NumericT>(epsilon,
689  A, A_trans, B, B_trans, C,
690  vcl_range_A, vcl_range_A_trans,
691  vcl_slice_B, vcl_slice_B_trans,
692  vcl_range_C);
693  if (ret != EXIT_SUCCESS)
694  return ret;
695 
696  //
697  //
698  std::cout << "Now using A=range, B=slice, C=slice" << std::endl;
699  ret = test_prod<NumericT>(epsilon,
700  A, A_trans, B, B_trans, C,
701  vcl_range_A, vcl_range_A_trans,
702  vcl_slice_B, vcl_slice_B_trans,
703  vcl_slice_C);
704  if (ret != EXIT_SUCCESS)
705  return ret;
706 
707 
708 
712 
713  //
714  //
715  std::cout << "Now using A=slice, B=matrix, C=matrix" << std::endl;
716  ret = test_prod<NumericT>(epsilon,
717  A, A_trans, B, B_trans, C,
718  vcl_slice_A, vcl_slice_A_trans,
719  vcl_B, vcl_B_trans,
720  vcl_C);
721  if (ret != EXIT_SUCCESS)
722  return ret;
723 
724 
725  //
726  //
727  std::cout << "Now using A=slice, B=matrix, C=range" << std::endl;
728  ret = test_prod<NumericT>(epsilon,
729  A, A_trans, B, B_trans, C,
730  vcl_slice_A, vcl_slice_A_trans,
731  vcl_B, vcl_B_trans,
732  vcl_range_C);
733  if (ret != EXIT_SUCCESS)
734  return ret;
735 
736  //
737  //
738  std::cout << "Now using A=slice, B=matrix, C=slice" << std::endl;
739  ret = test_prod<NumericT>(epsilon,
740  A, A_trans, B, B_trans, C,
741  vcl_slice_A, vcl_slice_A_trans,
742  vcl_B, vcl_B_trans,
743  vcl_slice_C);
744  if (ret != EXIT_SUCCESS)
745  return ret;
746 
747 
748 
749  //
750  //
751  std::cout << "Now using A=slice, B=range, C=matrix" << std::endl;
752  ret = test_prod<NumericT>(epsilon,
753  A, A_trans, B, B_trans, C,
754  vcl_slice_A, vcl_slice_A_trans,
755  vcl_range_B, vcl_range_B_trans,
756  vcl_C);
757  if (ret != EXIT_SUCCESS)
758  return ret;
759 
760 
761  //
762  //
763  std::cout << "Now using A=slice, B=range, C=range" << std::endl;
764  ret = test_prod<NumericT>(epsilon,
765  A, A_trans, B, B_trans, C,
766  vcl_slice_A, vcl_slice_A_trans,
767  vcl_range_B, vcl_range_B_trans,
768  vcl_range_C);
769  if (ret != EXIT_SUCCESS)
770  return ret;
771 
772  //
773  //
774  std::cout << "Now using A=slice, B=range, C=slice" << std::endl;
775  ret = test_prod<NumericT>(epsilon,
776  A, A_trans, B, B_trans, C,
777  vcl_slice_A, vcl_slice_A_trans,
778  vcl_range_B, vcl_range_B_trans,
779  vcl_slice_C);
780  if (ret != EXIT_SUCCESS)
781  return ret;
782 
783 
784  //
785  //
786  std::cout << "Now using A=slice, B=slice, C=matrix" << std::endl;
787  ret = test_prod<NumericT>(epsilon,
788  A, A_trans, B, B_trans, C,
789  vcl_slice_A, vcl_slice_A_trans,
790  vcl_slice_B, vcl_slice_B_trans,
791  vcl_C);
792  if (ret != EXIT_SUCCESS)
793  return ret;
794 
795 
796  //
797  //
798  std::cout << "Now using A=slice, B=slice, C=range" << std::endl;
799  ret = test_prod<NumericT>(epsilon,
800  A, A_trans, B, B_trans, C,
801  vcl_slice_A, vcl_slice_A_trans,
802  vcl_slice_B, vcl_slice_B_trans,
803  vcl_range_C);
804  if (ret != EXIT_SUCCESS)
805  return ret;
806 
807  //
808  //
809  std::cout << "Now using A=slice, B=slice, C=slice" << std::endl;
810  ret = test_prod<NumericT>(epsilon,
811  A, A_trans, B, B_trans, C,
812  vcl_slice_A, vcl_slice_A_trans,
813  vcl_slice_B, vcl_slice_B_trans,
814  vcl_slice_C);
815  if (ret != EXIT_SUCCESS)
816  return ret;
817 
818 
819  return ret;
820 
821 }
822 
823 
824 //
825 // Control functions
826 //
827 
828 
829 
830 template< typename NumericT, typename Epsilon >
831 int test(Epsilon const& epsilon)
832 {
833  int ret;
834 
835  std::cout << "///////////////////////////////////////" << std::endl;
836  std::cout << "/// Now testing A=row, B=row, C=row ///" << std::endl;
837  std::cout << "///////////////////////////////////////" << std::endl;
838  ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::row_major>(epsilon);
839  if (ret != EXIT_SUCCESS)
840  return ret;
841 
842  std::cout << "///////////////////////////////////////" << std::endl;
843  std::cout << "/// Now testing A=row, B=row, C=col ///" << std::endl;
844  std::cout << "///////////////////////////////////////" << std::endl;
845  ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::column_major>(epsilon);
846  if (ret != EXIT_SUCCESS)
847  return ret;
848 
849  std::cout << "///////////////////////////////////////" << std::endl;
850  std::cout << "/// Now testing A=row, B=col, C=row ///" << std::endl;
851  std::cout << "///////////////////////////////////////" << std::endl;
852  ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::row_major>(epsilon);
853  if (ret != EXIT_SUCCESS)
854  return ret;
855 
856  std::cout << "///////////////////////////////////////" << std::endl;
857  std::cout << "/// Now testing A=row, B=col, C=col ///" << std::endl;
858  std::cout << "///////////////////////////////////////" << std::endl;
859  ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::column_major>(epsilon);
860  if (ret != EXIT_SUCCESS)
861  return ret;
862 
863  std::cout << "///////////////////////////////////////" << std::endl;
864  std::cout << "/// Now testing A=col, B=row, C=row ///" << std::endl;
865  std::cout << "///////////////////////////////////////" << std::endl;
866  ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::row_major>(epsilon);
867  if (ret != EXIT_SUCCESS)
868  return ret;
869 
870  std::cout << "///////////////////////////////////////" << std::endl;
871  std::cout << "/// Now testing A=col, B=row, C=col ///" << std::endl;
872  std::cout << "///////////////////////////////////////" << std::endl;
873  ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::column_major>(epsilon);
874  if (ret != EXIT_SUCCESS)
875  return ret;
876 
877  std::cout << "///////////////////////////////////////" << std::endl;
878  std::cout << "/// Now testing A=col, B=col, C=row ///" << std::endl;
879  std::cout << "///////////////////////////////////////" << std::endl;
880  ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::row_major>(epsilon);
881  if (ret != EXIT_SUCCESS)
882  return ret;
883 
884  std::cout << "///////////////////////////////////////" << std::endl;
885  std::cout << "/// Now testing A=col, B=col, C=col ///" << std::endl;
886  std::cout << "///////////////////////////////////////" << std::endl;
887  ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::column_major>(epsilon);
888  if (ret != EXIT_SUCCESS)
889  return ret;
890 
891 
892 
893  return ret;
894 }
895 
896 //
897 // -------------------------------------------------------------
898 //
899 int main()
900 {
901  std::cout << std::endl;
902  std::cout << "----------------------------------------------" << std::endl;
903  std::cout << "----------------------------------------------" << std::endl;
904  std::cout << "## Test :: BLAS 3 routines" << std::endl;
905  std::cout << "----------------------------------------------" << std::endl;
906  std::cout << "----------------------------------------------" << std::endl;
907  std::cout << std::endl;
908 
909  int retval = EXIT_SUCCESS;
910 
911  std::cout << std::endl;
912  std::cout << "----------------------------------------------" << std::endl;
913  std::cout << std::endl;
914  {
915  typedef float NumericT;
916  NumericT epsilon = NumericT(1.0E-3);
917  std::cout << "# Testing setup:" << std::endl;
918  std::cout << " eps: " << epsilon << std::endl;
919  std::cout << " numeric: float" << std::endl;
920  retval = test<NumericT>(epsilon);
921  if ( retval == EXIT_SUCCESS )
922  std::cout << "# Test passed" << std::endl;
923  else
924  return retval;
925  }
926  std::cout << std::endl;
927  std::cout << "----------------------------------------------" << std::endl;
928  std::cout << std::endl;
929 #ifdef VIENNACL_WITH_OPENCL
931 #endif
932  {
933  {
934  typedef double NumericT;
935  NumericT epsilon = 1.0E-11;
936  std::cout << "# Testing setup:" << std::endl;
937  std::cout << " eps: " << epsilon << std::endl;
938  std::cout << " numeric: double" << std::endl;
939  retval = test<NumericT>(epsilon);
940  if ( retval == EXIT_SUCCESS )
941  std::cout << "# Test passed" << std::endl;
942  else
943  return retval;
944  }
945  std::cout << std::endl;
946  std::cout << "----------------------------------------------" << std::endl;
947  std::cout << std::endl;
948  }
949 
950  std::cout << std::endl;
951  std::cout << "------- Test completed --------" << std::endl;
952  std::cout << std::endl;
953 
954 
955  return retval;
956 }
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
Class for representing strided submatrices of a bigger matrix A.
Definition: forwards.h:442
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
Some helper routines for reading/writing/printing scheduler expressions.
A tag class representing assignment.
Definition: forwards.h:80
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
A dense matrix class.
Definition: forwards.h:374
void execute(statement const &s)
Definition: execute.hpp:279
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
A tag class representing inplace addition.
Definition: forwards.h:82
viennacl::vector< float > v1
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
int test_prod(Epsilon const &epsilon, ReferenceMatrixTypeA const &A, ReferenceMatrixTypeA const &A_trans, ReferenceMatrixTypeB const &B, ReferenceMatrixTypeB const &B_trans, ReferenceMatrixTypeC &C, MatrixTypeA const &vcl_A, MatrixTypeA const &vcl_A_trans, MatrixTypeB const &vcl_B, MatrixTypeB const &vcl_B_trans, MatrixTypeC &vcl_C)
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
Implementations of dense direct solvers are found here.
Proxy classes for matrices.
A tag class representing inplace subtraction.
Definition: forwards.h:84
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:423
float ScalarType
Definition: fft_1d.cpp:42
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
int test(Epsilon const &epsilon)
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:439
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:504
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:428
Implementation of the ViennaCL scalar class.