ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
generator_blas3.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2012, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 #ifndef NDEBUG
19  #define NDEBUG
20 #endif
21 
22 //
23 // *** System
24 //
25 #include <iostream>
26 
27 //
28 // *** Boost
29 //
30 #include <boost/numeric/ublas/io.hpp>
31 #include <boost/numeric/ublas/triangular.hpp>
32 #include <boost/numeric/ublas/matrix_sparse.hpp>
33 #include <boost/numeric/ublas/matrix.hpp>
34 #include <boost/numeric/ublas/matrix_proxy.hpp>
35 #include <boost/numeric/ublas/lu.hpp>
36 #include <boost/numeric/ublas/io.hpp>
37 
38 //
39 // *** ViennaCL
40 //
41 //#define VIENNACL_DEBUG_ALL
42 //#define VIENNACL_DEBUG_BUILD
43 #define VIENNACL_HAVE_UBLAS 1
44 #include "viennacl/scalar.hpp"
45 #include "viennacl/matrix.hpp"
47 #include "viennacl/vector.hpp"
48 #include "viennacl/linalg/prod.hpp"
52 #include "viennacl/device_specific/code_generator.hpp"
53 #include "list"
54 //
55 // -------------------------------------------------------------
56 //
57 using namespace boost::numeric;
58 //
59 // -------------------------------------------------------------
60 //
61 static const unsigned int min_large_block_size = 32;
62 static const unsigned int max_large_block_size = 128;
63 static const unsigned int n_large_blocks = static_cast<unsigned int>(std::log(max_large_block_size/min_large_block_size)/std::log(2.0)+1.0);
64 
65 static const unsigned int min_alignment = 1;
66 static const unsigned int max_alignment = 8;
67 
68 static const unsigned int max_small_block_size = max_alignment;
69 
70 //
71 // -------------------------------------------------------------
72 
73 template<typename ScalarType>
75 {
77  if (s1 != s2)
78  return (s1 - s2) / std::max(fabs(s1), fabs(s2));
79  return 0;
80 }
81 
82 template<typename ScalarType, typename VCLMatrixType>
83 ScalarType diff(ublas::matrix<ScalarType> & mat1, VCLMatrixType & mat2)
84 {
85  ublas::matrix<ScalarType> mat2_cpu(mat2.size1(), mat2.size2());
86  viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8)
87  viennacl::copy(mat2, mat2_cpu);
88  double ret = 0;
89  double act = 0;
90 
91  for (unsigned int i = 0; i < mat2_cpu.size1(); ++i)
92  {
93  for (unsigned int j = 0; j < mat2_cpu.size2(); ++j)
94  {
95  act = fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( fabs(mat2_cpu(i, j)), fabs(mat1(i,j)) );
96  if (act > ret)
97  ret = act;
98  }
99  }
100  //std::cout << ret << std::endl;
101  return ScalarType(ret);
102 }
103 
104 
105 
106 
107 
108 
109 //
110 // Part 1: Matrix-matrix multiplications
111 //
112 
113 
114 template< typename NumericT, typename Epsilon,
115  typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC,
116  typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
117 int test_prod(Epsilon const& epsilon,
118 
119  ReferenceMatrixTypeA const & A, ReferenceMatrixTypeA const & A_trans,
120  ReferenceMatrixTypeB const & B, ReferenceMatrixTypeB const & B_trans,
121  ReferenceMatrixTypeC & C,
122 
123  MatrixTypeA const & vcl_A, MatrixTypeA const & vcl_A_trans,
124  MatrixTypeB const & vcl_B, MatrixTypeB const & vcl_B_trans,
125  MatrixTypeC & vcl_C
126  )
127 {
128  int retval = EXIT_SUCCESS;
129  NumericT act_diff = 0;
130  NumericT alpha = NumericT(3.14);
131  NumericT beta = NumericT(4.51);
132 
133 std::cout << "Testing C = alpha*prod(A,B) + beta*C ..." << std::endl;
134 {
135  C = alpha*viennacl::linalg::prod(A, B) + beta*C;
136 
137  viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), alpha*viennacl::linalg::prod(vcl_A,vcl_B)+beta*vcl_C);
138  viennacl::device_specific::generate_enqueue_statement(statement, statement.array()[0]);
140  act_diff = std::fabs(diff(C, vcl_C));
141  if ( act_diff > epsilon )
142  {
143  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
144  std::cout << " diff: " << act_diff << std::endl;
145  retval = EXIT_FAILURE;
146  }
147  else
148  std::cout << "Test C = A * B passed!" << std::endl;
149 }
150 
151 
152  std::cout << "Testing C = alpha*trans(A) * B + beta*C ..." << std::endl;
153  {
154  C = alpha*boost::numeric::ublas::prod(trans(A_trans), B) + beta*C;
155  viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), alpha*viennacl::linalg::prod(trans(vcl_A_trans),vcl_B) + beta*vcl_C);
156  viennacl::device_specific::generate_enqueue_statement(statement, statement.array()[0]);
158  act_diff = std::fabs(diff(C, vcl_C));
159  if ( act_diff > epsilon )
160  {
161  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
162  std::cout << " diff: " << act_diff << std::endl;
163  retval = EXIT_FAILURE;
164  }
165  else std::cout << "Test C = trans(A) * B passed!" << std::endl;
166  }
167 
168 std::cout << "Testing C = alpha*A * trans(B) + beta*C ..." << std::endl;
169 {
170  C = boost::numeric::ublas::prod(A,trans(B_trans)) + beta*C;
171  viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A,trans(vcl_B_trans)) + beta*vcl_C);
172  viennacl::device_specific::generate_enqueue_statement(statement, statement.array()[0]);
174  act_diff = std::fabs(diff(C, vcl_C));
175  if ( act_diff > epsilon )
176  {
177  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
178  std::cout << " diff: " << act_diff << std::endl;
179  retval = EXIT_FAILURE;
180  }
181  else std::cout << "Test C = A * trans(B) passed!" << std::endl;
182 }
183 
184 std::cout << "Testing C = alpha*trans(A) * trans(B) + beta*C ..." << std::endl;
185 {
186  C = boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)) + beta*C;
187  viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans),trans(vcl_B_trans)) + beta*vcl_C);
188  viennacl::device_specific::generate_enqueue_statement(statement, statement.array()[0]);
190  act_diff = std::fabs(diff(C, vcl_C));
191  if ( act_diff > epsilon )
192  {
193  std::cout << "# Error at operation: matrix-matrix product" << std::endl;
194  std::cout << " diff: " << act_diff << std::endl;
195  retval = EXIT_FAILURE;
196  }
197  else std::cout << "Test C = trans(A) * trans(B) passed!" << std::endl;
198 }
199 
200 
201  return retval;
202 }
203 
204 template< typename NumericT, typename F_A, typename F_B, typename F_C, typename Epsilon>
205 int test_prod(Epsilon const& epsilon)
206 {
207  int ret;
208 
209  std::size_t matrix_size1 = 2*max_large_block_size;
210  std::size_t matrix_size2 = 3*max_large_block_size;
211  std::size_t matrix_size3 = 4*max_large_block_size;
212 
213  // --------------------------------------------------------------------------
214 
215  // ublas reference:
216  ublas::matrix<NumericT> A(matrix_size1, matrix_size2);
217  ublas::matrix<NumericT> B(matrix_size2, matrix_size3);
218  ublas::matrix<NumericT> C(matrix_size1, matrix_size3);
219 
220  //fill A and B:
221  for (std::size_t i = 0; i < A.size1(); ++i)
222  for (std::size_t j = 0; j < A.size2(); ++j)
223  A(i,j) = static_cast<NumericT>(0.1) * random<NumericT>();
224  for (std::size_t i = 0; i < B.size1(); ++i)
225  for (std::size_t j = 0; j < B.size2(); ++j)
226  B(i,j) = static_cast<NumericT>(0.1) * random<NumericT>();
227  for (std::size_t i = 0; i < C.size1(); ++i)
228  for (std::size_t j = 0; j < C.size2(); ++j)
229  C(i,j) = static_cast<NumericT>(0.1) * random<NumericT>();
230 
231 
232  ublas::matrix<NumericT> A_trans = trans(A);
233  ublas::matrix<NumericT> B_trans = trans(B);
234 
235  //
236  // ViennaCL objects
237  //
238 
239 
240 
241  // A
242  viennacl::matrix<NumericT, F_A> vcl_A(matrix_size1, matrix_size2);
243  viennacl::copy(A, vcl_A);
244 
245  // A^T
246  viennacl::matrix<NumericT, F_A> vcl_A_trans(matrix_size2, matrix_size1);
247  viennacl::copy(A_trans, vcl_A_trans);
248 
249  // B
250  viennacl::matrix<NumericT, F_B> vcl_B(matrix_size2, matrix_size3);
251  viennacl::copy(B, vcl_B);
252 
253  // B^T
254  viennacl::matrix<NumericT, F_B> vcl_B_trans(matrix_size3, matrix_size2);
255  viennacl::copy(B_trans, vcl_B_trans);
256 
257  // C
258  viennacl::matrix<NumericT, F_C> vcl_C(matrix_size1, matrix_size3);
259  viennacl::copy(C, vcl_C);
260 
261  std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl;
262 
266 
267  //
268  //
269  std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl;
270  ret = test_prod<NumericT>(epsilon,
271  A, A_trans, B, B_trans, C,
272  vcl_A, vcl_A_trans,
273  vcl_B, vcl_B_trans,
274  vcl_C);
275  if (ret != EXIT_SUCCESS)
276  return ret;
277 
278  return EXIT_SUCCESS;
279 }
280 
281 template< typename NumericT, typename Epsilon >
282 int test(Epsilon const& epsilon)
283 {
284  int ret;
285 
286  std::cout << "///////////////////////////////////////" << std::endl;
287  std::cout << "/// Now testing A=row, B=row, C=row ///" << std::endl;
288  std::cout << "///////////////////////////////////////" << std::endl;
289  ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::row_major>(epsilon);
290  if (ret != EXIT_SUCCESS)
291  return ret;
292 
293  std::cout << "///////////////////////////////////////" << std::endl;
294  std::cout << "/// Now testing A=col, B=row, C=row ///" << std::endl;
295  std::cout << "///////////////////////////////////////" << std::endl;
296  ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::row_major>(epsilon);
297  if (ret != EXIT_SUCCESS)
298  return ret;
299 
300  std::cout << "///////////////////////////////////////" << std::endl;
301  std::cout << "/// Now testing A=row, B=col, C=row ///" << std::endl;
302  std::cout << "///////////////////////////////////////" << std::endl;
303  ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::row_major>(epsilon);
304  if (ret != EXIT_SUCCESS)
305  return ret;
306 
307  std::cout << "///////////////////////////////////////" << std::endl;
308  std::cout << "/// Now testing A=col, B=col, C=row ///" << std::endl;
309  std::cout << "///////////////////////////////////////" << std::endl;
310  ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::row_major>(epsilon);
311  if (ret != EXIT_SUCCESS)
312  return ret;
313 
314 
315  std::cout << "///////////////////////////////////////" << std::endl;
316  std::cout << "/// Now testing A=row, B=row, C=col ///" << std::endl;
317  std::cout << "///////////////////////////////////////" << std::endl;
318  ret = test_prod<NumericT, viennacl::row_major, viennacl::row_major, viennacl::column_major>(epsilon);
319  if (ret != EXIT_SUCCESS)
320  return ret;
321 
322  std::cout << "///////////////////////////////////////" << std::endl;
323  std::cout << "/// Now testing A=col, B=row, C=col ///" << std::endl;
324  std::cout << "///////////////////////////////////////" << std::endl;
325  ret = test_prod<NumericT, viennacl::column_major, viennacl::row_major, viennacl::column_major>(epsilon);
326  if (ret != EXIT_SUCCESS)
327  return ret;
328 
329  std::cout << "///////////////////////////////////////" << std::endl;
330  std::cout << "/// Now testing A=row, B=col, C=col ///" << std::endl;
331  std::cout << "///////////////////////////////////////" << std::endl;
332  ret = test_prod<NumericT, viennacl::row_major, viennacl::column_major, viennacl::column_major>(epsilon);
333  if (ret != EXIT_SUCCESS)
334  return ret;
335 
336  std::cout << "///////////////////////////////////////" << std::endl;
337  std::cout << "/// Now testing A=col, B=col, C=col ///" << std::endl;
338  std::cout << "///////////////////////////////////////" << std::endl;
339  ret = test_prod<NumericT, viennacl::column_major, viennacl::column_major, viennacl::column_major>(epsilon);
340  if (ret != EXIT_SUCCESS)
341  return ret;
342 
343 
344 
345  return ret;
346 }
347 
348 int main(int argc, char* argv[])
349 {
350  std::vector<std::string> args(argv,argv+argc);
351  int retval = EXIT_SUCCESS;
352 
353  typedef std::vector<viennacl::ocl::device> devices_type;
354 
355  //platforms_type platforms = viennacl::ocl::get_platforms();
356  //size_t num_platforms = platforms.size();
357 
358  devices_type dev = viennacl::ocl::current_context().devices();
359  for (devices_type::iterator it = dev.begin(); it != dev.end(); ++it){
360  std::cout << std::endl;
361  std::cout << "----------------------------------------------" << std::endl;
362  std::cout << "----------------------------------------------" << std::endl;
363  std::cout << "## Test :: Generated BLAS 3 routines" << std::endl;
364  std::cout << "----------------------------------------------" << std::endl;
365  std::cout << "----------------------------------------------" << std::endl;
366  std::cout << std::endl;
367 
368  int retval = EXIT_SUCCESS;
369 
370  srand(static_cast<unsigned int>(time(NULL)));
371 
372  std::cout << std::endl;
373  std::cout << "----------------------------------------------" << std::endl;
374  std::cout << std::endl;
375  {
376  typedef float NumericT;
377  NumericT epsilon = NumericT(1.0E-3);
378  std::cout << "# Testing setup:" << std::endl;
379 
380  std::cout << viennacl::ocl::current_device().info() << std::endl;
381 
382  std::cout << " eps: " << epsilon << std::endl;
383  std::cout << " numeric: float" << std::endl;
384  retval = test<NumericT>(epsilon);
385  if ( retval == EXIT_SUCCESS )
386  std::cout << "# Test passed" << std::endl;
387  else
388  return retval;
389  }
390  std::cout << std::endl;
391  std::cout << "----------------------------------------------" << std::endl;
392  std::cout << std::endl;
393  #ifdef VIENNACL_WITH_OPENCL
395  #endif
396  {
397  {
398  typedef double NumericT;
399  NumericT epsilon = 1.0E-11;
400  std::cout << "# Testing setup:" << std::endl;
401  std::cout << " eps: " << epsilon << std::endl;
402  std::cout << " numeric: double" << std::endl;
403  retval = test<NumericT>(epsilon);
404  if ( retval == EXIT_SUCCESS )
405  std::cout << "# Test passed" << std::endl;
406  else
407  return retval;
408  }
409  std::cout << std::endl;
410  std::cout << "----------------------------------------------" << std::endl;
411  std::cout << std::endl;
412  }
413 
414  std::cout << std::endl;
415  std::cout << "------- Test completed --------" << std::endl;
416  std::cout << std::endl;
417  }
418 
419 
420 
421 
422  return retval;
423 }
424 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
Definition: backend.hpp:213
A tag class representing assignment.
Definition: forwards.h:80
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
A dense matrix class.
Definition: forwards.h:374
container_type const & array() const
Definition: forwards.h:530
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
int main(int argc, char *argv[])
int test(Epsilon const &epsilon)
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
Implementations of dense direct solvers are found here.
Proxy classes for matrices.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
float ScalarType
Definition: fft_1d.cpp:42
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:504
int test_prod(Epsilon const &epsilon, ReferenceMatrixTypeA const &A, ReferenceMatrixTypeA const &A_trans, ReferenceMatrixTypeB const &B, ReferenceMatrixTypeB const &B_trans, ReferenceMatrixTypeC &C, MatrixTypeA const &vcl_A, MatrixTypeA const &vcl_A_trans, MatrixTypeB const &vcl_B, MatrixTypeB const &vcl_B_trans, MatrixTypeC &vcl_C)
std::vector< viennacl::ocl::device > const & devices() const
Returns a vector with all devices in this context.
Definition: context.hpp:105
Implementation of the ViennaCL scalar class.