ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
blas3.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
25 //disable debug mechanisms to have a fair comparison with ublas:
26 #ifndef NDEBUG
27  #define NDEBUG
28 #endif
29 
30 // System headers
31 #include <iostream>
32 
33 
34 // ublas headers
35 #include <boost/numeric/ublas/io.hpp>
36 #include <boost/numeric/ublas/triangular.hpp>
37 #include <boost/numeric/ublas/matrix_sparse.hpp>
38 #include <boost/numeric/ublas/matrix.hpp>
39 #include <boost/numeric/ublas/matrix_proxy.hpp>
40 #include <boost/numeric/ublas/lu.hpp>
41 #include <boost/numeric/ublas/io.hpp>
42 
43 
44 // Must be set if you want to use ViennaCL algorithms on ublas objects
45 #define VIENNACL_WITH_UBLAS 1
46 
47 
48 // ViennaCL headers
49 #include "viennacl/scalar.hpp"
50 #include "viennacl/vector.hpp"
51 #include "viennacl/matrix.hpp"
52 #include "viennacl/linalg/prod.hpp"
53 
54 
55 // Some helper functions for this tutorial:
56 #include "Random.hpp"
57 #include "vector-io.hpp"
58 
59 #include "../benchmarks/benchmark-utils.hpp"
60 
61 #define BLAS3_MATRIX_SIZE 400
62 
63 using namespace boost::numeric;
64 
65 
70 #ifndef VIENNACL_WITH_OPENCL
71  struct dummy
72  {
73  std::size_t size() const { return 1; }
74  };
75 #endif
76 
80 int main()
81 {
82  typedef float ScalarType;
83 
84  Timer timer;
85  double exec_time;
86 
90  ublas::matrix<ScalarType> ublas_A(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
91  ublas::matrix<ScalarType, ublas::column_major> ublas_B(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
92  ublas::matrix<ScalarType> ublas_C(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
93  ublas::matrix<ScalarType> ublas_C1(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
94 
95  for (unsigned int i = 0; i < ublas_A.size1(); ++i)
96  for (unsigned int j = 0; j < ublas_A.size2(); ++j)
97  ublas_A(i,j) = random<ScalarType>();
98 
99  for (unsigned int i = 0; i < ublas_B.size1(); ++i)
100  for (unsigned int j = 0; j < ublas_B.size2(); ++j)
101  ublas_B(i,j) = random<ScalarType>();
102 
106  //viennacl::ocl::set_context_device_type(0, viennacl::ocl::gpu_tag()); //uncomment this is you wish to use GPUs only
110 
116  std::cout << "--- Computing matrix-matrix product using ublas ---" << std::endl;
117  timer.start();
118  ublas_C = ublas::prod(ublas_A, ublas_B);
119  exec_time = timer.get();
120  std::cout << " - Execution time: " << exec_time << std::endl;
121 
126  std::cout << std::endl << "--- Computing matrix-matrix product on each available compute device using ViennaCL ---" << std::endl;
127 #ifdef VIENNACL_WITH_OPENCL
128  std::vector<viennacl::ocl::device> devices = viennacl::ocl::current_context().devices();
129 #else
130  dummy devices;
131 #endif
132 
133  for (std::size_t device_id=0; device_id<devices.size(); ++device_id)
134  {
135 #ifdef VIENNACL_WITH_OPENCL
136  viennacl::ocl::current_context().switch_device(devices[device_id]);
137  std::cout << " - Device Name: " << viennacl::ocl::current_device().name() << std::endl;
138 #endif
139 
143  viennacl::copy(ublas_A, vcl_A);
144  viennacl::copy(ublas_B, vcl_B);
145  vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
147  timer.start();
148  vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
150  exec_time = timer.get();
151  std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl;
152 
156  viennacl::copy(vcl_C, ublas_C1);
157 
158  std::cout << " - Checking result... ";
159  bool check_ok = true;
160  for (std::size_t i = 0; i < ublas_A.size1(); ++i)
161  {
162  for (std::size_t j = 0; j < ublas_A.size2(); ++j)
163  {
164  if ( std::fabs(ublas_C1(i,j) - ublas_C(i,j)) / ublas_C(i,j) > 1e-4 )
165  {
166  check_ok = false;
167  break;
168  }
169  }
170  if (!check_ok)
171  break;
172  }
173  if (check_ok)
174  std::cout << "[OK]" << std::endl << std::endl;
175  else
176  std::cout << "[FAILED]" << std::endl << std::endl;
177 
178  }
179 
183  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
184  return EXIT_SUCCESS;
185 }
186 
void switch_device(vcl_size_t i)
Switches the current device to the i-th device in this context.
Definition: context.hpp:118
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
Definition: backend.hpp:213
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
void start()
A dense matrix class.
Definition: forwards.h:374
double get() const
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
int main()
Definition: blas3.cpp:80
std::string name() const
Device name string.
Definition: device.hpp:566
#define BLAS3_MATRIX_SIZE
Definition: blas3range.cpp:65
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
float ScalarType
Definition: fft_1d.cpp:42
std::vector< viennacl::ocl::device > const & devices() const
Returns a vector with all devices in this context.
Definition: context.hpp:105
Implementation of the ViennaCL scalar class.