ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
solver.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 /*
19 *
20 * Benchmark: Iterative solver tests (solver.cpp and solver.cu are identical, the latter being required for compilation using CUDA nvcc)
21 *
22 */
23 
24 
25 #ifndef NDEBUG
26  #define NDEBUG
27 #endif
28 
29 #include <boost/numeric/ublas/matrix_sparse.hpp>
30 #include <boost/numeric/ublas/io.hpp>
31 #include <boost/numeric/ublas/operation_sparse.hpp>
32 
33 #define VIENNACL_WITH_UBLAS 1
34 
35 #include "viennacl/scalar.hpp"
36 #include "viennacl/vector.hpp"
39 #include "viennacl/ell_matrix.hpp"
40 #include "viennacl/hyb_matrix.hpp"
41 #include "viennacl/context.hpp"
42 
43 #include "viennacl/linalg/cg.hpp"
46 
47 #include "viennacl/linalg/ilu.hpp"
51 
52 #ifdef VIENNACL_WITH_OPENCL
54 #endif
55 
57 
58 
59 #include <iostream>
60 #include <vector>
61 #include "benchmark-utils.hpp"
62 
63 
64 using namespace boost::numeric;
65 
66 #define BENCHMARK_RUNS 1
67 
68 
69 template<typename ScalarType>
70 ScalarType diff_inf(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
71 {
72  ublas::vector<ScalarType> v2_cpu(v2.size());
73  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
74 
75  for (unsigned int i=0;i<v1.size(); ++i)
76  {
77  if ( std::max( fabs(v2_cpu[i]), fabs(v1[i]) ) > 0 )
78  v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) );
79  else
80  v2_cpu[i] = 0.0;
81  }
82 
83  return norm_inf(v2_cpu);
84 }
85 
86 template<typename ScalarType>
87 ScalarType diff_2(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
88 {
89  ublas::vector<ScalarType> v2_cpu(v2.size());
90  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
91 
92  return norm_2(v1 - v2_cpu) / norm_2(v1);
93 }
94 
95 
96 template<typename MatrixType, typename VectorType, typename SolverTag, typename PrecondTag>
97 void run_solver(MatrixType const & matrix, VectorType const & rhs, VectorType const & ref_result, SolverTag const & solver, PrecondTag const & precond, long ops)
98 {
99  Timer timer;
100  VectorType result(rhs);
101  VectorType residual(rhs);
103 
104  timer.start();
105  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
106  {
107  result = viennacl::linalg::solve(matrix, rhs, solver, precond);
108  }
110  double exec_time = timer.get();
111  std::cout << "Exec. time: " << exec_time << std::endl;
112  std::cout << "Est. "; printOps(static_cast<double>(ops), exec_time / BENCHMARK_RUNS);
113  residual -= viennacl::linalg::prod(matrix, result);
114  std::cout << "Relative residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;
115  std::cout << "Estimated rel. residual: " << solver.error() << std::endl;
116  std::cout << "Iterations: " << solver.iters() << std::endl;
117  result -= ref_result;
118  std::cout << "Relative deviation from result: " << viennacl::linalg::norm_2(result) / viennacl::linalg::norm_2(ref_result) << std::endl;
119 }
120 
121 
122 template<typename ScalarType>
124 {
125  Timer timer;
126  double exec_time;
127 
128  ScalarType std_factor1 = static_cast<ScalarType>(3.1415);
129  ScalarType std_factor2 = static_cast<ScalarType>(42.0);
130  viennacl::scalar<ScalarType> vcl_factor1(std_factor1, ctx);
131  viennacl::scalar<ScalarType> vcl_factor2(std_factor2, ctx);
132 
133  ublas::vector<ScalarType> ublas_vec1;
134  ublas::vector<ScalarType> ublas_vec2;
135  ublas::vector<ScalarType> ublas_result;
136  unsigned int solver_iters = 100;
137  unsigned int solver_krylov_dim = 20;
138  double solver_tolerance = 1e-6;
139 
140  ublas::compressed_matrix<ScalarType> ublas_matrix;
141  if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx"))
142  {
143  std::cout << "Error reading Matrix file" << std::endl;
144  return EXIT_FAILURE;
145  }
146  //unsigned int cg_mat_size = cg_mat.size();
147  std::cout << "done reading matrix" << std::endl;
148 
149  ublas_result = ublas::scalar_vector<ScalarType>(ublas_matrix.size1(), ScalarType(1.0));
150  ublas_vec1 = ublas::prod(ublas_matrix, ublas_result);
151  ublas_vec2 = ublas_vec1;
152 
153  viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(ublas_vec1.size(), ublas_vec1.size(), ctx);
154  viennacl::coordinate_matrix<ScalarType> vcl_coordinate_matrix(ublas_vec1.size(), ublas_vec1.size(), ctx);
155  viennacl::ell_matrix<ScalarType> vcl_ell_matrix(ctx);
156  viennacl::hyb_matrix<ScalarType> vcl_hyb_matrix(ctx);
157 
158  viennacl::vector<ScalarType> vcl_vec1(ublas_vec1.size(), ctx);
159  viennacl::vector<ScalarType> vcl_vec2(ublas_vec1.size(), ctx);
160  viennacl::vector<ScalarType> vcl_result(ublas_vec1.size(), ctx);
161 
162 
163  //cpu to gpu:
164  viennacl::copy(ublas_matrix, vcl_compressed_matrix);
165  viennacl::copy(ublas_matrix, vcl_coordinate_matrix);
166  viennacl::copy(ublas_matrix, vcl_ell_matrix);
167  viennacl::copy(ublas_matrix, vcl_hyb_matrix);
168  viennacl::copy(ublas_vec1, vcl_vec1);
169  viennacl::copy(ublas_vec2, vcl_vec2);
170  viennacl::copy(ublas_result, vcl_result);
171 
172 
173  std::cout << "------- Jacobi preconditioner ----------" << std::endl;
177 
178  std::cout << "------- Row-Scaling preconditioner ----------" << std::endl;
182 
186  std::cout << "------- ICHOL0 on CPU (ublas) ----------" << std::endl;
187 
188  timer.start();
190  exec_time = timer.get();
191  std::cout << "Setup time: " << exec_time << std::endl;
192 
193  timer.start();
194  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
195  ublas_ichol0.apply(ublas_vec1);
196  exec_time = timer.get();
197  std::cout << "ublas time: " << exec_time << std::endl;
198 
199  std::cout << "------- ICHOL0 with ViennaCL ----------" << std::endl;
200 
201  timer.start();
203  exec_time = timer.get();
204  std::cout << "Setup time: " << exec_time << std::endl;
205 
207  timer.start();
208  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
209  vcl_ichol0.apply(vcl_vec1);
211  exec_time = timer.get();
212  std::cout << "ViennaCL time: " << exec_time << std::endl;
213 
214 
218  std::cout << "------- ILU0 on with ublas ----------" << std::endl;
219 
220  timer.start();
222  exec_time = timer.get();
223  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
224  timer.start();
225  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
226  ublas_ilu0.apply(ublas_vec1);
227  exec_time = timer.get();
228  std::cout << "ublas ILU0 substitution time (no level scheduling): " << exec_time << std::endl;
229 
230 
231  std::cout << "------- ILU0 with ViennaCL ----------" << std::endl;
232 
233  timer.start();
235  exec_time = timer.get();
236  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
237 
239  timer.start();
240  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
241  vcl_ilu0.apply(vcl_vec1);
243  exec_time = timer.get();
244  std::cout << "ViennaCL ILU0 substitution time (no level scheduling): " << exec_time << std::endl;
245 
246  timer.start();
247  viennacl::linalg::ilu0_tag ilu0_with_level_scheduling; ilu0_with_level_scheduling.use_level_scheduling(true);
248  viennacl::linalg::ilu0_precond< viennacl::compressed_matrix<ScalarType> > vcl_ilu0_level_scheduling(vcl_compressed_matrix, ilu0_with_level_scheduling);
249  exec_time = timer.get();
250  std::cout << "Setup time (with level scheduling): " << exec_time << std::endl;
251 
253  timer.start();
254  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
255  vcl_ilu0_level_scheduling.apply(vcl_vec1);
257  exec_time = timer.get();
258  std::cout << "ViennaCL ILU0 substitution time (with level scheduling): " << exec_time << std::endl;
259 
260 
261 
263 
264  std::cout << "------- Block-ILU0 with ublas ----------" << std::endl;
265 
266  ublas_vec1 = ublas_vec2;
267  viennacl::copy(ublas_vec1, vcl_vec1);
268 
269  timer.start();
271  viennacl::linalg::ilu0_tag> ublas_block_ilu0(ublas_matrix, viennacl::linalg::ilu0_tag());
272  exec_time = timer.get();
273  std::cout << "Setup time: " << exec_time << std::endl;
274 
275  timer.start();
276  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
277  ublas_block_ilu0.apply(ublas_vec1);
278  exec_time = timer.get();
279  std::cout << "ublas time: " << exec_time << std::endl;
280 
281  std::cout << "------- Block-ILU0 with ViennaCL ----------" << std::endl;
282 
283  timer.start();
285  viennacl::linalg::ilu0_tag> vcl_block_ilu0(vcl_compressed_matrix, viennacl::linalg::ilu0_tag());
286  exec_time = timer.get();
287  std::cout << "Setup time: " << exec_time << std::endl;
288 
289  //vcl_block_ilu0.apply(vcl_vec1); //warm-up
291  timer.start();
292  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
293  vcl_block_ilu0.apply(vcl_vec1);
295  exec_time = timer.get();
296  std::cout << "ViennaCL time: " << exec_time << std::endl;
297 
299 
300  std::cout << "------- ILUT with ublas ----------" << std::endl;
301 
302  ublas_vec1 = ublas_vec2;
303  viennacl::copy(ublas_vec1, vcl_vec1);
304 
305  timer.start();
307  exec_time = timer.get();
308  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
309  timer.start();
310  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
311  ublas_ilut.apply(ublas_vec1);
312  exec_time = timer.get();
313  std::cout << "ublas ILUT substitution time (no level scheduling): " << exec_time << std::endl;
314 
315 
316  std::cout << "------- ILUT with ViennaCL ----------" << std::endl;
317 
318  timer.start();
320  exec_time = timer.get();
321  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
322 
324  timer.start();
325  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
326  vcl_ilut.apply(vcl_vec1);
328  exec_time = timer.get();
329  std::cout << "ViennaCL ILUT substitution time (no level scheduling): " << exec_time << std::endl;
330 
331  timer.start();
332  viennacl::linalg::ilut_tag ilut_with_level_scheduling; ilut_with_level_scheduling.use_level_scheduling(true);
333  viennacl::linalg::ilut_precond< viennacl::compressed_matrix<ScalarType> > vcl_ilut_level_scheduling(vcl_compressed_matrix, ilut_with_level_scheduling);
334  exec_time = timer.get();
335  std::cout << "Setup time (with level scheduling): " << exec_time << std::endl;
336 
338  timer.start();
339  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
340  vcl_ilut_level_scheduling.apply(vcl_vec1);
342  exec_time = timer.get();
343  std::cout << "ViennaCL ILUT substitution time (with level scheduling): " << exec_time << std::endl;
344 
345 
347 
348  std::cout << "------- Block-ILUT with ublas ----------" << std::endl;
349 
350  ublas_vec1 = ublas_vec2;
351  viennacl::copy(ublas_vec1, vcl_vec1);
352 
353  timer.start();
355  viennacl::linalg::ilut_tag> ublas_block_ilut(ublas_matrix, viennacl::linalg::ilut_tag());
356  exec_time = timer.get();
357  std::cout << "Setup time: " << exec_time << std::endl;
358 
359  //ublas_block_ilut.apply(ublas_vec1);
360  timer.start();
361  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
362  ublas_block_ilut.apply(ublas_vec1);
363  exec_time = timer.get();
364  std::cout << "ublas time: " << exec_time << std::endl;
365 
366  std::cout << "------- Block-ILUT with ViennaCL ----------" << std::endl;
367 
368  timer.start();
370  viennacl::linalg::ilut_tag> vcl_block_ilut(vcl_compressed_matrix, viennacl::linalg::ilut_tag());
371  exec_time = timer.get();
372  std::cout << "Setup time: " << exec_time << std::endl;
373 
374  //vcl_block_ilut.apply(vcl_vec1); //warm-up
376  timer.start();
377  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
378  vcl_block_ilut.apply(vcl_vec1);
380  exec_time = timer.get();
381  std::cout << "ViennaCL time: " << exec_time << std::endl;
382 
383 
387  long cg_ops = static_cast<long>(solver_iters * (ublas_matrix.nnz() + 6 * ublas_vec2.size()));
388 
389  viennacl::linalg::cg_tag cg_solver(solver_tolerance, solver_iters);
390 
391  std::cout << "------- CG solver (no preconditioner) using ublas ----------" << std::endl;
392  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
393 
394  std::cout << "------- CG solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
395  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
396 
397 #ifdef VIENNACL_WITH_OPENCL
398  bool is_double = (sizeof(ScalarType) == sizeof(double));
399  if (is_double)
400  {
401  std::cout << "------- CG solver, mixed precision (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
402  viennacl::linalg::mixed_precision_cg_tag mixed_precision_cg_solver(solver_tolerance, solver_iters);
403 
404  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, mixed_precision_cg_solver, viennacl::linalg::no_precond(), cg_ops);
405  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, mixed_precision_cg_solver, viennacl::linalg::no_precond(), cg_ops);
406  }
407 #endif
408 
409  std::cout << "------- CG solver (no preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
410  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
411 
412  std::cout << "------- CG solver (no preconditioner) via ViennaCL, ell_matrix ----------" << std::endl;
413  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
414 
415  std::cout << "------- CG solver (no preconditioner) via ViennaCL, hyb_matrix ----------" << std::endl;
416  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
417 
418  std::cout << "------- CG solver (ICHOL0 preconditioner) using ublas ----------" << std::endl;
419  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ichol0, cg_ops);
420 
421  std::cout << "------- CG solver (ICHOL0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
422  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ichol0, cg_ops);
423 
424 
425  std::cout << "------- CG solver (ILU0 preconditioner) using ublas ----------" << std::endl;
426  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ilu0, cg_ops);
427 
428  std::cout << "------- CG solver (ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
429  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilu0, cg_ops);
430 
431 
432  std::cout << "------- CG solver (Block-ILU0 preconditioner) using ublas ----------" << std::endl;
433  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_block_ilu0, cg_ops);
434 
435  std::cout << "------- CG solver (Block-ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
436  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_block_ilu0, cg_ops);
437 
438  std::cout << "------- CG solver (ILUT preconditioner) using ublas ----------" << std::endl;
439  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ilut, cg_ops);
440 
441  std::cout << "------- CG solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
442  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilut, cg_ops);
443 
444  std::cout << "------- CG solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
445  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilut, cg_ops);
446 
447  std::cout << "------- CG solver (Block-ILUT preconditioner) using ublas ----------" << std::endl;
448  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_block_ilut, cg_ops);
449 
450  std::cout << "------- CG solver (Block-ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
451  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_block_ilut, cg_ops);
452 
453  std::cout << "------- CG solver (Jacobi preconditioner) using ublas ----------" << std::endl;
454  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_jacobi, cg_ops);
455 
456  std::cout << "------- CG solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
457  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_jacobi_csr, cg_ops);
458 
459  std::cout << "------- CG solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
460  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_jacobi_coo, cg_ops);
461 
462 
463  std::cout << "------- CG solver (row scaling preconditioner) using ublas ----------" << std::endl;
464  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_row_scaling, cg_ops);
465 
466  std::cout << "------- CG solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
467  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_row_scaling_csr, cg_ops);
468 
469  std::cout << "------- CG solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
470  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_row_scaling_coo, cg_ops);
471 
472 
476 
477  long bicgstab_ops = static_cast<long>(solver_iters * (2 * ublas_matrix.nnz() + 13 * ublas_vec2.size()));
478 
479  viennacl::linalg::bicgstab_tag bicgstab_solver(solver_tolerance, solver_iters);
480 
481  std::cout << "------- BiCGStab solver (no preconditioner) using ublas ----------" << std::endl;
482  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
483 
484  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
485  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
486 
487  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
488  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
489 
490 
491  std::cout << "------- BiCGStab solver (ILUT preconditioner) using ublas ----------" << std::endl;
492  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_ilut, bicgstab_ops);
493 
494  std::cout << "------- BiCGStab solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
495  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilut, bicgstab_ops);
496 
497  std::cout << "------- BiCGStab solver (Block-ILUT preconditioner) using ublas ----------" << std::endl;
498  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_block_ilut, bicgstab_ops);
499 
500 #ifdef VIENNACL_WITH_OPENCL
501  std::cout << "------- BiCGStab solver (Block-ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
502  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_block_ilut, bicgstab_ops);
503 #endif
504 
505 // std::cout << "------- BiCGStab solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
506 // run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilut, bicgstab_ops);
507 
508  std::cout << "------- BiCGStab solver (Jacobi preconditioner) using ublas ----------" << std::endl;
509  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_jacobi, bicgstab_ops);
510 
511  std::cout << "------- BiCGStab solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
512  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_jacobi_csr, bicgstab_ops);
513 
514  std::cout << "------- BiCGStab solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
515  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_jacobi_coo, bicgstab_ops);
516 
517 
518  std::cout << "------- BiCGStab solver (row scaling preconditioner) using ublas ----------" << std::endl;
519  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_row_scaling, bicgstab_ops);
520 
521  std::cout << "------- BiCGStab solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
522  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_row_scaling_csr, bicgstab_ops);
523 
524  std::cout << "------- BiCGStab solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
525  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_row_scaling_coo, bicgstab_ops);
526 
527 
531 
532  long gmres_ops = static_cast<long>(solver_iters * (ublas_matrix.nnz() + (solver_iters * 2 + 7) * ublas_vec2.size()));
533 
534  viennacl::linalg::gmres_tag gmres_solver(solver_tolerance, solver_iters, solver_krylov_dim);
535 
536  std::cout << "------- GMRES solver (no preconditioner) using ublas ----------" << std::endl;
537  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
538 
539  std::cout << "------- GMRES solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
540  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
541 
542  std::cout << "------- GMRES solver (no preconditioner) on GPU, coordinate_matrix ----------" << std::endl;
543  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
544 
545 
546  std::cout << "------- GMRES solver (ILUT preconditioner) using ublas ----------" << std::endl;
547  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_ilut, gmres_ops);
548 
549  std::cout << "------- GMRES solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
550  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_ilut, gmres_ops);
551 
552  std::cout << "------- GMRES solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
553  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_ilut, gmres_ops);
554 
555 
556  std::cout << "------- GMRES solver (Jacobi preconditioner) using ublas ----------" << std::endl;
557  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_jacobi, gmres_ops);
558 
559  std::cout << "------- GMRES solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
560  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_jacobi_csr, gmres_ops);
561 
562  std::cout << "------- GMRES solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
563  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_jacobi_coo, gmres_ops);
564 
565 
566  std::cout << "------- GMRES solver (row scaling preconditioner) using ublas ----------" << std::endl;
567  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_row_scaling, gmres_ops);
568 
569  std::cout << "------- GMRES solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
570  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_row_scaling_csr, gmres_ops);
571 
572  std::cout << "------- GMRES solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
573  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_row_scaling_coo, gmres_ops);
574 
575  return EXIT_SUCCESS;
576 }
577 
578 int main()
579 {
580  std::cout << std::endl;
581  std::cout << "----------------------------------------------" << std::endl;
582  std::cout << " Device Info" << std::endl;
583  std::cout << "----------------------------------------------" << std::endl;
584 
585 #ifdef VIENNACL_WITH_OPENCL
587  std::vector<viennacl::ocl::device> const & devices = pf.devices();
588 
589  // Set first device to first context:
590  viennacl::ocl::setup_context(0, devices[0]);
591 
592  // Set second device for second context (use the same device for the second context if only one device available):
593  if (devices.size() > 1)
594  viennacl::ocl::setup_context(1, devices[1]);
595  else
596  viennacl::ocl::setup_context(1, devices[0]);
597 
598  std::cout << viennacl::ocl::current_device().info() << std::endl;
600 #else
601  viennacl::context ctx;
602 #endif
603 
604  std::cout << "---------------------------------------------------------------------------" << std::endl;
605  std::cout << "---------------------------------------------------------------------------" << std::endl;
606  std::cout << " Benchmark for Execution Times of Iterative Solvers provided with ViennaCL " << std::endl;
607  std::cout << "---------------------------------------------------------------------------" << std::endl;
608  std::cout << " Note that the purpose of this benchmark is not to run solvers until" << std::endl;
609  std::cout << " convergence. Instead, only the execution times of a few iterations are" << std::endl;
610  std::cout << " recorded. Residual errors are only printed for information." << std::endl << std::endl;
611 
612 
613  std::cout << std::endl;
614  std::cout << "----------------------------------------------" << std::endl;
615  std::cout << "----------------------------------------------" << std::endl;
616  std::cout << "## Benchmark :: Solver" << std::endl;
617  std::cout << "----------------------------------------------" << std::endl;
618  std::cout << std::endl;
619  std::cout << " -------------------------------" << std::endl;
620  std::cout << " # benchmarking single-precision" << std::endl;
621  std::cout << " -------------------------------" << std::endl;
622  run_benchmark<float>(ctx);
623 #ifdef VIENNACL_WITH_OPENCL
625 #endif
626  {
627  std::cout << std::endl;
628  std::cout << " -------------------------------" << std::endl;
629  std::cout << " # benchmarking double-precision" << std::endl;
630  std::cout << " -------------------------------" << std::endl;
631  run_benchmark<double>(ctx);
632  }
633  return 0;
634 }
635 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
A reader and writer for the matrix market format is implemented here.
std::vector< platform > get_platforms()
Definition: platform.hpp:124
ILU0 preconditioner class, can be supplied to solve()-routines.
Definition: ilu0.hpp:146
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Incomplete Cholesky preconditioner class with static pattern (ICHOL0), can be supplied to solve()-rou...
Definition: ichol.hpp:127
int run_benchmark(viennacl::context ctx)
Definition: solver.cpp:123
A tag for incomplete Cholesky factorization with static pattern (ILU0)
Definition: ichol.hpp:43
Jacobi preconditioner class, can be supplied to solve()-routines. Generic version for non-ViennaCL ma...
Wrapper class for an OpenCL platform.
Definition: platform.hpp:45
A tag for incomplete LU factorization with static pattern (ILU0)
Definition: ilu0.hpp:58
std::vector< device > devices(cl_device_type dtype=CL_DEVICE_TYPE_DEFAULT)
Returns the available devices of the supplied device type.
Definition: platform.hpp:91
The stabilized bi-conjugate gradient method is implemented here.
ScalarType diff_2(ublas::vector< ScalarType > &v1, viennacl::vector< ScalarType > &v2)
Definition: solver.cpp:87
Implementations of incomplete Cholesky factorization preconditioners with static nonzero pattern...
ScalarType diff_inf(ublas::vector< ScalarType > &v1, viennacl::vector< ScalarType > &v2)
Definition: solver.cpp:70
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
bool use_level_scheduling() const
Definition: ilut.hpp:76
void start()
A tag for a jacobi preconditioner.
A block ILU preconditioner class, can be supplied to solve()-routines.
Definition: block_ilu.hpp:132
double get() const
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
Implementation of a simple Jacobi preconditioner.
Jacobi-type preconditioner class, can be supplied to solve()-routines. This is a diagonal preconditio...
Definition: row_scaling.hpp:87
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
void printOps(double num_ops, double exec_time)
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
Definition: gmres.hpp:49
Implementation of the coordinate_matrix class.
Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
void apply(VectorT &vec) const
Definition: ilut.hpp:266
viennacl::vector< float > v1
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
Implementations of the generalized minimum residual method are in this file.
#define BENCHMARK_RUNS
Definition: solver.cpp:66
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
A tag class representing the use of no preconditioner.
Definition: forwards.h:833
Implementations of incomplete factorization preconditioners. Convenience header file.
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
Implementation of the compressed_matrix class.
A tag for a row scaling preconditioner which merely normalizes the equation system such that each row...
Definition: row_scaling.hpp:40
void run_solver(MatrixType const &matrix, VectorType const &rhs, VectorType const &ref_result, SolverTag const &solver, PrecondTag const &precond, long ops)
Definition: solver.cpp:97
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
ILUT preconditioner class, can be supplied to solve()-routines.
Definition: ilut.hpp:252
int main()
Definition: solver.cpp:578
The conjugate gradient method is implemented here.
void apply(VectorT &vec) const
Definition: ilu0.hpp:160
Implementation of the ell_matrix class.
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
void apply(VectorT &vec) const
Definition: block_ilu.hpp:174
A row normalization preconditioner is implemented here.
void apply(VectorT &vec) const
Definition: ichol.hpp:141
viennacl::vector< int > v2
bool use_level_scheduling() const
Definition: ilu0.hpp:63
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
float ScalarType
Definition: fft_1d.cpp:42
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: cg.hpp:48
A sparse square matrix in compressed sparse rows format.
A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for d...
Definition: bicgstab.hpp:47
long read_matrix_market_file(MatrixT &mat, const char *file, long index_base=1)
Reads a sparse matrix from a file (MatrixMarket format)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context.
Definition: backend.hpp:225
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Implementation of the ViennaCL scalar class.
void setup_context(long i, std::vector< cl_device_id > const &devices)
Convenience function for setting devices for a context.
Definition: backend.hpp:231
The conjugate gradient method using mixed precision is implemented here. Experimental.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
viennacl::vector< NumericT > solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:88