ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bicgstab.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_BICGSTAB_HPP_
2 #define VIENNACL_LINALG_BICGSTAB_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include <numeric>
28 
29 #include "viennacl/forwards.h"
30 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/prod.hpp"
35 #include "viennacl/traits/size.hpp"
39 
40 namespace viennacl
41 {
42 namespace linalg
43 {
44 
48 {
49 public:
56  bicgstab_tag(double tol = 1e-8, vcl_size_t max_iters = 400, vcl_size_t max_iters_before_restart = 200)
57  : tol_(tol), iterations_(max_iters), iterations_before_restart_(max_iters_before_restart) {}
58 
60  double tolerance() const { return tol_; }
62  vcl_size_t max_iterations() const { return iterations_; }
64  vcl_size_t max_iterations_before_restart() const { return iterations_before_restart_; }
65 
67  vcl_size_t iters() const { return iters_taken_; }
68  void iters(vcl_size_t i) const { iters_taken_ = i; }
69 
71  double error() const { return last_error_; }
73  void error(double e) const { last_error_ = e; }
74 
75 private:
76  double tol_;
77  vcl_size_t iterations_;
78  vcl_size_t iterations_before_restart_;
79 
80  //return values from solver
81  mutable vcl_size_t iters_taken_;
82  mutable double last_error_;
83 };
84 
85 
87 template<typename MatrixT, typename NumericT>
88 viennacl::vector<NumericT> solve(MatrixT const & A, //MatrixType const & A,
90  bicgstab_tag const & tag,
92 {
94 
95  viennacl::vector<NumericT> residual = rhs;
97  viennacl::vector<NumericT> r0star = rhs;
101 
102  // Layout of temporary buffer:
103  // chunk 0: <residual, r_0^*>
104  // chunk 1: <As, As>
105  // chunk 2: <As, s>
106  // chunk 3: <Ap, r_0^*>
107  // chunk 4: <As, r_0^*>
108  // chunk 5: <s, s>
109  vcl_size_t buffer_size_per_vector = 256;
110  vcl_size_t num_buffer_chunks = 6;
111  viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer
112  std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size());
113 
114  NumericT norm_rhs_host = viennacl::linalg::norm_2(residual);
115  NumericT beta;
116  NumericT alpha;
117  NumericT omega;
118  NumericT residual_norm = norm_rhs_host;
119  inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host;
120 
121  NumericT r_dot_r0 = 0;
122  NumericT As_dot_As = 0;
123  NumericT As_dot_s = 0;
124  NumericT Ap_dot_r0 = 0;
125  NumericT As_dot_r0 = 0;
126  NumericT s_dot_s = 0;
127 
128  if (norm_rhs_host <= 0) //solution is zero if RHS norm is zero
129  return result;
130 
131  for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
132  {
133  tag.iters(i+1);
134  // Ap = A*p_j
135  // Ap_dot_r0 = <Ap, r_0^*>
137  inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector);
138 
140 
142  //
144  //viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
145  //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0));
146 
147  //alpha = residual_dot_r0 / Ap_dot_r0;
148 
150  //s = residual - alpha * Ap;
151 
153  // s = r - alpha * Ap
154  // <s, s> first stage
155  // dump alpha at end of inner_prod_buffer
157  inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector);
158 
159  // As = A*s_j
160  // As_dot_As = <As, As>
161  // As_dot_s = <As, s>
162  // As_dot_r0 = <As, r_0^*>
164  inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector);
165 
167 
168  viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
169 
170  r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_size_per_vector, NumericT(0));
171  As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, NumericT(0));
172  As_dot_s = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_size_per_vector, NumericT(0));
173  Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 3 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 4 * buffer_size_per_vector, NumericT(0));
174  As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 4 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 5 * buffer_size_per_vector, NumericT(0));
175  s_dot_s = std::accumulate(host_inner_prod_buffer.begin() + 5 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 6 * buffer_size_per_vector, NumericT(0));
176 
177  alpha = r_dot_r0 / Ap_dot_r0;
178  beta = -1.0 * As_dot_r0 / Ap_dot_r0;
179  omega = As_dot_s / As_dot_As;
180 
181  residual_norm = std::sqrt(s_dot_s - 2.0 * omega * As_dot_s + omega * omega * As_dot_As);
182  if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance())
183  break;
184 
185  // x_{j+1} = x_j + alpha * p_j + omega * s_j
186  // r_{j+1} = s_j - omega * t_j
187  // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j)
188  // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in next iteration
189  viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, omega, s,
190  residual, As,
191  beta, Ap,
192  r0star, inner_prod_buffer, buffer_size_per_vector);
193  }
194 
195  //store last error estimate:
196  tag.error(residual_norm / norm_rhs_host);
197 
198  return result;
199 }
200 
201 
211 template<typename MatrixT, typename VectorT>
212 VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag)
213 {
214  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
215  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
216  VectorT result = rhs;
217  viennacl::traits::clear(result);
218 
219  VectorT residual = rhs;
220  VectorT p = rhs;
221  VectorT r0star = rhs;
222  VectorT tmp0 = rhs;
223  VectorT tmp1 = rhs;
224  VectorT s = rhs;
225 
226  CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
227  CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host;
228  CPU_NumericType beta;
229  CPU_NumericType alpha;
230  CPU_NumericType omega;
231  //ScalarType inner_prod_temp; //temporary variable for inner product computation
232  CPU_NumericType new_ip_rr0star = 0;
233  CPU_NumericType residual_norm = norm_rhs_host;
234 
235  if (norm_rhs_host <= 0) //solution is zero if RHS norm is zero
236  return result;
237 
238  bool restart_flag = true;
239  vcl_size_t last_restart = 0;
240  for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
241  {
242  if (restart_flag)
243  {
244  residual = rhs;
245  residual -= viennacl::linalg::prod(matrix, result);
246  p = residual;
247  r0star = residual;
248  ip_rr0star = viennacl::linalg::norm_2(residual);
249  ip_rr0star *= ip_rr0star;
250  restart_flag = false;
251  last_restart = i;
252  }
253 
254  tag.iters(i+1);
255  tmp0 = viennacl::linalg::prod(matrix, p);
256  alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
257 
258  s = residual - alpha*tmp0;
259 
260  tmp1 = viennacl::linalg::prod(matrix, s);
261  CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
262  omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
263 
264  result += alpha * p + omega * s;
265  residual = s - omega * tmp1;
266 
267  new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
268  residual_norm = viennacl::linalg::norm_2(residual);
269  if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance())
270  break;
271 
272  beta = new_ip_rr0star / ip_rr0star * alpha/omega;
273  ip_rr0star = new_ip_rr0star;
274 
275  if ( (ip_rr0star <= 0 && ip_rr0star >= 0)
276  || (omega <= 0 && omega >= 0)
277  || (i - last_restart > tag.max_iterations_before_restart())
278  ) //search direction degenerate. A restart might help
279  restart_flag = true;
280 
281  // Execution of
282  // p = residual + beta * (p - omega*tmp0);
283  // without introducing temporary vectors:
284  p -= omega * tmp0;
285  p = residual + beta * p;
286  }
287 
288  //store last error estimate:
289  tag.error(residual_norm / norm_rhs_host);
290 
291  return result;
292 }
293 
294 template<typename MatrixT, typename VectorT>
295 VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, viennacl::linalg::no_precond)
296 {
297  return solve(matrix, rhs, tag);
298 }
299 
310 template<typename MatrixT, typename VectorT, typename PreconditionerT>
311 VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond)
312 {
313  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
314  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
315  VectorT result = rhs;
316  viennacl::traits::clear(result);
317 
318  VectorT residual = rhs;
319  VectorT r0star = residual; //can be chosen arbitrarily in fact
320  VectorT tmp0 = rhs;
321  VectorT tmp1 = rhs;
322  VectorT s = rhs;
323 
324  VectorT p = residual;
325 
326  CPU_NumericType ip_rr0star = viennacl::linalg::norm_2(residual);
327  CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
328  CPU_NumericType beta;
329  CPU_NumericType alpha;
330  CPU_NumericType omega;
331  CPU_NumericType new_ip_rr0star = 0;
332  CPU_NumericType residual_norm = norm_rhs_host;
333 
334  if (!norm_rhs_host) //solution is zero if RHS norm is zero
335  return result;
336 
337  bool restart_flag = true;
338  vcl_size_t last_restart = 0;
339  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
340  {
341  if (restart_flag)
342  {
343  residual = rhs;
344  residual -= viennacl::linalg::prod(matrix, result);
345  precond.apply(residual);
346  p = residual;
347  r0star = residual;
348  ip_rr0star = viennacl::linalg::norm_2(residual);
349  ip_rr0star *= ip_rr0star;
350  restart_flag = false;
351  last_restart = i;
352  }
353 
354  tag.iters(i+1);
355  tmp0 = viennacl::linalg::prod(matrix, p);
356  precond.apply(tmp0);
357  alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
358 
359  s = residual - alpha*tmp0;
360 
361  tmp1 = viennacl::linalg::prod(matrix, s);
362  precond.apply(tmp1);
363  CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
364  omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
365 
366  result += alpha * p + omega * s;
367  residual = s - omega * tmp1;
368 
369  residual_norm = viennacl::linalg::norm_2(residual);
370  if (residual_norm / norm_rhs_host < tag.tolerance())
371  break;
372 
373  new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
374 
375  beta = new_ip_rr0star / ip_rr0star * alpha/omega;
376  ip_rr0star = new_ip_rr0star;
377 
378  if (!ip_rr0star || !omega || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help
379  restart_flag = true;
380 
381  // Execution of
382  // p = residual + beta * (p - omega*tmp0);
383  // without introducing temporary vectors:
384  p -= omega * tmp0;
385  p = residual + beta * p;
386 
387  //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl;
388  }
389 
390  //store last error estimate:
391  tag.error(residual_norm / norm_rhs_host);
392 
393  return result;
394 }
395 
396 }
397 }
398 
399 #endif
vcl_size_t max_iterations_before_restart() const
Returns the maximum number of iterations before a restart.
Definition: bicgstab.hpp:64
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
bicgstab_tag(double tol=1e-8, vcl_size_t max_iters=400, vcl_size_t max_iters_before_restart=200)
The constructor.
Definition: bicgstab.hpp:56
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Various little tools used here and there in ViennaCL.
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: bicgstab.hpp:71
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:374
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
double tolerance() const
Returns the relative tolerance.
Definition: bicgstab.hpp:60
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Performs a joint vector update operation needed for an efficient pipelined BiCGStab algorithm...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
A tag class representing the use of no preconditioner.
Definition: forwards.h:833
Extracts the underlying context from objects.
vcl_size_t max_iterations() const
Returns the maximum number of iterations.
Definition: bicgstab.hpp:62
std::size_t vcl_size_t
Definition: forwards.h:74
Generic clear functionality for different vector and matrix types.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:238
void pipelined_bicgstab_prod(MatrixT const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm.
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: bicgstab.hpp:73
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
vcl_size_t iters() const
Return the number of solver iterations:
Definition: bicgstab.hpp:67
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm.
void iters(vcl_size_t i) const
Definition: bicgstab.hpp:68
A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for d...
Definition: bicgstab.hpp:47
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
A collection of compile time type deductions.
viennacl::vector< NumericT > solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:88
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)