ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
block_ilu.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
2 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include "viennacl/forwards.h"
28 #include "viennacl/tools/tools.hpp"
32 
33 #include <map>
34 
35 namespace viennacl
36 {
37 namespace linalg
38 {
39 namespace detail
40 {
42  template<typename VectorT, typename NumericT, typename SizeT = vcl_size_t>
44  {
45  public:
46  ilu_vector_range(VectorT & v,
47  SizeT start_index,
48  SizeT vec_size
49  ) : vec_(v), start_(start_index), size_(vec_size) {}
50 
51  NumericT & operator()(SizeT index)
52  {
53  assert(index < size_ && bool("Index out of bounds!"));
54  return vec_[start_ + index];
55  }
56 
57  NumericT & operator[](SizeT index)
58  {
59  assert(index < size_ && bool("Index out of bounds!"));
60  return vec_[start_ + index];
61  }
62 
63  SizeT size() const { return size_; }
64 
65  private:
66  VectorT & vec_;
67  SizeT start_;
68  SizeT size_;
69  };
70 
78  template<typename NumericT>
80  viennacl::compressed_matrix<NumericT> & diagonal_block_A,
81  vcl_size_t start_index,
82  vcl_size_t stop_index
83  )
84  {
85  assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
86  assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
87  assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
88 
89  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
90  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
91  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
92 
93  NumericT * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.handle());
94  unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
95  unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
96 
97  vcl_size_t output_counter = 0;
98  for (vcl_size_t row = start_index; row < stop_index; ++row)
99  {
100  unsigned int buffer_col_start = A_row_buffer[row];
101  unsigned int buffer_col_end = A_row_buffer[row+1];
102 
103  output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter);
104 
105  for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
106  {
107  unsigned int col = A_col_buffer[buf_index];
108  if (col < start_index)
109  continue;
110 
111  if (col >= static_cast<unsigned int>(stop_index))
112  continue;
113 
114  output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index);
115  output_elements[output_counter] = A_elements[buf_index];
116  ++output_counter;
117  }
118  output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter);
119  }
120  }
121 
122 } // namespace detail
123 
124 
125 
131 template<typename MatrixT, typename ILUTag>
133 {
134 typedef typename MatrixT::value_type ScalarType;
135 
136 public:
137  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
138 
139 
140  block_ilu_precond(MatrixT const & mat,
141  ILUTag const & tag,
142  vcl_size_t num_blocks = 8
143  ) : tag_(tag), LU_blocks(num_blocks)
144  {
145  // Set up vector of block indices:
146  block_indices_.resize(num_blocks);
147  for (vcl_size_t i=0; i<num_blocks; ++i)
148  {
149  vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
150  vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
151 
152  block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
153  }
154 
155  //initialize preconditioner:
156  //std::cout << "Start CPU precond" << std::endl;
157  init(mat);
158  //std::cout << "End CPU precond" << std::endl;
159  }
160 
161  block_ilu_precond(MatrixT const & mat,
162  ILUTag const & tag,
163  index_vector_type const & block_boundaries
164  ) : tag_(tag), block_indices_(block_boundaries), LU_blocks(block_boundaries.size())
165  {
166  //initialize preconditioner:
167  //std::cout << "Start CPU precond" << std::endl;
168  init(mat);
169  //std::cout << "End CPU precond" << std::endl;
170  }
171 
172 
173  template<typename VectorT>
174  void apply(VectorT & vec) const
175  {
176  for (vcl_size_t i=0; i<block_indices_.size(); ++i)
177  {
178  detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, LU_blocks[i].size2());
179 
180  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
181  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
182  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
183 
184  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
185  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
186  }
187  }
188 
189 private:
190  void init(MatrixT const & A)
191  {
193  viennacl::compressed_matrix<ScalarType> mat(host_context);
194 
195  viennacl::copy(A, mat);
196 
197  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
198 
199 #ifdef VIENNACL_WITH_OPENMP
200  #pragma omp parallel for
201 #endif
202  for (long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2)
203  {
204  vcl_size_t i = static_cast<vcl_size_t>(i2);
205  // Step 1: Extract blocks
206  vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
207  vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
208  viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
209 
210  detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
211 
212  // Step 2: Precondition blocks:
213  viennacl::switch_memory_context(LU_blocks[i], host_context);
214  preconditioner_dispatch(mat_block, LU_blocks[i], tag_);
215  }
216 
217  }
218 
219  void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
222  {
223  LU = mat_block;
225  }
226 
227  void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
230  {
231  std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1());
232 
233  viennacl::linalg::precondition(mat_block, temp, tag_);
234 
235  viennacl::copy(temp, LU);
236  }
237 
238  ILUTag const & tag_;
239  index_vector_type block_indices_;
240  std::vector< viennacl::compressed_matrix<ScalarType> > LU_blocks;
241 };
242 
243 
244 
245 
246 
251 template<typename NumericT, unsigned int AlignmentV, typename ILUTagT>
252 class block_ilu_precond< compressed_matrix<NumericT, AlignmentV>, ILUTagT>
253 {
255 
256 public:
257  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
258 
259 
261  ILUTagT const & tag,
262  vcl_size_t num_blocks = 8
263  ) : tag_(tag),
264  block_indices_(num_blocks),
265  gpu_block_indices_(),
266  gpu_L_trans_(0, 0, viennacl::traits::context(mat)),
267  gpu_U_trans_(0, 0, viennacl::traits::context(mat)),
268  gpu_D_(mat.size1(), viennacl::traits::context(mat)),
269  LU_blocks_(num_blocks)
270  {
271  // Set up vector of block indices:
272  block_indices_.resize(num_blocks);
273  for (vcl_size_t i=0; i<num_blocks; ++i)
274  {
275  vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
276  vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
277 
278  block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
279  }
280 
281  //initialize preconditioner:
282  //std::cout << "Start CPU precond" << std::endl;
283  init(mat);
284  //std::cout << "End CPU precond" << std::endl;
285  }
286 
288  ILUTagT const & tag,
289  index_vector_type const & block_boundaries
290  ) : tag_(tag),
291  block_indices_(block_boundaries),
292  gpu_block_indices_(),
293  gpu_L_trans_(0, 0, viennacl::traits::context(mat)),
294  gpu_U_trans_(0, 0, viennacl::traits::context(mat)),
295  gpu_D_(mat.size1(), viennacl::traits::context(mat)),
296  LU_blocks_(block_boundaries.size())
297  {
298  //initialize preconditioner:
299  //std::cout << "Start CPU precond" << std::endl;
300  init(mat);
301  //std::cout << "End CPU precond" << std::endl;
302  }
303 
304 
305  void apply(vector<NumericT> & vec) const
306  {
307  viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_,
308  vec,
310 
311  viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_,
312  vec,
314 
315  //apply_cpu(vec);
316  }
317 
318 
319 private:
320 
321  void init(MatrixType const & A)
322  {
324  viennacl::compressed_matrix<NumericT> mat(host_context);
325 
326  mat = A;
327 
328  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
329 
330 #ifdef VIENNACL_WITH_OPENMP
331  #pragma omp parallel for
332 #endif
333  for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
334  {
335  // Step 1: Extract blocks
336  vcl_size_t block_size = block_indices_[static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first;
337  vcl_size_t block_nnz = row_buffer[block_indices_[static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first];
338  viennacl::compressed_matrix<NumericT> mat_block(block_size, block_size, block_nnz, host_context);
339 
340  detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second);
341 
342  // Step 2: Precondition blocks:
343  viennacl::switch_memory_context(LU_blocks_[static_cast<vcl_size_t>(i)], host_context);
344  preconditioner_dispatch(mat_block, LU_blocks_[static_cast<vcl_size_t>(i)], tag_);
345  }
346 
347  /*
348  * copy resulting preconditioner back to GPU:
349  */
350 
354 
355  viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices_, 2 * block_indices_.size());
356  for (vcl_size_t i=0; i<block_indices_.size(); ++i)
357  {
358  block_indices_uint.set(2*i, block_indices_[i].first);
359  block_indices_uint.set(2*i + 1, block_indices_[i].second);
360  }
361 
362  viennacl::backend::memory_create(gpu_block_indices_, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get());
363 
364  blocks_to_device(mat.size1());
365 
366  }
367 
368  // Copy computed preconditioned blocks to OpenCL device
369  void blocks_to_device(vcl_size_t matrix_size)
370  {
371  std::vector< std::map<unsigned int, NumericT> > L_transposed(matrix_size);
372  std::vector< std::map<unsigned int, NumericT> > U_transposed(matrix_size);
373  std::vector<NumericT> entries_D(matrix_size);
374 
375  //
376  // Transpose individual blocks into a single large matrix:
377  //
378  for (vcl_size_t block_index = 0; block_index < LU_blocks_.size(); ++block_index)
379  {
380  MatrixType const & current_block = LU_blocks_[block_index];
381 
382  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle1());
383  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle2());
384  NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(current_block.handle());
385 
386  vcl_size_t block_start = block_indices_[block_index].first;
387 
388  //transpose L and U:
389  for (vcl_size_t row = 0; row < current_block.size1(); ++row)
390  {
391  unsigned int buffer_col_start = row_buffer[row];
392  unsigned int buffer_col_end = row_buffer[row+1];
393 
394  for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
395  {
396  unsigned int col = col_buffer[buf_index];
397 
398  if (row > col) //entry for L
399  L_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index];
400  else if (row == col)
401  entries_D[row + block_start] = elements[buf_index];
402  else //entry for U
403  U_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index];
404  }
405  }
406  }
407 
408  //
409  // Move data to GPU:
410  //
411  tools::const_sparse_matrix_adapter<NumericT, unsigned int> adapted_L_transposed(L_transposed, matrix_size, matrix_size);
412  tools::const_sparse_matrix_adapter<NumericT, unsigned int> adapted_U_transposed(U_transposed, matrix_size, matrix_size);
413  viennacl::copy(adapted_L_transposed, gpu_L_trans_);
414  viennacl::copy(adapted_U_transposed, gpu_U_trans_);
415  viennacl::copy(entries_D, gpu_D_);
416  }
417 
418  void preconditioner_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
421  {
422  LU = mat_block;
424  }
425 
426  void preconditioner_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
429  {
430  std::vector< std::map<unsigned int, NumericT> > temp(mat_block.size1());
431 
432  viennacl::linalg::precondition(mat_block, temp, tag_);
433 
434  viennacl::copy(temp, LU);
435  }
436 
437 
438  ILUTagT const & tag_;
439  index_vector_type block_indices_;
440  viennacl::backend::mem_handle gpu_block_indices_;
444 
445  std::vector<MatrixType> LU_blocks_;
446 };
447 
448 
449 }
450 }
451 
452 
453 
454 
455 #endif
456 
457 
458 
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
const vcl_size_t & size1() const
Returns the number of rows.
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag for incomplete LU factorization with static pattern (ILU0)
Definition: ilu0.hpp:58
void precondition(viennacl::compressed_matrix< NumericT > &A, ilu0_tag const &)
Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices...
Definition: ilu0.hpp:78
This file provides the forward declarations for the main types used within ViennaCL.
Implementations of incomplete factorization preconditioners with static nonzero pattern.
A block ILU preconditioner class, can be supplied to solve()-routines.
Definition: block_ilu.hpp:132
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
void extract_block_matrix(viennacl::compressed_matrix< NumericT > const &A, viennacl::compressed_matrix< NumericT > &diagonal_block_A, vcl_size_t start_index, vcl_size_t stop_index)
Extracts a diagonal block from a larger system matrix.
Definition: block_ilu.hpp:79
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
Definition: block_ilu.hpp:137
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, vcl_size_t num_blocks=8)
Definition: block_ilu.hpp:140
ilu_vector_range(VectorT &v, SizeT start_index, SizeT vec_size)
Definition: block_ilu.hpp:46
std::size_t vcl_size_t
Definition: forwards.h:74
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void apply(VectorT &vec) const
Definition: block_ilu.hpp:174
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, index_vector_type const &block_boundaries)
Definition: block_ilu.hpp:287
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
Implementations of an incomplete factorization preconditioner with threshold (ILUT) ...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, vcl_size_t num_blocks=8)
Definition: block_ilu.hpp:260
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
Definition: util.hpp:115
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
Helper range class for representing a subvector of a larger buffer.
Definition: block_ilu.hpp:43
Common routines used within ILU-type preconditioners.
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, index_vector_type const &block_boundaries)
Definition: block_ilu.hpp:161
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:118
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixType >::value >::type block_inplace_solve(const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::backend::mem_handle const &block_index_array, vcl_size_t num_blocks, viennacl::vector_base< ScalarType > const &mat_diagonal, viennacl::vector_base< ScalarType > &vec, SOLVERTAG tag)
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.
Definition: memory.hpp:622