ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
direct_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_DIRECT_SOLVE_HPP
2 #define VIENNACL_LINALG_HOST_BASED_DIRECT_SOLVE_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/vector.hpp"
26 #include "viennacl/matrix.hpp"
27 
29 
30 namespace viennacl
31 {
32 namespace linalg
33 {
34 namespace host_based
35 {
36 
37 namespace detail
38 {
39  //
40  // Upper solve:
41  //
42  template<typename MatrixT1, typename MatrixT2>
43  void upper_inplace_solve_matrix(MatrixT1 & A, MatrixT2 & B, vcl_size_t A_size, vcl_size_t B_size, bool unit_diagonal)
44  {
45  typedef typename MatrixT2::value_type value_type;
46 
47  for (vcl_size_t i = 0; i < A_size; ++i)
48  {
49  vcl_size_t current_row = A_size - i - 1;
50 
51  for (vcl_size_t j = current_row + 1; j < A_size; ++j)
52  {
53  value_type A_element = A(current_row, j);
54  for (vcl_size_t k=0; k < B_size; ++k)
55  B(current_row, k) -= A_element * B(j, k);
56  }
57 
58  if (!unit_diagonal)
59  {
60  value_type A_diag = A(current_row, current_row);
61  for (vcl_size_t k=0; k < B_size; ++k)
62  B(current_row, k) /= A_diag;
63  }
64  }
65  }
66 
67  template<typename MatrixT1, typename MatrixT2>
68  void inplace_solve_matrix(MatrixT1 & A, MatrixT2 & B, vcl_size_t A_size, vcl_size_t B_size, viennacl::linalg::unit_upper_tag)
69  {
70  upper_inplace_solve_matrix(A, B, A_size, B_size, true);
71  }
72 
73  template<typename MatrixT1, typename MatrixT2>
74  void inplace_solve_matrix(MatrixT1 & A, MatrixT2 & B, vcl_size_t A_size, vcl_size_t B_size, viennacl::linalg::upper_tag)
75  {
76  upper_inplace_solve_matrix(A, B, A_size, B_size, false);
77  }
78 
79  //
80  // Lower solve:
81  //
82  template<typename MatrixT1, typename MatrixT2>
83  void lower_inplace_solve_matrix(MatrixT1 & A, MatrixT2 & B, vcl_size_t A_size, vcl_size_t B_size, bool unit_diagonal)
84  {
85  typedef typename MatrixT2::value_type value_type;
86 
87  for (vcl_size_t i = 0; i < A_size; ++i)
88  {
89  for (vcl_size_t j = 0; j < i; ++j)
90  {
91  value_type A_element = A(i, j);
92  for (vcl_size_t k=0; k < B_size; ++k)
93  B(i, k) -= A_element * B(j, k);
94  }
95 
96  if (!unit_diagonal)
97  {
98  value_type A_diag = A(i, i);
99  for (vcl_size_t k=0; k < B_size; ++k)
100  B(i, k) /= A_diag;
101  }
102  }
103  }
104 
105  template<typename MatrixT1, typename MatrixT2>
106  void inplace_solve_matrix(MatrixT1 & A, MatrixT2 & B, vcl_size_t A_size, vcl_size_t B_size, viennacl::linalg::unit_lower_tag)
107  {
108  lower_inplace_solve_matrix(A, B, A_size, B_size, true);
109  }
110 
111  template<typename MatrixT1, typename MatrixT2>
112  void inplace_solve_matrix(MatrixT1 & A, MatrixT2 & B, vcl_size_t A_size, vcl_size_t B_size, viennacl::linalg::lower_tag)
113  {
114  lower_inplace_solve_matrix(A, B, A_size, B_size, false);
115  }
116 
117 }
118 
119 //
120 // Note: By convention, all size checks are performed in the calling frontend. No need to double-check here.
121 //
122 
124 
131 template<typename NumericT, typename SolverTagT>
132 void inplace_solve(const matrix_base<NumericT> & A, bool trans_A,
133  matrix_base<NumericT> & B, bool trans_B,
134  SolverTagT)
135 {
136  typedef NumericT value_type;
137 
138  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
139  value_type * data_B = detail::extract_raw_pointer<value_type>(B);
140 
141  vcl_size_t A_start1 = viennacl::traits::start1(A);
142  vcl_size_t A_start2 = viennacl::traits::start2(A);
145  vcl_size_t A_size1 = viennacl::traits::size1(A);
146  vcl_size_t A_size2 = viennacl::traits::size2(A);
147  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
148  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
149 
150  vcl_size_t B_start1 = viennacl::traits::start1(B);
151  vcl_size_t B_start2 = viennacl::traits::start2(B);
154  vcl_size_t B_size1 = viennacl::traits::size1(B);
155  vcl_size_t B_size2 = viennacl::traits::size2(B);
156  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
157  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
158 
159 
160  if (!trans_A && !trans_B)
161  {
162  if (A.row_major() && B.row_major())
163  {
164  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
165  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
166 
167  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
168  }
169  else if (A.row_major() && !B.row_major())
170  {
171  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
172  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
173 
174  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
175  }
176  else if (!A.row_major() && B.row_major())
177  {
178  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
179  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
180 
181  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
182  }
183  else
184  {
185  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
186  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
187 
188  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
189  }
190  }
191  else if (!trans_A && trans_B)
192  {
193  if (A.row_major() && B.row_major())
194  {
195  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
196  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
197 
198  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
199  }
200  else if (A.row_major() && !B.row_major())
201  {
202  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
203  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
204 
205  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
206  }
207  else if (!A.row_major() && B.row_major())
208  {
209  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
210  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
211 
212  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
213  }
214  else
215  {
216  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
217  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
218 
219  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
220  }
221  }
222  else if (trans_A && !trans_B)
223  {
224  if (A.row_major() && B.row_major())
225  {
226  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
227  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
228 
229  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size2, SolverTagT());
230  }
231  else if (A.row_major() && !B.row_major())
232  {
233  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
234  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
235 
236  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size2, SolverTagT());
237  }
238  else if (!A.row_major() && B.row_major())
239  {
240  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
241  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
242 
243  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size2, SolverTagT());
244  }
245  else
246  {
247  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
248  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
249 
250  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size2, SolverTagT());
251  }
252  }
253  else if (trans_A && trans_B)
254  {
255  if (A.row_major() && B.row_major())
256  {
257  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
258  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
259 
260  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size1, SolverTagT());
261  }
262  else if (A.row_major() && !B.row_major())
263  {
264  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
265  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
266 
267  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size1, SolverTagT());
268  }
269  else if (!A.row_major() && B.row_major())
270  {
271  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
272  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
273 
274  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size1, SolverTagT());
275  }
276  else
277  {
278  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
279  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
280 
281  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size1, B_size1, SolverTagT());
282  }
283  }
284 }
285 
291 template<typename NumericT, typename SolverTagT>
294  SolverTagT)
295 {
296  typedef NumericT value_type;
297 
298  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
299  value_type * data_B = const_cast<value_type *>(detail::extract_raw_pointer<value_type>(proxy_B.lhs()));
300 
301  vcl_size_t A_start1 = viennacl::traits::start1(A);
302  vcl_size_t A_start2 = viennacl::traits::start2(A);
305  vcl_size_t A_size2 = viennacl::traits::size2(A);
306  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
307  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
308 
309  vcl_size_t B_start1 = viennacl::traits::start1(proxy_B.lhs());
310  vcl_size_t B_start2 = viennacl::traits::start2(proxy_B.lhs());
311  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy_B.lhs());
312  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy_B.lhs());
313  vcl_size_t B_size1 = viennacl::traits::size1(proxy_B.lhs());
314  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy_B.lhs());
315  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy_B.lhs());
316 
317 
318  if (A.row_major() && proxy_B.lhs().row_major())
319  {
320  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
321  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
322 
323  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
324  }
325  else if (A.row_major() && !proxy_B.lhs().row_major())
326  {
327  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
328  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
329 
330  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
331  }
332  else if (!A.row_major() && proxy_B.lhs().row_major())
333  {
334  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
335  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
336 
337  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
338  }
339  else
340  {
341  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
342  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
343 
344  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
345  }
346 }
347 
348 //upper triangular solver for transposed lower triangular matrices
354 template<typename NumericT, typename SolverTagT>
357  SolverTagT)
358 {
359  typedef NumericT value_type;
360 
361  value_type const * data_A = detail::extract_raw_pointer<value_type>(proxy_A.lhs());
362  value_type * data_B = const_cast<value_type *>(detail::extract_raw_pointer<value_type>(B));
363 
364  vcl_size_t A_start1 = viennacl::traits::start1(proxy_A.lhs());
365  vcl_size_t A_start2 = viennacl::traits::start2(proxy_A.lhs());
366  vcl_size_t A_inc1 = viennacl::traits::stride1(proxy_A.lhs());
367  vcl_size_t A_inc2 = viennacl::traits::stride2(proxy_A.lhs());
368  vcl_size_t A_size2 = viennacl::traits::size2(proxy_A.lhs());
369  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(proxy_A.lhs());
370  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(proxy_A.lhs());
371 
372  vcl_size_t B_start1 = viennacl::traits::start1(B);
373  vcl_size_t B_start2 = viennacl::traits::start2(B);
376  vcl_size_t B_size2 = viennacl::traits::size2(B);
377  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
378  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
379 
380  if (proxy_A.lhs().row_major() && B.row_major())
381  {
382  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
383  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
384 
385  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
386  }
387  else if (proxy_A.lhs().row_major() && !B.row_major())
388  {
389  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
390  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
391 
392  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
393  }
394  else if (!proxy_A.lhs().row_major() && B.row_major())
395  {
396  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
397  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
398 
399  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
400  }
401  else
402  {
403  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
404  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
405 
406  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size2, SolverTagT());
407  }
408 }
409 
415 template<typename NumericT, typename SolverTagT>
418  SolverTagT)
419 {
420  typedef NumericT value_type;
421 
422  value_type const * data_A = detail::extract_raw_pointer<value_type>(proxy_A.lhs());
423  value_type * data_B = const_cast<value_type *>(detail::extract_raw_pointer<value_type>(proxy_B.lhs()));
424 
425  vcl_size_t A_start1 = viennacl::traits::start1(proxy_A.lhs());
426  vcl_size_t A_start2 = viennacl::traits::start2(proxy_A.lhs());
427  vcl_size_t A_inc1 = viennacl::traits::stride1(proxy_A.lhs());
428  vcl_size_t A_inc2 = viennacl::traits::stride2(proxy_A.lhs());
429  vcl_size_t A_size2 = viennacl::traits::size2(proxy_A.lhs());
430  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(proxy_A.lhs());
431  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(proxy_A.lhs());
432 
433  vcl_size_t B_start1 = viennacl::traits::start1(proxy_B.lhs());
434  vcl_size_t B_start2 = viennacl::traits::start2(proxy_B.lhs());
435  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy_B.lhs());
436  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy_B.lhs());
437  vcl_size_t B_size1 = viennacl::traits::size1(proxy_B.lhs());
438  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy_B.lhs());
439  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy_B.lhs());
440 
441  if (proxy_A.lhs().row_major() && proxy_B.lhs().row_major())
442  {
443  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
444  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
445 
446  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
447  }
448  else if (proxy_A.lhs().row_major() && !proxy_B.lhs().row_major())
449  {
450  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
451  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
452 
453  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
454  }
455  else if (!proxy_A.lhs().row_major() && proxy_B.lhs().row_major())
456  {
457  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
458  detail::matrix_array_wrapper<value_type, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
459 
460  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
461  }
462  else
463  {
464  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
465  detail::matrix_array_wrapper<value_type, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
466 
467  detail::inplace_solve_matrix(wrapper_A, wrapper_B, A_size2, B_size1, SolverTagT());
468  }
469 }
470 
471 //
472 // Solve on vector
473 //
474 
475 namespace detail
476 {
477  //
478  // Upper solve:
479  //
480  template<typename MatrixT, typename VectorT>
481  void upper_inplace_solve_vector(MatrixT & A, VectorT & b, vcl_size_t A_size, bool unit_diagonal)
482  {
483  typedef typename VectorT::value_type value_type;
484 
485  for (vcl_size_t i = 0; i < A_size; ++i)
486  {
487  vcl_size_t current_row = A_size - i - 1;
488 
489  for (vcl_size_t j = current_row + 1; j < A_size; ++j)
490  {
491  value_type A_element = A(current_row, j);
492  b(current_row) -= A_element * b(j);
493  }
494 
495  if (!unit_diagonal)
496  b(current_row) /= A(current_row, current_row);
497  }
498  }
499 
500  template<typename MatrixT, typename VectorT>
501  void inplace_solve_vector(MatrixT & A, VectorT & b, vcl_size_t A_size, viennacl::linalg::unit_upper_tag)
502  {
503  upper_inplace_solve_vector(A, b, A_size, true);
504  }
505 
506  template<typename MatrixT, typename VectorT>
507  void inplace_solve_vector(MatrixT & A, VectorT & b, vcl_size_t A_size, viennacl::linalg::upper_tag)
508  {
509  upper_inplace_solve_vector(A, b, A_size, false);
510  }
511 
512  //
513  // Lower solve:
514  //
515  template<typename MatrixT, typename VectorT>
516  void lower_inplace_solve_vector(MatrixT & A, VectorT & b, vcl_size_t A_size, bool unit_diagonal)
517  {
518  typedef typename VectorT::value_type value_type;
519 
520  for (vcl_size_t i = 0; i < A_size; ++i)
521  {
522  for (vcl_size_t j = 0; j < i; ++j)
523  {
524  value_type A_element = A(i, j);
525  b(i) -= A_element * b(j);
526  }
527 
528  if (!unit_diagonal)
529  b(i) /= A(i, i);
530  }
531  }
532 
533  template<typename MatrixT, typename VectorT>
534  void inplace_solve_vector(MatrixT & A, VectorT & b, vcl_size_t A_size, viennacl::linalg::unit_lower_tag)
535  {
536  lower_inplace_solve_vector(A, b, A_size, true);
537  }
538 
539  template<typename MatrixT, typename VectorT>
540  void inplace_solve_vector(MatrixT & A, VectorT & b, vcl_size_t A_size, viennacl::linalg::lower_tag)
541  {
542  lower_inplace_solve_vector(A, b, A_size, false);
543  }
544 
545 }
546 
547 template<typename NumericT, typename SolverTagT>
548 void inplace_solve(const matrix_base<NumericT> & mat, bool trans_mat,
549  vector_base<NumericT> & vec,
550  SolverTagT)
551 {
552  typedef NumericT value_type;
553 
554  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
555  value_type * data_v = detail::extract_raw_pointer<value_type>(vec);
556 
557  vcl_size_t A_start1 = viennacl::traits::start1(mat);
558  vcl_size_t A_start2 = viennacl::traits::start2(mat);
561  vcl_size_t A_size2 = viennacl::traits::size2(mat);
562  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
563  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
564 
567 
568  if (mat.row_major())
569  {
570  if (trans_mat)
571  {
572  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
573  detail::vector_array_wrapper<value_type> wrapper_v(data_v, start1, inc1);
574 
575  detail::inplace_solve_vector(wrapper_A, wrapper_v, A_size2, SolverTagT());
576  }
577  else
578  {
579  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
580  detail::vector_array_wrapper<value_type> wrapper_v(data_v, start1, inc1);
581 
582  detail::inplace_solve_vector(wrapper_A, wrapper_v, A_size2, SolverTagT());
583  }
584  }
585  else
586  {
587  if (trans_mat)
588  {
589  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
590  detail::vector_array_wrapper<value_type> wrapper_v(data_v, start1, inc1);
591 
592  detail::inplace_solve_vector(wrapper_A, wrapper_v, A_size2, SolverTagT());
593  }
594  else
595  {
596  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
597  detail::vector_array_wrapper<value_type> wrapper_v(data_v, start1, inc1);
598 
599  detail::inplace_solve_vector(wrapper_A, wrapper_v, A_size2, SolverTagT());
600  }
601  }
602 }
603 
604 
605 } // namespace host_based
606 } // namespace linalg
607 } // namespace viennacl
608 
609 #endif
Helper class for accessing a strided subvector of a larger vector.
Definition: common.hpp:50
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
Implementation of the dense matrix class.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
void lower_inplace_solve_vector(MatrixT &A, VectorT &b, vcl_size_t A_size, bool unit_diagonal)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag class representing a lower triangular matrix.
Definition: forwards.h:809
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
void inplace_solve(const matrix_base< NumericT > &A, bool trans_A, matrix_base< NumericT > &B, bool trans_B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
void upper_inplace_solve_vector(MatrixT &A, VectorT &b, vcl_size_t A_size, bool unit_diagonal)
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
void lower_inplace_solve_matrix(MatrixT1 &A, MatrixT2 &B, vcl_size_t A_size, vcl_size_t B_size, bool unit_diagonal)
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:73
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
std::size_t vcl_size_t
Definition: forwards.h:74
Common routines for single-threaded or OpenMP-enabled execution on CPU.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
void upper_inplace_solve_matrix(MatrixT1 &A, MatrixT2 &B, vcl_size_t A_size, vcl_size_t B_size, bool unit_diagonal)
void inplace_solve_vector(MatrixT &A, VectorT &b, vcl_size_t A_size, viennacl::linalg::unit_upper_tag)
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
A tag class representing transposed matrices.
Definition: forwards.h:219
void inplace_solve_matrix(MatrixT1 &A, MatrixT2 &B, vcl_size_t A_size, vcl_size_t B_size, viennacl::linalg::unit_upper_tag)
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:824