ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
direct_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_DIRECT_SOLVE_HPP
2 #define VIENNACL_LINALG_CUDA_DIRECT_SOLVE_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/vector.hpp"
27 #include "viennacl/matrix.hpp"
28 
29 
31 
32 
33 namespace viennacl
34 {
35 namespace linalg
36 {
37 namespace cuda
38 {
39 
40 template<typename NumericT>
42  const NumericT * A,
43  unsigned int A_start1, unsigned int A_start2,
44  unsigned int A_inc1, unsigned int A_inc2,
45  unsigned int A_size1, unsigned int A_size2,
46  unsigned int A_internal_size1, unsigned int A_internal_size2,
47  bool row_major_A,
48  bool transpose_A,
49 
50  NumericT * B,
51  unsigned int B_start1, unsigned int B_start2,
52  unsigned int B_inc1, unsigned int B_inc2,
53  unsigned int B_size1, unsigned int B_size2,
54  unsigned int B_internal_size1, unsigned int B_internal_size2,
55  bool row_major_B,
56  bool transpose_B,
57 
58  bool unit_diagonal)
59 {
60  NumericT temp;
61  NumericT entry_A;
62 
63  for (unsigned int row_cnt = 0; row_cnt < A_size1; ++row_cnt)
64  {
65  unsigned int row = A_size1 - 1 - row_cnt;
66 
67  if (!unit_diagonal)
68  {
69  __syncthreads();
70 
71  if (threadIdx.x == 0)
72  {
73  if (row_major_B && transpose_B)
74  B[(blockIdx.x * B_inc1 + B_start1) * B_internal_size2 + (row * B_inc2 + B_start2)] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
75  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
76  else if (row_major_B && !transpose_B)
77  B[(row * B_inc1 + B_start1) * B_internal_size2 + (blockIdx.x * B_inc2 + B_start2)] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
78  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
79  else if (!row_major_B && transpose_B)
80  B[(blockIdx.x * B_inc1 + B_start1) + (row * B_inc2 + B_start2) * B_internal_size1] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
81  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
82  else //if (!row_major_B && !transpose_B)
83  B[(row * B_inc1 + B_start1) + (blockIdx.x * B_inc2 + B_start2) * B_internal_size1] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
84  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
85  }
86  }
87 
88  __syncthreads();
89 
90  if (row_major_B && transpose_B)
91  temp = B[(blockIdx.x * B_inc1 + B_start1) * B_internal_size2 + (row * B_inc2 + B_start2)];
92  else if (row_major_B && !transpose_B)
93  temp = B[(row * B_inc1 + B_start1) * B_internal_size2 + (blockIdx.x * B_inc2 + B_start2)];
94  else if (!row_major_B && transpose_B)
95  temp = B[(blockIdx.x * B_inc1 + B_start1) + (row * B_inc2 + B_start2) * B_internal_size1];
96  else //if (!row_major_B && !transpose_B)
97  temp = B[(row * B_inc1 + B_start1) + (blockIdx.x * B_inc2 + B_start2) * B_internal_size1];
98 
99  //eliminate column of op(A) with index 'row' in parallel: " << std::endl;
100  for (unsigned int elim = threadIdx.x; elim < row; elim += blockDim.x)
101  {
102  if (row_major_A && transpose_A)
103  entry_A = A[(row * A_inc1 + A_start1) * A_internal_size2 + (elim * A_inc2 + A_start2)];
104  else if (row_major_A && !transpose_A)
105  entry_A = A[(elim * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)];
106  else if (!row_major_A && transpose_A)
107  entry_A = A[(row * A_inc1 + A_start1) + (elim * A_inc2 + A_start2) * A_internal_size1];
108  else //if (!row_major_A && !transpose_A)
109  entry_A = A[(elim * A_inc1 + A_start1) + (row * A_inc2 + A_start2) * A_internal_size1];
110 
111  if (row_major_B && transpose_B)
112  B[(blockIdx.x * B_inc1 + B_start1) * B_internal_size2 + (elim * B_inc2 + B_start2)] -= temp * entry_A;
113  else if (row_major_B && !transpose_B)
114  B[(elim * B_inc1 + B_start1) * B_internal_size2 + (blockIdx.x * B_inc2 + B_start2)] -= temp * entry_A;
115  else if (!row_major_B && transpose_B)
116  B[(blockIdx.x * B_inc1 + B_start1) + (elim * B_inc2 + B_start2) * B_internal_size1] -= temp * entry_A;
117  else //if (!row_major_B && !transpose_B)
118  B[(elim * B_inc1 + B_start1) + (blockIdx.x * B_inc2 + B_start2) * B_internal_size1] -= temp * entry_A;
119 
120  }
121  }
122 }
123 
124 
125 
126 template<typename NumericT>
128  const NumericT * A,
129  unsigned int A_start1, unsigned int A_start2,
130  unsigned int A_inc1, unsigned int A_inc2,
131  unsigned int A_size1, unsigned int A_size2,
132  unsigned int A_internal_size1, unsigned int A_internal_size2,
133  bool row_major_A,
134  bool transpose_A,
135 
136  NumericT * B,
137  unsigned int B_start1, unsigned int B_start2,
138  unsigned int B_inc1, unsigned int B_inc2,
139  unsigned int B_size1, unsigned int B_size2,
140  unsigned int B_internal_size1, unsigned int B_internal_size2,
141  bool row_major_B,
142  bool transpose_B,
143 
144  bool unit_diagonal)
145 {
146  NumericT temp;
147  NumericT entry_A;
148 
149  for (unsigned int row = 0; row < A_size1; ++row)
150  {
151 
152  if (!unit_diagonal)
153  {
154  __syncthreads();
155 
156  if (threadIdx.x == 0)
157  {
158  if (row_major_B && transpose_B)
159  B[(blockIdx.x * B_inc1 + B_start1) * B_internal_size2 + (row * B_inc2 + B_start2)] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
160  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
161  else if (row_major_B && !transpose_B)
162  B[(row * B_inc1 + B_start1) * B_internal_size2 + (blockIdx.x * B_inc2 + B_start2)] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
163  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
164  else if (!row_major_B && transpose_B)
165  B[(blockIdx.x * B_inc1 + B_start1) + (row * B_inc2 + B_start2) * B_internal_size1] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
166  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
167  else //if (!row_major_B && !transpose_B)
168  B[(row * B_inc1 + B_start1) + (blockIdx.x * B_inc2 + B_start2) * B_internal_size1] /= (row_major_A) ? A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]
169  : A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1];
170  }
171  }
172 
173  __syncthreads();
174 
175  if (row_major_B && transpose_B)
176  temp = B[(blockIdx.x * B_inc1 + B_start1) * B_internal_size2 + (row * B_inc2 + B_start2)];
177  else if (row_major_B && !transpose_B)
178  temp = B[(row * B_inc1 + B_start1) * B_internal_size2 + (blockIdx.x * B_inc2 + B_start2)];
179  else if (!row_major_B && transpose_B)
180  temp = B[(blockIdx.x * B_inc1 + B_start1) + (row * B_inc2 + B_start2) * B_internal_size1];
181  else //if (!row_major_B && !transpose_B)
182  temp = B[(row * B_inc1 + B_start1) + (blockIdx.x * B_inc2 + B_start2) * B_internal_size1];
183 
184  //eliminate column of op(A) with index 'row' in parallel: " << std::endl;
185  for (unsigned int elim = row + threadIdx.x + 1; elim < A_size1; elim += blockDim.x)
186  {
187  if (row_major_A && transpose_A)
188  entry_A = A[(row * A_inc1 + A_start1) * A_internal_size2 + (elim * A_inc2 + A_start2)];
189  else if (row_major_A && !transpose_A)
190  entry_A = A[(elim * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)];
191  else if (!row_major_A && transpose_A)
192  entry_A = A[(row * A_inc1 + A_start1) + (elim * A_inc2 + A_start2) * A_internal_size1];
193  else //if (!row_major_A && !transpose_A)
194  entry_A = A[(elim * A_inc1 + A_start1) + (row * A_inc2 + A_start2) * A_internal_size1];
195 
196  if (row_major_B && transpose_B)
197  B[(blockIdx.x * B_inc1 + B_start1) * B_internal_size2 + (elim * B_inc2 + B_start2)] -= temp * entry_A;
198  else if (row_major_B && !transpose_B)
199  B[(elim * B_inc1 + B_start1) * B_internal_size2 + (blockIdx.x * B_inc2 + B_start2)] -= temp * entry_A;
200  else if (!row_major_B && transpose_B)
201  B[(blockIdx.x * B_inc1 + B_start1) + (elim * B_inc2 + B_start2) * B_internal_size1] -= temp * entry_A;
202  else //if (!row_major_B && !transpose_B)
203  B[(elim * B_inc1 + B_start1) + (blockIdx.x * B_inc2 + B_start2) * B_internal_size1] -= temp * entry_A;
204 
205  }
206  }
207 }
208 
209 
210 
211 
212 
213 
214 namespace detail
215 {
216  template<typename TagT>
217  bool is_unit_solve(TagT const & tag) { return false; }
218 
219  inline bool is_unit_solve(viennacl::linalg::unit_lower_tag) { return true; }
220  inline bool is_unit_solve(viennacl::linalg::unit_upper_tag) { return true; }
221 
222  template<typename TagT>
223  bool is_upper_solve(TagT const & tag) { return false; }
224 
225  inline bool is_upper_solve(viennacl::linalg::upper_tag) { return true; }
226  inline bool is_upper_solve(viennacl::linalg::unit_upper_tag) { return true; }
227 
228  template<typename Matrix1T, typename Matrix2T, typename SolverTagT>
229  void inplace_solve_impl(Matrix1T const & A, bool transpose_A,
230  Matrix2T & B, bool transpose_B,
231  SolverTagT const & tag)
232  {
233  typedef typename viennacl::result_of::cpu_value_type<Matrix1T>::type value_type;
234 
235  dim3 threads(128);
236  dim3 grid( transpose_B ? B.size1() : B.size2() );
237 
238  if (is_upper_solve(tag))
239  {
240  matrix_matrix_upper_solve_kernel<<<grid,threads>>>(detail::cuda_arg<value_type>(A),
241  static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)),
242  static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)),
243  static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)),
244  static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)),
245  bool(A.row_major()),
246  transpose_A,
247 
248  detail::cuda_arg<value_type>(B),
249  static_cast<unsigned int>(viennacl::traits::start1(B)), static_cast<unsigned int>(viennacl::traits::start2(B)),
250  static_cast<unsigned int>(viennacl::traits::stride1(B)), static_cast<unsigned int>(viennacl::traits::stride2(B)),
251  static_cast<unsigned int>(viennacl::traits::size1(B)), static_cast<unsigned int>(viennacl::traits::size2(B)),
252  static_cast<unsigned int>(viennacl::traits::internal_size1(B)), static_cast<unsigned int>(viennacl::traits::internal_size2(B)),
253  bool(B.row_major()),
254  transpose_B,
255 
256  is_unit_solve(tag)
257  );
258  }
259  else
260  {
261  matrix_matrix_lower_solve_kernel<<<grid,threads>>>(detail::cuda_arg<value_type>(A),
262  static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)),
263  static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)),
264  static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)),
265  static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)),
266  bool(A.row_major()),
267  transpose_A,
268 
269  detail::cuda_arg<value_type>(B),
270  static_cast<unsigned int>(viennacl::traits::start1(B)), static_cast<unsigned int>(viennacl::traits::start2(B)),
271  static_cast<unsigned int>(viennacl::traits::stride1(B)), static_cast<unsigned int>(viennacl::traits::stride2(B)),
272  static_cast<unsigned int>(viennacl::traits::size1(B)), static_cast<unsigned int>(viennacl::traits::size2(B)),
273  static_cast<unsigned int>(viennacl::traits::internal_size1(B)), static_cast<unsigned int>(viennacl::traits::internal_size2(B)),
274  bool(B.row_major()),
275  transpose_B,
276 
277  is_unit_solve(tag)
278  );
279  }
280 
281  }
282 }
283 
284 
285 //
286 // Note: By convention, all size checks are performed in the calling frontend. No need to double-check here.
287 //
288 
290 
298 template<typename NumericT, typename SolverTagT>
299 void inplace_solve(const matrix_base<NumericT> & A, bool trans_A,
300  matrix_base<NumericT> & B, bool trans_B,
301  SolverTagT tag)
302 {
303  detail::inplace_solve_impl(A, trans_A,
304  B, trans_B, tag);
305 }
306 
307 
308 //
309 // Solve on vector
310 //
311 
312 template<typename NumericT>
314  NumericT const * A,
315  unsigned int A_start1, unsigned int A_start2,
316  unsigned int A_inc1, unsigned int A_inc2,
317  unsigned int A_size1, unsigned int A_size2,
318  unsigned int A_internal_size1, unsigned int A_internal_size2,
319  NumericT * v,
320  unsigned int v_start,
321  unsigned int v_inc,
322  unsigned int v_size,
323 
324  unsigned int options)
325 {
326  NumericT temp;
327  unsigned int unit_diagonal_flag = (options & (1 << 0));
328  unsigned int transposed_access_A = (options & (1 << 1));
329  unsigned int is_lower_solve = (options & (1 << 2));
330  unsigned int row;
331  for (unsigned int rows_processed = 0; rows_processed < A_size1; ++rows_processed) //Note: A required to be square
332  {
333  row = is_lower_solve ? rows_processed : ((A_size1 - rows_processed) - 1);
334  if (!unit_diagonal_flag)
335  {
336  __syncthreads();
337  if (threadIdx.x == 0)
338  v[row * v_inc + v_start] /= A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)];
339  }
340 
341  __syncthreads();
342 
343  temp = v[row * v_inc + v_start];
344 
345  for (int elim = (is_lower_solve ? (row + threadIdx.x + 1) : threadIdx.x);
346  elim < (is_lower_solve ? A_size1 : row);
347  elim += blockDim.x)
348  v[elim * v_inc + v_start] -= temp * A[transposed_access_A ? ((row * A_inc1 + A_start1) * A_internal_size2 + (elim * A_inc2 + A_start2))
349  : ((elim * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2))];
350  }
351 }
352 
353 
354 template<typename NumericT>
356  NumericT const * A,
357  unsigned int A_start1, unsigned int A_start2,
358  unsigned int A_inc1, unsigned int A_inc2,
359  unsigned int A_size1, unsigned int A_size2,
360  unsigned int A_internal_size1, unsigned int A_internal_size2,
361  NumericT * v,
362  unsigned int v_start,
363  unsigned int v_inc,
364  unsigned int v_size,
365  unsigned int options)
366 {
367  NumericT temp;
368  unsigned int unit_diagonal_flag = (options & (1 << 0));
369  unsigned int transposed_access_A = (options & (1 << 1));
370  unsigned int is_lower_solve = (options & (1 << 2));
371  unsigned int row;
372  for (unsigned int rows_processed = 0; rows_processed < A_size1; ++rows_processed) //Note: A required to be square
373  {
374  row = is_lower_solve ? rows_processed : ((A_size1 - rows_processed) - 1);
375  if (!unit_diagonal_flag)
376  {
377  __syncthreads();
378  if (threadIdx.x == 0)
379  v[row * v_inc + v_start] /= A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2) * A_internal_size1];
380  }
381 
382  __syncthreads();
383 
384  temp = v[row * v_inc + v_start];
385 
386  for (int elim = (is_lower_solve ? (row + threadIdx.x + 1) : threadIdx.x);
387  elim < (is_lower_solve ? A_size1 : row);
388  elim += blockDim.x)
389  v[elim * v_inc + v_start] -= temp * A[transposed_access_A ? ((row * A_inc1 + A_start1) + (elim * A_inc2 + A_start2) * A_internal_size1)
390  : ((elim * A_inc1 + A_start1) + (row * A_inc2 + A_start2) * A_internal_size1)];
391  }
392 }
393 
394 
395 namespace detail
396 {
397  inline unsigned int get_option_for_solver_tag(viennacl::linalg::upper_tag) { return 0; }
398  inline unsigned int get_option_for_solver_tag(viennacl::linalg::unit_upper_tag) { return (1 << 0); }
399  inline unsigned int get_option_for_solver_tag(viennacl::linalg::lower_tag) { return (1 << 2); }
400  inline unsigned int get_option_for_solver_tag(viennacl::linalg::unit_lower_tag) { return (1 << 2) | (1 << 0); }
401 
402  template<typename MatrixT, typename VectorT>
403  void inplace_solve_vector_impl(MatrixT const & mat,
404  VectorT & vec,
405  unsigned int options)
406  {
407  typedef typename viennacl::result_of::cpu_value_type<MatrixT>::type value_type;
408 
409  if (mat.row_major())
410  {
411  triangular_substitute_inplace_row_kernel<<<1, 128>>>(detail::cuda_arg<value_type>(mat),
412  static_cast<unsigned int>(viennacl::traits::start1(mat)), static_cast<unsigned int>(viennacl::traits::start2(mat)),
413  static_cast<unsigned int>(viennacl::traits::stride1(mat)), static_cast<unsigned int>(viennacl::traits::stride2(mat)),
414  static_cast<unsigned int>(viennacl::traits::size1(mat)), static_cast<unsigned int>(viennacl::traits::size2(mat)),
415  static_cast<unsigned int>(viennacl::traits::internal_size1(mat)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat)),
416  detail::cuda_arg<value_type>(vec),
417  static_cast<unsigned int>(viennacl::traits::start(vec)),
418  static_cast<unsigned int>(viennacl::traits::stride(vec)),
419  static_cast<unsigned int>(viennacl::traits::size(vec)),
420  options
421  );
422  }
423  else
424  {
425  triangular_substitute_inplace_col_kernel<<<1, 128>>>(detail::cuda_arg<value_type>(mat),
426  static_cast<unsigned int>(viennacl::traits::start1(mat)), static_cast<unsigned int>(viennacl::traits::start2(mat)),
427  static_cast<unsigned int>(viennacl::traits::stride1(mat)), static_cast<unsigned int>(viennacl::traits::stride2(mat)),
428  static_cast<unsigned int>(viennacl::traits::size1(mat)), static_cast<unsigned int>(viennacl::traits::size2(mat)),
429  static_cast<unsigned int>(viennacl::traits::internal_size1(mat)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat)),
430  detail::cuda_arg<value_type>(vec),
431  static_cast<unsigned int>(viennacl::traits::start(vec)),
432  static_cast<unsigned int>(viennacl::traits::stride(vec)),
433  static_cast<unsigned int>(viennacl::traits::size(vec)),
434  options
435  );
436  }
437  }
438 
439 }
440 
447 template<typename NumericT, typename SolverTagT>
448 void inplace_solve(const matrix_base<NumericT> & mat, bool trans_mat,
449  vector_base<NumericT> & vec,
450  SolverTagT)
451 {
452  unsigned int options = detail::get_option_for_solver_tag(SolverTagT());
453  if (trans_mat)
454  options |= 0x02;
455 
456  detail::inplace_solve_vector_impl(mat, vec, options);
457 }
458 
459 
460 }
461 }
462 }
463 
464 #endif
void inplace_solve_impl(Matrix1T const &A, bool transpose_A, Matrix2T &B, bool transpose_B, SolverTagT const &tag)
void inplace_solve_vector_impl(MatrixT const &mat, VectorT &vec, unsigned int options)
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
__global__ void triangular_substitute_inplace_row_kernel(NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
Implementation of the dense matrix class.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag class representing a lower triangular matrix.
Definition: forwards.h:809
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
__global__ void matrix_matrix_upper_solve_kernel(const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal)
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
bool is_unit_solve(TagT const &tag)
__global__ void matrix_matrix_lower_solve_kernel(const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
void inplace_solve(const matrix_base< NumericT > &A, bool trans_A, matrix_base< NumericT > &B, bool trans_B, SolverTagT tag)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
__global__ void triangular_substitute_inplace_col_kernel(NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:238
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
Common routines for CUDA execution.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
unsigned int get_option_for_solver_tag(viennacl::linalg::upper_tag)
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:824
bool is_upper_solve(TagT const &tag)