ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
direct_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP
2 #define VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/vector.hpp"
26 #include "viennacl/matrix.hpp"
27 #include "viennacl/ocl/kernel.hpp"
28 #include "viennacl/ocl/device.hpp"
29 #include "viennacl/ocl/handle.hpp"
31 
32 namespace viennacl
33 {
34 namespace linalg
35 {
36 namespace opencl
37 {
38 
39 namespace detail
40 {
42  inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_upper_tag) { return (1 << 0); }
43  inline cl_uint get_option_for_solver_tag(viennacl::linalg::lower_tag) { return (1 << 2); }
44  inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_lower_tag) { return (1 << 2) | (1 << 0); }
45 
46  template<typename MatrixT1, typename MatrixT2, typename KernelT>
47  void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, KernelT & k)
48  {
49  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
50  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
52  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
54  viennacl::traits::opencl_handle(B),
55  cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
57  cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
59  )
60  );
61  }
62 }
63 
64 
65 //
66 // Note: By convention, all size checks are performed in the calling frontend. No need to double-check here.
67 //
68 
70 
77 template<typename NumericT, typename SolverTagT>
78 void inplace_solve(const matrix_base<NumericT> & A, bool A_trans,
79  matrix_base<NumericT> & B, bool B_trans,
80  SolverTagT)
81 {
82  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
83 
84  std::string program_name;
85  if (A.row_major() && B.row_major())
86  {
88  KernelClass::init(ctx);
89  program_name = KernelClass::program_name();
90  }
91  else if (A.row_major() && !B.row_major())
92  {
94  KernelClass::init(ctx);
95  program_name = KernelClass::program_name();
96  }
97  else if (!A.row_major() && B.row_major())
98  {
100  KernelClass::init(ctx);
101  program_name = KernelClass::program_name();
102  }
103  else
104  {
106  KernelClass::init(ctx);
107  program_name = KernelClass::program_name();
108  }
109 
110  std::stringstream ss;
111  if (A_trans) ss << "trans_";
112  ss << SolverTagT::name();
113  if (B_trans) ss << "_trans";
114  ss << "_solve";
115 
116  viennacl::ocl::kernel & k = ctx.get_kernel(program_name, ss.str());
117 
118  if (B_trans)
119  k.global_work_size(0, B.size1() * k.local_work_size());
120  else
121  k.global_work_size(0, B.size2() * k.local_work_size());
123 }
124 
125 
126 
127 //
128 // Solve on vector
129 //
130 
131 template<typename NumericT, typename SOLVERTAG>
132 void inplace_solve(matrix_base<NumericT> const & A, bool A_trans,
134  SOLVERTAG)
135 {
136  cl_uint options = detail::get_option_for_solver_tag(SOLVERTAG());
137  if (A_trans)
138  options |= 0x02;
139 
140  viennacl::ocl::kernel & k = detail::legacy_kernel_for_matrix(A, "triangular_substitute_inplace");
141 
143  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
144  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
145  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
146  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
148  viennacl::traits::opencl_handle(x),
149  cl_uint(viennacl::traits::start(x)),
150  cl_uint(viennacl::traits::stride(x)),
151  cl_uint(viennacl::traits::size(x)),
152  options
153  )
154  );
155 }
156 
157 } //namespace opencl
158 } //namespace linalg
159 } //namespace viennacl
160 
161 #endif
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Implementation of the dense matrix class.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag class representing a lower triangular matrix.
Definition: forwards.h:809
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
ocl::kernel & legacy_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
Definition: common.hpp:43
cl_uint get_option_for_solver_tag(viennacl::linalg::upper_tag)
OpenCL kernel file for dense matrix solves with multiple right hand side (BLAS level 3) ...
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
void inplace_solve_impl(MatrixT1 const &A, MatrixT2 &B, KernelT &k)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:607
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
void inplace_solve(const matrix_base< NumericT > &A, bool A_trans, matrix_base< NumericT > &B, bool B_trans, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
Main kernel class for the generation of matrix solve kernels.
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:217
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:215
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
void init()
Definition: Random.hpp:25
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:824