ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hyb_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_HYB_MATRIX_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_HYB_MATRIX_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
10 
13 namespace viennacl
14 {
15 namespace linalg
16 {
17 namespace opencl
18 {
19 namespace kernels
20 {
21 
23 
24 template<typename StringT>
25 void generate_hyb_vec_mul(StringT & source, std::string const & numeric_string)
26 {
27  source.append("__kernel void vec_mul( \n");
28  source.append(" const __global int* ell_coords, \n");
29  source.append(" const __global "); source.append(numeric_string); source.append("* ell_elements, \n");
30  source.append(" const __global uint* csr_rows, \n");
31  source.append(" const __global uint* csr_cols, \n");
32  source.append(" const __global "); source.append(numeric_string); source.append("* csr_elements, \n");
33  source.append(" const __global "); source.append(numeric_string); source.append(" * x, \n");
34  source.append(" uint4 layout_x, \n");
35  source.append(" __global "); source.append(numeric_string); source.append(" * result, \n");
36  source.append(" uint4 layout_result, \n");
37  source.append(" unsigned int row_num, \n");
38  source.append(" unsigned int internal_row_num, \n");
39  source.append(" unsigned int items_per_row, \n");
40  source.append(" unsigned int aligned_items_per_row) \n");
41  source.append("{ \n");
42  source.append(" uint glb_id = get_global_id(0); \n");
43  source.append(" uint glb_sz = get_global_size(0); \n");
44 
45  source.append(" for (uint row_id = glb_id; row_id < row_num; row_id += glb_sz) { \n");
46  source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n");
47 
48  source.append(" uint offset = row_id; \n");
49  source.append(" for (uint item_id = 0; item_id < items_per_row; item_id++, offset += internal_row_num) { \n");
50  source.append(" "); source.append(numeric_string); source.append(" val = ell_elements[offset]; \n");
51 
52  source.append(" if (val != ("); source.append(numeric_string); source.append(")0) { \n");
53  source.append(" int col = ell_coords[offset]; \n");
54  source.append(" sum += (x[col * layout_x.y + layout_x.x] * val); \n");
55  source.append(" } \n");
56 
57  source.append(" } \n");
58 
59  source.append(" uint col_begin = csr_rows[row_id]; \n");
60  source.append(" uint col_end = csr_rows[row_id + 1]; \n");
61 
62  source.append(" for (uint item_id = col_begin; item_id < col_end; item_id++) { \n");
63  source.append(" sum += (x[csr_cols[item_id] * layout_x.y + layout_x.x] * csr_elements[item_id]); \n");
64  source.append(" } \n");
65 
66  source.append(" result[row_id * layout_result.y + layout_result.x] = sum; \n");
67  source.append(" } \n");
68  source.append("} \n");
69 }
70 
71 namespace detail
72 {
73  template<typename StringT>
74  void generate_hyb_matrix_dense_matrix_mul(StringT & source, std::string const & numeric_string,
75  bool B_transposed, bool B_row_major, bool C_row_major)
76  {
77  source.append("__kernel void ");
78  source.append(viennacl::linalg::opencl::detail::sparse_dense_matmult_kernel_name(B_transposed, B_row_major, C_row_major));
79  source.append("( \n");
80  source.append(" const __global int* ell_coords, \n");
81  source.append(" const __global "); source.append(numeric_string); source.append("* ell_elements, \n");
82  source.append(" const __global uint* csr_rows, \n");
83  source.append(" const __global uint* csr_cols, \n");
84  source.append(" const __global "); source.append(numeric_string); source.append("* csr_elements, \n");
85  source.append(" unsigned int row_num, \n");
86  source.append(" unsigned int internal_row_num, \n");
87  source.append(" unsigned int items_per_row, \n");
88  source.append(" unsigned int aligned_items_per_row, \n");
89  source.append(" __global const "); source.append(numeric_string); source.append("* d_mat, \n");
90  source.append(" unsigned int d_mat_row_start, \n");
91  source.append(" unsigned int d_mat_col_start, \n");
92  source.append(" unsigned int d_mat_row_inc, \n");
93  source.append(" unsigned int d_mat_col_inc, \n");
94  source.append(" unsigned int d_mat_row_size, \n");
95  source.append(" unsigned int d_mat_col_size, \n");
96  source.append(" unsigned int d_mat_internal_rows, \n");
97  source.append(" unsigned int d_mat_internal_cols, \n");
98  source.append(" __global "); source.append(numeric_string); source.append(" * result, \n");
99  source.append(" unsigned int result_row_start, \n");
100  source.append(" unsigned int result_col_start, \n");
101  source.append(" unsigned int result_row_inc, \n");
102  source.append(" unsigned int result_col_inc, \n");
103  source.append(" unsigned int result_row_size, \n");
104  source.append(" unsigned int result_col_size, \n");
105  source.append(" unsigned int result_internal_rows, \n");
106  source.append(" unsigned int result_internal_cols) { \n");
107 
108  source.append(" uint glb_id = get_global_id(0); \n");
109  source.append(" uint glb_sz = get_global_size(0); \n");
110 
111  source.append(" for (uint result_col = 0; result_col < result_col_size; ++result_col) { \n");
112  source.append(" for (uint row_id = glb_id; row_id < row_num; row_id += glb_sz) { \n");
113  source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n");
114 
115  source.append(" uint offset = row_id; \n");
116  source.append(" for (uint item_id = 0; item_id < items_per_row; item_id++, offset += internal_row_num) { \n");
117  source.append(" "); source.append(numeric_string); source.append(" val = ell_elements[offset]; \n");
118 
119  source.append(" if (val != ("); source.append(numeric_string); source.append(")0) { \n");
120  source.append(" int col = ell_coords[offset]; \n");
121  if (B_transposed && B_row_major)
122  source.append(" sum += d_mat[ (d_mat_row_start + result_col * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + col * d_mat_col_inc ] * val; \n");
123  else if (B_transposed && !B_row_major)
124  source.append(" sum += d_mat[ (d_mat_row_start + result_col * d_mat_row_inc) + (d_mat_col_start + col * d_mat_col_inc) * d_mat_internal_rows ] * val; \n");
125  else if (!B_transposed && B_row_major)
126  source.append(" sum += d_mat[ (d_mat_row_start + col * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + result_col * d_mat_col_inc ] * val; \n");
127  else
128  source.append(" sum += d_mat[ (d_mat_row_start + col * d_mat_row_inc) + (d_mat_col_start + result_col * d_mat_col_inc) * d_mat_internal_rows ] * val; \n");
129  source.append(" } \n");
130 
131  source.append(" } \n");
132 
133  source.append(" uint col_begin = csr_rows[row_id]; \n");
134  source.append(" uint col_end = csr_rows[row_id + 1]; \n");
135 
136  source.append(" for (uint item_id = col_begin; item_id < col_end; item_id++) { \n");
137  if (B_transposed && B_row_major)
138  source.append(" sum += d_mat[ (d_mat_row_start + result_col * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + csr_cols[item_id] * d_mat_col_inc ] * csr_elements[item_id]; \n");
139  else if (B_transposed && !B_row_major)
140  source.append(" sum += d_mat[ (d_mat_row_start + result_col * d_mat_row_inc) + (d_mat_col_start + csr_cols[item_id] * d_mat_col_inc) * d_mat_internal_rows ] * csr_elements[item_id]; \n");
141  else if (!B_transposed && B_row_major)
142  source.append(" sum += d_mat[ (d_mat_row_start + csr_cols[item_id] * d_mat_row_inc) * d_mat_internal_cols + d_mat_col_start + result_col * d_mat_col_inc ] * csr_elements[item_id]; \n");
143  else
144  source.append(" sum += d_mat[ (d_mat_row_start + csr_cols[item_id] * d_mat_row_inc) + (d_mat_col_start + result_col * d_mat_col_inc) * d_mat_internal_rows ] * csr_elements[item_id]; \n");
145  source.append(" } \n");
146 
147  if (C_row_major)
148  source.append(" result[ (result_row_start + row_id * result_row_inc) * result_internal_cols + result_col_start + result_col * result_col_inc ] = sum; \n");
149  else
150  source.append(" result[ (result_row_start + row_id * result_row_inc) + (result_col_start + result_col * result_col_inc) * result_internal_rows ] = sum; \n");
151  source.append(" } \n");
152  source.append(" } \n");
153  source.append("} \n");
154  }
155 }
156 
157 template<typename StringT>
158 void generate_hyb_matrix_dense_matrix_multiplication(StringT & source, std::string const & numeric_string)
159 {
160  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, false, false, false);
161  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, false, false, true);
162  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, false, true, false);
163  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, false, true, true);
164 
165  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, true, false, false);
166  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, true, false, true);
167  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, true, true, false);
168  detail::generate_hyb_matrix_dense_matrix_mul(source, numeric_string, true, true, true);
169 }
170 
172 
173 // main kernel class
175 template<typename NumericT>
177 {
178  static std::string program_name()
179  {
180  return viennacl::ocl::type_to_string<NumericT>::apply() + "_hyb_matrix";
181  }
182 
183  static void init(viennacl::ocl::context & ctx)
184  {
185  static std::map<cl_context, bool> init_done;
186  if (!init_done[ctx.handle().get()])
187  {
189  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
190 
191  std::string source;
192  source.reserve(1024);
193 
194  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
195 
196  generate_hyb_vec_mul(source, numeric_string);
197  generate_hyb_matrix_dense_matrix_multiplication(source, numeric_string);
198 
199  std::string prog_name = program_name();
200  #ifdef VIENNACL_BUILD_INFO
201  std::cout << "Creating program " << prog_name << std::endl;
202  #endif
203  ctx.add_program(source, prog_name);
204  init_done[ctx.handle().get()] = true;
205  } //if
206  } //init
207 };
208 
209 } // namespace kernels
210 } // namespace opencl
211 } // namespace linalg
212 } // namespace viennacl
213 #endif
214 
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
Definition: common.hpp:70
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:613
Common implementations shared by OpenCL-based operations.
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:189
void generate_hyb_matrix_dense_matrix_mul(StringT &source, std::string const &numeric_string, bool B_transposed, bool B_row_major, bool C_row_major)
Definition: hyb_matrix.hpp:74
Representation of an OpenCL kernel in ViennaCL.
static void init(viennacl::ocl::context &ctx)
Definition: hyb_matrix.hpp:183
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void generate_hyb_vec_mul(StringT &source, std::string const &numeric_string)
Definition: hyb_matrix.hpp:25
void generate_hyb_matrix_dense_matrix_multiplication(StringT &source, std::string const &numeric_string)
Definition: hyb_matrix.hpp:158
Main kernel class for generating OpenCL kernels for hyb_matrix.
Definition: hyb_matrix.hpp:176