ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
scalar.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
11 namespace viennacl
12 {
13 namespace linalg
14 {
15 namespace opencl
16 {
17 namespace kernels
18 {
19 
21 
24 {
25  VIENNACL_ASBS_NONE = 0, // scalar does not exist/contribute
28 };
29 
32 {
34 
36  std::string assign_op;
39 };
40 
41 // just returns the assignment string
42 template<typename StringT>
43 void generate_asbs_impl3(StringT & source, char sign_a, char sign_b, asbs_config const & cfg, bool mult_alpha, bool mult_beta)
44 {
45  source.append(" *s1 "); source.append(cfg.assign_op); source.append(1, sign_a); source.append(" *s2 ");
46  if (mult_alpha)
47  source.append("* alpha ");
48  else
49  source.append("/ alpha ");
50  if (cfg.b != VIENNACL_ASBS_NONE)
51  {
52  source.append(1, sign_b); source.append(" *s3 ");
53  if (mult_beta)
54  source.append("* beta");
55  else
56  source.append("/ beta");
57  }
58  source.append("; \n");
59 }
60 
61 template<typename StringT>
62 void generate_asbs_impl2(StringT & source, char sign_a, char sign_b, asbs_config const & cfg)
63 {
64  source.append(" if (options2 & (1 << 1)) { \n");
65  if (cfg.b != VIENNACL_ASBS_NONE)
66  {
67  source.append(" if (options3 & (1 << 1)) \n");
68  generate_asbs_impl3(source, sign_a, sign_b, cfg, false, false);
69  source.append(" else \n");
70  generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
71  }
72  else
73  generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
74  source.append(" } else { \n");
75  if (cfg.b != VIENNACL_ASBS_NONE)
76  {
77  source.append(" if (options3 & (1 << 1)) \n");
78  generate_asbs_impl3(source, sign_a, sign_b, cfg, true, false);
79  source.append(" else \n");
80  generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
81  }
82  else
83  generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
84  source.append(" } \n");
85 
86 }
87 
88 template<typename StringT>
89 void generate_asbs_impl(StringT & source, std::string const & numeric_string, asbs_config const & cfg)
90 {
91  source.append("__kernel void as");
92  if (cfg.b != VIENNACL_ASBS_NONE)
93  source.append("bs");
94  if (cfg.assign_op != "=")
95  source.append("_s");
96 
97  if (cfg.a == VIENNACL_ASBS_CPU)
98  source.append("_cpu");
99  else if (cfg.a == VIENNACL_ASBS_GPU)
100  source.append("_gpu");
101 
102  if (cfg.b == VIENNACL_ASBS_CPU)
103  source.append("_cpu");
104  else if (cfg.b == VIENNACL_ASBS_GPU)
105  source.append("_gpu");
106  source.append("( \n");
107  source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n");
108  source.append(" \n");
109  if (cfg.a == VIENNACL_ASBS_CPU)
110  {
111  source.append(" "); source.append(numeric_string); source.append(" fac2, \n");
112  }
113  else if (cfg.a == VIENNACL_ASBS_GPU)
114  {
115  source.append(" __global "); source.append(numeric_string); source.append(" * fac2, \n");
116  }
117  source.append(" unsigned int options2, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
118  source.append(" __global const "); source.append(numeric_string); source.append(" * s2");
119 
120  if (cfg.b != VIENNACL_ASBS_NONE)
121  {
122  source.append(", \n\n");
123  if (cfg.b == VIENNACL_ASBS_CPU)
124  {
125  source.append(" "); source.append(numeric_string); source.append(" fac3, \n");
126  }
127  else if (cfg.b == VIENNACL_ASBS_GPU)
128  {
129  source.append(" __global "); source.append(numeric_string); source.append(" * fac3, \n");
130  }
131  source.append(" unsigned int options3, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
132  source.append(" __global const "); source.append(numeric_string); source.append(" * s3");
133  }
134  source.append(") \n{ \n");
135 
136  if (cfg.a == VIENNACL_ASBS_CPU)
137  {
138  source.append(" "); source.append(numeric_string); source.append(" alpha = fac2; \n");
139  }
140  else if (cfg.a == VIENNACL_ASBS_GPU)
141  {
142  source.append(" "); source.append(numeric_string); source.append(" alpha = fac2[0]; \n");
143  }
144  source.append(" \n");
145 
146  if (cfg.b == VIENNACL_ASBS_CPU)
147  {
148  source.append(" "); source.append(numeric_string); source.append(" beta = fac3; \n");
149  }
150  else if (cfg.b == VIENNACL_ASBS_GPU)
151  {
152  source.append(" "); source.append(numeric_string); source.append(" beta = fac3[0]; \n");
153  }
154 
155  source.append(" if (options2 & (1 << 0)) { \n");
156  if (cfg.b != VIENNACL_ASBS_NONE)
157  {
158  source.append(" if (options3 & (1 << 0)) { \n");
159  generate_asbs_impl2(source, '-', '-', cfg);
160  source.append(" } else { \n");
161  generate_asbs_impl2(source, '-', '+', cfg);
162  source.append(" } \n");
163  }
164  else
165  generate_asbs_impl2(source, '-', '+', cfg);
166  source.append(" } else { \n");
167  if (cfg.b != VIENNACL_ASBS_NONE)
168  {
169  source.append(" if (options3 & (1 << 0)) { \n");
170  generate_asbs_impl2(source, '+', '-', cfg);
171  source.append(" } else { \n");
172  generate_asbs_impl2(source, '+', '+', cfg);
173  source.append(" } \n");
174  }
175  else
176  generate_asbs_impl2(source, '+', '+', cfg);
177 
178  source.append(" } \n");
179  source.append("} \n");
180 }
181 
182 template<typename StringT>
183 void generate_asbs(StringT & source, std::string const & numeric_string)
184 {
185  asbs_config cfg;
186  cfg.assign_op = "=";
187  cfg.with_stride_and_range = true;
188 
189  // as
190  cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
191  cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
192 
193  // asbs
194  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
195  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
196  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
197  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
198 
199  // asbs
200  cfg.assign_op = "+=";
201 
202  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
203  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
204  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
205  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
206 }
207 
208 template<typename StringT>
209 void generate_scalar_swap(StringT & source, std::string const & numeric_string)
210 {
211  source.append("__kernel void swap( \n");
212  source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n");
213  source.append(" __global "); source.append(numeric_string); source.append(" * s2) \n");
214  source.append("{ \n");
215  source.append(" "); source.append(numeric_string); source.append(" tmp = *s2; \n");
216  source.append(" *s2 = *s1; \n");
217  source.append(" *s1 = tmp; \n");
218  source.append("} \n");
219 }
220 
222 
223 // main kernel class
225 template<typename NumericT>
226 struct scalar
227 {
228  static std::string program_name()
229  {
231  }
232 
233  static void init(viennacl::ocl::context & ctx)
234  {
235  static std::map<cl_context, bool> init_done;
236  if (!init_done[ctx.handle().get()])
237  {
239  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
240 
241  std::string source;
242  source.reserve(8192);
243 
244  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
245 
246  // fully parametrized kernels:
247  generate_asbs(source, numeric_string);
248  generate_scalar_swap(source, numeric_string);
249 
250 
251  std::string prog_name = program_name();
252  #ifdef VIENNACL_BUILD_INFO
253  std::cout << "Creating program " << prog_name << std::endl;
254  #endif
255  ctx.add_program(source, prog_name);
256  init_done[ctx.handle().get()] = true;
257  } //if
258  } //init
259 };
260 
261 } // namespace kernels
262 } // namespace opencl
263 } // namespace linalg
264 } // namespace viennacl
265 #endif
266 
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
void generate_asbs_impl3(StringT &source, char sign_a, char sign_b, asbs_config const &cfg, bool mult_alpha, bool mult_beta)
Definition: scalar.hpp:43
Provides OpenCL-related utilities.
void generate_asbs(StringT &source, std::string const &numeric_string)
Definition: scalar.hpp:183
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:613
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
void generate_scalar_swap(StringT &source, std::string const &numeric_string)
Definition: scalar.hpp:209
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
Main kernel class for generating OpenCL kernels for operations involving viennacl::scalar<>, but not viennacl::vector<> or viennacl::matrix<>.
Definition: scalar.hpp:226
const OCL_TYPE & get() const
Definition: handle.hpp:189
static void init(viennacl::ocl::context &ctx)
Definition: scalar.hpp:233
Configuration struct for generating OpenCL kernels for linear combinations of viennacl::scalar<> obje...
Definition: scalar.hpp:31
void generate_asbs_impl(StringT &source, std::string const &numeric_string, asbs_config const &cfg)
Definition: scalar.hpp:89
asbs_scalar_type
Enumeration for the scalar type in avbv-like operations.
Definition: scalar.hpp:23
Representation of an OpenCL kernel in ViennaCL.
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void generate_asbs_impl2(StringT &source, char sign_a, char sign_b, asbs_config const &cfg)
Definition: scalar.hpp:62