ViennaCL - The Vienna Computing Library  1.6.0
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vector.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
3 
5 
7 
11 
12 #include "viennacl/ocl/kernel.hpp"
14 #include "viennacl/ocl/utils.hpp"
15 
17 
20 
23 namespace viennacl
24 {
25 namespace linalg
26 {
27 namespace opencl
28 {
29 namespace kernels
30 {
31 
32 template<typename NumericT, typename ScalarT>
33 static void generate_inner_prod_impl(device_specific::execution_handler & handler, std::string const & prefix, device_specific::reduction_template::parameters_type const & parameters, vcl_size_t vector_num,
34  viennacl::vector<NumericT> const * x, viennacl::vector<NumericT> const * y, ScalarT const* s)
35 {
36  namespace ds = device_specific;
37  ds::statements_container::data_type statements;
38  for (unsigned int i = 0; i < vector_num; ++i)
39  statements.push_back(scheduler::preset::inner_prod(s, x, y));
40  handler.add(prefix, ds::reduction_template(parameters), ds::statements_container(statements,ds::statements_container::INDEPENDENT));
41 }
42 
43 
44 
45 // main kernel class
47 template<typename NumericT>
48 class vector
49 {
50 private:
51 
52  template<typename ScalarT1, typename ScalarT2>
53  static void generate_avbv_impl2(device_specific::execution_handler & handler, std::string const & prefix, device_specific::vector_axpy_template::parameters_type const & parameters, scheduler::operation_node_type ASSIGN_OP,
54  viennacl::vector_base<NumericT> const * x, viennacl::vector_base<NumericT> const * y, ScalarT1 const * a,
55  viennacl::vector_base<NumericT> const * z, ScalarT2 const * b)
56  {
57  namespace ds = device_specific;
58  handler.add(prefix + "0000", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, false, false));
59  handler.add(prefix + "1000", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, false, false));
60  handler.add(prefix + "0100", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, false, false));
61  handler.add(prefix + "1100", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, false, false));
62  if (b)
63  {
64  handler.add(prefix + "0010", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, true, false));
65  handler.add(prefix + "1010", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, true, false));
66  handler.add(prefix + "0110", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, true, false));
67  handler.add(prefix + "1110", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, true, false));
68 
69  handler.add(prefix + "0001", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, false, true));
70  handler.add(prefix + "1001", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, false, true));
71  handler.add(prefix + "0101", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, false, true));
72  handler.add(prefix + "1101", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, false, true));
73 
74  handler.add(prefix + "0011", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, true, true));
75  handler.add(prefix + "1011", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, true, true));
76  handler.add(prefix + "0111", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, true, true));
77  handler.add(prefix + "1111", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, true, true));
78  }
79  }
80 
81  template<typename ScalarT>
82  static void generate_avbv_impl(device_specific::execution_handler & handler, std::string const & prefix, device_specific::vector_axpy_template::parameters_type const & parameters, scheduler::operation_node_type ASSIGN_OP,
83  viennacl::vector_base<NumericT> const * x, viennacl::vector_base<NumericT> const * y, ScalarT const * ha, viennacl::scalar<NumericT> const * da,
84  viennacl::vector_base<NumericT> const * z, ScalarT const * hb, viennacl::scalar<NumericT> const * db)
85  {
86  //x ASSIGN_OP a*y
87  generate_avbv_impl2(handler, prefix + "hv_", parameters, ASSIGN_OP, x, y, ha, (viennacl::vector<NumericT>*)NULL, (NumericT*)NULL);
88  generate_avbv_impl2(handler, prefix + "dv_", parameters, ASSIGN_OP, x, y, da, (viennacl::vector<NumericT>*)NULL, (NumericT*)NULL);
89 
90  //x ASSIGN_OP a*y + b*z
91  generate_avbv_impl2(handler, prefix + "hvhv_", parameters, ASSIGN_OP, x, y, ha, z, hb);
92  generate_avbv_impl2(handler, prefix + "dvhv_", parameters, ASSIGN_OP, x, y, da, z, hb);
93  generate_avbv_impl2(handler, prefix + "hvdv_", parameters, ASSIGN_OP, x, y, ha, z, db);
94  generate_avbv_impl2(handler, prefix + "dvdv_", parameters, ASSIGN_OP, x, y, da, z, db);
95  }
96 
97 public:
99  {
100  static std::map<cl_context, device_specific::execution_handler> handlers_map;
101  cl_context h = ctx.handle().get();
102  if (handlers_map.find(h) == handlers_map.end())
103  {
105 
106  namespace ds = viennacl::device_specific;
107  viennacl::ocl::device const & device = ctx.current_device();
108  handlers_map.insert(std::make_pair(h, ds::execution_handler(viennacl::ocl::type_to_string<NumericT>::apply() + "_vector", ctx, device)));
109  ds::execution_handler & handler = handlers_map.at(h);
110 
117  NumericT ha;
118  NumericT hb;
119 
120  ds::vector_axpy_template::parameters_type vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
121  ds::reduction_template::parameters_type reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
122 
123  generate_avbv_impl(handler, "assign_", vector_axpy_params, scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
124  generate_avbv_impl(handler, "ip_add_", vector_axpy_params, scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
125 
126  handler.add("plane_rotation", ds::vector_axpy_template(vector_axpy_params), scheduler::preset::plane_rotation(&x, &y, &ha, &hb));
127  handler.add("swap", ds::vector_axpy_template(vector_axpy_params), scheduler::preset::swap(&x, &y));
128  handler.add("assign_cpu", ds::vector_axpy_template(vector_axpy_params), scheduler::preset::assign_cpu(&x, &scalary));
129 
130  generate_inner_prod_impl(handler, "inner_prod", reduction_params, 1, &x, &y, &da);
131 
132  handler.add("norm_1", ds::reduction_template(reduction_params), scheduler::preset::norm_1(&da, &x));
133  bool is_float_or_double = is_floating_point<NumericT>::value;
134  if (is_float_or_double) //BIND_TO_HANDLE for optimization (will load x once in the internal inner product)
135  handler.add("norm_2", ds::reduction_template(reduction_params, ds::BIND_TO_HANDLE), scheduler::preset::norm_2(&da, &x));
136  handler.add("norm_inf", ds::reduction_template(reduction_params), scheduler::preset::norm_inf(&da, &x));
137  handler.add("index_norm_inf", ds::reduction_template(reduction_params), scheduler::preset::index_norm_inf(&da, &x));
138  handler.add("sum", ds::reduction_template(reduction_params), scheduler::preset::sum(&da, &x));
139  handler.add("max", ds::reduction_template(reduction_params), scheduler::preset::max(&da, &x));
140  handler.add("min", ds::reduction_template(reduction_params), scheduler::preset::min(&da, &x));
141  }
142  return handlers_map.at(h);
143  }
144 };
145 
146 // main kernel class
148 template<typename NumericT>
150 {
151 public:
153  {
154  static std::map<cl_context, device_specific::execution_handler> handlers_map;
155  cl_context h = ctx.handle().get();
156  if (handlers_map.find(h) == handlers_map.end())
157  {
159 
160  namespace ds = viennacl::device_specific;
161 
162  viennacl::ocl::device const & device = ctx.current_device();
163  handlers_map.insert(std::make_pair(h, ds::execution_handler(viennacl::ocl::type_to_string<NumericT>::apply() + "_vector_multi_inner_prod", ctx, device)));
164  ds::execution_handler & handler = handlers_map.at(h);
165 
166  ds::reduction_template::parameters_type reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
167 
168  //Dummy holders for the statements
173 
174  generate_inner_prod_impl(handler, "inner_prod_1", reduction_params, 1, &x, &y, &da);
175  generate_inner_prod_impl(handler, "inner_prod_2", reduction_params, 2, &x, &y, &da);
176  generate_inner_prod_impl(handler, "inner_prod_3", reduction_params, 3, &x, &y, &da);
177  generate_inner_prod_impl(handler, "inner_prod_4", reduction_params, 4, &x, &y, &da);
178  generate_inner_prod_impl(handler, "inner_prod_8", reduction_params, 8, &x, &y, &da);
179  }
180  return handlers_map.at(h);
181  }
182 };
183 
184 // main kernel class
186 template<typename NumericT>
188 {
189 
190 public:
192  {
193  static std::map<cl_context, device_specific::execution_handler> handlers_map;
194  cl_context h = ctx.handle().get();
195  if (handlers_map.find(h) == handlers_map.end())
196  {
198 
199  namespace ds = viennacl::device_specific;
200  using namespace scheduler;
202 
203  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
204  viennacl::ocl::device const & device = ctx.current_device();
205  handlers_map.insert(std::make_pair(h, ds::execution_handler(viennacl::ocl::type_to_string<NumericT>::apply() + "_vector_element", ctx, device)));
206  ds::execution_handler & handler = handlers_map.at(h);
207  ds::vector_axpy_template::parameters_type vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
208 
212 
213  // unary operations
214 #define VIENNACL_ADD_UNARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::unary_element_op(&x, &y, OPTYPE))
215  if (numeric_string == "float" || numeric_string == "double")
216  {
233  }
234  else
235  {
237  }
238 #undef VIENNACL_ADD_UNARY
239 
240  // binary operations
241 #define VIENNACL_ADD_BINARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::binary_element_op(&x, &y, &z, OPTYPE))
244  if (numeric_string == "float" || numeric_string == "double")
245  {
247  }
248 #undef VIENNACL_ADD_BINARY
249 
250  }
251  return handlers_map.at(h);
252  }
253 };
254 
255 } // namespace kernels
256 } // namespace opencl
257 } // namespace linalg
258 } // namespace viennacl
259 #endif
260 
viennacl::ocl::device const & current_device() const
Returns the current device.
Definition: context.hpp:111
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:152
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Implements a OpenCL platform within ViennaCL.
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
Definition: preset.hpp:212
Various little tools used here and there in ViennaCL.
Some helper routines for reading/writing/printing scheduler expressions.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
reduction_template::parameters_type const & reduction_params(ocl::device const &device)
Definition: reduction.hpp:109
Provides OpenCL-related utilities.
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
void add(std::string const &key, template_base const &T, statements_container const &statements)
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:252
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
Definition: preset.hpp:16
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:224
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:191
statement sum(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:246
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:613
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:149
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:259
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:189
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:433
vector_axpy_template::parameters_type const & vector_axpy_params(ocl::device const &device)
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:238
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:218
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:230
std::size_t vcl_size_t
Definition: forwards.h:74
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
Proxy classes for vectors.
operation_node_type
Enumeration for identifying the possible operations.
Definition: forwards.h:68
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
Definition: vector.hpp:187
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
Definition: preset.hpp:86
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:423
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:98
#define VIENNACL_ADD_UNARY(OPTYPE)
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:106
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
Definition: preset.hpp:78
const char * operator_string(scheduler::operation_node_type type)
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:48
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
#define VIENNACL_ADD_BINARY(OPTYPE)
Helper for handling fallbacks, lazy compilation, input-dependent kernels, etc.