1 #ifndef VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
35 #include <cuda_runtime.h>
47 template<
typename NumericT>
48 __global__
void as_kernel(NumericT *
s1,
const NumericT * fac2,
unsigned int options2,
const NumericT *
s2)
50 NumericT alpha = *fac2;
51 if (options2 & (1 << 0))
53 if (options2 & (1 << 1))
54 alpha = NumericT(1) / alpha;
59 template<
typename NumericT>
60 __global__
void as_kernel(NumericT *
s1, NumericT fac2,
unsigned int options2,
const NumericT *
s2)
62 NumericT alpha = fac2;
63 if (options2 & (1 << 0))
65 if (options2 & (1 << 1))
66 alpha = NumericT(1) / alpha;
71 template<
typename ScalarT1,
72 typename ScalarT2,
typename NumericT>
78 ScalarT2
const &
s2, NumericT
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha)
84 value_type temporary_alpha = 0;
86 temporary_alpha = alpha;
88 as_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(
s1),
91 detail::cuda_arg<value_type>(s2));
98 template<
typename NumericT>
100 const NumericT * fac2,
unsigned int options2,
const NumericT *
s2,
101 const NumericT * fac3,
unsigned int options3,
const NumericT * s3)
103 NumericT alpha = *fac2;
104 if (options2 & (1 << 0))
106 if (options2 & (1 << 1))
107 alpha = NumericT(1) / alpha;
109 NumericT beta = *fac3;
110 if (options3 & (1 << 0))
112 if (options3 & (1 << 1))
113 beta = NumericT(1) / beta;
115 *s1 = *s2 * alpha + *s3 * beta;
119 template<
typename NumericT>
121 NumericT fac2,
unsigned int options2,
const NumericT *
s2,
122 NumericT
const * fac3,
unsigned int options3,
const NumericT * s3)
124 NumericT alpha = fac2;
125 if (options2 & (1 << 0))
127 if (options2 & (1 << 1))
128 alpha = NumericT(1) / alpha;
130 NumericT beta = *fac3;
131 if (options3 & (1 << 0))
133 if (options3 & (1 << 1))
134 beta = NumericT(1) / beta;
136 *s1 = *s2 * alpha + *s3 * beta;
140 template<
typename NumericT>
142 NumericT
const * fac2,
unsigned int options2,
const NumericT *
s2,
143 NumericT fac3,
unsigned int options3,
const NumericT * s3)
145 NumericT alpha = *fac2;
146 if (options2 & (1 << 0))
148 if (options2 & (1 << 1))
149 alpha = NumericT(1) / alpha;
151 NumericT beta = fac3;
152 if (options3 & (1 << 0))
154 if (options3 & (1 << 1))
155 beta = NumericT(1) / beta;
157 *s1 = *s2 * alpha + *s3 * beta;
161 template<
typename NumericT>
163 NumericT fac2,
unsigned int options2,
const NumericT *
s2,
164 NumericT fac3,
unsigned int options3,
const NumericT * s3)
166 NumericT alpha = fac2;
167 if (options2 & (1 << 0))
169 if (options2 & (1 << 1))
170 alpha = NumericT(1) / alpha;
172 NumericT beta = fac3;
173 if (options3 & (1 << 0))
175 if (options3 & (1 << 1))
176 beta = NumericT(1) / beta;
178 *s1 = *s2 * alpha + *s3 * beta;
182 template<
typename ScalarT1,
183 typename ScalarT2,
typename NumericT1,
184 typename ScalarT3,
typename NumericT2>
192 ScalarT2
const &
s2, NumericT1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
193 ScalarT3
const & s3, NumericT2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
197 unsigned int options_alpha =
detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
200 value_type temporary_alpha = 0;
202 temporary_alpha = alpha;
204 value_type temporary_beta = 0;
206 temporary_beta = beta;
208 asbs_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(
s1),
211 detail::cuda_arg<value_type>(s2),
214 detail::cuda_arg<value_type>(s3) );
221 template<
typename NumericT>
223 const NumericT * fac2,
unsigned int options2,
const NumericT *
s2,
224 const NumericT * fac3,
unsigned int options3,
const NumericT * s3)
226 NumericT alpha = *fac2;
227 if (options2 & (1 << 0))
229 if (options2 & (1 << 1))
230 alpha = NumericT(1) / alpha;
232 NumericT beta = *fac3;
233 if (options3 & (1 << 0))
235 if (options3 & (1 << 1))
236 beta = NumericT(1) / beta;
238 *s1 += *s2 * alpha + *s3 * beta;
242 template<
typename NumericT>
244 NumericT fac2,
unsigned int options2,
const NumericT *
s2,
245 NumericT
const * fac3,
unsigned int options3,
const NumericT * s3)
247 NumericT alpha = fac2;
248 if (options2 & (1 << 0))
250 if (options2 & (1 << 1))
251 alpha = NumericT(1) / alpha;
253 NumericT beta = *fac3;
254 if (options3 & (1 << 0))
256 if (options3 & (1 << 1))
257 beta = NumericT(1) / beta;
259 *s1 += *s2 * alpha + *s3 * beta;
263 template<
typename NumericT>
265 NumericT
const * fac2,
unsigned int options2,
const NumericT *
s2,
266 NumericT fac3,
unsigned int options3,
const NumericT * s3)
268 NumericT alpha = *fac2;
269 if (options2 & (1 << 0))
271 if (options2 & (1 << 1))
272 alpha = NumericT(1) / alpha;
274 NumericT beta = fac3;
275 if (options3 & (1 << 0))
277 if (options3 & (1 << 1))
278 beta = NumericT(1) / beta;
280 *s1 += *s2 * alpha + *s3 * beta;
284 template<
typename NumericT>
286 NumericT fac2,
unsigned int options2,
const NumericT *
s2,
287 NumericT fac3,
unsigned int options3,
const NumericT * s3)
289 NumericT alpha = fac2;
290 if (options2 & (1 << 0))
292 if (options2 & (1 << 1))
293 alpha = NumericT(1) / alpha;
295 NumericT beta = fac3;
296 if (options3 & (1 << 0))
298 if (options3 & (1 << 1))
299 beta = NumericT(1) / beta;
301 *s1 += *s2 * alpha + *s3 * beta;
305 template<
typename ScalarT1,
306 typename ScalarT2,
typename NumericT1,
307 typename ScalarT3,
typename NumericT2>
315 ScalarT2
const &
s2, NumericT1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
316 ScalarT3
const & s3, NumericT2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
320 unsigned int options_alpha =
detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
323 value_type temporary_alpha = 0;
325 temporary_alpha = alpha;
327 value_type temporary_beta = 0;
329 temporary_beta = beta;
331 std::cout <<
"Launching asbs_s_kernel..." << std::endl;
332 asbs_s_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(
s1),
335 detail::cuda_arg<value_type>(s2),
338 detail::cuda_arg<value_type>(s3) );
344 template<
typename NumericT>
357 template<
typename ScalarT1,
typename ScalarT2>
365 scalar_swap_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(
s1),detail::cuda_arg<value_type>(s2));
Simple enable-if variant that uses the SFINAE pattern.
unsigned int make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Generic size and resize functionality for different vector and matrix types.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
viennacl::scalar< int > s2
viennacl::scalar< float > s1
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs(ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
__global__ void as_kernel(NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2)
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs_s(ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
Helper struct for checking whether a type is a viennacl::scalar<>
Common routines for CUDA execution.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_any_scalar< NumericT >::value >::type as(ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
__global__ void scalar_swap_kernel(NumericT *s1, NumericT *s2)
__global__ void asbs_kernel(NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
__global__ void asbs_s_kernel(NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
viennacl::backend::mem_handle::cuda_handle_type & arg_reference(viennacl::scalar< NumericT > &s, OtherT)
Simple enable-if variant that uses the SFINAE pattern.