1 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
2 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
68 unsigned int iters()
const {
return iters_taken_; }
69 void iters(
unsigned int i)
const { iters_taken_ = i; }
72 double error()
const {
return last_error_; }
74 void error(
double e)
const { last_error_ = e; }
79 unsigned int iterations_;
83 mutable unsigned int iters_taken_;
84 mutable double last_error_;
88 static const char * double_float_conversion_program =
89 "#if defined(cl_khr_fp64)\n"
90 "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
91 "#elif defined(cl_amd_fp64)\n"
92 "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
94 "__kernel void assign_double_to_float(\n"
95 " __global float * vec1,\n"
96 " __global const double * vec2, \n"
97 " unsigned int size) \n"
99 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
100 " vec1[i] = (float)(vec2[i]);\n"
102 "__kernel void inplace_add_float_to_double(\n"
103 " __global double * vec1,\n"
104 " __global const float * vec2, \n"
105 " unsigned int size) \n"
107 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
108 " vec1[i] += (double)(vec2[i]);\n"
121 template<
typename MatrixType,
typename VectorType>
132 VectorType result(rhs);
135 VectorType residual = rhs;
138 CPU_ScalarType new_ip_rr = 0;
139 CPU_ScalarType norm_rhs_squared = ip_rr;
141 if (norm_rhs_squared <= 0)
144 static bool first =
true;
149 ctx.
add_program(double_float_conversion_program,
"double_float_conversion_program");
156 float inner_ip_rr =
static_cast<float>(ip_rr);
157 float new_inner_ip_rr = 0;
158 float initial_inner_rhs_norm_squared =
static_cast<float>(ip_rr);
167 rhs.handle().opencl_handle(),
172 residual_low_precision = p_low_precision;
180 matrix.handle().opencl_handle(),
181 cl_uint(matrix.nnz())
197 result_low_precision += alpha * p_low_precision;
198 residual_low_precision -= alpha * tmp_low_precision;
202 beta = new_inner_ip_rr / inner_ip_rr;
203 inner_ip_rr = new_inner_ip_rr;
205 p_low_precision = residual_low_precision + beta * p_low_precision;
214 result_low_precision.
handle().opencl_handle(),
215 cl_uint(result.size())
220 residual = rhs - residual;
228 residual.handle().opencl_handle(),
229 cl_uint(residual.size())
231 result_low_precision.
clear();
232 residual_low_precision = p_low_precision;
233 initial_inner_rhs_norm_squared =
static_cast<float>(new_ip_rr);
234 inner_ip_rr =
static_cast<float>(new_ip_rr);
239 tag.
error(std::sqrt(new_ip_rr / norm_rhs_squared));
244 template<
typename MatrixType,
typename VectorType>
247 return solve(matrix, rhs, tag);
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
float inner_tolerance() const
Returns the relative tolerance.
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
A tag class representing the use of no preconditioner.
Implementations of incomplete factorization preconditioners. Convenience header file.
unsigned int max_iterations() const
Returns the maximum number of iterations.
double tolerance() const
Returns the relative tolerance.
Generic clear functionality for different vector and matrix types.
mixed_precision_cg_tag(double tol=1e-8, unsigned int max_iterations=300, float inner_tol=1e-2f)
The constructor.
double error() const
Returns the estimated relative error at the end of the solver run.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Implementations of the OpenCL backend, where all contexts are stored in.
Proxy classes for vectors.
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
void clear()
Resets all entries to zero. Does not change the size of the vector.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
void iters(unsigned int i) const
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
void error(double e) const
Sets the estimated relative error at the end of the solver run.
A collection of compile time type deductions.
unsigned int iters() const
Return the number of solver iterations:
const handle_type & handle() const
Returns the memory handle.
Main interface routines for memory management.
viennacl::vector< NumericT > solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.