1 #ifndef VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
51 template<
typename NumericT>
69 template<
typename NumericT>
81 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
90 buffer_size_per_vector,
97 template<
typename NumericT>
107 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
112 unsigned int thread_num = 256;
125 buffer_size_per_vector,
131 template<
typename NumericT>
141 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
145 unsigned int thread_num = 128;
146 unsigned int group_num = 256;
152 A.
handle().opencl_handle(),
156 viennacl::traits::opencl_handle(p),
157 viennacl::traits::opencl_handle(Ap),
160 buffer_size_per_vector,
167 template<
typename NumericT>
177 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
182 unsigned int group_num = 256;
190 A.
handle().opencl_handle(),
191 viennacl::traits::opencl_handle(p),
192 viennacl::traits::opencl_handle(Ap),
195 buffer_size_per_vector,
203 template<
typename NumericT>
213 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
217 unsigned int thread_num = 256;
218 unsigned int group_num = 128;
224 A.
handle().opencl_handle(),
231 viennacl::traits::opencl_handle(p),
232 viennacl::traits::opencl_handle(Ap),
235 buffer_size_per_vector,
245 template<
typename NumericT>
262 cl_uint chunk_size = cl_uint(buffer_chunk_size);
263 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
265 inner_prod_buffer, chunk_size, chunk_offset, vec_size,
270 template<
typename NumericT>
277 (void)buffer_chunk_size;
297 template<
typename NumericT>
312 cl_uint chunk_size = cl_uint(buffer_chunk_size);
313 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
322 inner_prod_buffer, chunk_size, chunk_offset,
331 template<
typename NumericT>
344 cl_uint chunk_size = cl_uint(buffer_chunk_size);
345 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
350 unsigned int thread_num = 256;
363 inner_prod_buffer, chunk_size, chunk_offset,
370 template<
typename NumericT>
383 cl_uint chunk_size = cl_uint(buffer_chunk_size);
384 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
388 unsigned int thread_num = 128;
389 unsigned int group_num = 128;
395 A.
handle().opencl_handle(),
399 viennacl::traits::opencl_handle(p),
400 viennacl::traits::opencl_handle(Ap),
403 inner_prod_buffer, chunk_size, chunk_offset,
411 template<
typename NumericT>
424 cl_uint chunk_size = cl_uint(buffer_chunk_size);
425 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
430 unsigned int group_num = 128;
438 A.
handle().opencl_handle(),
439 viennacl::traits::opencl_handle(p),
440 viennacl::traits::opencl_handle(Ap),
443 inner_prod_buffer, chunk_size, chunk_offset,
452 template<
typename NumericT>
465 cl_uint chunk_size = cl_uint(buffer_chunk_size);
466 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
470 unsigned int thread_num = 256;
471 unsigned int group_num = 128;
477 A.
handle().opencl_handle(),
484 viennacl::traits::opencl_handle(p),
485 viennacl::traits::opencl_handle(Ap),
488 inner_prod_buffer, chunk_size, chunk_offset,
505 template <
typename T>
523 cl_uint size_vk = cl_uint(v_k.
size());
525 cl_uint R_offset = cl_uint(offset_in_R);
526 cl_uint chunk_size = cl_uint(buffer_chunk_size);
527 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
531 inner_prod_buffer, chunk_size,
532 r_dot_vk_buffer, chunk_offset,
538 template <
typename T>
554 cl_uint size_vk = cl_uint(v_k_size);
555 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
556 cl_uint ocl_k = cl_uint(param_k);
557 cl_uint chunk_size = cl_uint(buffer_chunk_size);
559 vi_in_vk_buffer, chunk_size,
564 template <
typename T>
580 cl_uint size_vk = cl_uint(v_k_size);
581 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
582 cl_uint ocl_k = cl_uint(param_k);
583 cl_uint chunk_size = cl_uint(buffer_chunk_size);
584 cl_uint ocl_krylov_dim = cl_uint(krylov_dim);
586 vi_in_vk_buffer, chunk_size,
587 R_buffer, ocl_krylov_dim,
593 template <
typename T>
607 cl_uint size_vk = cl_uint(v_k_size);
608 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
609 cl_uint ocl_k = cl_uint(param_k);
612 krylov_basis, size_vk, internal_size_vk,
618 template <
typename T>
630 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
641 buffer_size_per_vector,
648 template <
typename T>
658 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
663 inner_prod_buffer.
clear();
666 unsigned int thread_num = 128;
679 buffer_size_per_vector,
685 template <
typename T>
695 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
701 unsigned int thread_num = 128;
702 unsigned int group_num = 128;
708 A.
handle().opencl_handle(),
712 viennacl::traits::opencl_handle(p), start_p,
713 viennacl::traits::opencl_handle(Ap), start_Ap,
716 buffer_size_per_vector,
723 template <
typename T>
733 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
740 unsigned int group_num = 128;
748 A.
handle().opencl_handle(),
749 viennacl::traits::opencl_handle(p), start_p,
750 viennacl::traits::opencl_handle(Ap), start_Ap,
753 buffer_size_per_vector,
761 template <
typename T>
771 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
777 unsigned int thread_num = 128;
778 unsigned int group_num = 128;
784 A.
handle().opencl_handle(),
791 viennacl::traits::opencl_handle(p), start_p,
792 viennacl::traits::opencl_handle(Ap), start_Ap,
795 buffer_size_per_vector,
vcl_size_t internal_ellnnz() const
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Main kernel class for generating specialized OpenCL kernels for fast iterative solvers.
Represents an OpenCL device within ViennaCL.
void pipelined_bicgstab_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Generic size and resize functionality for different vector and matrix types.
const handle_type & handle3() const
Represents an OpenCL kernel within ViennaCL.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
static void init(viennacl::ocl::context &ctx)
const handle_type & handle() const
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
vcl_size_t internal_size1() const
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
const handle_type & handle4() const
vcl_size_t rows_per_block() const
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
const handle_type & handle2() const
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
OpenCL kernel file for specialized iterative solver kernels.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of a smart-pointer-like class for handling OpenCL handles.
void pipelined_cg_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
result_of::size_type< T >::type start(T const &obj)
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vcl_size_t maxnnz() const
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
void clear()
Resets all entries to zero. Does not change the size of the vector.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
size_type size() const
Returns the length of the vector (cf. std::vector)
vcl_size_t ell_nnz() const
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void pipelined_cg_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
Forward declarations of the implicit_vector_base, vector_base class.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
const handle_type & handle5() const
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void pipelined_gmres_prod(compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Simple enable-if variant that uses the SFINAE pattern.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...