ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
viennacl::linalg::cuda Namespace Reference

Holds all CUDA compute kernels used by ViennaCL. More...

Namespaces

 detail
 Helper functions for the CUDA linear algebra backend.
 

Classes

struct  mat_mult_matrix_index
 Helper struct for accessing an element of a row- or column-major matrix. More...
 

Functions

template<typename NumericT >
void bisectSmall (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
 
template<typename NumericT >
void bisectLarge (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
 
template<typename NumericT >
void bisectLarge_OneIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
 
template<typename NumericT >
void bisectLarge_MultIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
 
template<typename NumericT >
__device__ void writeToGmem (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum, unsigned short *s_compaction_list, unsigned short *s_cl_helper, unsigned int offset_mult_lambda)
 Write data to global memory. More...
 
template<typename NumericT >
__device__ void compactStreamsFinal (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, unsigned int &offset_mult_lambda, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper, unsigned int is_one_lambda, unsigned int is_one_lambda_2, NumericT &left, NumericT &right, NumericT &left_2, NumericT &right_2, unsigned int &left_count, unsigned int &right_count, unsigned int &left_count_2, unsigned int &right_count_2, unsigned int c_block_iend, unsigned int c_sum_block, unsigned int c_block_iend_2, unsigned int c_sum_block_2)
 Perform final stream compaction before writing data to global memory. More...
 
__device__ void scanCompactBlocksStartAddress (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 Compute addresses to obtain compact list of block start addresses. More...
 
__device__ void scanSumBlocks (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 Perform scan to obtain number of eigenvalues before a specific block. More...
 
__device__ void scanInitial (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 
template<typename NumericT >
__device__ void storeNonEmptyIntervalsLarge (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, NumericT left, NumericT mid, NumericT right, const unsigned short left_count, const unsigned short mid_count, const unsigned short right_count, NumericT epsilon, unsigned int &compact_second_chunk, unsigned short *s_compaction_list, unsigned int &is_active_second)
 
template<typename NumericT >
__global__ void bisectKernelLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon, unsigned int *g_num_one, unsigned int *g_num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum)
 Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute. More...
 
template<typename NumericT >
__global__ void bisectKernelLarge_MultIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int *blocks_mult, unsigned int *blocks_mult_sum, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, NumericT *g_lambda, unsigned int *g_pos, NumericT precision)
 
template<typename NumericT >
__global__ void bisectKernelLarge_OneIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision)
 
template<typename NumericT >
__global__ void bisectKernelSmall (const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon)
 Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. More...
 
__device__ int floorPow2 (int n)
 
__device__ int ceilPow2 (int n)
 
template<typename NumericT >
__device__ NumericT computeMidpoint (const NumericT left, const NumericT right)
 
template<class S , class T , class NumericT >
__device__ void storeInterval (unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision)
 
template<typename NumericT >
__device__ unsigned int computeNumSmallerEigenvals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
 
template<typename NumericT >
__device__ unsigned int computeNumSmallerEigenvalsLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
 
template<class S , class T , class NumericT >
__device__ void storeNonEmptyIntervals (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second)
 Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread. More...
 
template<class T >
__device__ void createIndicesCompaction (T *s_compaction_list_exc, unsigned int num_threads_compaction)
 
template<class T , class NumericT >
__device__ void compactIntervals (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second)
 Perform stream compaction for second child intervals. More...
 
template<class T , class S , class NumericT >
__device__ void storeIntervalConverged (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second)
 
template<class T , class NumericT >
__device__ void subdivideActiveInterval (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
 Subdivide interval if active and not already converged. More...
 
template<typename NumericT >
__global__ void matrix_matrix_upper_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal)
 
template<typename NumericT >
__global__ void matrix_matrix_lower_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal)
 
template<typename NumericT , typename SolverTagT >
void inplace_solve (matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
 Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). More...
 
template<typename NumericT >
__global__ void triangular_substitute_inplace_row_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT >
__global__ void triangular_substitute_inplace_col_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT , typename SolverTagT >
void inplace_solve (matrix_base< NumericT > const &mat, vector_base< NumericT > &vec, SolverTagT)
 Direct inplace solver for dense triangular systems (non-transposed version) More...
 
__host__ __device__ float2 operator+ (float2 a, float2 b)
 
__host__ __device__ float2 operator- (float2 a, float2 b)
 
template<typename SCALARTYPE >
__device__ float2 operator/ (float2 a, SCALARTYPE b)
 
__device__ float2 operator* (float2 in1, float2 in2)
 
__host__ __device__ double2 operator+ (double2 a, double2 b)
 
__host__ __device__ double2 operator- (double2 a, double2 b)
 
template<typename SCALARTYPE >
__host__ __device__ double2 operator/ (double2 a, SCALARTYPE b)
 
__host__ __device__ double2 operator* (double2 in1, double2 in2)
 
__device__ unsigned int get_reorder_num (unsigned int v, unsigned int bit_size)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_direct (const Numeric2T *input, Numeric2T *output, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void direct (viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Direct 1D algorithm for computing Fourier transformation. More...
 
template<typename NumericT , unsigned int AlignmentV>
void direct (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &in, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Direct 2D algorithm for computing Fourier transformation. More...
 
template<typename NumericT >
__global__ void fft_reorder (NumericT *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void reorder (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_radix2_local (Numeric2T *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_radix2 (Numeric2T *input, unsigned int s, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void radix2 (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Radix-2 1D algorithm for computing Fourier transformation. More...
 
template<typename NumericT , unsigned int AlignmentV>
void radix2 (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Radix-2 2D algorithm for computing Fourier transformation. More...
 
template<typename Numeric2T , typename NumericT >
__global__ void bluestein_post (Numeric2T *Z, Numeric2T *out, unsigned int size, NumericT sign)
 
template<typename Numeric2T , typename NumericT >
__global__ void bluestein_pre (Numeric2T *input, Numeric2T *A, Numeric2T *B, unsigned int size, unsigned int ext_size, NumericT sign)
 
template<typename NumericT >
__global__ void zero2 (NumericT *input1, NumericT *input2, unsigned int size)
 
template<typename NumericT , unsigned int AlignmentV>
void bluestein (viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
 Bluestein's algorithm for computing Fourier transformation. More...
 
template<typename NumericT >
__global__ void fft_mult_vec (const NumericT *input1, const NumericT *input2, NumericT *output, unsigned int size)
 
template<typename NumericT , unsigned int AlignmentV>
void multiply_complex (viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
 Mutiply two complex vectors and store result in output. More...
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_div_vec_scalar (Numeric2T *input1, unsigned int size, NumericT factor)
 
template<typename NumericT , unsigned int AlignmentV>
void normalize (viennacl::vector< NumericT, AlignmentV > &input)
 Normalize vector on with his own size. More...
 
template<typename NumericT >
__global__ void transpose (const NumericT *input, NumericT *output, unsigned int row_num, unsigned int col_num)
 
template<typename NumericT , unsigned int AlignmentV>
void transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &input, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &output)
 Transpose matrix. More...
 
template<typename NumericT >
__global__ void transpose_inplace (NumericT *input, unsigned int row_num, unsigned int col_num)
 
template<typename NumericT , unsigned int AlignmentV>
void transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
 Inplace_transpose matrix. More...
 
template<typename RealT , typename ComplexT >
__global__ void real_to_complex (const RealT *in, ComplexT *out, unsigned int size)
 
template<typename NumericT >
void real_to_complex (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
 Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More...
 
template<typename ComplexT , typename RealT >
__global__ void complex_to_real (const ComplexT *in, RealT *out, unsigned int size)
 
template<typename NumericT >
void complex_to_real (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
 Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More...
 
template<typename NumericT >
__global__ void reverse_inplace (NumericT *vec, uint size)
 
template<typename NumericT >
void reverse (viennacl::vector_base< NumericT > &in)
 Reverse vector to oposite order and save it in input vector. More...
 
template<typename NumericT >
__global__ void pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_csr_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset)
 
template<typename NumericT >
void pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_csr_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename T >
__global__ void pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size)
 
template<typename T >
void pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More...
 
template<typename T >
__global__ void pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size)
 
template<typename T >
void pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k)
 
template<typename T >
void pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
 
template<typename T >
void pipelined_gmres_prod (compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename NumericT , typename SizeT , typename DistanceT >
void trans (matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
 
template<typename NumericT , typename ScalarT >
void am (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void ambm (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void ambm_m (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
void matrix_assign (matrix_base< NumericT > &mat, NumericT s, bool clear=false)
 
template<typename NumericT >
void matrix_diagonal_assign (matrix_base< NumericT > &mat, NumericT s)
 
template<typename NumericT >
void matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
 
template<typename NumericT >
void matrix_diag_to_vector (matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec)
 
template<typename NumericT >
void matrix_row (matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)
 
template<typename NumericT >
void matrix_column (const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
 
template<typename NumericT , typename SizeT , typename OpT >
void element_op (matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename SizeT , typename OpT >
void element_op (matrix_base< float, SizeT > &A, matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename SizeT , typename OpT >
void element_op (matrix_base< double, SizeT > &A, matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
void prod_impl (const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
 Carries out matrix-vector multiplication. More...
 
template<typename NumericT , typename ScalarT >
void prod_impl (const matrix_base< NumericT > &A, bool trans_A, const matrix_base< NumericT > &B, bool trans_B, matrix_base< NumericT > &C, ScalarT alpha, ScalarT beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename ScalarT >
void scaled_rank_1_update (matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
 The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More...
 
template<typename NumericT , typename VectorType >
void bidiag_pack (matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
 This function stores the diagonal and the superdiagonal of a matrix in two vectors. More...
 
template<typename NumericT >
void copy_vec (matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
 This function copies a row or a column from a matrix to a vector. More...
 
template<typename NumericT >
void house_update_A_left (matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
 This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P. More...
 
template<typename NumericT >
void house_update_A_right (matrix_base< NumericT > &A, vector_base< NumericT > &D)
 This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P. More...
 
template<typename NumericT >
void house_update_QL (matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
 This function updates the matrix Q, which is needed for the computation of the eigenvectors. More...
 
template<typename NumericT >
void givens_next (matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
 This function updates the matrix Q. It is part of the tql2 algorithm. More...
 
template<typename NumericT >
void inclusive_scan (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
 This function implements an inclusive scan. More...
 
template<typename NumericT , typename F >
void exclusive_scan (vector_base< NumericT, F > &vec1, vector_base< NumericT, F > &vec2)
 This function implements an exclusive scan. More...
 
template<typename NumericT >
__global__ void am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void bidiag_pack_row_major_kernel (T *A, T *D, T *S, uint size1, uint size2, uint stride)
 
template<typename T >
__global__ void bidiag_pack_column_major_kernel (T *A, T *D, T *S, uint size1, uint size2, uint stride)
 
template<typename T >
__global__ void copy_col_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride)
 
template<typename T >
__global__ void copy_col_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride)
 
template<typename T >
__global__ void copy_row_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride)
 
template<typename T >
__global__ void copy_row_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride)
 
template<typename T >
__global__ void house_update_A_left_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride)
 
template<typename T >
__global__ void house_update_A_left_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride)
 
template<typename T >
__global__ void house_update_A_right_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride)
 
template<typename T >
__global__ void house_update_A_right_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride)
 
template<typename T >
__device__ void col_reduce_lcl_array (T *sums, uint th_Idx, uint bl_Dim)
 
template<typename T >
__global__ void house_update_QL_row_major_kernel (T *QL, T *V, uint size1, uint strideQ)
 
template<typename T >
__global__ void house_update_QL_column_major_kernel (T *QL, T *V, uint size1, uint strideQ)
 
template<typename T >
__global__ void givens_next_row_major_kernel (T *matr, T *cs, T *ss, uint size, uint stride, uint start_i, uint end_i)
 
template<typename T >
__global__ void givens_next_column_major_kernel (T *matr, T *cs, T *ss, uint size, uint stride, uint start_i, uint end_i)
 
template<typename T >
__global__ void inclusive_scan_kernel_1 (T *X, unsigned int startX, unsigned int incX, unsigned int InputSize, T *Y, unsigned int startY, unsigned int incY, T *S, unsigned int startS, unsigned int incS)
 
template<typename T >
__global__ void exclusive_scan_kernel_1 (T *X, unsigned int startX, unsigned int incX, unsigned int InputSize, T *Y, unsigned int startY, unsigned int incY, T *S, unsigned int startS, unsigned int incS)
 
template<typename T >
__global__ void scan_kernel_2 (T *S_ref, unsigned int startS_ref, unsigned int incS_ref, T *S, unsigned int startS, unsigned int incS, unsigned int InputSize)
 
template<typename T >
__global__ void scan_kernel_3 (T *S_ref, unsigned int startS_ref, unsigned int incS_ref, T *S, unsigned int startS, unsigned int incS)
 
template<typename T >
__global__ void scan_kernel_4 (T *S, unsigned int startS, unsigned int incS, T *Y, unsigned int startY, unsigned int incY, unsigned int OutputSize)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void trans_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_internal_size1, unsigned int A_internal_size2, unsigned int A_size1, unsigned int A_size2, unsigned int A_stride1, unsigned int A_stride2, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_internal_size1, unsigned int B_internal_size2, unsigned int B_stride1, unsigned int B_stride2, bool data_major)
 
template<typename NumericT >
__global__ void am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void matrix_row_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void element_op_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void matrix_row_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void trans_vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void el_wise_mul_div (NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size)
 Main CUDA kernel for nonnegative matrix factorization of a dense matrices. More...
 
template<typename NumericT >
void nmf (viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf)
 The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. More...
 
template<typename NumericT >
__global__ void as_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2)
 
template<typename NumericT >
__global__ void as_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_any_scalar< NumericT >::value >::type as (ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs_s (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void scalar_swap_kernel (NumericT *s1, NumericT *s2)
 
template<typename ScalarT1 , typename ScalarT2 >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap (ScalarT1 &s1, ScalarT2 &s2)
 Swaps the contents of two scalars, data is copied. More...
 
template<typename NumericT >
__global__ void compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<typename NumericT >
__global__ void compressed_matrix_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<class NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a compressed_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More...
 
template<typename NumericT >
__global__ void compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *result, unsigned int size)
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename NumericT >
__global__ void compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, unsigned int nonzero_rows, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<typename NumericT >
void prod_impl (const viennacl::compressed_compressed_matrix< NumericT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a compressed_compressed_matrix. More...
 
template<typename NumericT >
__global__ void coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a coordinate_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More...
 
template<typename NumericT >
__global__ void ell_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a ell_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More...
 
template<typename NumericT >
__global__ void sliced_ell_matrix_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, unsigned int size_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<typename NumericT , typename IndexT >
void prod_impl (const viennacl::sliced_ell_matrix< NumericT, IndexT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a sliced_ell_matrix. More...
 
template<typename NumericT >
__global__ void hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename NumericT >
__global__ void csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const NumericT *elements_L, const unsigned int *block_offsets, NumericT *result, unsigned int size)
 
template<typename NumericT >
__global__ void csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const NumericT *elements_U, const NumericT *diagonal_U, const unsigned int *block_offsets, NumericT *result, unsigned int size)
 
template<typename NumericT >
__global__ void av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
__global__ void av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT , typename ScalarType1 >
void av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha)
 
template<typename NumericT , typename ScalarT1 >
void vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
 Assign a constant value to a vector (-range/-slice) More...
 
template<typename NumericT >
__global__ void vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
 Swaps the contents of two vectors, data is copied. More...
 
template<typename NumericT >
__global__ void element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT , typename OpT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy)
 Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More...
 
template<typename OpT >
void element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy)
 
template<typename OpT >
void element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
__global__ void inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT , typename ScalarT >
void inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
void inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
__global__ void inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results)
 
template<typename NumericT >
__global__ void vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result)
 
template<typename NumericT >
void inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result)
 
template<typename NumericT >
__global__ void norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
void norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
void norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
__global__ void vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
void max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void max_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void min_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
__device__ NumericT cuda_abs (NumericT val)
 
__device__ unsigned long cuda_abs (unsigned long val)
 
__device__ unsigned int cuda_abs (unsigned int val)
 
__device__ unsigned short cuda_abs (unsigned short val)
 
__device__ unsigned char cuda_abs (unsigned char val)
 
template<typename NumericT >
__global__ void index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result)
 
template<typename NumericT >
vcl_size_t index_norm_inf (vector_base< NumericT > const &vec1)
 Computes the index of the first entry that is equal to the supremum-norm in modulus. More...
 
template<typename NumericT >
__global__ void plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta)
 
template<typename NumericT >
void plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
 Computes a plane rotation of two vectors. More...
 

Detailed Description

Holds all CUDA compute kernels used by ViennaCL.

Function Documentation

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::am ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 76 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 38 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 74 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 66 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 103 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::ambm ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT > const &  mat3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 127 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 115 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 189 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 262 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 336 of file matrix_operations_col.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::ambm_m ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT > const &  mat3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 202 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 415 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 490 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 564 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 639 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 448 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 523 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 597 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 672 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 145 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 220 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 294 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 369 of file matrix_operations_row.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_any_scalar<NumericT>::value >::type viennacl::linalg::cuda::as ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 77 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::as_kernel ( NumericT *  s1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  s2 
)

Definition at line 48 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::as_kernel ( NumericT *  s1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  s2 
)

Definition at line 60 of file scalar_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
ScalarT3 const &  s3,
NumericT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 191 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT *  s1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  s2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 99 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT *  s1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  s2,
NumericT const *  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 120 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT *  s1,
NumericT const *  fac2,
unsigned int  options2,
const NumericT *  s2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 141 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT *  s1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  s2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 162 of file scalar_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs_s ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
ScalarT3 const &  s3,
NumericT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 314 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT *  s1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  s2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 222 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT *  s1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  s2,
NumericT const *  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 243 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT *  s1,
NumericT const *  fac2,
unsigned int  options2,
const NumericT *  s2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 264 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT *  s1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  s2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  s3 
)

Definition at line 285 of file scalar_operations.hpp.

template<typename NumericT , typename ScalarType1 >
void viennacl::linalg::cuda::av ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 118 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 51 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 84 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< NumericT > const &  vec3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 407 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 153 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 216 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 279 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 342 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv_v ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< NumericT > const &  vec3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 709 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 457 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT *  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 520 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 583 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT *  vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 646 of file vector_operations.hpp.

template<typename NumericT , typename VectorType >
void viennacl::linalg::cuda::bidiag_pack ( matrix_base< NumericT > &  A,
VectorType &  dh,
VectorType &  sh 
)

This function stores the diagonal and the superdiagonal of a matrix in two vectors.

Parameters
AThe matrix from which the vectors will be extracted of.
dhThe vector in which the diagonal of the matrix will be stored in.
shThe vector in which the superdiagonal of the matrix will be stored in.

Definition at line 2490 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::bidiag_pack_column_major_kernel ( T *  A,
T *  D,
T *  S,
uint  size1,
uint  size2,
uint  stride 
)

Definition at line 1435 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::bidiag_pack_row_major_kernel ( T *  A,
T *  D,
T *  S,
uint  size1,
uint  size2,
uint  stride 
)

Definition at line 1413 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge ( const NumericT *  g_d,
const NumericT *  g_s,
const unsigned int  n,
const NumericT  lg,
const NumericT  ug,
const unsigned int  lg_eig_count,
const unsigned int  ug_eig_count,
NumericT  epsilon,
unsigned int *  g_num_one,
unsigned int *  g_num_blocks_mult,
NumericT *  g_left_one,
NumericT *  g_right_one,
unsigned int *  g_pos_one,
NumericT *  g_left_mult,
NumericT *  g_right_mult,
unsigned int *  g_left_count_mult,
unsigned int *  g_right_count_mult,
unsigned int *  g_blocks_mult,
unsigned int *  g_blocks_mult_sum 
)

Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute.

Definition at line 536 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge_MultIntervals ( const NumericT *  g_d,
const NumericT *  g_s,
const unsigned int  n,
unsigned int *  blocks_mult,
unsigned int *  blocks_mult_sum,
NumericT *  g_left,
NumericT *  g_right,
unsigned int *  g_left_count,
unsigned int *  g_right_count,
NumericT *  g_lambda,
unsigned int *  g_pos,
NumericT  precision 
)

Perform second step of bisection algorithm for large matrices for intervals that after the first step contained more than one eigenvalue

Parameters
g_ddiagonal elements of symmetric, tridiagonal matrix
g_ssuperdiagonal elements of symmetric, tridiagonal matrix
nmatrix size
blocks_multstart addresses of blocks of intervals that are processed by one block of threads, each of the intervals contains more than one eigenvalue
blocks_mult_sumtotal number of eigenvalues / singleton intervals in one block of intervals
g_leftleft limits of intervals
g_rightright limits of intervals
g_left_countnumber of eigenvalues less than left limits
g_right_countnumber of eigenvalues less than right limits
g_lambdafinal eigenvalue
g_posindex of eigenvalue (in ascending order)
precisiondesired precision of eigenvalues

Definition at line 68 of file bisect_kernel_large_multi.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge_OneIntervals ( const NumericT *  g_d,
const NumericT *  g_s,
const unsigned int  n,
unsigned int  num_intervals,
NumericT *  g_left,
NumericT *  g_right,
unsigned int *  g_pos,
NumericT  precision 
)

Determine eigenvalues for large matrices for intervals that after the first step contained one eigenvalue

Parameters
g_ddiagonal elements of symmetric, tridiagonal matrix
g_ssuperdiagonal elements of symmetric, tridiagonal matrix
nmatrix size
num_intervalstotal number of intervals containing one eigenvalue after the first step
g_leftleft interval limits
g_rightright interval limits
g_posindex of interval / number of intervals that are smaller than right interval limit
precisiondesired precision of eigenvalues

Definition at line 59 of file bisect_kernel_large_onei.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelSmall ( const NumericT *  g_d,
const NumericT *  g_s,
const unsigned int  n,
NumericT *  g_left,
NumericT *  g_right,
unsigned int *  g_left_count,
unsigned int *  g_right_count,
const NumericT  lg,
const NumericT  ug,
const unsigned int  lg_eig_count,
const unsigned int  ug_eig_count,
NumericT  epsilon 
)

Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.

Parameters
g_ddiagonal elements in global memory
g_ssuperdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed an equals 0
nsize of matrix
g_lefthelper array
g_righthelper array
g_left_counthelper array
g_right_counthelper array
lglower bound of input interval (e.g. Gerschgorin interval)
ugupper bound of input interval (e.g. Gerschgorin interval)
lg_eig_countnumber of eigenvalues that are smaller than lg
ug_eig_countnumber of eigenvalues that are smaller than lu
epsilondesired accuracy of eigenvalues to compute

Definition at line 61 of file bisect_kernel_small.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  lg,
const NumericT  ug,
const NumericT  precision 
)

Definition at line 69 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge_MultIntervals ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  precision 
)

Definition at line 130 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge_OneIntervals ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  precision 
)

Definition at line 101 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectSmall ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataSmall< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  lg,
const NumericT  ug,
const NumericT  precision 
)

Definition at line 43 of file bisect_kernel_calls.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::bluestein ( viennacl::vector< NumericT, AlignmentV > &  in,
viennacl::vector< NumericT, AlignmentV > &  out,
vcl_size_t   
)

Bluestein's algorithm for computing Fourier transformation.

Currently, Works only for sizes of input data which less than 2^16. Uses a lot of additional memory, but should be fast for any size of data. Serial implementation has something about o(n * lg n) complexity

Definition at line 621 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::bluestein_post ( Numeric2T *  Z,
Numeric2T *  out,
unsigned int  size,
NumericT  sign 
)

Definition at line 537 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::bluestein_pre ( Numeric2T *  input,
Numeric2T *  A,
Numeric2T *  B,
unsigned int  size,
unsigned int  ext_size,
NumericT  sign 
)

Definition at line 563 of file fft_operations.hpp.

__device__ int viennacl::linalg::cuda::ceilPow2 ( int  n)
inline

Compute the next higher power of two of n

Parameters
nnumber for which next higher power of two is seeked

Definition at line 66 of file bisect_util.hpp.

template<typename T >
__device__ void viennacl::linalg::cuda::col_reduce_lcl_array ( T *  sums,
uint  th_Idx,
uint  bl_Dim 
)

Definition at line 1651 of file matrix_operations_col.hpp.

template<class T , class NumericT >
__device__ void viennacl::linalg::cuda::compactIntervals ( NumericT *  s_left,
NumericT *  s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  mid,
NumericT  right,
unsigned int  mid_count,
unsigned int  right_count,
T *  s_compaction_list,
unsigned int  num_threads_active,
unsigned int  is_active_second 
)

Perform stream compaction for second child intervals.

Parameters
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
midmidpoint of current interval (left of new interval)
rightupper limit of interval
mid_counteigenvalues less than mid
right_counteigenvalues less than right
s_compaction_listlist containing the indices where the data has to be stored
num_threads_activenumber of active threads / intervals
is_active_secondmark is thread has a second non-empty child interval

Definition at line 440 of file bisect_util.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::compactStreamsFinal ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
unsigned int &  offset_mult_lambda,
NumericT *  s_left,
NumericT *  s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
unsigned short *  s_cl_one,
unsigned short *  s_cl_mult,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper,
unsigned int  is_one_lambda,
unsigned int  is_one_lambda_2,
NumericT &  left,
NumericT &  right,
NumericT &  left_2,
NumericT &  right_2,
unsigned int &  left_count,
unsigned int &  right_count,
unsigned int &  left_count_2,
unsigned int &  right_count_2,
unsigned int  c_block_iend,
unsigned int  c_sum_block,
unsigned int  c_block_iend_2,
unsigned int  c_sum_block_2 
)

Perform final stream compaction before writing data to global memory.

Definition at line 134 of file bisect_kernel_large.hpp.

template<typename ComplexT , typename RealT >
__global__ void viennacl::linalg::cuda::complex_to_real ( const ComplexT *  in,
RealT *  out,
unsigned int  size 
)

Definition at line 808 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::complex_to_real ( viennacl::vector_base< NumericT > const &  in,
viennacl::vector_base< NumericT > &  out,
vcl_size_t  size 
)

Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)

Definition at line 818 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel ( const unsigned int *  row_jumper,
const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
unsigned int  nonzero_rows,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 863 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const NumericT *  sp_mat_elements,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 277 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const NumericT *  sp_mat_elements,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 431 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  result,
unsigned int  size 
)

Definition at line 593 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_adaptive_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT *  elements,
unsigned int  num_blocks,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 146 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 118 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::computeMidpoint ( const NumericT  left,
const NumericT  right 
)
inline

Compute midpoint of interval [left, right] avoiding overflow if possible

Parameters
leftleft / lower limit of interval
rightright / upper limit of interval

Definition at line 89 of file bisect_util.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::computeNumSmallerEigenvals ( const NumericT *  g_d,
const NumericT *  g_s,
const unsigned int  n,
const NumericT  x,
const unsigned int  tid,
const unsigned int  num_intervals_active,
NumericT *  s_d,
NumericT *  s_s,
unsigned int  converged 
)
inline

Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix

Parameters
g_ddiagonal elements stored in global memory
g_ssuperdiagonal elements stored in global memory
nsize of matrix
xvalue for which the number of eigenvalues that are smaller is seeked
tidthread identified (e.g. threadIdx.x or gtid)
num_intervals_activenumber of active intervals / threads that currently process an interval
s_dscratch space to store diagonal entries of the tridiagonal matrix in shared memory
s_sscratch space to store superdiagonal entries of the tridiagonal matrix in shared memory
convergedflag if the current thread is already converged (that is count does not have to be computed)

Definition at line 177 of file bisect_util.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::computeNumSmallerEigenvalsLarge ( const NumericT *  g_d,
const NumericT *  g_s,
const unsigned int  n,
const NumericT  x,
const unsigned int  tid,
const unsigned int  num_intervals_active,
NumericT *  s_d,
NumericT *  s_s,
unsigned int  converged 
)
inline

Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix

Parameters
g_ddiagonal elements stored in global memory
g_ssuperdiagonal elements stored in global memory
nsize of matrix
xvalue for which the number of eigenvalues that are smaller is seeked
tidthread identified (e.g. threadIdx.x or gtid)
num_intervals_activenumber of active intervals / threads that currently process an interval
s_dscratch space to store diagonal entries of the tridiagonal matrix in shared memory
s_sscratch space to store superdiagonal entries of the tridiagonal matrix in shared memory
convergedflag if the current thread is already converged (that is count does not have to be computed)

Definition at line 237 of file bisect_util.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
const unsigned int *  group_boundaries,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1180 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
const unsigned int *  group_boundaries,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1375 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
const unsigned int *  group_boundaries,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result 
)

Definition at line 1080 of file sparse_matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_col_column_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size,
uint  stride 
)

Definition at line 1477 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_col_row_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size,
uint  stride 
)

Definition at line 1459 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_row_column_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size,
uint  stride 
)

Definition at line 1514 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_row_row_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size,
uint  stride 
)

Definition at line 1495 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::copy_vec ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  V,
vcl_size_t  row_start,
vcl_size_t  col_start,
bool  copy_col 
)

This function copies a row or a column from a matrix to a vector.

Parameters
AThe matrix where to copy from.
VThe vector to fill with data.
row_startThe number of the first row to copy.
col_startThe number of the first column to copy.
copy_colSet to TRUE to copy a column, FALSE to copy a row.

Definition at line 2527 of file matrix_operations.hpp.

template<class T >
__device__ void viennacl::linalg::cuda::createIndicesCompaction ( T *  s_compaction_list_exc,
unsigned int  num_threads_compaction 
)

Create indices for compaction, that is process s_compaction_list_exc which is 1 for intervals that generated a second child and 0 otherwise and create for each of the non-zero elements the index where the new interval belongs to in a compact representation of all generated second childs

Parameters
s_compaction_list_exclist containing the flags which threads generated two childs
num_threads_compactionnumber of threads to employ for compaction

Definition at line 373 of file bisect_util.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward ( const unsigned int *  row_jumper_U,
const unsigned int *  column_indices_U,
const NumericT *  elements_U,
const NumericT *  diagonal_U,
const unsigned int *  block_offsets,
NumericT *  result,
unsigned int  size 
)

Definition at line 700 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward ( const unsigned int *  row_jumper_L,
const unsigned int *  column_indices_L,
const NumericT *  elements_L,
const unsigned int *  block_offsets,
NumericT *  result,
unsigned int  size 
)

Definition at line 668 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 257 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 110 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
const NumericT *  diagonal_entries,
NumericT *  vector,
unsigned int  size 
)

Definition at line 597 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
const NumericT *  diagonal_entries,
NumericT *  vector,
unsigned int  size 
)

Definition at line 563 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
const NumericT *  diagonal_entries,
NumericT *  vector,
unsigned int  size 
)

Definition at line 429 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 342 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 497 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 367 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 180 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT *  elements,
NumericT *  vector,
unsigned int  size 
)

Definition at line 42 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::cuda_abs ( NumericT  val)

Definition at line 2893 of file vector_operations.hpp.

__device__ unsigned long viennacl::linalg::cuda::cuda_abs ( unsigned long  val)
inline

Definition at line 2894 of file vector_operations.hpp.

__device__ unsigned int viennacl::linalg::cuda::cuda_abs ( unsigned int  val)
inline

Definition at line 2895 of file vector_operations.hpp.

__device__ unsigned short viennacl::linalg::cuda::cuda_abs ( unsigned short  val)
inline

Definition at line 2896 of file vector_operations.hpp.

__device__ unsigned char viennacl::linalg::cuda::cuda_abs ( unsigned char  val)
inline

Definition at line 2897 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::direct ( viennacl::vector< NumericT, AlignmentV > const &  in,
viennacl::vector< NumericT, AlignmentV > &  out,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Direct 1D algorithm for computing Fourier transformation.

Works on any sizes of data. Serial implementation has o(n^2) complexity

Definition at line 196 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::direct ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &  in,
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  out,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Direct 2D algorithm for computing Fourier transformation.

Works on any sizes of data. Serial implementation has o(n^2) complexity

Definition at line 221 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::el_wise_mul_div ( NumericT *  matrix1,
NumericT const *  matrix2,
NumericT const *  matrix3,
unsigned int  size 
)

Main CUDA kernel for nonnegative matrix factorization of a dense matrices.

Definition at line 38 of file nmf_operations.hpp.

template<typename NumericT , typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT, SizeT > &  A,
matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 511 of file matrix_operations.hpp.

template<typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< float, SizeT > &  A,
matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 571 of file matrix_operations.hpp.

template<typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< double, SizeT > &  A,
matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 631 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &  proxy 
)

Definition at line 699 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &  proxy 
)

Definition at line 741 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &  proxy 
)

Definition at line 783 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &  proxy 
)

Definition at line 825 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &  proxy 
)

Definition at line 867 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &  proxy 
)

Definition at line 909 of file matrix_operations.hpp.

template<typename NumericT , typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &  proxy 
)

Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)

Parameters
vec1The result vector (or -range, or -slice)
proxyThe proxy object holding v2, v3 and the operation

Definition at line 933 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &  proxy 
)

Definition at line 951 of file matrix_operations.hpp.

template<typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< float > &  vec1,
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 963 of file vector_operations.hpp.

template<typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< double > &  vec1,
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 993 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &  proxy 
)

Definition at line 993 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &  proxy 
)

Definition at line 1035 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &  proxy 
)

Definition at line 1038 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &  proxy 
)

Definition at line 1065 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &  proxy 
)

Definition at line 1077 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &  proxy 
)

Definition at line 1093 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &  proxy 
)

Definition at line 1119 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &  proxy 
)

Definition at line 1121 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &  proxy 
)

Definition at line 1149 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &  proxy 
)

Definition at line 1161 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &  proxy 
)

Definition at line 1177 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &  proxy 
)

Definition at line 1203 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &  proxy 
)

Definition at line 1205 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &  proxy 
)

Definition at line 1233 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &  proxy 
)

Definition at line 1245 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &  proxy 
)

Definition at line 1260 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &  proxy 
)

Definition at line 1287 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &  proxy 
)

Definition at line 1289 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &  proxy 
)

Definition at line 1317 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &  proxy 
)

Definition at line 1329 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &  proxy 
)

Definition at line 1345 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &  proxy 
)

Definition at line 1371 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &  proxy 
)

Definition at line 1373 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &  proxy 
)

Definition at line 1401 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &  proxy 
)

Definition at line 1429 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &  proxy 
)

Definition at line 1457 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &  proxy 
)

Definition at line 1485 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 755 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 804 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)

Definition at line 891 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 835 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)

Definition at line 845 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT *  C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 786 of file matrix_operations_row.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_coords,
const NumericT *  sp_mat_elements,
unsigned int  sp_mat_row_num,
unsigned int  sp_mat_col_num,
unsigned int  sp_mat_internal_row_num,
unsigned int  sp_mat_items_per_row,
unsigned int  sp_mat_aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1645 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_coords,
const NumericT *  sp_mat_elements,
unsigned int  sp_mat_row_num,
unsigned int  sp_mat_col_num,
unsigned int  sp_mat_internal_row_num,
unsigned int  sp_mat_items_per_row,
unsigned int  sp_mat_aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1818 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  row_num,
unsigned int  col_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row 
)

Definition at line 1575 of file sparse_matrix_operations.hpp.

template<typename NumericT , typename F >
void viennacl::linalg::cuda::exclusive_scan ( vector_base< NumericT, F > &  vec1,
vector_base< NumericT, F > &  vec2 
)

This function implements an exclusive scan.

Parameters
vec1Input vector: Gets overwritten by the routine.
vec2The output vector.

Definition at line 2788 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::exclusive_scan_kernel_1 ( T *  X,
unsigned int  startX,
unsigned int  incX,
unsigned int  InputSize,
T *  Y,
unsigned int  startY,
unsigned int  incY,
T *  S,
unsigned int  startS,
unsigned int  incS 
)

Definition at line 1868 of file matrix_operations_col.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_direct ( const Numeric2T *  input,
Numeric2T *  output,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
NumericT  sign,
bool  is_row_major 
)

Definition at line 139 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_div_vec_scalar ( Numeric2T *  input1,
unsigned int  size,
NumericT  factor 
)

Definition at line 689 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::fft_mult_vec ( const NumericT *  input1,
const NumericT *  input2,
NumericT *  output,
unsigned int  size 
)

Definition at line 656 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_radix2 ( Numeric2T *  input,
unsigned int  s,
unsigned int  bit_size,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
NumericT  sign,
bool  is_row_major 
)

Definition at line 370 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_radix2_local ( Numeric2T *  input,
unsigned int  bit_size,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
NumericT  sign,
bool  is_row_major 
)

Definition at line 297 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::fft_reorder ( NumericT *  input,
unsigned int  bit_size,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
bool  is_row_major 
)

Definition at line 240 of file fft_operations.hpp.

__device__ int viennacl::linalg::cuda::floorPow2 ( int  n)
inline

Compute the next lower power of two of n

Parameters
nnumber for which next higher power of two is seeked

Definition at line 46 of file bisect_util.hpp.

__device__ unsigned int viennacl::linalg::cuda::get_reorder_num ( unsigned int  v,
unsigned int  bit_size 
)
inline

Definition at line 127 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::givens_next ( matrix_base< NumericT > &  Q,
vector_base< NumericT > &  tmp1,
vector_base< NumericT > &  tmp2,
int  l,
int  m 
)

This function updates the matrix Q. It is part of the tql2 algorithm.

Parameters
QThe matrix to be updated.
tmp1Vector with data from the tql2 algorithm.
tmp2Vector with data from the tql2 algorithm.
lData from the tql2 algorithm.
mData from the tql2 algorithm.

Definition at line 2695 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::givens_next_column_major_kernel ( T *  matr,
T *  cs,
T *  ss,
uint  size,
uint  stride,
uint  start_i,
uint  end_i 
)

Definition at line 1771 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::givens_next_row_major_kernel ( T *  matr,
T *  cs,
T *  ss,
uint  size,
uint  stride,
uint  start_i,
uint  end_i 
)

Definition at line 1724 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::house_update_A_left ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  D,
vcl_size_t  start 
)

This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P.

Parameters
AThe matrix to be updated.
DThe normalized householder vector.
startThe repetition counter.

Definition at line 2588 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_left_column_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size1,
uint  size2,
uint  stride 
)

Definition at line 1560 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_left_row_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size1,
uint  size2,
uint  stride 
)

Definition at line 1535 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::house_update_A_right ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  D 
)

This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P.

Parameters
AThe matrix to be updated.
DThe normalized householder vector.

Definition at line 2627 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_right_column_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size1,
uint  size2,
uint  stride 
)

Definition at line 1618 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_right_row_major_kernel ( T *  A,
T *  V,
uint  row_start,
uint  col_start,
uint  size1,
uint  size2,
uint  stride 
)

Definition at line 1587 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::house_update_QL ( matrix_base< NumericT > &  Q,
vector_base< NumericT > &  D,
vcl_size_t  A_size1 
)

This function updates the matrix Q, which is needed for the computation of the eigenvectors.

Parameters
QThe matrix to be updated.
DThe householder vector.
A_size1size1 of matrix A

Definition at line 2664 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_QL_column_major_kernel ( T *  QL,
T *  V,
uint  size1,
uint  strideQ 
)

Definition at line 1696 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_QL_row_major_kernel ( T *  QL,
T *  V,
uint  size1,
uint  strideQ 
)

Definition at line 1669 of file matrix_operations_col.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 2158 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row,
const NumericT *  d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT *  result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 2356 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row 
)

Definition at line 2074 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::inclusive_scan ( vector_base< NumericT > &  vec1,
vector_base< NumericT > &  vec2 
)

This function implements an inclusive scan.

Parameters
vec1Input vector: Gets overwritten by the routine.
vec2The output vector.

Definition at line 2730 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::inclusive_scan_kernel_1 ( T *  X,
unsigned int  startX,
unsigned int  incX,
unsigned int  InputSize,
T *  Y,
unsigned int  startY,
unsigned int  incY,
T *  S,
unsigned int  startS,
unsigned int  incS 
)

Definition at line 1822 of file matrix_operations_col.hpp.

template<typename NumericT >
vcl_size_t viennacl::linalg::cuda::index_norm_inf ( vector_base< NumericT > const &  vec1)

Computes the index of the first entry that is equal to the supremum-norm in modulus.

Parameters
vec1The vector
Returns
The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types.

Definition at line 2955 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel ( const NumericT *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int *  result 
)

Definition at line 2900 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
NumericT *  group_results 
)

Definition at line 1821 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT *  y2,
unsigned int  start2,
unsigned int  stride2,
NumericT *  group_results 
)

Definition at line 1860 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT *  y2,
unsigned int  start2,
unsigned int  stride2,
const NumericT *  y3,
unsigned int  start3,
unsigned int  stride3,
NumericT *  group_results 
)

Definition at line 1905 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel ( const NumericT *  x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT *  y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT *  y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT *  y2,
unsigned int  start2,
unsigned int  stride2,
const NumericT *  y3,
unsigned int  start3,
unsigned int  stride3,
const NumericT *  y4,
unsigned int  start4,
unsigned int  stride4,
const NumericT *  y5,
unsigned int  start5,
unsigned int  stride5,
const NumericT *  y6,
unsigned int  start6,
unsigned int  stride6,
const NumericT *  y7,
unsigned int  start7,
unsigned int  stride7,
NumericT *  group_results 
)

Definition at line 1956 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::inner_prod_cpu ( vector_base< NumericT > const &  vec1,
vector_base< NumericT > const &  vec2,
NumericT &  result 
)

Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).

Parameters
vec1The first vector
vec2The second vector
resultThe result scalar (on the host)

Definition at line 1785 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::inner_prod_impl ( vector_base< NumericT > const &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT &  result 
)

Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).

Parameters
vec1The first vector
vec2The second vector
resultThe result scalar (on the gpu)

Definition at line 1753 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::inner_prod_impl ( vector_base< NumericT > const &  x,
vector_tuple< NumericT > const &  vec_tuple,
vector_base< NumericT > &  result 
)

Definition at line 2053 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_kernel ( const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2,
NumericT *  group_buffer 
)

Definition at line 1507 of file vector_operations.hpp.

template<typename NumericT , typename SolverTagT >
void viennacl::linalg::cuda::inplace_solve ( matrix_base< NumericT > const &  A,
matrix_base< NumericT > &  B,
SolverTagT  tag 
)

Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation).

Parameters
AThe system matrix
BThe matrix of row vectors, where the solution is directly written to
tagSolver tag for identifying the respective triangular solver
Examples:
blas2.cpp, and least-squares.cpp.

Definition at line 253 of file direct_solve.hpp.

template<typename NumericT , typename SolverTagT >
void viennacl::linalg::cuda::inplace_solve ( matrix_base< NumericT > const &  mat,
vector_base< NumericT > &  vec,
SolverTagT   
)

Direct inplace solver for dense triangular systems (non-transposed version)

Parameters
matThe system matrix proxy
vecThe load vector, where the solution is directly written to

Definition at line 398 of file direct_solve.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 627 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 648 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 670 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 691 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 715 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 736 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 767 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 788 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_assign ( matrix_base< NumericT > &  mat,
NumericT  s,
bool  clear = false 
)

Definition at line 279 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_assign_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 718 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 736 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 851 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 874 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 897 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 920 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 943 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 966 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 989 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1012 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1035 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1058 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1104 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1081 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1127 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1150 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1173 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1196 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1219 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_column ( const matrix_base< NumericT > &  mat,
unsigned int  j,
vector_base< NumericT > &  vec 
)

Definition at line 472 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_diag_from_vector ( const vector_base< NumericT > &  vec,
int  k,
matrix_base< NumericT > &  mat 
)

Definition at line 340 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_diag_to_vector ( matrix_base< NumericT > const &  mat,
int  k,
vector_base< NumericT > &  vec 
)

Definition at line 392 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_diagonal_assign ( matrix_base< NumericT > &  mat,
NumericT  s 
)

Definition at line 311 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 38 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 125 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 212 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 299 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 749 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 836 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 923 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1010 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1463 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1550 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1637 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1724 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2178 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2265 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2352 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2439 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel ( const NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
bool  row_major_A,
NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_size1,
unsigned int  B_size2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
bool  row_major_B,
bool  unit_diagonal 
)

Definition at line 107 of file direct_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 393 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 480 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 567 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 654 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1104 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1191 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1278 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1365 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1819 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1906 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1993 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2080 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2535 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2622 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2709 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel ( NumericT  alpha,
const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT *  C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2796 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel ( const NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
bool  row_major_A,
NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_size1,
unsigned int  B_size2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
bool  row_major_B,
bool  unit_diagonal 
)

Definition at line 41 of file direct_solve.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_row ( matrix_base< NumericT > const &  mat,
unsigned int  i,
vector_base< NumericT > &  vec 
)

Definition at line 439 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_assign_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 749 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 767 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 881 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 904 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 927 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 950 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 973 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 996 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1019 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1042 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1065 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1088 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1134 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1111 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1157 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1180 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1203 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1226 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1249 of file matrix_operations_row.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::max_cpu ( vector_base< NumericT > const &  vec1,
NumericT &  result 
)

Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.

Parameters
vec1The vector
resultThe result host scalar

Definition at line 2810 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::max_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the maximum of a vector, both reduction stages run on the GPU.

Parameters
vec1The vector
resultThe result GPU scalar

Definition at line 2782 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::min_cpu ( vector_base< NumericT > const &  vec1,
NumericT &  result 
)

Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.

Parameters
vec1The vector
resultThe result host scalar

Definition at line 2864 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::min_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the maximum of a vector, both reduction stages run on the GPU.

Parameters
vec1The vector
resultThe result GPU scalar

Definition at line 2836 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::multiply_complex ( viennacl::vector< NumericT, AlignmentV > const &  input1,
viennacl::vector< NumericT, AlignmentV > const &  input2,
viennacl::vector< NumericT, AlignmentV > &  output 
)

Mutiply two complex vectors and store result in output.

Definition at line 673 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::nmf ( viennacl::matrix_base< NumericT > const &  V,
viennacl::matrix_base< NumericT > &  W,
viennacl::matrix_base< NumericT > &  H,
viennacl::linalg::nmf_config const &  conf 
)

The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.

Parameters
VInput matrix
WFirst factor
HSecond factor
confA configuration object holding tolerances and the like

Definition at line 59 of file nmf_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_1_cpu ( vector_base< NumericT > const &  vec1,
NumericT &  result 
)

Computes the l^1-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2622 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_1_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the l^1-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2604 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_2_cpu ( vector_base< NumericT > const &  vec1,
NumericT &  result 
)

Computes the l^2-norm of a vector - implementation.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2668 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_2_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the l^2-norm of a vector - implementation.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2649 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_inf_cpu ( vector_base< NumericT > const &  vec1,
NumericT &  result 
)

Computes the supremum-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2716 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_inf_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the supremum-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2696 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_floats ( const NumericT *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
NumericT *  group_buffer 
)

Definition at line 2252 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_integers ( const NumericT *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
NumericT *  group_buffer 
)

Definition at line 2345 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers ( const NumericT *  vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
NumericT *  group_buffer 
)

Definition at line 2429 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::normalize ( viennacl::vector< NumericT, AlignmentV > &  input)

Normalize vector on with his own size.

Definition at line 699 of file fft_operations.hpp.

__device__ float2 viennacl::linalg::cuda::operator* ( float2  in1,
float2  in2 
)
inline

Definition at line 97 of file fft_operations.hpp.

__host__ __device__ double2 viennacl::linalg::cuda::operator* ( double2  in1,
double2  in2 
)
inline

Definition at line 122 of file fft_operations.hpp.

__host__ __device__ float2 viennacl::linalg::cuda::operator+ ( float2  a,
float2  b 
)
inline

Definition at line 79 of file fft_operations.hpp.

__host__ __device__ double2 viennacl::linalg::cuda::operator+ ( double2  a,
double2  b 
)
inline

Definition at line 103 of file fft_operations.hpp.

__host__ __device__ float2 viennacl::linalg::cuda::operator- ( float2  a,
float2  b 
)
inline

Definition at line 85 of file fft_operations.hpp.

__host__ __device__ double2 viennacl::linalg::cuda::operator- ( double2  a,
double2  b 
)
inline

Definition at line 109 of file fft_operations.hpp.

template<typename SCALARTYPE >
__device__ float2 viennacl::linalg::cuda::operator/ ( float2  a,
SCALARTYPE  b 
)
inline

Definition at line 91 of file fft_operations.hpp.

template<typename SCALARTYPE >
__host__ __device__ double2 viennacl::linalg::cuda::operator/ ( double2  a,
SCALARTYPE  b 
)
inline

Definition at line 116 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_coo_vec_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
const unsigned int *  group_boundaries,
const NumericT *  p,
NumericT *  Ap,
const NumericT *  r0star,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 942 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT *  elements,
unsigned int  num_blocks,
const NumericT *  p,
NumericT *  Ap,
const NumericT *  r0star,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 800 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_ell_vec_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT *  p,
NumericT *  Ap,
const NumericT *  r0star,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1090 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_hyb_vec_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT *  p,
NumericT *  Ap,
const NumericT *  r0star,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1287 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( compressed_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 908 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( coordinate_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1056 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1153 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( sliced_ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1254 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( hyb_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1363 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_sliced_ell_vec_mul_kernel ( const unsigned int *  columns_per_block,
const unsigned int *  column_indices,
const unsigned int *  block_start,
const NumericT *  elements,
const NumericT *  p,
NumericT *  Ap,
const NumericT *  r0star,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1185 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_update_s ( vector_base< NumericT > &  s,
vector_base< NumericT > &  r,
vector_base< NumericT > const &  Ap,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 695 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_update_s_kernel ( NumericT *  s,
NumericT const *  residual,
NumericT const *  Ap,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  chunk_size,
unsigned int  chunk_offset 
)

Definition at line 638 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_vector_kernel ( NumericT *  result,
NumericT  alpha,
NumericT *  p,
NumericT  omega,
NumericT const *  s,
NumericT *  residual,
NumericT const *  As,
NumericT  beta,
NumericT const *  Ap,
NumericT const *  r0star,
NumericT *  inner_prod_buffer,
unsigned int  size 
)

Definition at line 717 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_vector_update ( vector_base< NumericT > &  result,
NumericT  alpha,
vector_base< NumericT > &  p,
NumericT  omega,
vector_base< NumericT > const &  s,
vector_base< NumericT > &  residual,
vector_base< NumericT > const &  As,
NumericT  beta,
vector_base< NumericT > const &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size 
)

Definition at line 768 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_coo_vec_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
const unsigned int *  group_boundaries,
const NumericT *  p,
NumericT *  Ap,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 241 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT *  elements,
unsigned int  num_blocks,
const NumericT *  p,
NumericT *  Ap,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 114 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_ell_vec_mul_kernel ( const unsigned int *  coords,
const NumericT *  elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT *  p,
NumericT *  Ap,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 373 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_hyb_vec_mul_kernel ( const unsigned int *  ell_coords,
const NumericT *  ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT *  csr_elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT *  p,
NumericT *  Ap,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 542 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( compressed_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 213 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( coordinate_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 345 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 428 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( sliced_ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 515 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( hyb_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 610 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_sliced_ell_vec_mul_kernel ( const unsigned int *  columns_per_block,
const unsigned int *  column_indices,
const unsigned int *  block_start,
const NumericT *  elements,
const NumericT *  p,
NumericT *  Ap,
unsigned int  size,
NumericT *  inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 454 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_vector_kernel ( NumericT *  result,
NumericT  alpha,
NumericT *  p,
NumericT *  r,
NumericT const *  Ap,
NumericT  beta,
NumericT *  inner_prod_buffer,
unsigned int  size 
)

Definition at line 44 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_vector_update ( vector_base< NumericT > &  result,
NumericT  alpha,
vector_base< NumericT > &  p,
vector_base< NumericT > &  r,
vector_base< NumericT > const &  Ap,
NumericT  beta,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 85 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1 ( vector_base< T > const &  device_krylov_basis,
vcl_size_t  v_k_size,
vcl_size_t  v_k_internal_size,
vcl_size_t  param_k,
vector_base< T > &  vi_in_vk_buffer,
vcl_size_t  buffer_chunk_size 
)

Definition at line 1540 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1_kernel ( T const *  krylov_basis,
unsigned int  size,
unsigned int  internal_size,
unsigned int  k,
T *  vi_in_vk_buffer,
unsigned int  chunk_size 
)

Definition at line 1488 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2 ( vector_base< T > &  device_krylov_basis,
vcl_size_t  v_k_size,
vcl_size_t  v_k_internal_size,
vcl_size_t  param_k,
vector_base< T > const &  vi_in_vk_buffer,
vector_base< T > &  R_buffer,
vcl_size_t  krylov_dim,
vector_base< T > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size 
)

Definition at line 1632 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2_kernel ( T *  krylov_basis,
unsigned int  size,
unsigned int  internal_size,
unsigned int  k,
T const *  vi_in_vk_buffer,
unsigned int  chunk_size,
T *  R_buffer,
unsigned int  krylov_dim,
T *  inner_prod_buffer 
)

Definition at line 1565 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_normalize_vk ( vector_base< T > &  v_k,
vector_base< T > const &  residual,
vector_base< T > &  R_buffer,
vcl_size_t  offset_in_R,
vector_base< T > const &  inner_prod_buffer,
vector_base< T > &  r_dot_vk_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Performs a vector normalization needed for an efficient pipelined GMRES algorithm.

This routines computes for vectors 'r', 'v_k': Second reduction step for ||v_k|| v_k /= ||v_k|| First reduction step for <r, v_k>

Definition at line 1457 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_normalize_vk_kernel ( T *  vk,
unsigned int  vk_offset,
T const *  residual,
T *  R_buffer,
unsigned int  R_offset,
T const *  inner_prod_buffer,
unsigned int  chunk_size,
T *  r_dot_vk_buffer,
unsigned int  chunk_offset,
unsigned int  size 
)

Definition at line 1395 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( compressed_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1709 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( coordinate_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1731 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( ell_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1753 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( sliced_ell_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1774 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( hyb_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1796 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_update_result ( vector_base< T > &  result,
vector_base< T > const &  residual,
vector_base< T > const &  krylov_basis,
vcl_size_t  v_k_size,
vcl_size_t  v_k_internal_size,
vector_base< T > const &  coefficients,
vcl_size_t  param_k 
)

Definition at line 1684 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_update_result_kernel ( T *  result,
T const *  residual,
T const *  krylov_basis,
unsigned int  size,
unsigned int  internal_size,
T const *  coefficients,
unsigned int  k 
)

Definition at line 1664 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::plane_rotation ( vector_base< NumericT > &  vec1,
vector_base< NumericT > &  vec2,
NumericT  alpha,
NumericT  beta 
)

Computes a plane rotation of two vectors.

Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)

Parameters
vec1The first vector
vec2The second vector
alphaThe first transformation coefficient
betaThe second transformation coefficient

Definition at line 3015 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::plane_rotation_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2,
NumericT  alpha,
NumericT  beta 
)

Definition at line 2979 of file vector_operations.hpp.

template<class NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a compressed_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 225 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
sp_matThe sparse matrix
d_matThe dense matrix
resultThe result matrix

Definition at line 339 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.

Implementation of the convenience expression result = prod(sp_mat, d_mat);

Parameters
sp_matThe sparse matrix
d_matThe transposed dense matrix proxy
resultThe result matrix

Definition at line 494 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_compressed_matrix< NumericT > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a compressed_compressed_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 906 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a coordinate_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 1157 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Compressed Matrix(COO)-Dense Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, d_mat);

Parameters
sp_matThe Sparse Matrix (Coordinate format)
d_matThe Dense Matrix
resultThe Result Matrix

Definition at line 1283 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT > &  mat,
bool  mat_transpose,
const vector_base< NumericT > &  vec,
vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication.

Implementation of the convenience expressions result = prod(mat, vec); and result = prod(trans(mat), vec);

Parameters
matThe matrix
mat_transposeWhether the matrix is to be transposed.
vecThe vector
resultThe result vector

Definition at line 1427 of file matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));

Parameters
sp_matThe Sparse Matrix (Coordinate format)
d_matThe Dense Transposed Matrix
resultThe Result Matrix

Definition at line 1477 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a ell_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 1623 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format

Parameters
sp_matThe sparse matrix (ELL)
d_matThe dense matrix
resultThe result matrix

Definition at line 1715 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format

Parameters
sp_matThe sparse matrix (ELL)
d_matThe dense matrix
resultThe result matrix

Definition at line 1888 of file sparse_matrix_operations.hpp.

template<typename NumericT , typename IndexT >
void viennacl::linalg::cuda::prod_impl ( const viennacl::sliced_ell_matrix< NumericT, IndexT > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a sliced_ell_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 2047 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 2132 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, AlignmentV > &  mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, d_mat);

Parameters
matThe sparse matrix
d_matThe dense matrix (row- or column-major)
resultThe dense result matrix (row- or column-major)

Definition at line 2239 of file sparse_matrix_operations.hpp.

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT > &  A,
bool  trans_A,
const matrix_base< NumericT > &  B,
bool  trans_B,
matrix_base< NumericT > &  C,
ScalarT  alpha,
ScalarT  beta 
)

Carries out matrix-matrix multiplication.

Implementation of C = prod(A, B);

Definition at line 2385 of file matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, AlignmentV > &  mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, trans(d_mat));

Parameters
matThe sparse matrix
d_matTransposed matrix proxy object for the rhs dense matrix (row- or column-major)
resultThe dense result matrix (row- or column-major)

Definition at line 2437 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::radix2 ( viennacl::vector< NumericT, AlignmentV > &  in,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Radix-2 1D algorithm for computing Fourier transformation.

Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm

Definition at line 441 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::radix2 ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  in,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Radix-2 2D algorithm for computing Fourier transformation.

Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm

Definition at line 493 of file fft_operations.hpp.

template<typename RealT , typename ComplexT >
__global__ void viennacl::linalg::cuda::real_to_complex ( const RealT *  in,
ComplexT *  out,
unsigned int  size 
)

Definition at line 781 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::real_to_complex ( viennacl::vector_base< NumericT > const &  in,
viennacl::vector_base< NumericT > &  out,
vcl_size_t  size 
)

Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)

Definition at line 796 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::reorder ( viennacl::vector< NumericT, AlignmentV > &  in,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  bits_datasize,
vcl_size_t  batch_num,
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Definition at line 281 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::reverse ( viennacl::vector_base< NumericT > &  in)

Reverse vector to oposite order and save it in input vector.

Definition at line 846 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::reverse_inplace ( NumericT *  vec,
uint  size 
)

Definition at line 831 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scalar_swap_kernel ( NumericT *  s1,
NumericT *  s2 
)

Definition at line 345 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  val,
unsigned int  options2,
const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1334 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  val,
unsigned int  options2,
const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1374 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  val,
unsigned int  options2,
const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1363 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel ( NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT *  val,
unsigned int  options2,
const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1403 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::scaled_rank_1_update ( matrix_base< NumericT > &  mat1,
ScalarT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
const vector_base< NumericT > &  vec1,
const vector_base< NumericT > &  vec2 
)

The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.

Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);

Parameters
mat1The matrix to be updated
alphaThe scaling factor (either a viennacl::scalar<>, float, or double)
len_alphaLength of the buffer for an eventual final reduction step (currently always '1')
reciprocal_alphaUse 1/alpha instead of alpha
flip_sign_alphaUse -alpha instead of alpha
vec1The first vector
vec2The second vector

Definition at line 2417 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::scan_kernel_2 ( T *  S_ref,
unsigned int  startS_ref,
unsigned int  incS_ref,
T *  S,
unsigned int  startS,
unsigned int  incS,
unsigned int  InputSize 
)

Definition at line 1918 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::scan_kernel_3 ( T *  S_ref,
unsigned int  startS_ref,
unsigned int  incS_ref,
T *  S,
unsigned int  startS,
unsigned int  incS 
)

Definition at line 1960 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::scan_kernel_4 ( T *  S,
unsigned int  startS,
unsigned int  incS,
T *  Y,
unsigned int  startY,
unsigned int  incY,
unsigned int  OutputSize 
)

Definition at line 1979 of file matrix_operations_col.hpp.

__device__ void viennacl::linalg::cuda::scanCompactBlocksStartAddress ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_compaction,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper 
)
inline

Compute addresses to obtain compact list of block start addresses.

Definition at line 238 of file bisect_kernel_large.hpp.

__device__ void viennacl::linalg::cuda::scanInitial ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
const unsigned int  num_threads_compaction,
unsigned short *  s_cl_one,
unsigned short *  s_cl_mult,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper 
)
inline

Perform initial scan for compaction of intervals containing one and multiple eigenvalues; also do initial scan to build blocks

Definition at line 369 of file bisect_kernel_large.hpp.

__device__ void viennacl::linalg::cuda::scanSumBlocks ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
const unsigned int  num_threads_compaction,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper 
)
inline

Perform scan to obtain number of eigenvalues before a specific block.

Definition at line 303 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::sliced_ell_matrix_vec_mul_kernel ( const unsigned int *  columns_per_block,
const unsigned int *  column_indices,
const unsigned int *  block_start,
const NumericT *  elements,
const NumericT *  x,
unsigned int  start_x,
unsigned int  inc_x,
unsigned int  size_x,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 2001 of file sparse_matrix_operations.hpp.

template<class S , class T , class NumericT >
__device__ void viennacl::linalg::cuda::storeInterval ( unsigned int  addr,
NumericT *  s_left,
NumericT *  s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  left,
NumericT  right,
left_count,
right_count,
NumericT  precision 
)

Check if interval converged and store appropriately

Parameters
addraddress where to store the information of the interval
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
leftlower limit of interval
rightupper limit of interval
left_counteigenvalues less than left
right_counteigenvalues less than right
precisiondesired precision for eigenvalues

Definition at line 124 of file bisect_util.hpp.

template<class T , class S , class NumericT >
__device__ void viennacl::linalg::cuda::storeIntervalConverged ( NumericT *  s_left,
NumericT *  s_right,
T *  s_left_count,
T *  s_right_count,
NumericT &  left,
NumericT &  mid,
NumericT &  right,
S &  left_count,
S &  mid_count,
S &  right_count,
T *  s_compaction_list_exc,
unsigned int &  compact_second_chunk,
const unsigned int  num_threads_active,
unsigned int &  is_active_second 
)

Definition at line 465 of file bisect_util.hpp.

template<class S , class T , class NumericT >
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervals ( unsigned int  addr,
const unsigned int  num_threads_active,
NumericT *  s_left,
NumericT *  s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  left,
NumericT  mid,
NumericT  right,
const S  left_count,
const S  mid_count,
const S  right_count,
NumericT  precision,
unsigned int &  compact_second_chunk,
T *  s_compaction_list_exc,
unsigned int &  is_active_second 
)

Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread.

Parameters
addrbase address for storing intervals
num_threads_activenumber of threads / intervals in current sweep
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
leftlower limit of interval
midmidpoint of interval
rightupper limit of interval
left_counteigenvalues less than left
mid_counteigenvalues less than mid
right_counteigenvalues less than right
precisiondesired precision for eigenvalues
compact_second_chunkshared mem flag if second chunk is used and ergo requires compaction
s_compaction_list_exchelper array for stream compaction, s_compaction_list_exc[tid] = 1 when the thread generated two child intervals
is_active_secondmark is thread has a second non-empty child interval

Definition at line 309 of file bisect_util.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervalsLarge ( unsigned int  addr,
const unsigned int  num_threads_active,
NumericT *  s_left,
NumericT *  s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
NumericT  left,
NumericT  mid,
NumericT  right,
const unsigned short  left_count,
const unsigned short  mid_count,
const unsigned short  right_count,
NumericT  epsilon,
unsigned int &  compact_second_chunk,
unsigned short *  s_compaction_list,
unsigned int &  is_active_second 
)

Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread

Definition at line 475 of file bisect_kernel_large.hpp.

template<class T , class NumericT >
__device__ void viennacl::linalg::cuda::subdivideActiveInterval ( const unsigned int  tid,
NumericT *  s_left,
NumericT *  s_right,
T *  s_left_count,
T *  s_right_count,
const unsigned int  num_threads_active,
NumericT &  left,
NumericT &  right,
unsigned int &  left_count,
unsigned int &  right_count,
NumericT &  mid,
unsigned int &  all_threads_converged 
)

Subdivide interval if active and not already converged.

Parameters
tidid of thread
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
num_threads_activenumber of active threads in warp
leftlower limit of interval
rightupper limit of interval
left_counteigenvalues less than left
right_counteigenvalues less than right
midmedian of interval
all_threads_convergedshared memory flag if all threads are

Definition at line 529 of file bisect_util.hpp.

template<typename ScalarT1 , typename ScalarT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value >::type viennacl::linalg::cuda::swap ( ScalarT1 &  s1,
ScalarT2 &  s2 
)

Swaps the contents of two scalars, data is copied.

Parameters
s1The first scalar
s2The second scalar

Definition at line 361 of file scalar_operations.hpp.

template<typename NumericT , typename SizeT , typename DistanceT >
void viennacl::linalg::cuda::trans ( matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &  proxy,
matrix_base< NumericT > &  temp_trans 
)
Examples:
blas2.cpp.

Definition at line 57 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::trans_kernel ( const NumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_stride1,
unsigned int  A_stride2,
NumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
unsigned int  B_stride1,
unsigned int  B_stride2,
bool  data_major 
)

Definition at line 35 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel ( const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1277 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel ( const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1321 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::transpose ( const NumericT *  input,
NumericT *  output,
unsigned int  row_num,
unsigned int  col_num 
)

Definition at line 712 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::transpose ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &  input,
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  output 
)

Transpose matrix.

Definition at line 731 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::transpose ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  input)

Inplace_transpose matrix.

Definition at line 769 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::transpose_inplace ( NumericT *  input,
unsigned int  row_num,
unsigned int  col_num 
)

Definition at line 745 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel ( NumericT const *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
unsigned int  options 
)

Definition at line 307 of file direct_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel ( NumericT const *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
unsigned int  options 
)

Definition at line 266 of file direct_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1251 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1029 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1056 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1084 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1112 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1140 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1168 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1196 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1224 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1280 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1336 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_log_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1308 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1364 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1392 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1420 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1448 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1476 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_mul_col_kernel ( const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1246 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_mul_row_kernel ( const NumericT *  A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT *  v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT *  result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1276 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT1 >
void viennacl::linalg::cuda::vector_assign ( vector_base< NumericT > &  vec1,
ScalarT1 const &  alpha,
bool  up_to_internal_size = false 
)

Assign a constant value to a vector (-range/-slice)

Parameters
vec1The vector to which the value should be assigned
alphaThe value to be assigned
up_to_internal_sizeSpecifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer).

Definition at line 777 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_assign_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  internal_size1,
NumericT  alpha 
)

Definition at line 756 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_maxmin_kernel ( const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT *  result 
)

Definition at line 2739 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel ( NumericT const *  vec1,
NumericT *  result,
unsigned int  start_result,
unsigned int  inc_result 
)

Definition at line 2031 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats ( const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT *  result 
)

Definition at line 1547 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers ( const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT *  result 
)

Definition at line 1589 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers ( const NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT *  result 
)

Definition at line 1626 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::vector_swap ( vector_base< NumericT > &  vec1,
vector_base< NumericT > &  vec2 
)

Swaps the contents of two vectors, data is copied.

Parameters
vec1The first vector (or -range, or -slice)
vec2The second vector (or -range, or -slice)

Definition at line 827 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_swap_kernel ( NumericT *  vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 800 of file vector_operations.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::writeToGmem ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
const unsigned int  num_blocks_mult,
NumericT *  g_left_one,
NumericT *  g_right_one,
unsigned int *  g_pos_one,
NumericT *  g_left_mult,
NumericT *  g_right_mult,
unsigned int *  g_left_count_mult,
unsigned int *  g_right_count_mult,
NumericT *  s_left,
NumericT *  s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
unsigned int *  g_blocks_mult,
unsigned int *  g_blocks_mult_sum,
unsigned short *  s_compaction_list,
unsigned short *  s_cl_helper,
unsigned int  offset_mult_lambda 
)

Write data to global memory.

Definition at line 53 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::zero2 ( NumericT *  input1,
NumericT *  input2,
unsigned int  size 
)

Definition at line 601 of file fft_operations.hpp.