Holds all CUDA compute kernels used by ViennaCL. More...
Namespaces | |
detail | |
Helper functions for the CUDA linear algebra backend. | |
Classes | |
struct | mat_mult_matrix_index |
Helper struct for accessing an element of a row- or column-major matrix. More... | |
Functions | |
template<typename NumericT > | |
void | bisectSmall (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision) |
template<typename NumericT > | |
void | bisectLarge (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision) |
template<typename NumericT > | |
void | bisectLarge_OneIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision) |
template<typename NumericT > | |
void | bisectLarge_MultIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision) |
template<typename NumericT > | |
__device__ void | writeToGmem (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum, unsigned short *s_compaction_list, unsigned short *s_cl_helper, unsigned int offset_mult_lambda) |
Write data to global memory. More... | |
template<typename NumericT > | |
__device__ void | compactStreamsFinal (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, unsigned int &offset_mult_lambda, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper, unsigned int is_one_lambda, unsigned int is_one_lambda_2, NumericT &left, NumericT &right, NumericT &left_2, NumericT &right_2, unsigned int &left_count, unsigned int &right_count, unsigned int &left_count_2, unsigned int &right_count_2, unsigned int c_block_iend, unsigned int c_sum_block, unsigned int c_block_iend_2, unsigned int c_sum_block_2) |
Perform final stream compaction before writing data to global memory. More... | |
__device__ void | scanCompactBlocksStartAddress (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
Compute addresses to obtain compact list of block start addresses. More... | |
__device__ void | scanSumBlocks (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
Perform scan to obtain number of eigenvalues before a specific block. More... | |
__device__ void | scanInitial (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
template<typename NumericT > | |
__device__ void | storeNonEmptyIntervalsLarge (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, NumericT left, NumericT mid, NumericT right, const unsigned short left_count, const unsigned short mid_count, const unsigned short right_count, NumericT epsilon, unsigned int &compact_second_chunk, unsigned short *s_compaction_list, unsigned int &is_active_second) |
template<typename NumericT > | |
__global__ void | bisectKernelLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon, unsigned int *g_num_one, unsigned int *g_num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute. More... | |
template<typename NumericT > | |
__global__ void | bisectKernelLarge_MultIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int *blocks_mult, unsigned int *blocks_mult_sum, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, NumericT *g_lambda, unsigned int *g_pos, NumericT precision) |
template<typename NumericT > | |
__global__ void | bisectKernelLarge_OneIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision) |
template<typename NumericT > | |
__global__ void | bisectKernelSmall (const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. More... | |
__device__ int | floorPow2 (int n) |
__device__ int | ceilPow2 (int n) |
template<typename NumericT > | |
__device__ NumericT | computeMidpoint (const NumericT left, const NumericT right) |
template<class S , class T , class NumericT > | |
__device__ void | storeInterval (unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision) |
template<typename NumericT > | |
__device__ unsigned int | computeNumSmallerEigenvals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged) |
template<typename NumericT > | |
__device__ unsigned int | computeNumSmallerEigenvalsLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged) |
template<class S , class T , class NumericT > | |
__device__ void | storeNonEmptyIntervals (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread. More... | |
template<class T > | |
__device__ void | createIndicesCompaction (T *s_compaction_list_exc, unsigned int num_threads_compaction) |
template<class T , class NumericT > | |
__device__ void | compactIntervals (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second) |
Perform stream compaction for second child intervals. More... | |
template<class T , class S , class NumericT > | |
__device__ void | storeIntervalConverged (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second) |
template<class T , class NumericT > | |
__device__ void | subdivideActiveInterval (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged) |
Subdivide interval if active and not already converged. More... | |
template<typename NumericT > | |
__global__ void | matrix_matrix_upper_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal) |
template<typename NumericT > | |
__global__ void | matrix_matrix_lower_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal) |
template<typename NumericT , typename SolverTagT > | |
void | inplace_solve (const matrix_base< NumericT > &A, bool trans_A, matrix_base< NumericT > &B, bool trans_B, SolverTagT tag) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). Both A and B can optionally be transposed. More... | |
template<typename NumericT > | |
__global__ void | triangular_substitute_inplace_row_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
template<typename NumericT > | |
__global__ void | triangular_substitute_inplace_col_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
template<typename NumericT , typename SolverTagT > | |
void | inplace_solve (const matrix_base< NumericT > &mat, bool trans_mat, vector_base< NumericT > &vec, SolverTagT) |
Direct inplace solver for dense triangular systems (non-transposed version) More... | |
__host__ __device__ float2 | operator+ (float2 a, float2 b) |
__host__ __device__ float2 | operator- (float2 a, float2 b) |
template<typename SCALARTYPE > | |
__device__ float2 | operator/ (float2 a, SCALARTYPE b) |
__device__ float2 | operator* (float2 in1, float2 in2) |
__host__ __device__ double2 | operator+ (double2 a, double2 b) |
__host__ __device__ double2 | operator- (double2 a, double2 b) |
template<typename SCALARTYPE > | |
__host__ __device__ double2 | operator/ (double2 a, SCALARTYPE b) |
__host__ __device__ double2 | operator* (double2 in1, double2 in2) |
__device__ unsigned int | get_reorder_num (unsigned int v, unsigned int bit_size) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_direct (const Numeric2T *input, Numeric2T *output, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
template<typename NumericT , unsigned int AlignmentV> | |
void | direct (viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Direct 1D algorithm for computing Fourier transformation. More... | |
template<typename NumericT , unsigned int AlignmentV> | |
void | direct (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &in, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Direct 2D algorithm for computing Fourier transformation. More... | |
template<typename NumericT > | |
__global__ void | fft_reorder (NumericT *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, bool is_row_major) |
template<typename NumericT , unsigned int AlignmentV> | |
void | reorder (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_radix2_local (Numeric2T *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_radix2 (Numeric2T *input, unsigned int s, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
template<typename NumericT , unsigned int AlignmentV> | |
void | radix2 (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Radix-2 1D algorithm for computing Fourier transformation. More... | |
template<typename NumericT , unsigned int AlignmentV> | |
void | radix2 (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Radix-2 2D algorithm for computing Fourier transformation. More... | |
template<typename Numeric2T , typename NumericT > | |
__global__ void | bluestein_post (Numeric2T *Z, Numeric2T *out, unsigned int size, NumericT sign) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | bluestein_pre (Numeric2T *input, Numeric2T *A, Numeric2T *B, unsigned int size, unsigned int ext_size, NumericT sign) |
template<typename NumericT > | |
__global__ void | zero2 (NumericT *input1, NumericT *input2, unsigned int size) |
template<typename NumericT , unsigned int AlignmentV> | |
void | bluestein (viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t) |
Bluestein's algorithm for computing Fourier transformation. More... | |
template<typename NumericT > | |
__global__ void | fft_mult_vec (const NumericT *input1, const NumericT *input2, NumericT *output, unsigned int size) |
template<typename NumericT , unsigned int AlignmentV> | |
void | multiply_complex (viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output) |
Mutiply two complex vectors and store result in output. More... | |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_div_vec_scalar (Numeric2T *input1, unsigned int size, NumericT factor) |
template<typename NumericT , unsigned int AlignmentV> | |
void | normalize (viennacl::vector< NumericT, AlignmentV > &input) |
Normalize vector on with his own size. More... | |
template<typename NumericT > | |
__global__ void | transpose (const NumericT *input, NumericT *output, unsigned int row_num, unsigned int col_num) |
template<typename NumericT , unsigned int AlignmentV> | |
void | transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &input, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &output) |
Transpose matrix. More... | |
template<typename NumericT > | |
__global__ void | transpose_inplace (NumericT *input, unsigned int row_num, unsigned int col_num) |
template<typename NumericT , unsigned int AlignmentV> | |
void | transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input) |
Inplace_transpose matrix. More... | |
template<typename RealT , typename ComplexT > | |
__global__ void | real_to_complex (const RealT *in, ComplexT *out, unsigned int size) |
template<typename NumericT > | |
void | real_to_complex (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size) |
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More... | |
template<typename ComplexT , typename RealT > | |
__global__ void | complex_to_real (const ComplexT *in, RealT *out, unsigned int size) |
template<typename NumericT > | |
void | complex_to_real (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size) |
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More... | |
template<typename NumericT > | |
__global__ void | reverse_inplace (NumericT *vec, uint size) |
template<typename NumericT > | |
void | reverse (viennacl::vector_base< NumericT > &in) |
Reverse vector to oposite order and save it in input vector. More... | |
template<typename NumericT > | |
__global__ void | pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size) |
template<typename NumericT > | |
void | pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_csr_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size) |
template<typename NumericT > | |
void | pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_csr_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename T > | |
__global__ void | pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size) |
template<typename T > | |
void | pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More... | |
template<typename T > | |
__global__ void | pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size) |
template<typename T > | |
void | pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size) |
template<typename T > | |
__global__ void | pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size) |
template<typename T > | |
__global__ void | pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k) |
template<typename T > | |
void | pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k) |
template<typename T > | |
void | pipelined_gmres_prod (compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename NumericT , typename SizeT , typename DistanceT > | |
void | trans (matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans) |
template<typename NumericT , typename ScalarT > | |
void | am (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | ambm (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | ambm_m (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
void | matrix_assign (matrix_base< NumericT > &mat, NumericT s, bool clear=false) |
template<typename NumericT > | |
void | matrix_diagonal_assign (matrix_base< NumericT > &mat, NumericT s) |
template<typename NumericT > | |
void | matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat) |
template<typename NumericT > | |
void | matrix_diag_to_vector (matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec) |
template<typename NumericT > | |
void | matrix_row (matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec) |
template<typename NumericT > | |
void | matrix_column (const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec) |
template<typename NumericT , typename SizeT , typename OpT > | |
void | element_op (matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy) |
template<typename SizeT , typename OpT > | |
void | element_op (matrix_base< float, SizeT > &A, matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &proxy) |
template<typename SizeT , typename OpT > | |
void | element_op (matrix_base< double, SizeT > &A, matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
template<typename NumericT > | |
void | prod_impl (const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result) |
Carries out matrix-vector multiplication. More... | |
template<typename NumericT , typename ScalarT > | |
void | prod_impl (const matrix_base< NumericT > &A, bool trans_A, const matrix_base< NumericT > &B, bool trans_B, matrix_base< NumericT > &C, ScalarT alpha, ScalarT beta) |
Carries out matrix-matrix multiplication. More... | |
template<typename NumericT , typename ScalarT > | |
void | scaled_rank_1_update (matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More... | |
template<typename NumericT , typename VectorType > | |
void | bidiag_pack (matrix_base< NumericT > &A, VectorType &dh, VectorType &sh) |
This function stores the diagonal and the superdiagonal of a matrix in two vectors. More... | |
template<typename NumericT > | |
void | copy_vec (matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col) |
This function copies a row or a column from a matrix to a vector. More... | |
template<typename NumericT > | |
void | house_update_A_left (matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start) |
This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P. More... | |
template<typename NumericT > | |
void | house_update_A_right (matrix_base< NumericT > &A, vector_base< NumericT > &D) |
This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P. More... | |
template<typename NumericT > | |
void | house_update_QL (matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1) |
This function updates the matrix Q, which is needed for the computation of the eigenvectors. More... | |
template<typename NumericT > | |
void | givens_next (matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m) |
This function updates the matrix Q. It is part of the tql2 algorithm. More... | |
template<typename NumericT > | |
void | inclusive_scan (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2) |
This function implements an inclusive scan. More... | |
template<typename NumericT , typename F > | |
void | exclusive_scan (vector_base< NumericT, F > &vec1, vector_base< NumericT, F > &vec2) |
This function implements an exclusive scan. More... | |
template<typename NumericT > | |
__global__ void | am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | bidiag_pack_row_major_kernel (T *A, T *D, T *S, uint size1, uint size2, uint stride) |
template<typename T > | |
__global__ void | bidiag_pack_column_major_kernel (T *A, T *D, T *S, uint size1, uint size2, uint stride) |
template<typename T > | |
__global__ void | copy_col_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
template<typename T > | |
__global__ void | copy_col_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
template<typename T > | |
__global__ void | copy_row_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
template<typename T > | |
__global__ void | copy_row_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
template<typename T > | |
__global__ void | house_update_A_left_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
template<typename T > | |
__global__ void | house_update_A_left_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
template<typename T > | |
__global__ void | house_update_A_right_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
template<typename T > | |
__global__ void | house_update_A_right_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
template<typename T > | |
__device__ void | col_reduce_lcl_array (T *sums, uint th_Idx, uint bl_Dim) |
template<typename T > | |
__global__ void | house_update_QL_row_major_kernel (T *QL, T *V, uint size1, uint strideQ) |
template<typename T > | |
__global__ void | house_update_QL_column_major_kernel (T *QL, T *V, uint size1, uint strideQ) |
template<typename T > | |
__global__ void | givens_next_row_major_kernel (T *matr, T *cs, T *ss, uint size, uint stride, uint start_i, uint end_i) |
template<typename T > | |
__global__ void | givens_next_column_major_kernel (T *matr, T *cs, T *ss, uint size, uint stride, uint start_i, uint end_i) |
template<typename T > | |
__global__ void | inclusive_scan_kernel_1 (T *X, unsigned int startX, unsigned int incX, unsigned int InputSize, T *Y, unsigned int startY, unsigned int incY, T *S, unsigned int startS, unsigned int incS) |
template<typename T > | |
__global__ void | exclusive_scan_kernel_1 (T *X, unsigned int startX, unsigned int incX, unsigned int InputSize, T *Y, unsigned int startY, unsigned int incY, T *S, unsigned int startS, unsigned int incS) |
template<typename T > | |
__global__ void | scan_kernel_2 (T *S_ref, unsigned int startS_ref, unsigned int incS_ref, T *S, unsigned int startS, unsigned int incS, unsigned int InputSize) |
template<typename T > | |
__global__ void | scan_kernel_3 (T *S_ref, unsigned int startS_ref, unsigned int incS_ref, T *S, unsigned int startS, unsigned int incS) |
template<typename T > | |
__global__ void | scan_kernel_4 (T *S, unsigned int startS, unsigned int incS, T *Y, unsigned int startY, unsigned int incY, unsigned int OutputSize) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
__device__ unsigned int | row_major_index (unsigned int row, unsigned int col, unsigned int, unsigned int num_cols) |
__device__ unsigned int | col_major_index (unsigned int row, unsigned int col, unsigned int num_rows, unsigned int) |
template<typename NumericT > | |
__global__ void | trans_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_internal_size1, unsigned int A_internal_size2, unsigned int A_size1, unsigned int A_size2, unsigned int A_stride1, unsigned int A_stride2, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_internal_size1, unsigned int B_internal_size2, unsigned int B_stride1, unsigned int B_stride2, bool data_major) |
template<typename NumericT > | |
__global__ void | am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | matrix_row_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | element_op_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | element_op_int_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | matrix_row_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | trans_vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | el_wise_mul_div (NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size) |
Main CUDA kernel for nonnegative matrix factorization of a dense matrices. More... | |
template<typename NumericT > | |
void | nmf (viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf) |
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. More... | |
template<typename NumericT > | |
__global__ void | as_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2) |
template<typename NumericT > | |
__global__ void | as_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2) |
template<typename ScalarT1 , typename ScalarT2 , typename NumericT > | |
viennacl::enable_if < viennacl::is_scalar < ScalarT1 >::value &&viennacl::is_scalar < ScalarT2 >::value &&viennacl::is_any_scalar < NumericT >::value >::type | as (ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 > | |
viennacl::enable_if < viennacl::is_scalar < ScalarT1 >::value &&viennacl::is_scalar < ScalarT2 >::value &&viennacl::is_scalar < ScalarT3 >::value &&viennacl::is_any_scalar < NumericT1 >::value &&viennacl::is_any_scalar < NumericT2 >::value >::type | asbs (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 > | |
viennacl::enable_if < viennacl::is_scalar < ScalarT1 >::value &&viennacl::is_scalar < ScalarT2 >::value &&viennacl::is_scalar < ScalarT3 >::value &&viennacl::is_any_scalar < NumericT1 >::value &&viennacl::is_any_scalar < NumericT2 >::value >::type | asbs_s (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | scalar_swap_kernel (NumericT *s1, NumericT *s2) |
template<typename ScalarT1 , typename ScalarT2 > | |
viennacl::enable_if < viennacl::is_scalar < ScalarT1 >::value &&viennacl::is_scalar < ScalarT2 >::value >::type | swap (ScalarT1 &s1, ScalarT2 &s2) |
Swaps the contents of two scalars, data is copied. More... | |
template<typename NumericT > | |
__global__ void | compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<class NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a compressed_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More... | |
template<typename NumericT > | |
__global__ void | compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *result, unsigned int size) |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixT >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename NumericT > | |
__global__ void | compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, unsigned int nonzero_rows, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<typename NumericT > | |
void | prod_impl (const viennacl::compressed_compressed_matrix< NumericT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix. More... | |
template<typename NumericT > | |
__global__ void | coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a coordinate_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More... | |
template<typename NumericT > | |
__global__ void | ell_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a ell_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More... | |
template<typename NumericT > | |
__global__ void | sliced_ell_matrix_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, unsigned int size_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<typename NumericT , typename IndexT > | |
void | prod_impl (const viennacl::sliced_ell_matrix< NumericT, IndexT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a sliced_ell_matrix. More... | |
template<typename NumericT > | |
__global__ void | hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. More... | |
template<typename NumericT > | |
__global__ void | csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const NumericT *elements_L, const unsigned int *block_offsets, NumericT *result, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const NumericT *elements_U, const NumericT *diagonal_U, const unsigned int *block_offsets, NumericT *result, unsigned int size) |
template<typename NumericT > | |
__global__ void | av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
__global__ void | av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT , typename ScalarType1 > | |
void | av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha) |
template<typename NumericT , typename ScalarT1 > | |
void | vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false) |
Assign a constant value to a vector (-range/-slice) More... | |
template<typename NumericT > | |
__global__ void | vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2) |
Swaps the contents of two vectors, data is copied. More... | |
template<typename NumericT > | |
__global__ void | element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename NumericT , typename OpT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More... | |
template<typename OpT > | |
void | element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy) |
template<typename OpT > | |
void | element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
template<typename NumericT > | |
__global__ void | inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
__global__ void | vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
__global__ void | vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT , typename ScalarT > | |
void | inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
template<typename NumericT > | |
void | inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
template<typename NumericT > | |
__global__ void | inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result) |
template<typename NumericT > | |
void | inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result) |
template<typename NumericT > | |
__global__ void | norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
void | norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the l^1-norm of a vector. More... | |
template<typename NumericT > | |
void | norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the l^1-norm of a vector. More... | |
template<typename NumericT > | |
void | norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the l^2-norm of a vector - implementation. More... | |
template<typename NumericT > | |
void | norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the l^2-norm of a vector - implementation. More... | |
template<typename NumericT > | |
void | norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the supremum-norm of a vector. More... | |
template<typename NumericT > | |
void | norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the supremum-norm of a vector. More... | |
template<typename NumericT > | |
__global__ void | vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
void | max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | max_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
void | min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | min_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
__device__ NumericT | cuda_abs (NumericT val) |
__device__ unsigned long | cuda_abs (unsigned long val) |
__device__ unsigned int | cuda_abs (unsigned int val) |
__device__ unsigned short | cuda_abs (unsigned short val) |
__device__ unsigned char | cuda_abs (unsigned char val) |
template<typename NumericT > | |
__global__ void | index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result) |
template<typename NumericT > | |
vcl_size_t | index_norm_inf (vector_base< NumericT > const &vec1) |
Computes the index of the first entry that is equal to the supremum-norm in modulus. More... | |
template<typename NumericT > | |
__global__ void | plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta) |
template<typename NumericT > | |
void | plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta) |
Computes a plane rotation of two vectors. More... | |
Holds all CUDA compute kernels used by ViennaCL.
void viennacl::linalg::cuda::am | ( | matrix_base< NumericT > & | mat1, |
matrix_base< NumericT > const & | mat2, | ||
ScalarT const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
Definition at line 76 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::am_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 38 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::am_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 74 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::am_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 96 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::am_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 133 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::ambm | ( | matrix_base< NumericT > & | mat1, |
matrix_base< NumericT > const & | mat2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
matrix_base< NumericT > const & | mat3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 127 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 115 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 189 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 262 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 336 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::ambm_m | ( | matrix_base< NumericT > & | mat1, |
matrix_base< NumericT > const & | mat2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
matrix_base< NumericT > const & | mat3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 202 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 415 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 490 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 564 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 639 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 478 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 553 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 627 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 702 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 175 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 250 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 324 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 399 of file matrix_operations_row.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_any_scalar<NumericT>::value >::type viennacl::linalg::cuda::as | ( | ScalarT1 & | s1, |
ScalarT2 const & | s2, | ||
NumericT const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
Definition at line 77 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::as_kernel | ( | NumericT * | s1, |
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2 | ||
) |
Definition at line 48 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::as_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2 | ||
) |
Definition at line 60 of file scalar_operations.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs | ( | ScalarT1 & | s1, |
ScalarT2 const & | s2, | ||
NumericT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
ScalarT3 const & | s3, | ||
NumericT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 191 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 99 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT const * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 120 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
NumericT const * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 141 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 162 of file scalar_operations.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs_s | ( | ScalarT1 & | s1, |
ScalarT2 const & | s2, | ||
NumericT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
ScalarT3 const & | s3, | ||
NumericT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 314 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 222 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT const * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 243 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
NumericT const * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 264 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 285 of file scalar_operations.hpp.
void viennacl::linalg::cuda::av | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
Definition at line 118 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::av_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 51 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::av_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 84 of file vector_operations.hpp.
void viennacl::linalg::cuda::avbv | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
vector_base< NumericT > const & | vec3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 407 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 153 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 216 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 279 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 342 of file vector_operations.hpp.
void viennacl::linalg::cuda::avbv_v | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
vector_base< NumericT > const & | vec3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 709 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 457 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 520 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 583 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 646 of file vector_operations.hpp.
void viennacl::linalg::cuda::bidiag_pack | ( | matrix_base< NumericT > & | A, |
VectorType & | dh, | ||
VectorType & | sh | ||
) |
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
A | The matrix from which the vectors will be extracted of. |
dh | The vector in which the diagonal of the matrix will be stored in. |
sh | The vector in which the superdiagonal of the matrix will be stored in. |
Definition at line 2490 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::bidiag_pack_column_major_kernel | ( | T * | A, |
T * | D, | ||
T * | S, | ||
uint | size1, | ||
uint | size2, | ||
uint | stride | ||
) |
Definition at line 1435 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::bidiag_pack_row_major_kernel | ( | T * | A, |
T * | D, | ||
T * | S, | ||
uint | size1, | ||
uint | size2, | ||
uint | stride | ||
) |
Definition at line 1413 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelLarge | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const unsigned int | lg_eig_count, | ||
const unsigned int | ug_eig_count, | ||
NumericT | epsilon, | ||
unsigned int * | g_num_one, | ||
unsigned int * | g_num_blocks_mult, | ||
NumericT * | g_left_one, | ||
NumericT * | g_right_one, | ||
unsigned int * | g_pos_one, | ||
NumericT * | g_left_mult, | ||
NumericT * | g_right_mult, | ||
unsigned int * | g_left_count_mult, | ||
unsigned int * | g_right_count_mult, | ||
unsigned int * | g_blocks_mult, | ||
unsigned int * | g_blocks_mult_sum | ||
) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute.
Definition at line 536 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelLarge_MultIntervals | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
unsigned int * | blocks_mult, | ||
unsigned int * | blocks_mult_sum, | ||
NumericT * | g_left, | ||
NumericT * | g_right, | ||
unsigned int * | g_left_count, | ||
unsigned int * | g_right_count, | ||
NumericT * | g_lambda, | ||
unsigned int * | g_pos, | ||
NumericT | precision | ||
) |
Perform second step of bisection algorithm for large matrices for intervals that after the first step contained more than one eigenvalue
g_d | diagonal elements of symmetric, tridiagonal matrix |
g_s | superdiagonal elements of symmetric, tridiagonal matrix |
n | matrix size |
blocks_mult | start addresses of blocks of intervals that are processed by one block of threads, each of the intervals contains more than one eigenvalue |
blocks_mult_sum | total number of eigenvalues / singleton intervals in one block of intervals |
g_left | left limits of intervals |
g_right | right limits of intervals |
g_left_count | number of eigenvalues less than left limits |
g_right_count | number of eigenvalues less than right limits |
g_lambda | final eigenvalue |
g_pos | index of eigenvalue (in ascending order) |
precision | desired precision of eigenvalues |
Definition at line 68 of file bisect_kernel_large_multi.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelLarge_OneIntervals | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
unsigned int | num_intervals, | ||
NumericT * | g_left, | ||
NumericT * | g_right, | ||
unsigned int * | g_pos, | ||
NumericT | precision | ||
) |
Determine eigenvalues for large matrices for intervals that after the first step contained one eigenvalue
g_d | diagonal elements of symmetric, tridiagonal matrix |
g_s | superdiagonal elements of symmetric, tridiagonal matrix |
n | matrix size |
num_intervals | total number of intervals containing one eigenvalue after the first step |
g_left | left interval limits |
g_right | right interval limits |
g_pos | index of interval / number of intervals that are smaller than right interval limit |
precision | desired precision of eigenvalues |
Definition at line 59 of file bisect_kernel_large_onei.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelSmall | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
NumericT * | g_left, | ||
NumericT * | g_right, | ||
unsigned int * | g_left_count, | ||
unsigned int * | g_right_count, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const unsigned int | lg_eig_count, | ||
const unsigned int | ug_eig_count, | ||
NumericT | epsilon | ||
) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.
g_d | diagonal elements in global memory |
g_s | superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed an equals 0 |
n | size of matrix |
g_left | helper array |
g_right | helper array |
g_left_count | helper array |
g_right_count | helper array |
lg | lower bound of input interval (e.g. Gerschgorin interval) |
ug | upper bound of input interval (e.g. Gerschgorin interval) |
lg_eig_count | number of eigenvalues that are smaller than lg |
ug_eig_count | number of eigenvalues that are smaller than lu |
epsilon | desired accuracy of eigenvalues to compute |
Definition at line 61 of file bisect_kernel_small.hpp.
void viennacl::linalg::cuda::bisectLarge | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const NumericT | precision | ||
) |
Definition at line 69 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bisectLarge_MultIntervals | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | precision | ||
) |
Definition at line 130 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bisectLarge_OneIntervals | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | precision | ||
) |
Definition at line 101 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bisectSmall | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataSmall< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const NumericT | precision | ||
) |
Definition at line 43 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bluestein | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
viennacl::vector< NumericT, AlignmentV > & | out, | ||
vcl_size_t | |||
) |
Bluestein's algorithm for computing Fourier transformation.
Currently, Works only for sizes of input data which less than 2^16. Uses a lot of additional memory, but should be fast for any size of data. Serial implementation has something about o(n * lg n) complexity
Definition at line 621 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::bluestein_post | ( | Numeric2T * | Z, |
Numeric2T * | out, | ||
unsigned int | size, | ||
NumericT | sign | ||
) |
Definition at line 537 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::bluestein_pre | ( | Numeric2T * | input, |
Numeric2T * | A, | ||
Numeric2T * | B, | ||
unsigned int | size, | ||
unsigned int | ext_size, | ||
NumericT | sign | ||
) |
Definition at line 563 of file fft_operations.hpp.
|
inline |
Compute the next higher power of two of n
n | number for which next higher power of two is seeked |
Definition at line 66 of file bisect_util.hpp.
|
inline |
Definition at line 37 of file matrix_operations_row.hpp.
__device__ void viennacl::linalg::cuda::col_reduce_lcl_array | ( | T * | sums, |
uint | th_Idx, | ||
uint | bl_Dim | ||
) |
Definition at line 1651 of file matrix_operations_col.hpp.
__device__ void viennacl::linalg::cuda::compactIntervals | ( | NumericT * | s_left, |
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT | mid, | ||
NumericT | right, | ||
unsigned int | mid_count, | ||
unsigned int | right_count, | ||
T * | s_compaction_list, | ||
unsigned int | num_threads_active, | ||
unsigned int | is_active_second | ||
) |
Perform stream compaction for second child intervals.
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
mid | midpoint of current interval (left of new interval) |
right | upper limit of interval |
mid_count | eigenvalues less than mid |
right_count | eigenvalues less than right |
s_compaction_list | list containing the indices where the data has to be stored |
num_threads_active | number of active threads / intervals |
is_active_second | mark is thread has a second non-empty child interval |
Definition at line 440 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::compactStreamsFinal | ( | const unsigned int | tid, |
const unsigned int | tid_2, | ||
const unsigned int | num_threads_active, | ||
unsigned int & | offset_mult_lambda, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
unsigned short * | s_left_count, | ||
unsigned short * | s_right_count, | ||
unsigned short * | s_cl_one, | ||
unsigned short * | s_cl_mult, | ||
unsigned short * | s_cl_blocking, | ||
unsigned short * | s_cl_helper, | ||
unsigned int | is_one_lambda, | ||
unsigned int | is_one_lambda_2, | ||
NumericT & | left, | ||
NumericT & | right, | ||
NumericT & | left_2, | ||
NumericT & | right_2, | ||
unsigned int & | left_count, | ||
unsigned int & | right_count, | ||
unsigned int & | left_count_2, | ||
unsigned int & | right_count_2, | ||
unsigned int | c_block_iend, | ||
unsigned int | c_sum_block, | ||
unsigned int | c_block_iend_2, | ||
unsigned int | c_sum_block_2 | ||
) |
Perform final stream compaction before writing data to global memory.
Definition at line 134 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::complex_to_real | ( | const ComplexT * | in, |
RealT * | out, | ||
unsigned int | size | ||
) |
Definition at line 808 of file fft_operations.hpp.
void viennacl::linalg::cuda::complex_to_real | ( | viennacl::vector_base< NumericT > const & | in, |
viennacl::vector_base< NumericT > & | out, | ||
vcl_size_t | size | ||
) |
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
Definition at line 818 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_jumper, |
const unsigned int * | row_indices, | ||
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
unsigned int | nonzero_rows, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
Definition at line 791 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
const unsigned int * | sp_mat_col_indices, | ||
const NumericT * | sp_mat_elements, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 205 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
const unsigned int * | sp_mat_col_indices, | ||
const NumericT * | sp_mat_elements, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 359 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | result, | ||
unsigned int | size | ||
) |
Definition at line 521 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
Definition at line 118 of file sparse_matrix_operations.hpp.
|
inline |
Compute midpoint of interval [left, right] avoiding overflow if possible
left | left / lower limit of interval |
right | right / upper limit of interval |
Definition at line 89 of file bisect_util.hpp.
|
inline |
Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix
g_d | diagonal elements stored in global memory |
g_s | superdiagonal elements stored in global memory |
n | size of matrix |
x | value for which the number of eigenvalues that are smaller is seeked |
tid | thread identified (e.g. threadIdx.x or gtid) |
num_intervals_active | number of active intervals / threads that currently process an interval |
s_d | scratch space to store diagonal entries of the tridiagonal matrix in shared memory |
s_s | scratch space to store superdiagonal entries of the tridiagonal matrix in shared memory |
converged | flag if the current thread is already converged (that is count does not have to be computed) |
Definition at line 177 of file bisect_util.hpp.
|
inline |
Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix
g_d | diagonal elements stored in global memory |
g_s | superdiagonal elements stored in global memory |
n | size of matrix |
x | value for which the number of eigenvalues that are smaller is seeked |
tid | thread identified (e.g. threadIdx.x or gtid) |
num_intervals_active | number of active intervals / threads that currently process an interval |
s_d | scratch space to store diagonal entries of the tridiagonal matrix in shared memory |
s_s | scratch space to store superdiagonal entries of the tridiagonal matrix in shared memory |
converged | flag if the current thread is already converged (that is count does not have to be computed) |
Definition at line 237 of file bisect_util.hpp.
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1108 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1303 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result | ||
) |
Definition at line 1008 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::copy_col_column_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size, | ||
uint | stride | ||
) |
Definition at line 1477 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::copy_col_row_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size, | ||
uint | stride | ||
) |
Definition at line 1459 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::copy_row_column_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size, | ||
uint | stride | ||
) |
Definition at line 1514 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::copy_row_row_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size, | ||
uint | stride | ||
) |
Definition at line 1495 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::copy_vec | ( | matrix_base< NumericT > & | A, |
vector_base< NumericT > & | V, | ||
vcl_size_t | row_start, | ||
vcl_size_t | col_start, | ||
bool | copy_col | ||
) |
This function copies a row or a column from a matrix to a vector.
A | The matrix where to copy from. |
V | The vector to fill with data. |
row_start | The number of the first row to copy. |
col_start | The number of the first column to copy. |
copy_col | Set to TRUE to copy a column, FALSE to copy a row. |
Definition at line 2527 of file matrix_operations.hpp.
__device__ void viennacl::linalg::cuda::createIndicesCompaction | ( | T * | s_compaction_list_exc, |
unsigned int | num_threads_compaction | ||
) |
Create indices for compaction, that is process s_compaction_list_exc which is 1 for intervals that generated a second child and 0 otherwise and create for each of the non-zero elements the index where the new interval belongs to in a compact representation of all generated second childs
s_compaction_list_exc | list containing the flags which threads generated two childs |
num_threads_compaction | number of threads to employ for compaction |
Definition at line 373 of file bisect_util.hpp.
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward | ( | const unsigned int * | row_jumper_U, |
const unsigned int * | column_indices_U, | ||
const NumericT * | elements_U, | ||
const NumericT * | diagonal_U, | ||
const unsigned int * | block_offsets, | ||
NumericT * | result, | ||
unsigned int | size | ||
) |
Definition at line 700 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward | ( | const unsigned int * | row_jumper_L, |
const unsigned int * | column_indices_L, | ||
const NumericT * | elements_L, | ||
const unsigned int * | block_offsets, | ||
NumericT * | result, | ||
unsigned int | size | ||
) |
Definition at line 668 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 257 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 110 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | diagonal_entries, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 597 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | diagonal_entries, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 563 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | diagonal_entries, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 429 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 342 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 497 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 367 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 180 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 42 of file sparse_matrix_operations_solve.hpp.
__device__ NumericT viennacl::linalg::cuda::cuda_abs | ( | NumericT | val | ) |
Definition at line 2893 of file vector_operations.hpp.
|
inline |
Definition at line 2894 of file vector_operations.hpp.
|
inline |
Definition at line 2895 of file vector_operations.hpp.
|
inline |
Definition at line 2896 of file vector_operations.hpp.
|
inline |
Definition at line 2897 of file vector_operations.hpp.
void viennacl::linalg::cuda::direct | ( | viennacl::vector< NumericT, AlignmentV > const & | in, |
viennacl::vector< NumericT, AlignmentV > & | out, | ||
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Direct 1D algorithm for computing Fourier transformation.
Works on any sizes of data. Serial implementation has o(n^2) complexity
Definition at line 196 of file fft_operations.hpp.
void viennacl::linalg::cuda::direct | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const & | in, |
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | out, | ||
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Direct 2D algorithm for computing Fourier transformation.
Works on any sizes of data. Serial implementation has o(n^2) complexity
Definition at line 221 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::el_wise_mul_div | ( | NumericT * | matrix1, |
NumericT const * | matrix2, | ||
NumericT const * | matrix3, | ||
unsigned int | size | ||
) |
Main CUDA kernel for nonnegative matrix factorization of a dense matrices.
Definition at line 38 of file nmf_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT, SizeT > & | A, |
matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 511 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< float, SizeT > & | A, |
matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 571 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< double, SizeT > & | A, |
matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 631 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const & | proxy | ||
) |
Definition at line 699 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const & | proxy | ||
) |
Definition at line 741 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const & | proxy | ||
) |
Definition at line 783 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const & | proxy | ||
) |
Definition at line 825 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const & | proxy | ||
) |
Definition at line 867 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const & | proxy | ||
) |
Definition at line 909 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const & | proxy | ||
) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)
vec1 | The result vector (or -range, or -slice) |
proxy | The proxy object holding v2, v3 and the operation |
Definition at line 933 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const & | proxy | ||
) |
Definition at line 951 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< float > & | vec1, |
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 963 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< double > & | vec1, |
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 993 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const & | proxy | ||
) |
Definition at line 993 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const & | proxy | ||
) |
Definition at line 1035 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const & | proxy | ||
) |
Definition at line 1038 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const & | proxy | ||
) |
Definition at line 1065 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const & | proxy | ||
) |
Definition at line 1077 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const & | proxy | ||
) |
Definition at line 1093 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const & | proxy | ||
) |
Definition at line 1119 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const & | proxy | ||
) |
Definition at line 1121 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const & | proxy | ||
) |
Definition at line 1149 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const & | proxy | ||
) |
Definition at line 1161 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const & | proxy | ||
) |
Definition at line 1177 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const & | proxy | ||
) |
Definition at line 1203 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const & | proxy | ||
) |
Definition at line 1205 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const & | proxy | ||
) |
Definition at line 1233 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const & | proxy | ||
) |
Definition at line 1245 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const & | proxy | ||
) |
Definition at line 1260 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const & | proxy | ||
) |
Definition at line 1287 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const & | proxy | ||
) |
Definition at line 1289 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const & | proxy | ||
) |
Definition at line 1317 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const & | proxy | ||
) |
Definition at line 1329 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const & | proxy | ||
) |
Definition at line 1345 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const & | proxy | ||
) |
Definition at line 1371 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const & | proxy | ||
) |
Definition at line 1373 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const & | proxy | ||
) |
Definition at line 1401 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const & | proxy | ||
) |
Definition at line 1429 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const & | proxy | ||
) |
Definition at line 1457 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const & | proxy | ||
) |
Definition at line 1485 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::element_op_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 755 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 804 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::element_op_int_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT const * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3, | ||
unsigned int | op_type | ||
) |
Definition at line 891 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 865 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::element_op_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT const * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3, | ||
unsigned int | op_type | ||
) |
Definition at line 845 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::element_op_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 816 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
const NumericT * | sp_mat_elements, | ||
unsigned int | sp_mat_row_num, | ||
unsigned int | sp_mat_col_num, | ||
unsigned int | sp_mat_internal_row_num, | ||
unsigned int | sp_mat_items_per_row, | ||
unsigned int | sp_mat_aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1573 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
const NumericT * | sp_mat_elements, | ||
unsigned int | sp_mat_row_num, | ||
unsigned int | sp_mat_col_num, | ||
unsigned int | sp_mat_internal_row_num, | ||
unsigned int | sp_mat_items_per_row, | ||
unsigned int | sp_mat_aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1746 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | row_num, | ||
unsigned int | col_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row | ||
) |
Definition at line 1503 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::exclusive_scan | ( | vector_base< NumericT, F > & | vec1, |
vector_base< NumericT, F > & | vec2 | ||
) |
This function implements an exclusive scan.
vec1 | Input vector: Gets overwritten by the routine. |
vec2 | The output vector. |
Definition at line 2788 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::exclusive_scan_kernel_1 | ( | T * | X, |
unsigned int | startX, | ||
unsigned int | incX, | ||
unsigned int | InputSize, | ||
T * | Y, | ||
unsigned int | startY, | ||
unsigned int | incY, | ||
T * | S, | ||
unsigned int | startS, | ||
unsigned int | incS | ||
) |
Definition at line 1868 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::fft_direct | ( | const Numeric2T * | input, |
Numeric2T * | output, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
NumericT | sign, | ||
bool | is_row_major | ||
) |
Definition at line 139 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_div_vec_scalar | ( | Numeric2T * | input1, |
unsigned int | size, | ||
NumericT | factor | ||
) |
Definition at line 689 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_mult_vec | ( | const NumericT * | input1, |
const NumericT * | input2, | ||
NumericT * | output, | ||
unsigned int | size | ||
) |
Definition at line 656 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_radix2 | ( | Numeric2T * | input, |
unsigned int | s, | ||
unsigned int | bit_size, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
NumericT | sign, | ||
bool | is_row_major | ||
) |
Definition at line 370 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_radix2_local | ( | Numeric2T * | input, |
unsigned int | bit_size, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
NumericT | sign, | ||
bool | is_row_major | ||
) |
Definition at line 297 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_reorder | ( | NumericT * | input, |
unsigned int | bit_size, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
bool | is_row_major | ||
) |
Definition at line 240 of file fft_operations.hpp.
|
inline |
Compute the next lower power of two of n
n | number for which next higher power of two is seeked |
Definition at line 46 of file bisect_util.hpp.
|
inline |
Definition at line 127 of file fft_operations.hpp.
void viennacl::linalg::cuda::givens_next | ( | matrix_base< NumericT > & | Q, |
vector_base< NumericT > & | tmp1, | ||
vector_base< NumericT > & | tmp2, | ||
int | l, | ||
int | m | ||
) |
This function updates the matrix Q. It is part of the tql2 algorithm.
Q | The matrix to be updated. |
tmp1 | Vector with data from the tql2 algorithm. |
tmp2 | Vector with data from the tql2 algorithm. |
l | Data from the tql2 algorithm. |
m | Data from the tql2 algorithm. |
Definition at line 2695 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::givens_next_column_major_kernel | ( | T * | matr, |
T * | cs, | ||
T * | ss, | ||
uint | size, | ||
uint | stride, | ||
uint | start_i, | ||
uint | end_i | ||
) |
Definition at line 1771 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::givens_next_row_major_kernel | ( | T * | matr, |
T * | cs, | ||
T * | ss, | ||
uint | size, | ||
uint | stride, | ||
uint | start_i, | ||
uint | end_i | ||
) |
Definition at line 1724 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::house_update_A_left | ( | matrix_base< NumericT > & | A, |
vector_base< NumericT > & | D, | ||
vcl_size_t | start | ||
) |
This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P.
A | The matrix to be updated. |
D | The normalized householder vector. |
start | The repetition counter. |
Definition at line 2588 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_left_column_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size1, | ||
uint | size2, | ||
uint | stride | ||
) |
Definition at line 1560 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_left_row_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size1, | ||
uint | size2, | ||
uint | stride | ||
) |
Definition at line 1535 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::house_update_A_right | ( | matrix_base< NumericT > & | A, |
vector_base< NumericT > & | D | ||
) |
This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P.
A | The matrix to be updated. |
D | The normalized householder vector. |
Definition at line 2627 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_right_column_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size1, | ||
uint | size2, | ||
uint | stride | ||
) |
Definition at line 1618 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_right_row_major_kernel | ( | T * | A, |
T * | V, | ||
uint | row_start, | ||
uint | col_start, | ||
uint | size1, | ||
uint | size2, | ||
uint | stride | ||
) |
Definition at line 1587 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::house_update_QL | ( | matrix_base< NumericT > & | Q, |
vector_base< NumericT > & | D, | ||
vcl_size_t | A_size1 | ||
) |
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
Q | The matrix to be updated. |
D | The householder vector. |
A_size1 | size1 of matrix A |
Definition at line 2664 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::house_update_QL_column_major_kernel | ( | T * | QL, |
T * | V, | ||
uint | size1, | ||
uint | strideQ | ||
) |
Definition at line 1696 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::house_update_QL_row_major_kernel | ( | T * | QL, |
T * | V, | ||
uint | size1, | ||
uint | strideQ | ||
) |
Definition at line 1669 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 2086 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 2284 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row | ||
) |
Definition at line 2002 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::inclusive_scan | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > & | vec2 | ||
) |
This function implements an inclusive scan.
vec1 | Input vector: Gets overwritten by the routine. |
vec2 | The output vector. |
Definition at line 2730 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::inclusive_scan_kernel_1 | ( | T * | X, |
unsigned int | startX, | ||
unsigned int | incX, | ||
unsigned int | InputSize, | ||
T * | Y, | ||
unsigned int | startY, | ||
unsigned int | incY, | ||
T * | S, | ||
unsigned int | startS, | ||
unsigned int | incS | ||
) |
Definition at line 1822 of file matrix_operations_col.hpp.
vcl_size_t viennacl::linalg::cuda::index_norm_inf | ( | vector_base< NumericT > const & | vec1 | ) |
Computes the index of the first entry that is equal to the supremum-norm in modulus.
vec1 | The vector |
Definition at line 2955 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int * | result | ||
) |
Definition at line 2900 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
NumericT * | group_results | ||
) |
Definition at line 1821 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
NumericT * | group_results | ||
) |
Definition at line 1860 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
const NumericT * | y3, | ||
unsigned int | start3, | ||
unsigned int | stride3, | ||
NumericT * | group_results | ||
) |
Definition at line 1905 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
const NumericT * | y3, | ||
unsigned int | start3, | ||
unsigned int | stride3, | ||
const NumericT * | y4, | ||
unsigned int | start4, | ||
unsigned int | stride4, | ||
const NumericT * | y5, | ||
unsigned int | start5, | ||
unsigned int | stride5, | ||
const NumericT * | y6, | ||
unsigned int | start6, | ||
unsigned int | stride6, | ||
const NumericT * | y7, | ||
unsigned int | start7, | ||
unsigned int | stride7, | ||
NumericT * | group_results | ||
) |
Definition at line 1956 of file vector_operations.hpp.
void viennacl::linalg::cuda::inner_prod_cpu | ( | vector_base< NumericT > const & | vec1, |
vector_base< NumericT > const & | vec2, | ||
NumericT & | result | ||
) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
vec1 | The first vector |
vec2 | The second vector |
result | The result scalar (on the host) |
Definition at line 1785 of file vector_operations.hpp.
void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< NumericT > const & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarT & | result | ||
) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
vec1 | The first vector |
vec2 | The second vector |
result | The result scalar (on the gpu) |
Definition at line 1753 of file vector_operations.hpp.
void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< NumericT > const & | x, |
vector_tuple< NumericT > const & | vec_tuple, | ||
vector_base< NumericT > & | result | ||
) |
Definition at line 2053 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_kernel | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2, | ||
NumericT * | group_buffer | ||
) |
Definition at line 1507 of file vector_operations.hpp.
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_base< NumericT > & | A, |
bool | trans_A, | ||
matrix_base< NumericT > & | B, | ||
bool | trans_B, | ||
SolverTagT | tag | ||
) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). Both A and B can optionally be transposed.
A | The system matrix |
trans_A | Whether A is transposed |
B | The matrix of row vectors, where the solution is directly written to |
trans_B | Whether B is transposed |
tag | Solver tag for identifying the respective triangular solver |
Definition at line 299 of file direct_solve.hpp.
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_base< NumericT > & | mat, |
bool | trans_mat, | ||
vector_base< NumericT > & | vec, | ||
SolverTagT | |||
) |
Direct inplace solver for dense triangular systems (non-transposed version)
mat | The system matrix proxy |
trans_mat | Whether the matrix is to be transposed |
vec | The load vector, where the solution is directly written to |
Definition at line 448 of file direct_solve.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 555 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 576 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 598 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 619 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 643 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 664 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 695 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 716 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_assign | ( | matrix_base< NumericT > & | mat, |
NumericT | s, | ||
bool | clear = false |
||
) |
Definition at line 279 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 718 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 736 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 851 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 874 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 897 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 920 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 943 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 966 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 989 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1012 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1035 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1058 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1104 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1081 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1127 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1150 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1173 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1196 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1219 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::matrix_column | ( | const matrix_base< NumericT > & | mat, |
unsigned int | j, | ||
vector_base< NumericT > & | vec | ||
) |
Definition at line 472 of file matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_diag_from_vector | ( | const vector_base< NumericT > & | vec, |
int | k, | ||
matrix_base< NumericT > & | mat | ||
) |
Definition at line 340 of file matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_diag_to_vector | ( | matrix_base< NumericT > const & | mat, |
int | k, | ||
vector_base< NumericT > & | vec | ||
) |
Definition at line 392 of file matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_diagonal_assign | ( | matrix_base< NumericT > & | mat, |
NumericT | s | ||
) |
Definition at line 311 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 38 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 125 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 212 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 299 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 749 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 836 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 923 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1010 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1463 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1550 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1637 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1724 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2178 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2265 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2352 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2439 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel | ( | const NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
bool | row_major_A, | ||
bool | transpose_A, | ||
NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_size1, | ||
unsigned int | B_size2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
bool | row_major_B, | ||
bool | transpose_B, | ||
bool | unit_diagonal | ||
) |
Definition at line 127 of file direct_solve.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 393 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 480 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 567 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 654 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1104 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1191 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1278 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1365 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1819 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1906 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1993 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2080 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2535 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2622 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2709 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2796 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel | ( | const NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
bool | row_major_A, | ||
bool | transpose_A, | ||
NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_size1, | ||
unsigned int | B_size2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
bool | row_major_B, | ||
bool | transpose_B, | ||
bool | unit_diagonal | ||
) |
Definition at line 41 of file direct_solve.hpp.
void viennacl::linalg::cuda::matrix_row | ( | matrix_base< NumericT > const & | mat, |
unsigned int | i, | ||
vector_base< NumericT > & | vec | ||
) |
Definition at line 439 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 779 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 797 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 911 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 934 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 957 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 980 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1003 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1026 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1049 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1072 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1095 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1118 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1164 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1141 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1187 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1210 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1233 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1256 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1279 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::max_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
vec1 | The vector |
result | The result host scalar |
Definition at line 2810 of file vector_operations.hpp.
void viennacl::linalg::cuda::max_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the maximum of a vector, both reduction stages run on the GPU.
vec1 | The vector |
result | The result GPU scalar |
Definition at line 2782 of file vector_operations.hpp.
void viennacl::linalg::cuda::min_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
vec1 | The vector |
result | The result host scalar |
Definition at line 2864 of file vector_operations.hpp.
void viennacl::linalg::cuda::min_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the maximum of a vector, both reduction stages run on the GPU.
vec1 | The vector |
result | The result GPU scalar |
Definition at line 2836 of file vector_operations.hpp.
void viennacl::linalg::cuda::multiply_complex | ( | viennacl::vector< NumericT, AlignmentV > const & | input1, |
viennacl::vector< NumericT, AlignmentV > const & | input2, | ||
viennacl::vector< NumericT, AlignmentV > & | output | ||
) |
Mutiply two complex vectors and store result in output.
Definition at line 673 of file fft_operations.hpp.
void viennacl::linalg::cuda::nmf | ( | viennacl::matrix_base< NumericT > const & | V, |
viennacl::matrix_base< NumericT > & | W, | ||
viennacl::matrix_base< NumericT > & | H, | ||
viennacl::linalg::nmf_config const & | conf | ||
) |
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
V | Input matrix |
W | First factor |
H | Second factor |
conf | A configuration object holding tolerances and the like |
Definition at line 59 of file nmf_operations.hpp.
void viennacl::linalg::cuda::norm_1_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the l^1-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2622 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_1_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the l^1-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2604 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_2_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the l^2-norm of a vector - implementation.
vec1 | The vector |
result | The result scalar |
Definition at line 2668 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_2_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the l^2-norm of a vector - implementation.
vec1 | The vector |
result | The result scalar |
Definition at line 2649 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_inf_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the supremum-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2716 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_inf_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the supremum-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2696 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::norm_kernel_floats | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
NumericT * | group_buffer | ||
) |
Definition at line 2252 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::norm_kernel_integers | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
NumericT * | group_buffer | ||
) |
Definition at line 2345 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
NumericT * | group_buffer | ||
) |
Definition at line 2429 of file vector_operations.hpp.
void viennacl::linalg::cuda::normalize | ( | viennacl::vector< NumericT, AlignmentV > & | input | ) |
Normalize vector on with his own size.
Definition at line 699 of file fft_operations.hpp.
|
inline |
Definition at line 97 of file fft_operations.hpp.
|
inline |
Definition at line 122 of file fft_operations.hpp.
|
inline |
Definition at line 79 of file fft_operations.hpp.
|
inline |
Definition at line 103 of file fft_operations.hpp.
|
inline |
Definition at line 85 of file fft_operations.hpp.
|
inline |
Definition at line 109 of file fft_operations.hpp.
|
inline |
Definition at line 91 of file fft_operations.hpp.
|
inline |
Definition at line 116 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_coo_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 843 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 751 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_ell_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 991 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_hyb_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 1188 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | compressed_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 811 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | coordinate_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 957 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1054 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | sliced_ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1155 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | hyb_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1264 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_sliced_ell_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
const unsigned int * | column_indices, | ||
const unsigned int * | block_start, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 1086 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_update_s | ( | vector_base< NumericT > & | s, |
vector_base< NumericT > & | r, | ||
vector_base< NumericT > const & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 646 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_update_s_kernel | ( | NumericT * | s, |
NumericT const * | residual, | ||
NumericT const * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | chunk_size, | ||
unsigned int | chunk_offset | ||
) |
Definition at line 589 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_vector_kernel | ( | NumericT * | result, |
NumericT | alpha, | ||
NumericT * | p, | ||
NumericT | omega, | ||
NumericT const * | s, | ||
NumericT * | residual, | ||
NumericT const * | As, | ||
NumericT | beta, | ||
NumericT const * | Ap, | ||
NumericT const * | r0star, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | size | ||
) |
Definition at line 668 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_vector_update | ( | vector_base< NumericT > & | result, |
NumericT | alpha, | ||
vector_base< NumericT > & | p, | ||
NumericT | omega, | ||
vector_base< NumericT > const & | s, | ||
vector_base< NumericT > & | residual, | ||
vector_base< NumericT > const & | As, | ||
NumericT | beta, | ||
vector_base< NumericT > const & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size | ||
) |
Definition at line 719 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_coo_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 192 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 114 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_ell_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 324 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_hyb_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 493 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | compressed_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 166 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | coordinate_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 296 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 379 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | sliced_ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 466 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | hyb_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 561 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_sliced_ell_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
const unsigned int * | column_indices, | ||
const unsigned int * | block_start, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 405 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_vector_kernel | ( | NumericT * | result, |
NumericT | alpha, | ||
NumericT * | p, | ||
NumericT * | r, | ||
NumericT const * | Ap, | ||
NumericT | beta, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | size | ||
) |
Definition at line 44 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_vector_update | ( | vector_base< NumericT > & | result, |
NumericT | alpha, | ||
vector_base< NumericT > & | p, | ||
vector_base< NumericT > & | r, | ||
vector_base< NumericT > const & | Ap, | ||
NumericT | beta, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 85 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1 | ( | vector_base< T > const & | device_krylov_basis, |
vcl_size_t | v_k_size, | ||
vcl_size_t | v_k_internal_size, | ||
vcl_size_t | param_k, | ||
vector_base< T > & | vi_in_vk_buffer, | ||
vcl_size_t | buffer_chunk_size | ||
) |
Definition at line 1441 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1_kernel | ( | T const * | krylov_basis, |
unsigned int | size, | ||
unsigned int | internal_size, | ||
unsigned int | k, | ||
T * | vi_in_vk_buffer, | ||
unsigned int | chunk_size | ||
) |
Definition at line 1389 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2 | ( | vector_base< T > & | device_krylov_basis, |
vcl_size_t | v_k_size, | ||
vcl_size_t | v_k_internal_size, | ||
vcl_size_t | param_k, | ||
vector_base< T > const & | vi_in_vk_buffer, | ||
vector_base< T > & | R_buffer, | ||
vcl_size_t | krylov_dim, | ||
vector_base< T > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size | ||
) |
Definition at line 1533 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2_kernel | ( | T * | krylov_basis, |
unsigned int | size, | ||
unsigned int | internal_size, | ||
unsigned int | k, | ||
T const * | vi_in_vk_buffer, | ||
unsigned int | chunk_size, | ||
T * | R_buffer, | ||
unsigned int | krylov_dim, | ||
T * | inner_prod_buffer | ||
) |
Definition at line 1466 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_normalize_vk | ( | vector_base< T > & | v_k, |
vector_base< T > const & | residual, | ||
vector_base< T > & | R_buffer, | ||
vcl_size_t | offset_in_R, | ||
vector_base< T > const & | inner_prod_buffer, | ||
vector_base< T > & | r_dot_vk_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
This routines computes for vectors 'r', 'v_k': Second reduction step for ||v_k|| v_k /= ||v_k|| First reduction step for <r, v_k>
Definition at line 1358 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_normalize_vk_kernel | ( | T * | vk, |
unsigned int | vk_offset, | ||
T const * | residual, | ||
T * | R_buffer, | ||
unsigned int | R_offset, | ||
T const * | inner_prod_buffer, | ||
unsigned int | chunk_size, | ||
T * | r_dot_vk_buffer, | ||
unsigned int | chunk_offset, | ||
unsigned int | size | ||
) |
Definition at line 1296 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | compressed_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1610 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | coordinate_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1630 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | ell_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1652 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | sliced_ell_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1673 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | hyb_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1695 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_update_result | ( | vector_base< T > & | result, |
vector_base< T > const & | residual, | ||
vector_base< T > const & | krylov_basis, | ||
vcl_size_t | v_k_size, | ||
vcl_size_t | v_k_internal_size, | ||
vector_base< T > const & | coefficients, | ||
vcl_size_t | param_k | ||
) |
Definition at line 1585 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_update_result_kernel | ( | T * | result, |
T const * | residual, | ||
T const * | krylov_basis, | ||
unsigned int | size, | ||
unsigned int | internal_size, | ||
T const * | coefficients, | ||
unsigned int | k | ||
) |
Definition at line 1565 of file iterative_operations.hpp.
void viennacl::linalg::cuda::plane_rotation | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > & | vec2, | ||
NumericT | alpha, | ||
NumericT | beta | ||
) |
Computes a plane rotation of two vectors.
Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)
vec1 | The first vector |
vec2 | The second vector |
alpha | The first transformation coefficient |
beta | The second transformation coefficient |
Definition at line 3015 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::plane_rotation_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2, | ||
NumericT | alpha, | ||
NumericT | beta | ||
) |
Definition at line 2979 of file vector_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 155 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.
Implementation of the convenience expression result = prod(mat, vec);
sp_mat | The sparse matrix |
d_mat | The dense matrix |
result | The result matrix |
Definition at line 267 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
sp_mat | The sparse matrix |
d_mat | The transposed dense matrix proxy |
result | The result matrix |
Definition at line 422 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_compressed_matrix< NumericT > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 834 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a coordinate_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 1085 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
sp_mat | The Sparse Matrix (Coordinate format) |
d_mat | The Dense Matrix |
result | The Result Matrix |
Definition at line 1211 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));
sp_mat | The Sparse Matrix (Coordinate format) |
d_mat | The Dense Transposed Matrix |
result | The Result Matrix |
Definition at line 1405 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT > & | mat, |
bool | mat_transpose, | ||
const vector_base< NumericT > & | vec, | ||
vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication.
Implementation of the convenience expressions result = prod(mat, vec); and result = prod(trans(mat), vec);
mat | The matrix |
mat_transpose | Whether the matrix is to be transposed. |
vec | The vector |
result | The result vector |
Definition at line 1427 of file matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 1551 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format
sp_mat | The sparse matrix (ELL) |
d_mat | The dense matrix |
result | The result matrix |
Definition at line 1643 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format
sp_mat | The sparse matrix (ELL) |
d_mat | The dense matrix |
result | The result matrix |
Definition at line 1816 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::sliced_ell_matrix< NumericT, IndexT > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a sliced_ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 1975 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 2060 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, d_mat);
mat | The sparse matrix |
d_mat | The dense matrix (row- or column-major) |
result | The dense result matrix (row- or column-major) |
Definition at line 2167 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, trans(d_mat));
mat | The sparse matrix |
d_mat | Transposed matrix proxy object for the rhs dense matrix (row- or column-major) |
result | The dense result matrix (row- or column-major) |
Definition at line 2365 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT > & | A, |
bool | trans_A, | ||
const matrix_base< NumericT > & | B, | ||
bool | trans_B, | ||
matrix_base< NumericT > & | C, | ||
ScalarT | alpha, | ||
ScalarT | beta | ||
) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(A, B);
Definition at line 2385 of file matrix_operations.hpp.
void viennacl::linalg::cuda::radix2 | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Radix-2 1D algorithm for computing Fourier transformation.
Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm
Definition at line 441 of file fft_operations.hpp.
void viennacl::linalg::cuda::radix2 | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | in, |
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Radix-2 2D algorithm for computing Fourier transformation.
Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm
Definition at line 493 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::real_to_complex | ( | const RealT * | in, |
ComplexT * | out, | ||
unsigned int | size | ||
) |
Definition at line 781 of file fft_operations.hpp.
void viennacl::linalg::cuda::real_to_complex | ( | viennacl::vector_base< NumericT > const & | in, |
viennacl::vector_base< NumericT > & | out, | ||
vcl_size_t | size | ||
) |
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
Definition at line 796 of file fft_operations.hpp.
void viennacl::linalg::cuda::reorder | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | bits_datasize, | ||
vcl_size_t | batch_num, | ||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Definition at line 281 of file fft_operations.hpp.
void viennacl::linalg::cuda::reverse | ( | viennacl::vector_base< NumericT > & | in | ) |
Reverse vector to oposite order and save it in input vector.
Definition at line 846 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::reverse_inplace | ( | NumericT * | vec, |
uint | size | ||
) |
Definition at line 831 of file fft_operations.hpp.
|
inline |
Definition at line 33 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::scalar_swap_kernel | ( | NumericT * | s1, |
NumericT * | s2 | ||
) |
Definition at line 345 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1334 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1374 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1393 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1433 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::scaled_rank_1_update | ( | matrix_base< NumericT > & | mat1, |
ScalarT const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
const vector_base< NumericT > & | vec1, | ||
const vector_base< NumericT > & | vec2 | ||
) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.
Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
mat1 | The matrix to be updated |
alpha | The scaling factor (either a viennacl::scalar<>, float, or double) |
len_alpha | Length of the buffer for an eventual final reduction step (currently always '1') |
reciprocal_alpha | Use 1/alpha instead of alpha |
flip_sign_alpha | Use -alpha instead of alpha |
vec1 | The first vector |
vec2 | The second vector |
Definition at line 2417 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::scan_kernel_2 | ( | T * | S_ref, |
unsigned int | startS_ref, | ||
unsigned int | incS_ref, | ||
T * | S, | ||
unsigned int | startS, | ||
unsigned int | incS, | ||
unsigned int | InputSize | ||
) |
Definition at line 1918 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::scan_kernel_3 | ( | T * | S_ref, |
unsigned int | startS_ref, | ||
unsigned int | incS_ref, | ||
T * | S, | ||
unsigned int | startS, | ||
unsigned int | incS | ||
) |
Definition at line 1960 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::scan_kernel_4 | ( | T * | S, |
unsigned int | startS, | ||
unsigned int | incS, | ||
T * | Y, | ||
unsigned int | startY, | ||
unsigned int | incY, | ||
unsigned int | OutputSize | ||
) |
Definition at line 1979 of file matrix_operations_col.hpp.
|
inline |
Compute addresses to obtain compact list of block start addresses.
Definition at line 238 of file bisect_kernel_large.hpp.
|
inline |
Perform initial scan for compaction of intervals containing one and multiple eigenvalues; also do initial scan to build blocks
Definition at line 369 of file bisect_kernel_large.hpp.
|
inline |
Perform scan to obtain number of eigenvalues before a specific block.
Definition at line 303 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::sliced_ell_matrix_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
const unsigned int * | column_indices, | ||
const unsigned int * | block_start, | ||
const NumericT * | elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
unsigned int | size_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
Definition at line 1929 of file sparse_matrix_operations.hpp.
__device__ void viennacl::linalg::cuda::storeInterval | ( | unsigned int | addr, |
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT | left, | ||
NumericT | right, | ||
S | left_count, | ||
S | right_count, | ||
NumericT | precision | ||
) |
Check if interval converged and store appropriately
addr | address where to store the information of the interval |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
left | lower limit of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
right_count | eigenvalues less than right |
precision | desired precision for eigenvalues |
Definition at line 124 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::storeIntervalConverged | ( | NumericT * | s_left, |
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT & | left, | ||
NumericT & | mid, | ||
NumericT & | right, | ||
S & | left_count, | ||
S & | mid_count, | ||
S & | right_count, | ||
T * | s_compaction_list_exc, | ||
unsigned int & | compact_second_chunk, | ||
const unsigned int | num_threads_active, | ||
unsigned int & | is_active_second | ||
) |
Definition at line 465 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervals | ( | unsigned int | addr, |
const unsigned int | num_threads_active, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT | left, | ||
NumericT | mid, | ||
NumericT | right, | ||
const S | left_count, | ||
const S | mid_count, | ||
const S | right_count, | ||
NumericT | precision, | ||
unsigned int & | compact_second_chunk, | ||
T * | s_compaction_list_exc, | ||
unsigned int & | is_active_second | ||
) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread.
addr | base address for storing intervals |
num_threads_active | number of threads / intervals in current sweep |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
left | lower limit of interval |
mid | midpoint of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
mid_count | eigenvalues less than mid |
right_count | eigenvalues less than right |
precision | desired precision for eigenvalues |
compact_second_chunk | shared mem flag if second chunk is used and ergo requires compaction |
s_compaction_list_exc | helper array for stream compaction, s_compaction_list_exc[tid] = 1 when the thread generated two child intervals |
is_active_second | mark is thread has a second non-empty child interval |
Definition at line 309 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervalsLarge | ( | unsigned int | addr, |
const unsigned int | num_threads_active, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
unsigned short * | s_left_count, | ||
unsigned short * | s_right_count, | ||
NumericT | left, | ||
NumericT | mid, | ||
NumericT | right, | ||
const unsigned short | left_count, | ||
const unsigned short | mid_count, | ||
const unsigned short | right_count, | ||
NumericT | epsilon, | ||
unsigned int & | compact_second_chunk, | ||
unsigned short * | s_compaction_list, | ||
unsigned int & | is_active_second | ||
) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread
Definition at line 475 of file bisect_kernel_large.hpp.
__device__ void viennacl::linalg::cuda::subdivideActiveInterval | ( | const unsigned int | tid, |
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
const unsigned int | num_threads_active, | ||
NumericT & | left, | ||
NumericT & | right, | ||
unsigned int & | left_count, | ||
unsigned int & | right_count, | ||
NumericT & | mid, | ||
unsigned int & | all_threads_converged | ||
) |
Subdivide interval if active and not already converged.
tid | id of thread |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
num_threads_active | number of active threads in warp |
left | lower limit of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
right_count | eigenvalues less than right |
mid | median of interval |
all_threads_converged | shared memory flag if all threads are |
Definition at line 529 of file bisect_util.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value >::type viennacl::linalg::cuda::swap | ( | ScalarT1 & | s1, |
ScalarT2 & | s2 | ||
) |
Swaps the contents of two scalars, data is copied.
s1 | The first scalar |
s2 | The second scalar |
Definition at line 361 of file scalar_operations.hpp.
void viennacl::linalg::cuda::trans | ( | matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const & | proxy, |
matrix_base< NumericT > & | temp_trans | ||
) |
Definition at line 57 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::trans_kernel | ( | const NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_stride1, | ||
unsigned int | A_stride2, | ||
NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
unsigned int | B_stride1, | ||
unsigned int | B_stride2, | ||
bool | data_major | ||
) |
Definition at line 43 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1277 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1351 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::transpose | ( | const NumericT * | input, |
NumericT * | output, | ||
unsigned int | row_num, | ||
unsigned int | col_num | ||
) |
Definition at line 712 of file fft_operations.hpp.
void viennacl::linalg::cuda::transpose | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const & | input, |
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | output | ||
) |
Transpose matrix.
Definition at line 731 of file fft_operations.hpp.
void viennacl::linalg::cuda::transpose | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | input | ) |
Inplace_transpose matrix.
Definition at line 769 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::transpose_inplace | ( | NumericT * | input, |
unsigned int | row_num, | ||
unsigned int | col_num | ||
) |
Definition at line 745 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel | ( | NumericT const * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
unsigned int | options | ||
) |
Definition at line 355 of file direct_solve.hpp.
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel | ( | NumericT const * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
unsigned int | options | ||
) |
Definition at line 313 of file direct_solve.hpp.
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1251 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1029 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1056 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1084 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1112 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1140 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1168 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1196 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1224 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1280 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1336 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_log_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1308 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1364 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1392 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1420 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1448 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1476 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_mul_col_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1246 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::vec_mul_row_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1306 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::vector_assign | ( | vector_base< NumericT > & | vec1, |
ScalarT1 const & | alpha, | ||
bool | up_to_internal_size = false |
||
) |
Assign a constant value to a vector (-range/-slice)
vec1 | The vector to which the value should be assigned |
alpha | The value to be assigned |
up_to_internal_size | Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). |
Definition at line 777 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_assign_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | internal_size1, | ||
NumericT | alpha | ||
) |
Definition at line 756 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_maxmin_kernel | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 2739 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel | ( | NumericT const * | vec1, |
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result | ||
) |
Definition at line 2031 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 1547 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 1589 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 1626 of file vector_operations.hpp.
void viennacl::linalg::cuda::vector_swap | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > & | vec2 | ||
) |
Swaps the contents of two vectors, data is copied.
vec1 | The first vector (or -range, or -slice) |
vec2 | The second vector (or -range, or -slice) |
Definition at line 827 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_swap_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 800 of file vector_operations.hpp.
__device__ void viennacl::linalg::cuda::writeToGmem | ( | const unsigned int | tid, |
const unsigned int | tid_2, | ||
const unsigned int | num_threads_active, | ||
const unsigned int | num_blocks_mult, | ||
NumericT * | g_left_one, | ||
NumericT * | g_right_one, | ||
unsigned int * | g_pos_one, | ||
NumericT * | g_left_mult, | ||
NumericT * | g_right_mult, | ||
unsigned int * | g_left_count_mult, | ||
unsigned int * | g_right_count_mult, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
unsigned short * | s_left_count, | ||
unsigned short * | s_right_count, | ||
unsigned int * | g_blocks_mult, | ||
unsigned int * | g_blocks_mult_sum, | ||
unsigned short * | s_compaction_list, | ||
unsigned short * | s_cl_helper, | ||
unsigned int | offset_mult_lambda | ||
) |
Write data to global memory.
Definition at line 53 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::zero2 | ( | NumericT * | input1, |
NumericT * | input2, | ||
unsigned int | size | ||
) |
Definition at line 601 of file fft_operations.hpp.