ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
bisect_kernel_calls.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_CALLS_HPP_
2 #define VIENNACL_LINALG_CUDA_BISECT_KERNEL_CALLS_HPP_
3 /* =========================================================================
4  Copyright (c) 2010-2014, Institute for Microelectronics,
5  Institute for Analysis and Scientific Computing,
6  TU Wien.
7  Portions of this software are copyright by UChicago Argonne, LLC.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
20 
29 // includes, kernels
34 
35 
36 namespace viennacl
37 {
38 namespace linalg
39 {
40 namespace cuda
41 {
42 template<typename NumericT>
44  const unsigned int mat_size,
45  const NumericT lg, const NumericT ug,
46  const NumericT precision)
47 {
48 
49 
50  dim3 blocks(1, 1, 1);
52 
53  bisectKernelSmall<<< blocks, threads >>>(
54  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_a),
55  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_b) + 1,
56  mat_size,
57  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.vcl_g_left),
58  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.vcl_g_right),
59  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.vcl_g_left_count),
60  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.vcl_g_right_count),
61  lg, ug, 0, mat_size,
62  precision
63  );
65 }
66 
67 
68 template<typename NumericT>
70  const unsigned int mat_size,
71  const NumericT lg, const NumericT ug,
72  const NumericT precision)
73  {
74 
75  dim3 blocks(1, 1, 1);
76  dim3 threads(VIENNACL_BISECT_MAX_THREADS_BLOCK, 1, 1);
77  bisectKernelLarge<<< blocks, threads >>>
78  (viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_a),
79  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_b) + 1,
80  mat_size,
81  lg, ug, 0, mat_size, precision,
82  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_num_one),
83  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_num_blocks_mult),
84  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_left_one),
85  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_right_one),
86  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_pos_one),
87  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_left_mult),
88  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_right_mult),
89  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_left_count_mult),
90  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_right_count_mult),
91  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_blocks_mult),
92  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_blocks_mult_sum)
93  );
95 }
96 
97 
98 // compute eigenvalues for intervals that contained only one eigenvalue
99 // after the first processing step
100 template<typename NumericT>
102  const unsigned int mat_size,
103  const NumericT precision)
104  {
105 
106  unsigned int num_one_intervals = result.g_num_one;
107  unsigned int num_blocks = viennacl::linalg::detail::getNumBlocksLinear(num_one_intervals, VIENNACL_BISECT_MAX_THREADS_BLOCK);
108  dim3 grid_onei;
109  grid_onei.x = num_blocks;
110  grid_onei.y = 1, grid_onei.z = 1;
111  dim3 threads_onei(VIENNACL_BISECT_MAX_THREADS_BLOCK, 1, 1);
112 
113 
114  bisectKernelLarge_OneIntervals<<< grid_onei , threads_onei >>>
115  (viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_a),
116  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_b) + 1,
117  mat_size, num_one_intervals,
118  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_left_one),
119  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_right_one),
120  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_pos_one),
121  precision
122  );
123  viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("bisectKernelLarge_OneIntervals() FAILED.");
124 }
125 
126 
127 // process intervals that contained more than one eigenvalue after
128 // the first processing step
129 template<typename NumericT>
131  const unsigned int mat_size,
132  const NumericT precision)
133  {
134  // get the number of blocks of intervals that contain, in total when
135  // each interval contains only one eigenvalue, not more than
136  // MAX_THREADS_BLOCK threads
137  unsigned int num_blocks_mult = result.g_num_blocks_mult;
138 
139  // setup the execution environment
140  dim3 grid_mult(num_blocks_mult, 1, 1);
141  dim3 threads_mult(VIENNACL_BISECT_MAX_THREADS_BLOCK, 1, 1);
142 
143  bisectKernelLarge_MultIntervals<<< grid_mult, threads_mult >>>
144  (viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_a),
145  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(input.g_b) + 1,
146  mat_size,
147  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_blocks_mult),
148  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_blocks_mult_sum),
149  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_left_mult),
150  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_right_mult),
151  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_left_count_mult),
152  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_right_count_mult),
153  viennacl::linalg::cuda::detail::cuda_arg<NumericT>(result.g_lambda_mult),
154  viennacl::linalg::cuda::detail::cuda_arg<unsigned int>(result.g_pos_mult),
155  precision
156  );
157  viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("bisectKernelLarge_MultIntervals() FAILED.");
158 }
159 }
160 }
161 }
162 
163 #endif
viennacl::vector< NumericT > g_b
device side representation of superdiagonal
Definition: structs.hpp:62
void bisectLarge(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
viennacl::vector< NumericT > g_left_mult
Definition: structs.hpp:155
#define VIENNACL_BISECT_MAX_THREADS_BLOCK
Definition: config.hpp:31
Determine eigenvalues for small symmetric, tridiagonal matrix.
viennacl::vector< NumericT > g_left_one
Definition: structs.hpp:143
viennacl::scalar< unsigned int > g_num_blocks_mult
Definition: structs.hpp:139
viennacl::vector< unsigned int > g_blocks_mult
Definition: structs.hpp:169
viennacl::vector< unsigned int > g_left_count_mult
Definition: structs.hpp:162
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
viennacl::vector< unsigned int > g_right_count_mult
Definition: structs.hpp:166
In this class the input matrix is stored.
Definition: structs.hpp:53
unsigned int getNumBlocksLinear(const unsigned int num_threads, const unsigned int num_threads_block)
Definition: util.hpp:96
viennacl::vector< NumericT > vcl_g_right
right interval limits at the end of the computation
Definition: structs.hpp:105
viennacl::vector< NumericT > g_lambda_mult
Definition: structs.hpp:177
viennacl::vector< NumericT > vcl_g_left
left interval limits at the end of the computation
Definition: structs.hpp:103
viennacl::vector< unsigned int > vcl_g_left_count
number of eigenvalues smaller than the left interval limit
Definition: structs.hpp:107
viennacl::vector< unsigned int > g_pos_one
Definition: structs.hpp:151
#define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
Definition: config.hpp:34
First step of the bisection algorithm for the computation of eigenvalues.
Determine eigenvalues for large matrices for intervals that contained after the first step one eigenv...
viennacl::vector< NumericT > g_right_one
Definition: structs.hpp:147
viennacl::vector< unsigned int > g_pos_mult
Definition: structs.hpp:181
void bisectLarge_OneIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
viennacl::vector< unsigned int > vcl_g_right_count
number of eigenvalues bigger than the right interval limit
Definition: structs.hpp:109
In this class the data of the result for large matrices is stored.
Definition: structs.hpp:129
void bisectSmall(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
viennacl::vector< unsigned int > g_blocks_mult_sum
Definition: structs.hpp:173
viennacl::scalar< unsigned int > g_num_one
number of intervals containing one eigenvalue after the first step
Definition: structs.hpp:135
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:27
void bisectLarge_MultIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
In this class the data of the result for small matrices is stored.
Definition: structs.hpp:98
Second step of the bisection algorithm for the computation of eigenvalues for large matrices...
viennacl::vector< NumericT > g_a
device side representation of diagonal
Definition: structs.hpp:60
viennacl::vector< NumericT > g_right_mult
Definition: structs.hpp:158