ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
bisect_kernel_calls.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_CALLS_HPP_
2 #define VIENNACL_LINALG_CUDA_BISECT_KERNEL_CALLS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
20 
30 
31 // includes, kernels
36 
37 
38 namespace viennacl
39 {
40 namespace linalg
41 {
42 namespace cuda
43 {
44 template<typename NumericT>
46  const unsigned int mat_size,
47  const NumericT lg, const NumericT ug,
48  const NumericT precision)
49 {
50 
51 
52  dim3 blocks(1, 1, 1);
54 
55  bisectKernelSmall<<< blocks, threads >>>(
56  viennacl::cuda_arg(input.g_a),
57  viennacl::cuda_arg(input.g_b) + 1,
58  mat_size,
63  lg, ug, 0, mat_size,
64  precision
65  );
67 }
68 
69 
70 template<typename NumericT>
72  const unsigned int mat_size,
73  const NumericT lg, const NumericT ug,
74  const NumericT precision)
75  {
76 
77  dim3 blocks(1, 1, 1);
78  dim3 threads(mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2 , 1, 1);
79  bisectKernelLarge<<< blocks, threads >>>
80  (viennacl::cuda_arg(input.g_a),
81  viennacl::cuda_arg(input.g_b) + 1,
82  mat_size,
83  lg, ug, static_cast<unsigned int>(0), mat_size, precision,
95  );
97 }
98 
99 
100 // compute eigenvalues for intervals that contained only one eigenvalue
101 // after the first processing step
102 template<typename NumericT>
104  const unsigned int mat_size,
105  const NumericT precision)
106  {
107 
108  unsigned int num_one_intervals = result.g_num_one;
109  unsigned int num_blocks = viennacl::linalg::detail::getNumBlocksLinear(num_one_intervals,
111  dim3 grid_onei;
112  grid_onei.x = num_blocks;
113  grid_onei.y = 1, grid_onei.z = 1;
114  dim3 threads_onei(mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2, 1, 1);
115 
116 
117  bisectKernelLarge_OneIntervals<<< grid_onei , threads_onei >>>
118  (viennacl::cuda_arg(input.g_a),
119  viennacl::cuda_arg(input.g_b) + 1,
120  mat_size, num_one_intervals,
124  precision
125  );
126  viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("bisectKernelLarge_OneIntervals() FAILED.");
127 }
128 
129 
130 // process intervals that contained more than one eigenvalue after
131 // the first processing step
132 template<typename NumericT>
134  const unsigned int mat_size,
135  const NumericT precision)
136  {
137  // get the number of blocks of intervals that contain, in total when
138  // each interval contains only one eigenvalue, not more than
139  // MAX_THREADS_BLOCK threads
140  unsigned int num_blocks_mult = result.g_num_blocks_mult;
141 
142  // setup the execution environment
143  dim3 grid_mult(num_blocks_mult, 1, 1);
144  dim3 threads_mult(mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2, 1, 1);
145 
146  bisectKernelLarge_MultIntervals<<< grid_mult, threads_mult >>>
147  (viennacl::cuda_arg(input.g_a),
148  viennacl::cuda_arg(input.g_b) + 1,
149  mat_size,
158  precision
159  );
160  viennacl::linalg::cuda::VIENNACL_CUDA_LAST_ERROR_CHECK("bisectKernelLarge_MultIntervals() FAILED.");
161 }
162 }
163 }
164 }
165 
166 #endif
viennacl::vector< NumericT > g_b
device side representation of superdiagonal
Definition: structs.hpp:62
void bisectLarge(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
viennacl::vector< NumericT > g_left_mult
left interval limits of intervals containing multiple eigenvalues after the first iteration step ...
Definition: structs.hpp:146
#define VIENNACL_BISECT_MAX_THREADS_BLOCK
Definition: config.hpp:32
Helper structures to simplify variable handling.
Determine eigenvalues for small symmetric, tridiagonal matrix.
viennacl::vector< NumericT > g_left_one
left interval limits of intervals containing one eigenvalue after the first iteration step ...
Definition: structs.hpp:137
viennacl::scalar< unsigned int > g_num_blocks_mult
number of (thread) blocks of intervals containing multiple eigenvalues after the first steo ...
Definition: structs.hpp:134
viennacl::vector< unsigned int > g_blocks_mult
start addresses in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after ...
Definition: structs.hpp:156
viennacl::vector< unsigned int > g_left_count_mult
number of eigenvalues less than the left limit of the eigenvalue intervals containing multiple eigenv...
Definition: structs.hpp:151
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
viennacl::vector< unsigned int > g_right_count_mult
number of eigenvalues less than the right limit of the eigenvalue intervals containing multiple eigen...
Definition: structs.hpp:154
In this class the input matrix is stored.
Definition: structs.hpp:53
unsigned int getNumBlocksLinear(const unsigned int num_threads, const unsigned int num_threads_block)
Definition: util.hpp:96
viennacl::vector< NumericT > vcl_g_right
right interval limits at the end of the computation
Definition: structs.hpp:103
viennacl::vector< NumericT > g_lambda_mult
eigenvalues that have been generated in the second step from intervals that still contained multiple ...
Definition: structs.hpp:162
viennacl::vector< NumericT > vcl_g_left
left interval limits at the end of the computation
Definition: structs.hpp:101
viennacl::vector< unsigned int > vcl_g_left_count
number of eigenvalues smaller than the left interval limit
Definition: structs.hpp:105
viennacl::vector< unsigned int > g_pos_one
interval indices (position in sorted listed of eigenvalues) of intervals containing one eigenvalue af...
Definition: structs.hpp:143
#define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
Definition: config.hpp:38
First step of the bisection algorithm for the computation of eigenvalues.
Determine eigenvalues for large matrices for intervals that contained after the first step one eigenv...
viennacl::vector< NumericT > g_right_one
right interval limits of intervals containing one eigenvalue after the first iteration step ...
Definition: structs.hpp:140
viennacl::vector< unsigned int > g_pos_mult
eigenvalue index of intervals that have been generated in the second processing step ...
Definition: structs.hpp:165
void bisectLarge_OneIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
viennacl::vector< unsigned int > vcl_g_right_count
number of eigenvalues bigger than the right interval limit
Definition: structs.hpp:107
In this class the data of the result for large matrices is stored.
Definition: structs.hpp:125
void bisectSmall(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
viennacl::vector< unsigned int > g_blocks_mult_sum
accumulated number of intervals in g_left_mult etc. of blocks of intervals containing more than one e...
Definition: structs.hpp:159
viennacl::scalar< unsigned int > g_num_one
number of intervals containing one eigenvalue after the first step
Definition: structs.hpp:131
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:30
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39
void bisectLarge_MultIntervals(const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
In this class the data of the result for small matrices is stored.
Definition: structs.hpp:96
Second step of the bisection algorithm for the computation of eigenvalues for large matrices...
viennacl::vector< NumericT > g_a
device side representation of diagonal
Definition: structs.hpp:60
viennacl::vector< NumericT > g_right_mult
right interval limits of intervals containing multiple eigenvalues after the first iteration step ...
Definition: structs.hpp:148