ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
amg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_AMG_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_AMG_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
10 
13 namespace viennacl
14 {
15 namespace linalg
16 {
17 namespace opencl
18 {
19 namespace kernels
20 {
21 
22 
23 template<typename StringT>
24 void generate_amg_influence_trivial(StringT & source)
25 {
26 
27  source.append("__kernel void amg_influence_trivial( \n");
28  source.append(" __global const unsigned int * A_row_indices, \n");
29  source.append(" __global const unsigned int * A_col_indices, \n");
30  source.append(" unsigned int A_size1, \n");
31  source.append(" unsigned int A_nnz, \n");
32  source.append(" __global unsigned int * influences_row, \n");
33  source.append(" __global unsigned int * influences_id, \n");
34  source.append(" __global unsigned int * influences_values) { \n");
35 
36  source.append(" for (unsigned int i = get_global_id(0); i < A_size1; i += get_global_size(0)) \n");
37  source.append(" { \n");
38  source.append(" unsigned int tmp = A_row_indices[i]; \n");
39  source.append(" influences_row[i] = tmp; \n");
40  source.append(" influences_values[i] = A_row_indices[i+1] - tmp; \n");
41  source.append(" } \n");
42 
43  source.append(" for (unsigned int i = get_global_id(0); i < A_nnz; i += get_global_size(0)) \n");
44  source.append(" influences_id[i] = A_col_indices[i]; \n");
45 
46  source.append(" if (get_global_id(0) == 0) \n");
47  source.append(" influences_row[A_size1] = A_row_indices[A_size1]; \n");
48  source.append("} \n");
49 
50 }
51 
52 
53 template<typename StringT>
54 void generate_amg_pmis2_init_workdata(StringT & source)
55 {
56 
57  source.append("__kernel void amg_pmis2_init_workdata( \n");
58  source.append(" __global unsigned int *work_state, \n");
59  source.append(" __global unsigned int *work_random, \n");
60  source.append(" __global unsigned int *work_index, \n");
61  source.append(" __global unsigned int const *point_types, \n");
62  source.append(" __global unsigned int const *random_weights, \n");
63  source.append(" unsigned int size) { \n");
64 
65  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
66  source.append(" switch (point_types[i]) { \n");
67  source.append(" case 0: work_state[i] = 1; break; \n"); //viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED
68  source.append(" case 1: work_state[i] = 2; break; \n"); //viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_COARSE
69  source.append(" case 2: work_state[i] = 0; break; \n"); //viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_FINE
70 
71  source.append(" default: break; // do nothing \n");
72  source.append(" } \n");
73 
74  source.append(" work_random[i] = random_weights[i]; \n");
75  source.append(" work_index[i] = i; \n");
76  source.append(" } \n");
77  source.append("} \n");
78 }
79 
80 
81 
82 template<typename StringT>
84 {
85 
86  source.append("__kernel void amg_pmis2_max_neighborhood( \n");
87  source.append(" __global unsigned int *work_state, \n");
88  source.append(" __global unsigned int *work_random, \n");
89  source.append(" __global unsigned int *work_index, \n");
90  source.append(" __global unsigned int *work_state2, \n");
91  source.append(" __global unsigned int *work_random2, \n");
92  source.append(" __global unsigned int *work_index2, \n");
93  source.append(" __global unsigned int const *influences_row, \n");
94  source.append(" __global unsigned int const *influences_id, \n");
95  source.append(" unsigned int size) { \n");
96 
97  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
98 
99  // load
100  source.append(" unsigned int state = work_state[i]; \n");
101  source.append(" unsigned int random = work_random[i]; \n");
102  source.append(" unsigned int index = work_index[i]; \n");
103 
104  // max
105  source.append(" unsigned int j_stop = influences_row[i + 1]; \n");
106  source.append(" for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
107  source.append(" unsigned int influenced_point_id = influences_id[j]; \n");
108 
109  // lexigraphical triple-max (not particularly pretty, but does the job):
110  source.append(" if (state < work_state[influenced_point_id]) { \n");
111  source.append(" state = work_state[influenced_point_id]; \n");
112  source.append(" random = work_random[influenced_point_id]; \n");
113  source.append(" index = work_index[influenced_point_id]; \n");
114  source.append(" } else if (state == work_state[influenced_point_id]) { \n");
115  source.append(" if (random < work_random[influenced_point_id]) { \n");
116  source.append(" state = work_state[influenced_point_id]; \n");
117  source.append(" random = work_random[influenced_point_id]; \n");
118  source.append(" index = work_index[influenced_point_id]; \n");
119  source.append(" } else if (random == work_random[influenced_point_id]) { \n");
120  source.append(" if (index < work_index[influenced_point_id]) { \n");
121  source.append(" state = work_state[influenced_point_id]; \n");
122  source.append(" random = work_random[influenced_point_id]; \n");
123  source.append(" index = work_index[influenced_point_id]; \n");
124  source.append(" } \n");
125  source.append(" } \n");
126  source.append(" } \n");
127 
128  source.append(" }\n"); //for
129 
130  // store
131  source.append(" work_state2[i] = state; \n");
132  source.append(" work_random2[i] = random; \n");
133  source.append(" work_index2[i] = index; \n");
134  source.append(" } \n");
135  source.append("} \n");
136 }
137 
138 
139 
140 template<typename StringT>
141 void generate_amg_pmis2_mark_mis_nodes(StringT & source)
142 {
143 
144  source.append("__kernel void amg_pmis2_mark_mis_nodes( \n");
145  source.append(" __global unsigned int const *work_state, \n");
146  source.append(" __global unsigned int const *work_index, \n");
147  source.append(" __global unsigned int *point_types, \n");
148  source.append(" __global unsigned int *undecided_buffer, \n");
149  source.append(" unsigned int size) { \n");
150 
151  source.append(" unsigned int num_undecided = 0; \n");
152  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
153  source.append(" unsigned int max_state = work_state[i]; \n");
154  source.append(" unsigned int max_index = work_index[i]; \n");
155 
156  source.append(" if (point_types[i] == 0) { \n"); // viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED
157  source.append(" if (i == max_index) point_types[i] = 1; \n"); // viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_COARSE
158  source.append(" else if (max_state == 2) point_types[i] = 2; \n"); // viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_FINE
159  source.append(" else num_undecided += 1; \n");
160  source.append(" } \n");
161  source.append(" } \n");
162 
163  // reduction in shared memory:
164  source.append(" __local unsigned int shared_buffer[256]; \n");
165  source.append(" shared_buffer[get_local_id(0)] = num_undecided; \n");
166  source.append(" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) { \n");
167  source.append(" barrier(CLK_LOCAL_MEM_FENCE); \n");
168  source.append(" if (get_local_id(0) < stride) shared_buffer[get_local_id(0)] += shared_buffer[get_local_id(0)+stride]; \n");
169  source.append(" } \n");
170 
171  source.append(" if (get_local_id(0) == 0) \n");
172  source.append(" undecided_buffer[get_group_id(0)] = shared_buffer[0]; \n");
173 
174  source.append("} \n");
175 }
176 
177 
178 template<typename StringT>
179 void generate_amg_pmis2_reset_state(StringT & source)
180 {
181 
182  source.append("__kernel void amg_pmis2_reset_state( \n");
183  source.append(" __global unsigned int *point_types, \n");
184  source.append(" unsigned int size) { \n");
185 
186  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
187  source.append(" if (point_types[i] != 1) point_types[i] = 0;\n"); // mind mapping of POINT_TYPE_COARSE and POINT_TYPE_UNDECIDED
188  source.append(" } \n");
189 
190  source.append("} \n");
191 }
192 
193 
194 
196 
197 
198 
199 template<typename StringT>
201 {
202 
203  source.append(" __kernel void amg_agg_propagate_coarse_indices( \n");
204  source.append(" __global unsigned int *point_types, \n");
205  source.append(" __global unsigned int *coarse_ids, \n");
206  source.append(" __global unsigned int const *influences_row, \n");
207  source.append(" __global unsigned int const *influences_id, \n");
208  source.append(" unsigned int size) { \n");
209 
210  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
211  source.append(" { \n");
212  source.append(" if (point_types[i] == 1) { \n"); //viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_COARSE
213  source.append(" unsigned int coarse_index = coarse_ids[i]; \n");
214 
215  source.append(" unsigned int j_stop = influences_row[i + 1]; \n");
216  source.append(" for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
217  source.append(" unsigned int influenced_point_id = influences_id[j]; \n");
218  source.append(" coarse_ids[influenced_point_id] = coarse_index; \n");
219  source.append(" if (influenced_point_id != i) point_types[influenced_point_id] = 2; \n"); //viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_FINE
220  source.append(" } \n");
221  source.append(" } \n");
222  source.append(" } \n");
223  source.append("} \n");
224 
225 }
226 
227 
228 
229 template<typename StringT>
230 void generate_amg_agg_merge_undecided(StringT & source)
231 {
232 
233  source.append(" __kernel void amg_agg_merge_undecided( \n");
234  source.append(" __global unsigned int *point_types, \n");
235  source.append(" __global unsigned int *coarse_ids, \n");
236  source.append(" __global unsigned int const *influences_row, \n");
237  source.append(" __global unsigned int const *influences_id, \n");
238  source.append(" unsigned int size) { \n");
239 
240  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
241  source.append(" { \n");
242  source.append(" if (point_types[i] == 0) { \n"); //viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED
243 
244  source.append(" unsigned int j_stop = influences_row[i + 1]; \n");
245  source.append(" for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
246  source.append(" unsigned int influenced_point_id = influences_id[j]; \n");
247  source.append(" if (point_types[influenced_point_id] != 0) { \n"); // viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED
248  source.append(" coarse_ids[i] = coarse_ids[influenced_point_id]; \n");
249  source.append(" break; \n");
250  source.append(" } \n");
251  source.append(" } \n");
252 
253  source.append(" } \n");
254  source.append(" } \n");
255  source.append("} \n");
256 
257 }
258 
259 
260 template<typename StringT>
262 {
263 
264  source.append(" __kernel void amg_agg_merge_undecided_2( \n");
265  source.append(" __global unsigned int *point_types, \n");
266  source.append(" unsigned int size) { \n");
267 
268  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
269  source.append(" if (point_types[i] == 0) point_types[i] = 2; \n"); // POINT_TYPE_UNDECIDED to POINT_TYPE_FINE
270 
271  source.append("} \n");
272 }
273 
275 
276 template<typename StringT>
277 void generate_amg_interpol_ag(StringT & source, std::string const & numeric_string)
278 {
279 
280  source.append(" __kernel void amg_interpol_ag( \n");
281  source.append(" __global unsigned int * P_row_indices, \n");
282  source.append(" __global unsigned int * P_column_indices, \n");
283  source.append(" __global "); source.append(numeric_string); source.append(" * P_elements, \n");
284  source.append(" __global const unsigned int * coarse_agg_ids, \n");
285  source.append(" unsigned int size) { \n");
286 
287  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
288  source.append(" { \n");
289  source.append(" P_row_indices[i] = i; \n");
290  source.append(" P_column_indices[i] = coarse_agg_ids[i]; \n");
291  source.append(" P_elements[i] = 1; \n");
292  source.append(" } \n");
293  source.append(" if (get_global_id(0) == 0) P_row_indices[size] = size; \n");
294  source.append(" } \n");
295 
296 }
297 
298 template<typename StringT>
299 void generate_amg_interpol_sa(StringT & source, std::string const & numeric_string)
300 {
301 
302  source.append("__kernel void amg_interpol_sa( \n");
303  source.append(" __global unsigned int const *A_row_indices, \n");
304  source.append(" __global unsigned int const *A_col_indices, \n");
305  source.append(" __global "); source.append(numeric_string); source.append(" const *A_elements, \n");
306  source.append(" unsigned int A_size1, \n");
307  source.append(" unsigned int A_nnz, \n");
308  source.append(" __global unsigned int *Jacobi_row_indices, \n");
309  source.append(" __global unsigned int *Jacobi_col_indices, \n");
310  source.append(" __global "); source.append(numeric_string); source.append(" *Jacobi_elements, \n");
311  source.append(" "); source.append(numeric_string); source.append(" omega) { \n");
312 
313  source.append(" for (unsigned int row = get_global_id(0); row < A_size1; row += get_global_size(0)) \n");
314  source.append(" { \n");
315  source.append(" unsigned int row_begin = A_row_indices[row]; \n");
316  source.append(" unsigned int row_end = A_row_indices[row+1]; \n");
317 
318  source.append(" Jacobi_row_indices[row] = row_begin; \n");
319 
320  // Step 1: Extract diagonal:
321  source.append(" "); source.append(numeric_string); source.append(" diag = 0; \n");
322  source.append(" for (unsigned int j = row_begin; j < row_end; ++j) { \n");
323  source.append(" if (A_col_indices[j] == row) { \n");
324  source.append(" diag = A_elements[j]; \n");
325  source.append(" break; \n");
326  source.append(" } \n");
327  source.append(" } \n");
328 
329  // Step 2: Write entries:
330  source.append(" for (unsigned int j = row_begin; j < row_end; ++j) { \n");
331  source.append(" unsigned int col_index = A_col_indices[j]; \n");
332  source.append(" Jacobi_col_indices[j] = col_index; \n");
333  source.append(" Jacobi_elements[j] = (col_index == row) ? (1 - omega) : (-omega * A_elements[j] / diag); \n");
334  source.append(" } \n");
335 
336  source.append(" } \n");
337  source.append(" if (get_global_id(0) == 0) Jacobi_row_indices[A_size1] = A_nnz; \n");
338  source.append("} \n");
339 
340 }
342 
343 // main kernel class
345 template<typename NumericT>
346 struct amg
347 {
348  static std::string program_name()
349  {
351  }
352 
353  static void init(viennacl::ocl::context & ctx)
354  {
355  static std::map<cl_context, bool> init_done;
356  if (!init_done[ctx.handle().get()])
357  {
359  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
360 
361  std::string source;
362  source.reserve(2048);
363 
364  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
365 
374 
375  generate_amg_interpol_ag(source, numeric_string);
376  generate_amg_interpol_sa(source, numeric_string);
377 
378  std::string prog_name = program_name();
379  #ifdef VIENNACL_BUILD_INFO
380  std::cout << "Creating program " << prog_name << std::endl;
381  #endif
382  ctx.add_program(source, prog_name);
383  init_done[ctx.handle().get()] = true;
384  } //if
385  } //init
386 };
387 
388 } // namespace kernels
389 } // namespace opencl
390 } // namespace linalg
391 } // namespace viennacl
392 #endif
393 
Main kernel class for generating OpenCL kernels for compressed_matrix.
Definition: amg.hpp:346
void generate_amg_pmis2_reset_state(StringT &source)
Definition: amg.hpp:179
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
void generate_amg_pmis2_init_workdata(StringT &source)
Definition: amg.hpp:54
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
Common implementations shared by OpenCL-based operations.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
const OCL_TYPE & get() const
Definition: handle.hpp:189
void generate_amg_pmis2_mark_mis_nodes(StringT &source)
Definition: amg.hpp:141
void generate_amg_agg_merge_undecided_2(StringT &source)
Definition: amg.hpp:261
static std::string program_name()
Definition: amg.hpp:348
void generate_amg_influence_trivial(StringT &source)
Definition: amg.hpp:24
static void init(viennacl::ocl::context &ctx)
Definition: amg.hpp:353
Representation of an OpenCL kernel in ViennaCL.
void generate_amg_pmis2_max_neighborhood(StringT &source)
Definition: amg.hpp:83
void generate_amg_agg_merge_undecided(StringT &source)
Definition: amg.hpp:230
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void generate_amg_interpol_ag(StringT &source, std::string const &numeric_string)
Definition: amg.hpp:277
void generate_amg_interpol_sa(StringT &source, std::string const &numeric_string)
Definition: amg.hpp:299
void generate_amg_agg_propagate_coarse_indices(StringT &source)
Definition: amg.hpp:200