ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
ilu.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_ILU_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_ILU_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
24 #include "viennacl/ocl/utils.hpp"
25 
28 namespace viennacl
29 {
30 namespace linalg
31 {
32 namespace opencl
33 {
34 namespace kernels
35 {
36 template<typename StringT>
37 void generate_ilu_level_scheduling_substitute(StringT & source, std::string const & numeric_string)
38 {
39  source.append("__kernel void level_scheduling_substitute( \n");
40  source.append(" __global const unsigned int * row_index_array, \n");
41  source.append(" __global const unsigned int * row_indices, \n");
42  source.append(" __global const unsigned int * column_indices, \n");
43  source.append(" __global const "); source.append(numeric_string); source.append(" * elements, \n");
44  source.append(" __global "); source.append(numeric_string); source.append(" * vec, \n");
45  source.append(" unsigned int size) \n");
46  source.append("{ \n");
47  source.append(" for (unsigned int row = get_global_id(0); \n");
48  source.append(" row < size; \n");
49  source.append(" row += get_global_size(0)) \n");
50  source.append(" { \n");
51  source.append(" unsigned int eq_row = row_index_array[row]; \n");
52  source.append(" "); source.append(numeric_string); source.append(" vec_entry = vec[eq_row]; \n");
53  source.append(" unsigned int row_end = row_indices[row+1]; \n");
54 
55  source.append(" for (unsigned int j = row_indices[row]; j < row_end; ++j) \n");
56  source.append(" vec_entry -= vec[column_indices[j]] * elements[j]; \n");
57 
58  source.append(" vec[eq_row] = vec_entry; \n");
59  source.append(" } \n");
60  source.append("} \n");
61 }
62 
64 
65 
66 template<typename StringT>
67 void generate_icc_extract_L_1(StringT & source)
68 {
69  source.append("__kernel void extract_L_1( \n");
70  source.append(" __global unsigned int const *A_row_indices, \n");
71  source.append(" __global unsigned int const *A_col_indices, \n");
72  source.append(" unsigned int A_size1, \n");
73  source.append(" __global unsigned int *L_row_indices) { \n");
74 
75  source.append(" for (unsigned int row = get_global_id(0); \n");
76  source.append(" row < A_size1; \n");
77  source.append(" row += get_global_size(0)) \n");
78  source.append(" { \n");
79  source.append(" unsigned int row_begin = A_row_indices[row]; \n");
80  source.append(" unsigned int row_end = A_row_indices[row+1]; \n");
81 
82  source.append(" unsigned int num_entries_L = 0; \n");
83  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) { \n");
84  source.append(" unsigned int col = A_col_indices[j]; \n");
85  source.append(" if (col <= row) ++num_entries_L; \n");
86  source.append(" } \n");
87 
88  source.append(" L_row_indices[row] = num_entries_L; \n");
89  source.append(" } \n");
90  source.append("} \n");
91 }
92 
93 template<typename StringT>
94 void generate_icc_extract_L_2(StringT & source, std::string const & numeric_string)
95 {
96  source.append("__kernel void extract_L_2( \n");
97  source.append(" __global unsigned int const *A_row_indices, \n");
98  source.append(" __global unsigned int const *A_col_indices, \n");
99  source.append(" __global "); source.append(numeric_string); source.append(" const *A_elements, \n");
100  source.append(" unsigned int A_size1, \n");
101  source.append(" __global unsigned int const *L_row_indices, \n");
102  source.append(" __global unsigned int *L_col_indices, \n");
103  source.append(" __global "); source.append(numeric_string); source.append(" *L_elements) { \n");
104 
105  source.append(" for (unsigned int row = get_global_id(0); \n");
106  source.append(" row < A_size1; \n");
107  source.append(" row += get_global_size(0)) \n");
108  source.append(" { \n");
109  source.append(" unsigned int row_begin = A_row_indices[row]; \n");
110  source.append(" unsigned int row_end = A_row_indices[row+1]; \n");
111 
112  source.append(" unsigned int index_L = L_row_indices[row]; \n");
113  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) { \n");
114  source.append(" unsigned int col = A_col_indices[j]; \n");
115  source.append(" "); source.append(numeric_string); source.append(" value = A_elements[j]; \n");
116 
117  source.append(" if (col <= row) { \n");
118  source.append(" L_col_indices[index_L] = col; \n");
119  source.append(" L_elements[index_L] = value; \n");
120  source.append(" ++index_L; \n");
121  source.append(" } \n");
122  source.append(" } \n");
123 
124  source.append(" } \n");
125  source.append("} \n");
126 }
127 
128 
129 template<typename StringT>
130 void generate_icc_chow_patel_sweep_kernel(StringT & source, std::string const & numeric_string)
131 {
132  source.append("__kernel void icc_chow_patel_sweep_kernel( \n");
133  source.append(" __global unsigned int const *L_row_indices, \n");
134  source.append(" __global unsigned int const *L_col_indices, \n");
135  source.append(" __global "); source.append(numeric_string); source.append(" *L_elements, \n");
136  source.append(" __global "); source.append(numeric_string); source.append(" const *L_backup, \n");
137  source.append(" unsigned int L_size1, \n");
138 
139  source.append(" __global "); source.append(numeric_string); source.append(" const *aij_L) { \n");
140 
141  source.append(" for (unsigned int row = get_global_id(0); \n");
142  source.append(" row < L_size1; \n");
143  source.append(" row += get_global_size(0)) \n");
144  source.append(" { \n");
145 
146  //
147  // Update L:
148  //
149  source.append(" unsigned int row_Li_start = L_row_indices[row]; \n");
150  source.append(" unsigned int row_Li_end = L_row_indices[row + 1]; \n");
151 
152  source.append(" for (unsigned int i = row_Li_start; i < row_Li_end; ++i) { \n");
153  source.append(" unsigned int col = L_col_indices[i]; \n");
154 
155  source.append(" unsigned int row_Lj_start = L_row_indices[col]; \n");
156  source.append(" unsigned int row_Lj_end = L_row_indices[col + 1]; \n");
157 
158  source.append(" unsigned int index_Lj = row_Lj_start; \n");
159  source.append(" unsigned int col_Lj = L_col_indices[index_Lj]; \n");
160 
161  source.append(" "); source.append(numeric_string); source.append(" s = aij_L[i]; \n");
162  source.append(" for (unsigned int index_Li = row_Li_start; index_Li < i; ++index_Li) { \n");
163  source.append(" unsigned int col_Li = L_col_indices[index_Li]; \n");
164 
165  source.append(" while (col_Lj < col_Li) { \n");
166  source.append(" ++index_Lj; \n");
167  source.append(" col_Lj = L_col_indices[index_Lj]; \n");
168  source.append(" } \n");
169 
170  source.append(" if (col_Lj == col_Li) \n");
171  source.append(" s -= L_backup[index_Li] * L_backup[index_Lj]; \n");
172  source.append(" } \n");
173 
174  // update l_ij:
175  source.append(" L_elements[i] = (row == col) ? sqrt(s) : (s / L_backup[row_Lj_end - 1]); \n");
176  source.append(" } \n");
177 
178  source.append(" } \n");
179  source.append("} \n");
180 }
181 
182 
184 
185 template<typename StringT>
186 void generate_ilu_extract_LU_1(StringT & source)
187 {
188  source.append("__kernel void extract_LU_1( \n");
189  source.append(" __global unsigned int const *A_row_indices, \n");
190  source.append(" __global unsigned int const *A_col_indices, \n");
191  source.append(" unsigned int A_size1, \n");
192  source.append(" __global unsigned int *L_row_indices, \n");
193  source.append(" __global unsigned int *U_row_indices) { \n");
194 
195  source.append(" for (unsigned int row = get_global_id(0); \n");
196  source.append(" row < A_size1; \n");
197  source.append(" row += get_global_size(0)) \n");
198  source.append(" { \n");
199  source.append(" unsigned int row_begin = A_row_indices[row]; \n");
200  source.append(" unsigned int row_end = A_row_indices[row+1]; \n");
201 
202  source.append(" unsigned int num_entries_L = 0; \n");
203  source.append(" unsigned int num_entries_U = 0; \n");
204  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) { \n");
205  source.append(" unsigned int col = A_col_indices[j]; \n");
206  source.append(" if (col <= row) ++num_entries_L; \n");
207  source.append(" if (col >= row) ++num_entries_U; \n");
208  source.append(" } \n");
209 
210  source.append(" L_row_indices[row] = num_entries_L; \n");
211  source.append(" U_row_indices[row] = num_entries_U; \n");
212  source.append(" } \n");
213  source.append("} \n");
214 }
215 
216 template<typename StringT>
217 void generate_ilu_extract_LU_2(StringT & source, std::string const & numeric_string)
218 {
219  source.append("__kernel void extract_LU_2( \n");
220  source.append(" __global unsigned int const *A_row_indices, \n");
221  source.append(" __global unsigned int const *A_col_indices, \n");
222  source.append(" __global "); source.append(numeric_string); source.append(" const *A_elements, \n");
223  source.append(" unsigned int A_size1, \n");
224  source.append(" __global unsigned int const *L_row_indices, \n");
225  source.append(" __global unsigned int *L_col_indices, \n");
226  source.append(" __global "); source.append(numeric_string); source.append(" *L_elements, \n");
227  source.append(" __global unsigned int const *U_row_indices, \n");
228  source.append(" __global unsigned int *U_col_indices, \n");
229  source.append(" __global "); source.append(numeric_string); source.append(" *U_elements) { \n");
230 
231  source.append(" for (unsigned int row = get_global_id(0); \n");
232  source.append(" row < A_size1; \n");
233  source.append(" row += get_global_size(0)) \n");
234  source.append(" { \n");
235  source.append(" unsigned int row_begin = A_row_indices[row]; \n");
236  source.append(" unsigned int row_end = A_row_indices[row+1]; \n");
237 
238  source.append(" unsigned int index_L = L_row_indices[row]; \n");
239  source.append(" unsigned int index_U = U_row_indices[row]; \n");
240  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) { \n");
241  source.append(" unsigned int col = A_col_indices[j]; \n");
242  source.append(" "); source.append(numeric_string); source.append(" value = A_elements[j]; \n");
243 
244  source.append(" if (col <= row) { \n");
245  source.append(" L_col_indices[index_L] = col; \n");
246  source.append(" L_elements[index_L] = value; \n");
247  source.append(" ++index_L; \n");
248  source.append(" } \n");
249  source.append(" if (col >= row) { \n");
250  source.append(" U_col_indices[index_U] = col; \n");
251  source.append(" U_elements[index_U] = value; \n");
252  source.append(" ++index_U; \n");
253  source.append(" } \n");
254  source.append(" } \n");
255 
256  source.append(" } \n");
257  source.append("} \n");
258 }
259 
260 template<typename StringT>
261 void generate_ilu_scale_kernel_1(StringT & source, std::string const & numeric_string)
262 {
263  source.append("__kernel void ilu_scale_kernel_1( \n");
264  source.append(" __global unsigned int const *A_row_indices, \n");
265  source.append(" __global unsigned int const *A_col_indices, \n");
266  source.append(" __global "); source.append(numeric_string); source.append(" const *A_elements, \n");
267  source.append(" unsigned int A_size1, \n");
268  source.append(" __global "); source.append(numeric_string); source.append(" *D_elements) { \n");
269 
270  source.append(" for (unsigned int row = get_global_id(0); \n");
271  source.append(" row < A_size1; \n");
272  source.append(" row += get_global_size(0)) \n");
273  source.append(" { \n");
274  source.append(" unsigned int row_begin = A_row_indices[row]; \n");
275  source.append(" unsigned int row_end = A_row_indices[row+1]; \n");
276 
277  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) { \n");
278  source.append(" unsigned int col = A_col_indices[j]; \n");
279 
280  source.append(" if (col == row) { \n");
281  source.append(" D_elements[row] = 1 / sqrt(fabs(A_elements[j])); \n");
282  source.append(" break; \n");
283  source.append(" } \n");
284  source.append(" } \n");
285 
286  source.append(" } \n");
287  source.append("} \n");
288 }
289 
290 template<typename StringT>
291 void generate_ilu_scale_kernel_2(StringT & source, std::string const & numeric_string)
292 {
293  source.append("__kernel void ilu_scale_kernel_2( \n");
294  source.append(" __global unsigned int const *R_row_indices, \n");
295  source.append(" __global unsigned int const *R_col_indices, \n");
296  source.append(" __global "); source.append(numeric_string); source.append(" *R_elements, \n");
297  source.append(" unsigned int R_size1, \n");
298  source.append(" __global "); source.append(numeric_string); source.append(" const *D_elements) { \n");
299 
300  source.append(" for (unsigned int row = get_global_id(0); \n");
301  source.append(" row < R_size1; \n");
302  source.append(" row += get_global_size(0)) \n");
303  source.append(" { \n");
304  source.append(" unsigned int row_begin = R_row_indices[row]; \n");
305  source.append(" unsigned int row_end = R_row_indices[row+1]; \n");
306 
307  source.append(" "); source.append(numeric_string); source.append(" D_row = D_elements[row]; \n");
308  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) \n");
309  source.append(" R_elements[j] *= D_row * D_elements[R_col_indices[j]]; \n");
310 
311  source.append(" } \n");
312  source.append("} \n");
313 }
314 
315 template<typename StringT>
316 void generate_ilu_chow_patel_sweep_kernel(StringT & source, std::string const & numeric_string)
317 {
318  source.append("__kernel void ilu_chow_patel_sweep_kernel( \n");
319  source.append(" __global unsigned int const *L_row_indices, \n");
320  source.append(" __global unsigned int const *L_col_indices, \n");
321  source.append(" __global "); source.append(numeric_string); source.append(" *L_elements, \n");
322  source.append(" __global "); source.append(numeric_string); source.append(" const *L_backup, \n");
323  source.append(" unsigned int L_size1, \n");
324 
325  source.append(" __global "); source.append(numeric_string); source.append(" const *aij_L, \n");
326 
327  source.append(" __global unsigned int const *U_trans_row_indices, \n");
328  source.append(" __global unsigned int const *U_trans_col_indices, \n");
329  source.append(" __global "); source.append(numeric_string); source.append(" *U_trans_elements, \n");
330  source.append(" __global "); source.append(numeric_string); source.append(" const *U_trans_backup, \n");
331 
332  source.append(" __global "); source.append(numeric_string); source.append(" const *aij_U_trans) { \n");
333 
334  source.append(" for (unsigned int row = get_global_id(0); \n");
335  source.append(" row < L_size1; \n");
336  source.append(" row += get_global_size(0)) \n");
337  source.append(" { \n");
338 
339  //
340  // Update L:
341  //
342  source.append(" unsigned int row_L_start = L_row_indices[row]; \n");
343  source.append(" unsigned int row_L_end = L_row_indices[row + 1]; \n");
344 
345  source.append(" for (unsigned int j = row_L_start; j < row_L_end; ++j) { \n");
346  source.append(" unsigned int col = L_col_indices[j]; \n");
347 
348  source.append(" if (col == row) continue; \n");
349 
350  source.append(" unsigned int row_U_start = U_trans_row_indices[col]; \n");
351  source.append(" unsigned int row_U_end = U_trans_row_indices[col + 1]; \n");
352 
353  source.append(" unsigned int index_U = row_U_start; \n");
354  source.append(" unsigned int col_U = (index_U < row_U_end) ? U_trans_col_indices[index_U] : L_size1; \n");
355 
356  source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n");
357  source.append(" for (unsigned int k = row_L_start; k < j; ++k) { \n");
358  source.append(" unsigned int col_L = L_col_indices[k]; \n");
359 
360  source.append(" while (col_U < col_L) { \n");
361  source.append(" ++index_U; \n");
362  source.append(" col_U = U_trans_col_indices[index_U]; \n");
363  source.append(" } \n");
364 
365  source.append(" if (col_U == col_L) \n");
366  source.append(" sum += L_backup[k] * U_trans_backup[index_U]; \n");
367  source.append(" } \n");
368 
369  // update l_ij:
370  source.append(" L_elements[j] = (aij_L[j] - sum) / U_trans_backup[row_U_end - 1]; \n");
371  source.append(" } \n");
372 
373  //
374  // Update U:
375  //
376  source.append(" unsigned int row_U_start = U_trans_row_indices[row]; \n");
377  source.append(" unsigned int row_U_end = U_trans_row_indices[row + 1]; \n");
378 
379  source.append(" for (unsigned int j = row_U_start; j < row_U_end; ++j) { \n");
380  source.append(" unsigned int col = U_trans_col_indices[j]; \n");
381 
382  source.append(" row_L_start = L_row_indices[col]; \n");
383  source.append(" row_L_end = L_row_indices[col + 1]; \n");
384 
385  // compute \sum_{k=1}^{j-1} l_ik u_kj
386  source.append(" unsigned int index_L = row_L_start; \n");
387  source.append(" unsigned int col_L = (index_L < row_L_end) ? L_col_indices[index_L] : L_size1; \n");
388  source.append(" "); source.append(numeric_string); source.append(" sum = 0; \n");
389  source.append(" for (unsigned int k = row_U_start; k < j; ++k) { \n");
390  source.append(" unsigned int col_U = U_trans_col_indices[k]; \n");
391 
392  // find element in L:
393  source.append(" while (col_L < col_U) { \n");
394  source.append(" ++index_L; \n");
395  source.append(" col_L = L_col_indices[index_L]; \n");
396  source.append(" } \n");
397 
398  source.append(" if (col_U == col_L) \n");
399  source.append(" sum += L_backup[index_L] * U_trans_backup[k]; \n");
400  source.append(" } \n");
401 
402  // update U_ij:
403  source.append(" U_trans_elements[j] = aij_U_trans[j] - sum; \n");
404  source.append(" } \n");
405 
406  source.append(" } \n");
407  source.append("} \n");
408 }
409 
410 
411 template<typename StringT>
412 void generate_ilu_form_neumann_matrix_kernel(StringT & source, std::string const & numeric_string)
413 {
414  source.append("__kernel void ilu_form_neumann_matrix_kernel( \n");
415  source.append(" __global unsigned int const *R_row_indices, \n");
416  source.append(" __global unsigned int const *R_col_indices, \n");
417  source.append(" __global "); source.append(numeric_string); source.append(" *R_elements, \n");
418  source.append(" unsigned int R_size1, \n");
419  source.append(" __global "); source.append(numeric_string); source.append(" *D_elements) { \n");
420 
421  source.append(" for (unsigned int row = get_global_id(0); \n");
422  source.append(" row < R_size1; \n");
423  source.append(" row += get_global_size(0)) \n");
424  source.append(" { \n");
425  source.append(" unsigned int row_begin = R_row_indices[row]; \n");
426  source.append(" unsigned int row_end = R_row_indices[row+1]; \n");
427 
428  // Part 1: Extract and set diagonal entry
429  source.append(" "); source.append(numeric_string); source.append(" diag = D_elements[row]; \n");
430  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) { \n");
431  source.append(" unsigned int col = R_col_indices[j]; \n");
432  source.append(" if (col == row) { \n");
433  source.append(" diag = R_elements[j]; \n");
434  source.append(" R_elements[j] = 0; \n");
435  source.append(" break; \n");
436  source.append(" } \n");
437  source.append(" } \n");
438  source.append(" D_elements[row] = diag; \n");
439 
440  // Part 2: Scale
441  source.append(" for (unsigned int j=row_begin; j<row_end; ++j) \n");
442  source.append(" R_elements[j] /= -diag; \n");
443 
444  source.append(" } \n");
445  source.append("} \n");
446 }
447 
448 
449 
450 // main kernel class
452 template<class NumericT>
453 struct ilu
454 {
455  static std::string program_name()
456  {
458  }
459 
460  static void init(viennacl::ocl::context & ctx)
461  {
462  static std::map<cl_context, bool> init_done;
463  if (!init_done[ctx.handle().get()])
464  {
466  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
467 
468  std::string source;
469  source.reserve(1024);
470 
471  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
472 
473  // only generate for floating points (forces error for integers)
474  if (numeric_string == "float" || numeric_string == "double")
475  {
476  generate_ilu_level_scheduling_substitute(source, numeric_string);
477 
478  generate_icc_extract_L_1(source);
479  generate_icc_extract_L_2(source, numeric_string);
480  generate_icc_chow_patel_sweep_kernel(source, numeric_string);
481 
483  generate_ilu_extract_LU_2(source, numeric_string);
484  generate_ilu_scale_kernel_1(source, numeric_string);
485  generate_ilu_scale_kernel_2(source, numeric_string);
486  generate_ilu_chow_patel_sweep_kernel(source, numeric_string);
487  generate_ilu_form_neumann_matrix_kernel(source, numeric_string);
488  }
489 
490  std::string prog_name = program_name();
491  #ifdef VIENNACL_BUILD_INFO
492  std::cout << "Creating program " << prog_name << std::endl;
493  #endif
494  ctx.add_program(source, prog_name);
495  init_done[ctx.handle().get()] = true;
496  } //if
497  } //init
498 };
499 
500 } // namespace kernels
501 } // namespace opencl
502 } // namespace linalg
503 } // namespace viennacl
504 #endif
505 
Implements a OpenCL platform within ViennaCL.
void generate_ilu_level_scheduling_substitute(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:37
void generate_ilu_extract_LU_2(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:217
Various little tools used here and there in ViennaCL.
void generate_icc_extract_L_2(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:94
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
Provides OpenCL-related utilities.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
void generate_ilu_extract_LU_1(StringT &source)
Definition: ilu.hpp:186
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
void generate_icc_extract_L_1(StringT &source)
Definition: ilu.hpp:67
const OCL_TYPE & get() const
Definition: handle.hpp:189
Main kernel class for generating OpenCL kernels for incomplete LU factorization preconditioners.
Definition: ilu.hpp:453
void generate_ilu_chow_patel_sweep_kernel(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:316
static void init(viennacl::ocl::context &ctx)
Definition: ilu.hpp:460
void generate_ilu_scale_kernel_1(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:261
void generate_ilu_scale_kernel_2(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:291
Representation of an OpenCL kernel in ViennaCL.
void generate_icc_chow_patel_sweep_kernel(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:130
void generate_ilu_form_neumann_matrix_kernel(StringT &source, std::string const &numeric_string)
Definition: ilu.hpp:412
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
static std::string program_name()
Definition: ilu.hpp:455