ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
ilu_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_ILU_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_ILU_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
37 
38 
39 namespace viennacl
40 {
41 namespace linalg
42 {
43 namespace cuda
44 {
45 
46 template<typename IndexT> // to control external linkage
47 __global__ void extract_L_kernel_1(
48  const IndexT * A_row_indices,
49  const IndexT * A_col_indices,
50  unsigned int A_size1,
51  unsigned int * L_row_indices)
52 {
53  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
54  row < A_size1;
55  row += gridDim.x * blockDim.x)
56  {
57  unsigned int row_begin = A_row_indices[row];
58  unsigned int row_end = A_row_indices[row+1];
59 
60  unsigned int num_entries_L = 0;
61  for (unsigned int j=row_begin; j<row_end; ++j)
62  {
63  unsigned int col = A_col_indices[j];
64  if (col <= row)
65  ++num_entries_L;
66  }
67 
68  L_row_indices[row] = num_entries_L;
69  }
70 }
71 
72 template<typename NumericT>
73 __global__ void extract_L_kernel_2(
74  unsigned int const *A_row_indices,
75  unsigned int const *A_col_indices,
76  NumericT const *A_elements,
77  unsigned int A_size1,
78 
79  unsigned int const *L_row_indices,
80  unsigned int *L_col_indices,
81  NumericT *L_elements)
82 {
83  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
84  row < A_size1;
85  row += gridDim.x * blockDim.x)
86  {
87  unsigned int row_begin = A_row_indices[row];
88  unsigned int row_end = A_row_indices[row+1];
89 
90  unsigned int index_L = L_row_indices[row];
91  for (unsigned int j = row_begin; j < row_end; ++j)
92  {
93  unsigned int col = A_col_indices[j];
94  NumericT value = A_elements[j];
95 
96  if (col <= row)
97  {
98  L_col_indices[index_L] = col;
99  L_elements[index_L] = value;
100  ++index_L;
101  }
102  }
103  }
104 }
105 
106 template<typename NumericT>
109 {
110  //
111  // Step 1: Count elements in L and U:
112  //
113  extract_L_kernel_1<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(A.handle1()),
114  viennacl::cuda_arg<unsigned int>(A.handle2()),
115  static_cast<unsigned int>(A.size1()),
116  viennacl::cuda_arg<unsigned int>(L.handle1())
117  );
118  VIENNACL_CUDA_LAST_ERROR_CHECK("extract_L_kernel_1");
119 
120  //
121  // Step 2: Exclusive scan on row_buffers:
122  //
123  viennacl::vector<unsigned int> wrapped_L_row_buffer(viennacl::cuda_arg<unsigned int>(L.handle1().cuda_handle()), viennacl::CUDA_MEMORY, A.size1() + 1);
124  viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
125  L.reserve(wrapped_L_row_buffer[L.size1()], false);
126 
127  //
128  // Step 3: Write entries
129  //
130  extract_L_kernel_2<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(A.handle1()),
131  viennacl::cuda_arg<unsigned int>(A.handle2()),
132  viennacl::cuda_arg<NumericT>(A.handle()),
133  static_cast<unsigned int>(A.size1()),
134  viennacl::cuda_arg<unsigned int>(L.handle1()),
135  viennacl::cuda_arg<unsigned int>(L.handle2()),
136  viennacl::cuda_arg<NumericT>(L.handle())
137  );
138  VIENNACL_CUDA_LAST_ERROR_CHECK("extract_L_kernel_2");
139 
141 
142 } // extract_L
143 
145 
146 
147 template<typename NumericT>
148 __global__ void ilu_scale_kernel_1(
149  unsigned int const *A_row_indices,
150  unsigned int const *A_col_indices,
151  NumericT const *A_elements,
152  unsigned int A_size1,
153 
154  NumericT *D_elements)
155 {
156  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
157  row < A_size1;
158  row += gridDim.x * blockDim.x)
159  {
160  unsigned int row_begin = A_row_indices[row];
161  unsigned int row_end = A_row_indices[row+1];
162 
163  for (unsigned int j = row_begin; j < row_end; ++j)
164  {
165  unsigned int col = A_col_indices[j];
166  if (row == col)
167  {
168  D_elements[row] = NumericT(1) / sqrt(fabs(A_elements[j]));
169  break;
170  }
171  }
172  }
173 }
174 
176 template<typename NumericT>
177 __global__ void ilu_scale_kernel_2(
178  unsigned int const *R_row_indices,
179  unsigned int const *R_col_indices,
180  NumericT *R_elements,
181  unsigned int R_size1,
182 
183  NumericT *D_elements)
184 {
185  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
186  row < R_size1;
187  row += gridDim.x * blockDim.x)
188  {
189  unsigned int row_begin = R_row_indices[row];
190  unsigned int row_end = R_row_indices[row+1];
191 
192  NumericT D_row = D_elements[row];
193 
194  for (unsigned int j = row_begin; j < row_end; ++j)
195  R_elements[j] *= D_row * D_elements[R_col_indices[j]];
196  }
197 }
198 
199 
200 
202 template<typename NumericT>
205 {
207 
208  // fill D:
209  ilu_scale_kernel_1<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(A.handle1()),
210  viennacl::cuda_arg<unsigned int>(A.handle2()),
211  viennacl::cuda_arg<NumericT>(A.handle()),
212  static_cast<unsigned int>(A.size1()),
214  );
215  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_scale_kernel_1");
216 
217  // scale L:
218  ilu_scale_kernel_2<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(L.handle1()),
219  viennacl::cuda_arg<unsigned int>(L.handle2()),
220  viennacl::cuda_arg<NumericT>(L.handle()),
221  static_cast<unsigned int>(L.size1()),
223  );
224  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_scale_kernel_1");
225 }
226 
228 
230 template<typename NumericT>
232  unsigned int const *L_row_indices,
233  unsigned int const *L_col_indices,
234  NumericT *L_elements,
235  NumericT const *L_backup,
236  unsigned int L_size1,
237  NumericT const *aij_L)
238 {
239  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
240  row < L_size1;
241  row += gridDim.x * blockDim.x)
242  {
243  //
244  // update L:
245  //
246  unsigned int row_Li_start = L_row_indices[row];
247  unsigned int row_Li_end = L_row_indices[row + 1];
248 
249  for (unsigned int i = row_Li_start; i < row_Li_end; ++i)
250  {
251  unsigned int col = L_col_indices[i];
252 
253  unsigned int row_Lj_start = L_row_indices[col];
254  unsigned int row_Lj_end = L_row_indices[col + 1];
255 
256  // compute \sum_{k=1}^{j-1} l_ik u_kj
257  unsigned int index_Lj = row_Lj_start;
258  unsigned int col_Lj = L_col_indices[index_Lj];
259  NumericT s = aij_L[i];
260  for (unsigned int index_Li = row_Li_start; index_Li < i; ++index_Li)
261  {
262  unsigned int col_Li = L_col_indices[index_Li];
263 
264  // find element in U
265  while (col_Lj < col_Li)
266  {
267  ++index_Lj;
268  col_Lj = L_col_indices[index_Lj];
269  }
270 
271  if (col_Lj == col_Li)
272  s -= L_backup[index_Li] * L_backup[index_Lj];
273  }
274 
275  // update l_ij:
276  L_elements[i] = (row == col) ? sqrt(s) : (s / L_backup[row_Lj_end - 1]); // diagonal element is last entry in U
277  }
278 
279  }
280 }
281 
282 
284 template<typename NumericT>
286  vector<NumericT> const & aij_L)
287 {
290  viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
291 
292  icc_chow_patel_sweep_kernel<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(L.handle1()),
293  viennacl::cuda_arg<unsigned int>(L.handle2()),
294  viennacl::cuda_arg<NumericT>(L.handle()),
295  viennacl::cuda_arg<NumericT>(L_backup),
296  static_cast<unsigned int>(L.size1()),
297 
298  viennacl::cuda_arg<NumericT>(aij_L.handle())
299  );
300  VIENNACL_CUDA_LAST_ERROR_CHECK("icc_chow_patel_sweep_kernel");
301 
302 }
303 
304 
306 
307 template<typename IndexT> // to control external linkage
308 __global__ void extract_LU_kernel_1(
309  const IndexT * A_row_indices,
310  const IndexT * A_col_indices,
311  unsigned int A_size1,
312 
313  unsigned int * L_row_indices,
314 
315  unsigned int * U_row_indices)
316 {
317  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
318  row < A_size1;
319  row += gridDim.x * blockDim.x)
320  {
321  unsigned int row_begin = A_row_indices[row];
322  unsigned int row_end = A_row_indices[row+1];
323 
324  unsigned int num_entries_L = 0;
325  unsigned int num_entries_U = 0;
326  for (unsigned int j=row_begin; j<row_end; ++j)
327  {
328  unsigned int col = A_col_indices[j];
329  if (col <= row)
330  ++num_entries_L;
331  if (col >= row)
332  ++num_entries_U;
333  }
334 
335  L_row_indices[row] = num_entries_L;
336  U_row_indices[row] = num_entries_U;
337  }
338 }
339 
340 template<typename NumericT>
341 __global__ void extract_LU_kernel_2(
342  unsigned int const *A_row_indices,
343  unsigned int const *A_col_indices,
344  NumericT const *A_elements,
345  unsigned int A_size1,
346 
347  unsigned int const *L_row_indices,
348  unsigned int *L_col_indices,
349  NumericT *L_elements,
350 
351  unsigned int const *U_row_indices,
352  unsigned int *U_col_indices,
353  NumericT *U_elements)
354 {
355  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
356  row < A_size1;
357  row += gridDim.x * blockDim.x)
358  {
359  unsigned int row_begin = A_row_indices[row];
360  unsigned int row_end = A_row_indices[row+1];
361 
362  unsigned int index_L = L_row_indices[row];
363  unsigned int index_U = U_row_indices[row];
364  for (unsigned int j = row_begin; j < row_end; ++j)
365  {
366  unsigned int col = A_col_indices[j];
367  NumericT value = A_elements[j];
368 
369  if (col <= row)
370  {
371  L_col_indices[index_L] = col;
372  L_elements[index_L] = value;
373  ++index_L;
374  }
375 
376  if (col >= row)
377  {
378  U_col_indices[index_U] = col;
379  U_elements[index_U] = value;
380  ++index_U;
381  }
382  }
383  }
384 }
385 
386 template<typename NumericT>
390 {
391  //
392  // Step 1: Count elements in L and U:
393  //
394  extract_LU_kernel_1<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(A.handle1()),
395  viennacl::cuda_arg<unsigned int>(A.handle2()),
396  static_cast<unsigned int>(A.size1()),
397  viennacl::cuda_arg<unsigned int>(L.handle1()),
398  viennacl::cuda_arg<unsigned int>(U.handle1())
399  );
400  VIENNACL_CUDA_LAST_ERROR_CHECK("extract_LU_kernel_1");
401 
402  //
403  // Step 2: Exclusive scan on row_buffers:
404  //
405  viennacl::vector<unsigned int> wrapped_L_row_buffer(viennacl::cuda_arg<unsigned int>(L.handle1()), viennacl::CUDA_MEMORY, A.size1() + 1);
406  viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
407  L.reserve(wrapped_L_row_buffer[L.size1()], false);
408 
409  viennacl::vector<unsigned int> wrapped_U_row_buffer(viennacl::cuda_arg<unsigned int>(U.handle1()), viennacl::CUDA_MEMORY, A.size1() + 1);
410  viennacl::linalg::exclusive_scan(wrapped_U_row_buffer, wrapped_U_row_buffer);
411  U.reserve(wrapped_U_row_buffer[U.size1()], false);
412 
413  //
414  // Step 3: Write entries
415  //
416  extract_LU_kernel_2<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(A.handle1()),
417  viennacl::cuda_arg<unsigned int>(A.handle2()),
418  viennacl::cuda_arg<NumericT>(A.handle()),
419  static_cast<unsigned int>(A.size1()),
420  viennacl::cuda_arg<unsigned int>(L.handle1()),
421  viennacl::cuda_arg<unsigned int>(L.handle2()),
422  viennacl::cuda_arg<NumericT>(L.handle()),
423  viennacl::cuda_arg<unsigned int>(U.handle1()),
424  viennacl::cuda_arg<unsigned int>(U.handle2()),
425  viennacl::cuda_arg<NumericT>(U.handle())
426  );
427  VIENNACL_CUDA_LAST_ERROR_CHECK("extract_LU_kernel_2");
428 
430  // Note: block information for U will be generated after transposition
431 
432 } // extract_LU
433 
435 
437 template<typename NumericT>
441 {
443 
444  // fill D:
445  ilu_scale_kernel_1<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(A.handle1()),
446  viennacl::cuda_arg<unsigned int>(A.handle2()),
447  viennacl::cuda_arg<NumericT>(A.handle()),
448  static_cast<unsigned int>(A.size1()),
449  viennacl::cuda_arg<NumericT>(D.handle())
450  );
451  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_scale_kernel_1");
452 
453  // scale L:
454  ilu_scale_kernel_2<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(L.handle1()),
455  viennacl::cuda_arg<unsigned int>(L.handle2()),
456  viennacl::cuda_arg<NumericT>(L.handle()),
457  static_cast<unsigned int>(L.size1()),
458  viennacl::cuda_arg<NumericT>(D.handle())
459  );
460  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_scale_kernel_2");
461 
462  // scale U:
463  ilu_scale_kernel_2<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(U.handle1()),
464  viennacl::cuda_arg<unsigned int>(U.handle2()),
465  viennacl::cuda_arg<NumericT>(U.handle()),
466  static_cast<unsigned int>(U.size1()),
467  viennacl::cuda_arg<NumericT>(D.handle())
468  );
469  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_scale_kernel_2");
470 }
471 
473 
475 template<typename NumericT>
477  unsigned int const *L_row_indices,
478  unsigned int const *L_col_indices,
479  NumericT *L_elements,
480  NumericT const *L_backup,
481  unsigned int L_size1,
482 
483  NumericT const *aij_L,
484 
485  unsigned int const *U_trans_row_indices,
486  unsigned int const *U_trans_col_indices,
487  NumericT *U_trans_elements,
488  NumericT const *U_trans_backup,
489 
490  NumericT const *aij_U_trans)
491 {
492  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
493  row < L_size1;
494  row += gridDim.x * blockDim.x)
495  {
496  //
497  // update L:
498  //
499  unsigned int row_L_start = L_row_indices[row];
500  unsigned int row_L_end = L_row_indices[row + 1];
501 
502  for (unsigned int j = row_L_start; j < row_L_end; ++j)
503  {
504  unsigned int col = L_col_indices[j];
505 
506  if (col == row)
507  continue;
508 
509  unsigned int row_U_start = U_trans_row_indices[col];
510  unsigned int row_U_end = U_trans_row_indices[col + 1];
511 
512  // compute \sum_{k=1}^{j-1} l_ik u_kj
513  unsigned int index_U = row_U_start;
514  unsigned int col_U = (index_U < row_U_end) ? U_trans_col_indices[index_U] : L_size1;
515  NumericT sum = 0;
516  for (unsigned int k = row_L_start; k < j; ++k)
517  {
518  unsigned int col_L = L_col_indices[k];
519 
520  // find element in U
521  while (col_U < col_L)
522  {
523  ++index_U;
524  col_U = U_trans_col_indices[index_U];
525  }
526 
527  if (col_U == col_L)
528  sum += L_backup[k] * U_trans_backup[index_U];
529  }
530 
531  // update l_ij:
532  L_elements[j] = (aij_L[j] - sum) / U_trans_backup[row_U_end - 1]; // diagonal element is last entry in U
533  }
534 
535 
536  //
537  // update U:
538  //
539  unsigned int row_U_start = U_trans_row_indices[row];
540  unsigned int row_U_end = U_trans_row_indices[row + 1];
541  for (unsigned int j = row_U_start; j < row_U_end; ++j)
542  {
543  unsigned int col = U_trans_col_indices[j];
544 
545  row_L_start = L_row_indices[col];
546  row_L_end = L_row_indices[col + 1];
547 
548  // compute \sum_{k=1}^{j-1} l_ik u_kj
549  unsigned int index_L = row_L_start;
550  unsigned int col_L = (index_L < row_L_end) ? L_col_indices[index_L] : L_size1;
551  NumericT sum = 0;
552  for (unsigned int k = row_U_start; k < j; ++k)
553  {
554  unsigned int col_U = U_trans_col_indices[k];
555 
556  // find element in L
557  while (col_L < col_U)
558  {
559  ++index_L;
560  col_L = L_col_indices[index_L];
561  }
562 
563  if (col_U == col_L)
564  sum += L_backup[index_L] * U_trans_backup[k];
565  }
566 
567  // update u_ij:
568  U_trans_elements[j] = aij_U_trans[j] - sum;
569  }
570  }
571 }
572 
573 
575 template<typename NumericT>
577  vector<NumericT> const & aij_L,
578  compressed_matrix<NumericT> & U_trans,
579  vector<NumericT> const & aij_U_trans)
580 {
583  viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
584 
587  viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size());
588 
589  ilu_chow_patel_sweep_kernel<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(L.handle1()),
590  viennacl::cuda_arg<unsigned int>(L.handle2()),
591  viennacl::cuda_arg<NumericT>(L.handle()),
592  viennacl::cuda_arg<NumericT>(L_backup),
593  static_cast<unsigned int>(L.size1()),
594 
595  viennacl::cuda_arg<NumericT>(aij_L.handle()),
596 
597  viennacl::cuda_arg<unsigned int>(U_trans.handle1()),
598  viennacl::cuda_arg<unsigned int>(U_trans.handle2()),
599  viennacl::cuda_arg<NumericT>(U_trans.handle()),
600  viennacl::cuda_arg<NumericT>(U_backup),
601 
602  viennacl::cuda_arg<NumericT>(aij_U_trans.handle())
603  );
604  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_chow_patel_sweep_kernel");
605 
606 }
607 
609 
610 template<typename NumericT>
612  unsigned int const *R_row_indices,
613  unsigned int const *R_col_indices,
614  NumericT *R_elements,
615  unsigned int R_size1,
616 
617  NumericT *D_elements)
618 {
619  for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
620  row < R_size1;
621  row += gridDim.x * blockDim.x)
622  {
623  unsigned int row_begin = R_row_indices[row];
624  unsigned int row_end = R_row_indices[row+1];
625 
626  // part 1: extract diagonal entry
627  NumericT diag = 0;
628  for (unsigned int j = row_begin; j < row_end; ++j)
629  {
630  unsigned int col = R_col_indices[j];
631  if (col == row)
632  {
633  diag = R_elements[j];
634  R_elements[j] = 0; // (I - D^{-1}R)
635  break;
636  }
637  }
638  D_elements[row] = diag;
639 
640  // part2: scale
641  for (unsigned int j = row_begin; j < row_end; ++j)
642  R_elements[j] /= -diag;
643  }
644 }
645 
646 
647 
648 template<typename NumericT>
650  vector<NumericT> & diag_R)
651 {
652  ilu_form_neumann_matrix_kernel<<<128, 128>>>(viennacl::cuda_arg<unsigned int>(R.handle1()),
653  viennacl::cuda_arg<unsigned int>(R.handle2()),
654  viennacl::cuda_arg<NumericT>(R.handle()),
655  static_cast<unsigned int>(R.size1()),
656  viennacl::cuda_arg<NumericT>(diag_R.handle())
657  );
658  VIENNACL_CUDA_LAST_ERROR_CHECK("ilu_form_neumann_matrix_kernel");
659 }
660 
661 } //namespace host_based
662 } //namespace linalg
663 } //namespace viennacl
664 
665 
666 #endif
__global__ void ilu_chow_patel_sweep_kernel(unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L, unsigned int const *U_trans_row_indices, unsigned int const *U_trans_col_indices, NumericT *U_trans_elements, NumericT const *U_trans_backup, NumericT const *aij_U_trans)
CUDA kernel for one Chow-Patel-ILU sweep.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
__global__ void extract_LU_kernel_2(unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements, unsigned int const *U_row_indices, unsigned int *U_col_indices, NumericT *U_elements)
Generic size and resize functionality for different vector and matrix types.
void extract_LU(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Implementations of vector operations.
const vcl_size_t & size1() const
Returns the number of rows.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
This file provides the forward declarations for the main types used within ViennaCL.
__global__ void ilu_form_neumann_matrix_kernel(unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements)
Determines row and column increments for matrices and matrix proxies.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
void extract_L(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
void ilu_form_neumann_matrix(compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
__global__ void extract_L_kernel_2(unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements)
__global__ void ilu_scale_kernel_1(unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, NumericT *D_elements)
__global__ void extract_L_kernel_1(const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:885
void ilu_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
void icc_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
__global__ void icc_chow_patel_sweep_kernel(unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L)
CUDA kernel for one Chow-Patel-ICC sweep.
__global__ void ilu_scale_kernel_2(unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements)
Scales values in a matrix such that output = D * input * D, where D is a diagonal matrix (only the di...
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:30
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39
void ilu_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
Implementation of the ViennaCL scalar class.
__global__ void extract_LU_kernel_1(const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices, unsigned int *U_row_indices)
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
Simple enable-if variant that uses the SFINAE pattern.
void icc_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...