1 #ifndef VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_
41 #ifndef VIENNACL_OPENMP_ILU_MIN_SIZE
42 #define VIENNACL_OPENMP_ILU_MIN_SIZE 5000
52 template<
typename NumericT>
58 unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
59 unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
60 NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.
handle());
62 unsigned int *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
67 #ifdef VIENNACL_WITH_OPENMP
68 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
70 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
72 unsigned int col_begin = A_row_buffer[
row];
73 unsigned int col_end = A_row_buffer[
row+1];
75 for (
unsigned int j = col_begin; j < col_end; ++j)
77 unsigned int col = A_col_buffer[j];
90 unsigned int *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
91 NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.
handle());
96 #ifdef VIENNACL_WITH_OPENMP
97 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
99 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
101 unsigned int col_begin = A_row_buffer[
row];
102 unsigned int col_end = A_row_buffer[
row+1];
104 unsigned int index_L = L_row_buffer[
row];
105 for (
unsigned int j = col_begin; j < col_end; ++j)
107 unsigned int col = A_col_buffer[j];
112 L_col_buffer[index_L] = col;
113 L_elements[index_L] = value;
123 template<
typename NumericT>
129 unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
130 unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
131 NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.
handle());
133 NumericT *D_elements = detail::extract_raw_pointer<NumericT>(D.handle());
138 #ifdef VIENNACL_WITH_OPENMP
139 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
141 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
143 unsigned int col_begin = A_row_buffer[
row];
144 unsigned int col_end = A_row_buffer[
row+1];
146 for (
unsigned int j = col_begin; j < col_end; ++j)
148 unsigned int col = A_col_buffer[j];
151 D_elements[
row] =
NumericT(1) / std::sqrt(std::fabs(A_elements[j]));
160 unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
161 unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
162 NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.
handle());
164 #ifdef VIENNACL_WITH_OPENMP
165 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
167 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
169 unsigned int col_begin = L_row_buffer[
row];
170 unsigned int col_end = L_row_buffer[
row+1];
174 for (
unsigned int j = col_begin; j < col_end; ++j)
175 L_elements[j] *= D_row * D_elements[L_col_buffer[j]];
184 template<
typename NumericT>
188 unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
189 unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
190 NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.
handle());
192 NumericT *aij_ptr = detail::extract_raw_pointer<NumericT>(aij_L.
handle());
198 #ifdef VIENNACL_WITH_OPENMP
199 #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)
201 for (
long i = 0; i < static_cast<long>(L.
nnz()); ++i)
202 L_backup[i] = L_elements[i];
206 #ifdef VIENNACL_WITH_OPENMP
207 #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
209 for (
long row = 0; row < static_cast<long>(L.
size1()); ++
row)
214 unsigned int row_Li_start = L_row_buffer[
row];
215 unsigned int row_Li_end = L_row_buffer[
row + 1];
217 for (
unsigned int i = row_Li_start; i < row_Li_end; ++i)
219 unsigned int col = L_col_buffer[i];
221 unsigned int row_Lj_start = L_row_buffer[col];
222 unsigned int row_Lj_end = L_row_buffer[col+1];
225 unsigned int index_Lj = row_Lj_start;
226 unsigned int col_Lj = L_col_buffer[index_Lj];
228 for (
unsigned int index_Li = row_Li_start; index_Li < i; ++index_Li)
230 unsigned int col_Li = L_col_buffer[index_Li];
233 while (col_Lj < col_Li)
236 col_Lj = L_col_buffer[index_Lj];
239 if (col_Lj == col_Li)
240 s -= L_backup[index_Li] * L_backup[index_Lj];
244 L_elements[i] = s / L_backup[row_Lj_end - 1];
246 L_elements[i] = std::sqrt(s);
257 template<
typename NumericT>
264 unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
265 unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
266 NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.
handle());
268 unsigned int *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
269 unsigned int *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
274 #ifdef VIENNACL_WITH_OPENMP
275 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
277 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
279 unsigned int col_begin = A_row_buffer[
row];
280 unsigned int col_end = A_row_buffer[
row+1];
282 for (
unsigned int j = col_begin; j < col_end; ++j)
284 unsigned int col = A_col_buffer[j];
303 unsigned int *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
304 NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.
handle());
306 unsigned int *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
307 NumericT *U_elements = detail::extract_raw_pointer<NumericT>(U.
handle());
312 #ifdef VIENNACL_WITH_OPENMP
313 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
315 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
317 unsigned int col_begin = A_row_buffer[
row];
318 unsigned int col_end = A_row_buffer[
row+1];
320 unsigned int index_L = L_row_buffer[
row];
321 unsigned int index_U = U_row_buffer[
row];
322 for (
unsigned int j = col_begin; j < col_end; ++j)
324 unsigned int col = A_col_buffer[j];
329 L_col_buffer[index_L] = col;
330 L_elements[index_L] = value;
336 U_col_buffer[index_U] = col;
337 U_elements[index_U] = value;
348 template<
typename NumericT>
355 unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
356 unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
357 NumericT const *A_elements = detail::extract_raw_pointer<NumericT>(A.
handle());
359 NumericT *D_elements = detail::extract_raw_pointer<NumericT>(D.handle());
364 #ifdef VIENNACL_WITH_OPENMP
365 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
367 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
369 unsigned int col_begin = A_row_buffer[
row];
370 unsigned int col_end = A_row_buffer[
row+1];
372 for (
unsigned int j = col_begin; j < col_end; ++j)
374 unsigned int col = A_col_buffer[j];
377 D_elements[
row] =
NumericT(1) / std::sqrt(std::fabs(A_elements[j]));
386 unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
387 unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
388 NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.
handle());
390 #ifdef VIENNACL_WITH_OPENMP
391 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
393 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
395 unsigned int col_begin = L_row_buffer[
row];
396 unsigned int col_end = L_row_buffer[
row+1];
400 for (
unsigned int j = col_begin; j < col_end; ++j)
401 L_elements[j] *= D_row * D_elements[L_col_buffer[j]];
407 unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
408 unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
409 NumericT *U_elements = detail::extract_raw_pointer<NumericT>(U.
handle());
411 #ifdef VIENNACL_WITH_OPENMP
412 #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
414 for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
416 unsigned int col_begin = U_row_buffer[
row];
417 unsigned int col_end = U_row_buffer[
row+1];
421 for (
unsigned int j = col_begin; j < col_end; ++j)
422 U_elements[j] *= D_row * D_elements[U_col_buffer[j]];
430 template<
typename NumericT>
434 NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.
handle());
435 unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle1());
436 unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle2());
441 NumericT * B_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(B.handle());
442 unsigned int * B_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle1());
443 unsigned int * B_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle2());
446 for (std::size_t i = 0; i < B.size1(); ++i)
454 unsigned int row_start = A_row_buffer[
row];
455 unsigned int row_stop = A_row_buffer[
row+1];
457 for (
unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
458 B_row_buffer[A_col_buffer[nnz_index]] += 1;
462 unsigned int offset = B_row_buffer[0];
464 for (std::size_t
row = 1;
row < B.size1(); ++
row)
466 unsigned int tmp = B_row_buffer[
row];
467 B_row_buffer[
row] = offset;
470 B_row_buffer[B.size1()] = offset;
476 std::vector<unsigned int> B_row_offsets(B.size1());
478 for (
unsigned int row = 0; row < static_cast<unsigned int>(A.
size1()); ++
row)
481 unsigned int row_start = A_row_buffer[
row];
482 unsigned int row_stop = A_row_buffer[
row+1];
484 for (
unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
486 unsigned int col_in_A = A_col_buffer[nnz_index];
487 unsigned int B_nnz_index = B_row_buffer[col_in_A] + B_row_offsets[col_in_A];
488 B_col_buffer[B_nnz_index] =
row;
489 B_elements[B_nnz_index] = A_elements[nnz_index];
490 ++B_row_offsets[col_in_A];
502 template<
typename NumericT>
508 unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
509 unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
510 NumericT *L_elements = detail::extract_raw_pointer<NumericT>(L.
handle());
512 NumericT const *aij_L_ptr = detail::extract_raw_pointer<NumericT>(aij_L.
handle());
514 unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.
handle1());
515 unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.
handle2());
516 NumericT *U_elements = detail::extract_raw_pointer<NumericT>(U_trans.
handle());
518 NumericT const *aij_U_trans_ptr = detail::extract_raw_pointer<NumericT>(aij_U_trans.
handle());
525 #ifdef VIENNACL_WITH_OPENMP
526 #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)
528 for (
long i = 0; i < static_cast<long>(L.
nnz()); ++i)
529 L_backup[i] = L_elements[i];
531 #ifdef VIENNACL_WITH_OPENMP
532 #pragma omp parallel for if (U_trans.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE)
534 for (
long i = 0; i < static_cast<long>(U_trans.
nnz()); ++i)
535 U_backup[i] = U_elements[i];
538 #ifdef VIENNACL_WITH_OPENMP
539 #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
541 for (
long row = 0; row < static_cast<long>(L.
size1()); ++
row)
546 unsigned int row_L_start = L_row_buffer[
row];
547 unsigned int row_L_end = L_row_buffer[
row + 1];
549 for (
unsigned int j = row_L_start; j < row_L_end; ++j)
551 unsigned int col = L_col_buffer[j];
556 unsigned int row_U_start = U_row_buffer[col];
557 unsigned int row_U_end = U_row_buffer[col + 1];
560 unsigned int index_U = row_U_start;
561 unsigned int col_U = (index_U < row_U_end) ? U_col_buffer[index_U] : static_cast<unsigned int>(U_trans.
size2());
563 for (
unsigned int k = row_L_start; k < j; ++k)
565 unsigned int col_L = L_col_buffer[k];
568 while (col_U < col_L)
571 col_U = U_col_buffer[index_U];
575 sum += L_backup[k] * U_backup[index_U];
579 assert(U_col_buffer[row_U_end - 1] ==
row &&
bool(
"Accessing U element which is not a diagonal element!"));
580 L_elements[j] = (aij_L_ptr[j] -
sum) / U_backup[row_U_end - 1];
587 unsigned int row_U_start = U_row_buffer[
row];
588 unsigned int row_U_end = U_row_buffer[
row + 1];
589 for (
unsigned int j = row_U_start; j < row_U_end; ++j)
591 unsigned int col = U_col_buffer[j];
593 row_L_start = L_row_buffer[col];
594 row_L_end = L_row_buffer[col + 1];
597 unsigned int index_L = row_L_start;
598 unsigned int col_L = (index_L < row_L_end) ? L_col_buffer[index_L] : static_cast<unsigned int>(L.
size1());
600 for (
unsigned int k = row_U_start; k < j; ++k)
602 unsigned int col_U = U_col_buffer[k];
605 while (col_L < col_U)
608 col_L = L_col_buffer[index_L];
612 sum += L_backup[index_L] * U_backup[k];
616 U_elements[j] = aij_U_trans_ptr[j] -
sum;
625 template<
typename NumericT>
629 unsigned int *R_row_buffer = detail::extract_raw_pointer<unsigned int>(R.
handle1());
630 unsigned int *R_col_buffer = detail::extract_raw_pointer<unsigned int>(R.
handle2());
631 NumericT *R_elements = detail::extract_raw_pointer<NumericT>(R.
handle());
633 NumericT *diag_R_ptr = detail::extract_raw_pointer<NumericT>(diag_R.
handle());
635 #ifdef VIENNACL_WITH_OPENMP
636 #pragma omp parallel for if (R.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE)
638 for (
long row = 0; row < static_cast<long>(R.
size1()); ++
row)
640 unsigned int col_begin = R_row_buffer[
row];
641 unsigned int col_end = R_row_buffer[
row+1];
645 for (
unsigned int j = col_begin; j < col_end; ++j)
647 unsigned int col = R_col_buffer[j];
650 diag = R_elements[j];
657 assert((diag > 0 || diag < 0) &&
bool(
"Zero diagonal detected!"));
660 for (
unsigned int j = col_begin; j < col_end; ++j)
661 R_elements[j] /= -diag;
const vcl_size_t & size2() const
Returns the number of columns.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Generic size and resize functionality for different vector and matrix types.
Implementations of vector operations.
const vcl_size_t & size1() const
Returns the number of rows.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
void icc_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > &aij_L)
Performs one nonlinear relaxation step in the Chow-Patel-ICC using OpenMP (cf. Algorithm 3 in paper...
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void extract_LU(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
void ilu_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
void ilu_form_neumann_matrix(compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
void extract_L(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void icc_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void ilu_transpose(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &B)
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
Implementation of the ViennaCL scalar class.
const handle_type & handle() const
Returns the memory handle.
void ilu_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
Simple enable-if variant that uses the SFINAE pattern.