1 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
2 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
42 template<
typename VectorT,
typename NumericT,
typename SizeT = vcl_
size_t>
49 ) : vec_(v), start_(start_index), size_(vec_size) {}
53 assert(index < size_ &&
bool(
"Index out of bounds!"));
54 return vec_[start_ + index];
59 assert(index < size_ &&
bool(
"Index out of bounds!"));
60 return vec_[start_ + index];
63 SizeT
size()
const {
return size_; }
78 template<
typename NumericT>
89 NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.
handle());
90 unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle1());
91 unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle2());
93 NumericT * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.
handle());
94 unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.
handle1());
95 unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.
handle2());
100 unsigned int buffer_col_start = A_row_buffer[
row];
101 unsigned int buffer_col_end = A_row_buffer[
row+1];
103 output_row_buffer[
row - start_index] =
static_cast<unsigned int>(output_counter);
105 for (
unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
107 unsigned int col = A_col_buffer[buf_index];
108 if (col < start_index)
111 if (col >= static_cast<unsigned int>(stop_index))
114 output_col_buffer[output_counter] =
static_cast<unsigned int>(col - start_index);
115 output_elements[output_counter] = A_elements[buf_index];
118 output_row_buffer[
row - start_index + 1] =
static_cast<unsigned int>(output_counter);
131 template<
typename MatrixT,
typename ILUTag>
134 typedef typename MatrixT::value_type ScalarType;
143 ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks)
146 block_indices_.resize(num_blocks);
149 vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
150 vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
152 block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
163 index_vector_type
const & block_boundaries
164 ) : tag_(tag), block_indices_(block_boundaries), L_blocks(block_boundaries.
size()), U_blocks(block_boundaries.
size())
173 template<
typename VectorT>
176 for (
vcl_size_t i=0; i<block_indices_.size(); ++i)
177 apply_dispatch(vec, i, ILUTag());
181 void init(MatrixT
const & A)
188 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
190 #ifdef VIENNACL_WITH_OPENMP
191 #pragma omp parallel for
193 for (
long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2)
197 vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
198 vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
206 init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_);
231 template<
typename VectorT>
234 detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].
size2());
236 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
237 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
238 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
240 viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
241 viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag());
244 template<
typename VectorT>
247 detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].
size2());
250 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
251 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
252 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
254 viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
258 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle1());
259 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle2());
260 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle());
262 viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag());
267 index_vector_type block_indices_;
268 std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks;
269 std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks;
280 template<
typename NumericT,
unsigned int AlignmentV,
typename ILUTagT>
293 block_indices_(num_blocks),
294 gpu_block_indices_(),
298 L_blocks_(num_blocks),
299 U_blocks_(num_blocks)
302 block_indices_.resize(num_blocks);
308 block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
319 index_vector_type
const & block_boundaries
321 block_indices_(block_boundaries),
322 gpu_block_indices_(),
326 L_blocks_(block_boundaries.
size()),
327 U_blocks_(block_boundaries.
size())
352 void init(MatrixType
const & A)
359 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
361 #ifdef VIENNACL_WITH_OPENMP
362 #pragma omp parallel for
364 for (
long i=0; i<static_cast<long>(block_indices_.size()); ++i)
367 vcl_size_t block_size = block_indices_[
static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first;
368 vcl_size_t block_nnz = row_buffer[block_indices_[
static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first];
371 detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second);
376 init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], U_blocks_[static_cast<vcl_size_t>(i)], tag_);
383 for (
vcl_size_t i=0; i<block_indices_.size(); ++i)
385 block_indices_uint.
set(2*i, block_indices_[i].first);
386 block_indices_uint.set(2*i + 1, block_indices_[i].second);
396 void blocks_to_device(MatrixType
const & A)
398 gpu_L_trans_.resize(A.size1(), A.size2());
399 gpu_U_trans_.resize(A.size1(), A.size2());
400 gpu_D_.resize(A.size1());
402 unsigned int * L_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle1());
403 unsigned int * U_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle1());
408 #ifdef VIENNACL_WITH_OPENMP
409 #pragma omp parallel for
411 for (
long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
415 unsigned int block_start =
static_cast<unsigned int>(block_indices_[block_index].first);
416 unsigned int block_stop =
static_cast<unsigned int>(block_indices_[block_index].second);
418 unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
419 unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
422 std::fill(L_trans_row_buffer + block_start,
423 L_trans_row_buffer + block_stop,
424 static_cast<unsigned int>(0));
429 unsigned int col_start = L_row_buffer[
row];
430 unsigned int col_end = L_row_buffer[
row+1];
432 for (
unsigned int j = col_start; j < col_end; ++j)
434 unsigned int col = L_col_buffer[j];
435 if (col < static_cast<unsigned int>(
row))
436 L_trans_row_buffer[col + block_start] += 1;
442 unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
443 unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
446 std::fill(U_trans_row_buffer + block_start,
447 U_trans_row_buffer + block_stop,
448 static_cast<unsigned int>(0));
453 unsigned int col_start = U_row_buffer[
row];
454 unsigned int col_end = U_row_buffer[
row+1];
456 for (
unsigned int j = col_start; j < col_end; ++j)
458 unsigned int col = U_col_buffer[j];
460 U_trans_row_buffer[col + block_start] += 1;
469 unsigned int current_value = 0;
470 for (
vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i)
472 unsigned int tmp = L_trans_row_buffer[i];
473 L_trans_row_buffer[i] = current_value;
474 current_value += tmp;
476 gpu_L_trans_.reserve(current_value);
479 for (
vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i)
481 unsigned int tmp = U_trans_row_buffer[i];
482 U_trans_row_buffer[i] = current_value;
483 current_value += tmp;
485 gpu_U_trans_.reserve(current_value);
491 unsigned int * L_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle2());
492 NumericT * L_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle());
494 unsigned int * U_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle2());
495 NumericT * U_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle());
497 NumericT * D_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle());
499 std::vector<unsigned int> offset_L(gpu_L_trans_.size1());
500 std::vector<unsigned int> offset_U(gpu_U_trans_.size1());
502 #ifdef VIENNACL_WITH_OPENMP
503 #pragma omp parallel for
505 for (
long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
508 unsigned int block_start =
static_cast<unsigned int>(block_indices_[block_index].first);
510 unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
511 unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
512 NumericT const * L_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(L_blocks_[block_index].handle());
518 unsigned int col_start = L_row_buffer[
row];
519 unsigned int col_end = L_row_buffer[
row+1];
521 for (
unsigned int j = col_start; j < col_end; ++j)
523 unsigned int col = L_col_buffer[j];
526 unsigned int row_trans = col + block_start;
527 unsigned int k = L_trans_row_buffer[row_trans] + offset_L[row_trans];
528 offset_L[row_trans] += 1;
530 L_trans_col_buffer[k] =
static_cast<unsigned int>(
row) + block_start;
531 L_trans_elements[k] = L_elements[j];
536 unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
537 unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
538 NumericT const * U_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(U_blocks_[block_index].handle());
543 unsigned int col_start = U_row_buffer[
row];
544 unsigned int col_end = U_row_buffer[
row+1];
546 for (
unsigned int j = col_start; j < col_end; ++j)
548 unsigned int row_trans = U_col_buffer[j] + block_start;
549 unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans];
551 if (row_trans ==
row + block_start)
553 D_elements[row_trans] = U_elements[j];
555 else if (row_trans >
row + block_start)
557 offset_U[row_trans] += 1;
559 U_trans_col_buffer[k] =
static_cast<unsigned int>(
row) + block_start;
560 U_trans_elements[k] = U_elements[j];
597 index_vector_type block_indices_;
603 std::vector<MatrixType> L_blocks_;
604 std::vector<MatrixType> U_blocks_;
const vcl_size_t & size2() const
Returns the number of columns.
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
Helper class implementing an array on the host. Default case: No conversion necessary.
const vcl_size_t & size1() const
Returns the number of rows.
NumericT & operator[](SizeT index)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
A tag for incomplete LU factorization with static pattern (ILU0)
void precondition(viennacl::compressed_matrix< NumericT > &A, ilu0_tag const &)
Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices...
This file provides the forward declarations for the main types used within ViennaCL.
Implementations of incomplete factorization preconditioners with static nonzero pattern.
A block ILU preconditioner class, can be supplied to solve()-routines.
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
void extract_block_matrix(viennacl::compressed_matrix< NumericT > const &A, viennacl::compressed_matrix< NumericT > &diagonal_block_A, vcl_size_t start_index, vcl_size_t stop_index)
Extracts a diagonal block from a larger system matrix.
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A tag class representing an upper triangular matrix.
A tag for incomplete LU factorization with threshold (ILUT)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
NumericT & operator()(SizeT index)
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, vcl_size_t num_blocks=8)
ilu_vector_range(VectorT &v, SizeT start_index, SizeT vec_size)
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void apply(VectorT &vec) const
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, index_vector_type const &block_boundaries)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Implementations of an incomplete factorization preconditioner with threshold (ILUT) ...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, vcl_size_t num_blocks=8)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
A tag class representing a lower triangular matrix with unit diagonal.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Helper range class for representing a subvector of a larger buffer.
Common routines used within ILU-type preconditioners.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
void apply(vector< NumericT > &vec) const
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, index_vector_type const &block_boundaries)
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixType >::value >::type block_inplace_solve(const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::backend::mem_handle const &block_index_array, vcl_size_t num_blocks, viennacl::vector_base< ScalarType > const &mat_diagonal, viennacl::vector_base< ScalarType > &vec, SOLVERTAG tag)
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.