ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
block_ilu.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
2 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include "viennacl/forwards.h"
28 #include "viennacl/tools/tools.hpp"
32 
33 #include <map>
34 
35 namespace viennacl
36 {
37 namespace linalg
38 {
39 namespace detail
40 {
42  template<typename VectorT, typename NumericT, typename SizeT = vcl_size_t>
44  {
45  public:
46  ilu_vector_range(VectorT & v,
47  SizeT start_index,
48  SizeT vec_size
49  ) : vec_(v), start_(start_index), size_(vec_size) {}
50 
51  NumericT & operator()(SizeT index)
52  {
53  assert(index < size_ && bool("Index out of bounds!"));
54  return vec_[start_ + index];
55  }
56 
57  NumericT & operator[](SizeT index)
58  {
59  assert(index < size_ && bool("Index out of bounds!"));
60  return vec_[start_ + index];
61  }
62 
63  SizeT size() const { return size_; }
64 
65  private:
66  VectorT & vec_;
67  SizeT start_;
68  SizeT size_;
69  };
70 
78  template<typename NumericT>
80  viennacl::compressed_matrix<NumericT> & diagonal_block_A,
81  vcl_size_t start_index,
82  vcl_size_t stop_index
83  )
84  {
85  assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
86  assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
87  assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
88 
89  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
90  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
91  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
92 
93  NumericT * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.handle());
94  unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
95  unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
96 
97  vcl_size_t output_counter = 0;
98  for (vcl_size_t row = start_index; row < stop_index; ++row)
99  {
100  unsigned int buffer_col_start = A_row_buffer[row];
101  unsigned int buffer_col_end = A_row_buffer[row+1];
102 
103  output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter);
104 
105  for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
106  {
107  unsigned int col = A_col_buffer[buf_index];
108  if (col < start_index)
109  continue;
110 
111  if (col >= static_cast<unsigned int>(stop_index))
112  continue;
113 
114  output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index);
115  output_elements[output_counter] = A_elements[buf_index];
116  ++output_counter;
117  }
118  output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter);
119  }
120  }
121 
122 } // namespace detail
123 
124 
125 
131 template<typename MatrixT, typename ILUTag>
133 {
134 typedef typename MatrixT::value_type ScalarType;
135 
136 public:
137  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
138 
139 
140  block_ilu_precond(MatrixT const & mat,
141  ILUTag const & tag,
142  vcl_size_t num_blocks = 8
143  ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks)
144  {
145  // Set up vector of block indices:
146  block_indices_.resize(num_blocks);
147  for (vcl_size_t i=0; i<num_blocks; ++i)
148  {
149  vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
150  vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
151 
152  block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
153  }
154 
155  //initialize preconditioner:
156  //std::cout << "Start CPU precond" << std::endl;
157  init(mat);
158  //std::cout << "End CPU precond" << std::endl;
159  }
160 
161  block_ilu_precond(MatrixT const & mat,
162  ILUTag const & tag,
163  index_vector_type const & block_boundaries
164  ) : tag_(tag), block_indices_(block_boundaries), L_blocks(block_boundaries.size()), U_blocks(block_boundaries.size())
165  {
166  //initialize preconditioner:
167  //std::cout << "Start CPU precond" << std::endl;
168  init(mat);
169  //std::cout << "End CPU precond" << std::endl;
170  }
171 
172 
173  template<typename VectorT>
174  void apply(VectorT & vec) const
175  {
176  for (vcl_size_t i=0; i<block_indices_.size(); ++i)
177  apply_dispatch(vec, i, ILUTag());
178  }
179 
180 private:
181  void init(MatrixT const & A)
182  {
184  viennacl::compressed_matrix<ScalarType> mat(host_context);
185 
186  viennacl::copy(A, mat);
187 
188  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
189 
190 #ifdef VIENNACL_WITH_OPENMP
191  #pragma omp parallel for
192 #endif
193  for (long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2)
194  {
195  vcl_size_t i = static_cast<vcl_size_t>(i2);
196  // Step 1: Extract blocks
197  vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
198  vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
199  viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
200 
201  detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
202 
203  // Step 2: Precondition blocks:
204  viennacl::switch_memory_context(L_blocks[i], host_context);
205  viennacl::switch_memory_context(U_blocks[i], host_context);
206  init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_);
207  }
208 
209  }
210 
211  void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
215  {
216  (void)U;
217  L = mat_block;
219  }
220 
221  void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
225  {
226  L.resize(mat_block.size1(), mat_block.size2());
227  U.resize(mat_block.size1(), mat_block.size2());
228  viennacl::linalg::precondition(mat_block, L, U, tag_);
229  }
230 
231  template<typename VectorT>
232  void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilu0_tag) const
233  {
234  detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2());
235 
236  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
237  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
238  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
239 
240  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
241  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag());
242  }
243 
244  template<typename VectorT>
245  void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilut_tag) const
246  {
247  detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2());
248 
249  {
250  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
251  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
252  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
253 
254  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
255  }
256 
257  {
258  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle1());
259  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle2());
260  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle());
261 
262  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag());
263  }
264  }
265 
266  ILUTag tag_;
267  index_vector_type block_indices_;
268  std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks;
269  std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks;
270 };
271 
272 
273 
274 
275 
280 template<typename NumericT, unsigned int AlignmentV, typename ILUTagT>
281 class block_ilu_precond< compressed_matrix<NumericT, AlignmentV>, ILUTagT>
282 {
284 
285 public:
286  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
287 
288 
289  block_ilu_precond(MatrixType const & mat,
290  ILUTagT const & tag,
291  vcl_size_t num_blocks = 8
292  ) : tag_(tag),
293  block_indices_(num_blocks),
294  gpu_block_indices_(),
295  gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
296  gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
297  gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)),
298  L_blocks_(num_blocks),
299  U_blocks_(num_blocks)
300  {
301  // Set up vector of block indices:
302  block_indices_.resize(num_blocks);
303  for (vcl_size_t i=0; i<num_blocks; ++i)
304  {
305  vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
306  vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
307 
308  block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
309  }
310 
311  //initialize preconditioner:
312  //std::cout << "Start CPU precond" << std::endl;
313  init(mat);
314  //std::cout << "End CPU precond" << std::endl;
315  }
316 
317  block_ilu_precond(MatrixType const & mat,
318  ILUTagT const & tag,
319  index_vector_type const & block_boundaries
320  ) : tag_(tag),
321  block_indices_(block_boundaries),
322  gpu_block_indices_(),
323  gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
324  gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
325  gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)),
326  L_blocks_(block_boundaries.size()),
327  U_blocks_(block_boundaries.size())
328  {
329  //initialize preconditioner:
330  //std::cout << "Start CPU precond" << std::endl;
331  init(mat);
332  //std::cout << "End CPU precond" << std::endl;
333  }
334 
335 
336  void apply(vector<NumericT> & vec) const
337  {
338  viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_,
339  vec,
341 
342  viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_,
343  vec,
345 
346  //apply_cpu(vec);
347  }
348 
349 
350 private:
351 
352  void init(MatrixType const & A)
353  {
355  viennacl::compressed_matrix<NumericT> mat(host_context);
356 
357  mat = A;
358 
359  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
360 
361 #ifdef VIENNACL_WITH_OPENMP
362  #pragma omp parallel for
363 #endif
364  for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
365  {
366  // Step 1: Extract blocks
367  vcl_size_t block_size = block_indices_[static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first;
368  vcl_size_t block_nnz = row_buffer[block_indices_[static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first];
369  viennacl::compressed_matrix<NumericT> mat_block(block_size, block_size, block_nnz, host_context);
370 
371  detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second);
372 
373  // Step 2: Precondition blocks:
374  viennacl::switch_memory_context(L_blocks_[static_cast<vcl_size_t>(i)], host_context);
375  viennacl::switch_memory_context(U_blocks_[static_cast<vcl_size_t>(i)], host_context);
376  init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], U_blocks_[static_cast<vcl_size_t>(i)], tag_);
377  }
378 
379  /*
380  * copy resulting preconditioner back to GPU:
381  */
382  viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices_, 2 * block_indices_.size());
383  for (vcl_size_t i=0; i<block_indices_.size(); ++i)
384  {
385  block_indices_uint.set(2*i, block_indices_[i].first);
386  block_indices_uint.set(2*i + 1, block_indices_[i].second);
387  }
388 
389  viennacl::backend::memory_create(gpu_block_indices_, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get());
390 
391  blocks_to_device(A);
392 
393  }
394 
395  // Copy computed preconditioned blocks to OpenCL device
396  void blocks_to_device(MatrixType const & A)
397  {
398  gpu_L_trans_.resize(A.size1(), A.size2());
399  gpu_U_trans_.resize(A.size1(), A.size2());
400  gpu_D_.resize(A.size1());
401 
402  unsigned int * L_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle1());
403  unsigned int * U_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle1());
404 
405  //
406  // Count elements per row
407  //
408 #ifdef VIENNACL_WITH_OPENMP
409  #pragma omp parallel for
410 #endif
411  for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
412  {
413  vcl_size_t block_index = vcl_size_t(block_index2);
414 
415  unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first);
416  unsigned int block_stop = static_cast<unsigned int>(block_indices_[block_index].second);
417 
418  unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
419  unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
420 
421  // zero row array of L:
422  std::fill(L_trans_row_buffer + block_start,
423  L_trans_row_buffer + block_stop,
424  static_cast<unsigned int>(0));
425 
426  // count number of elements per row:
427  for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row)
428  {
429  unsigned int col_start = L_row_buffer[row];
430  unsigned int col_end = L_row_buffer[row+1];
431 
432  for (unsigned int j = col_start; j < col_end; ++j)
433  {
434  unsigned int col = L_col_buffer[j];
435  if (col < static_cast<unsigned int>(row))
436  L_trans_row_buffer[col + block_start] += 1;
437  }
438  }
439 
441 
442  unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
443  unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
444 
445  // zero row array of U:
446  std::fill(U_trans_row_buffer + block_start,
447  U_trans_row_buffer + block_stop,
448  static_cast<unsigned int>(0));
449 
450  // count number of elements per row:
451  for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row)
452  {
453  unsigned int col_start = U_row_buffer[row];
454  unsigned int col_end = U_row_buffer[row+1];
455 
456  for (unsigned int j = col_start; j < col_end; ++j)
457  {
458  unsigned int col = U_col_buffer[j];
459  if (col > row)
460  U_trans_row_buffer[col + block_start] += 1;
461  }
462  }
463  }
464 
465 
466  //
467  // Exclusive scan on row buffer (feel free to add parallelization here)
468  //
469  unsigned int current_value = 0;
470  for (vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i)
471  {
472  unsigned int tmp = L_trans_row_buffer[i];
473  L_trans_row_buffer[i] = current_value;
474  current_value += tmp;
475  }
476  gpu_L_trans_.reserve(current_value);
477 
478  current_value = 0;
479  for (vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i)
480  {
481  unsigned int tmp = U_trans_row_buffer[i];
482  U_trans_row_buffer[i] = current_value;
483  current_value += tmp;
484  }
485  gpu_U_trans_.reserve(current_value);
486 
487 
488  //
489  // Fill with data
490  //
491  unsigned int * L_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle2());
492  NumericT * L_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle());
493 
494  unsigned int * U_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle2());
495  NumericT * U_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle());
496 
497  NumericT * D_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle());
498 
499  std::vector<unsigned int> offset_L(gpu_L_trans_.size1());
500  std::vector<unsigned int> offset_U(gpu_U_trans_.size1());
501 
502 #ifdef VIENNACL_WITH_OPENMP
503  #pragma omp parallel for
504 #endif
505  for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
506  {
507  vcl_size_t block_index = vcl_size_t(block_index2);
508  unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first);
509 
510  unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
511  unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
512  NumericT const * L_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(L_blocks_[block_index].handle());
513 
514 
515  // write L_trans:
516  for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row)
517  {
518  unsigned int col_start = L_row_buffer[row];
519  unsigned int col_end = L_row_buffer[row+1];
520 
521  for (unsigned int j = col_start; j < col_end; ++j)
522  {
523  unsigned int col = L_col_buffer[j];
524  if (col < row)
525  {
526  unsigned int row_trans = col + block_start;
527  unsigned int k = L_trans_row_buffer[row_trans] + offset_L[row_trans];
528  offset_L[row_trans] += 1;
529 
530  L_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start;
531  L_trans_elements[k] = L_elements[j];
532  }
533  }
534  }
535 
536  unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
537  unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
538  NumericT const * U_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(U_blocks_[block_index].handle());
539 
540  // write U_trans and D:
541  for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row)
542  {
543  unsigned int col_start = U_row_buffer[row];
544  unsigned int col_end = U_row_buffer[row+1];
545 
546  for (unsigned int j = col_start; j < col_end; ++j)
547  {
548  unsigned int row_trans = U_col_buffer[j] + block_start;
549  unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans];
550 
551  if (row_trans == row + block_start) // entry for D
552  {
553  D_elements[row_trans] = U_elements[j];
554  }
555  else if (row_trans > row + block_start) //entry for U
556  {
557  offset_U[row_trans] += 1;
558 
559  U_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start;
560  U_trans_elements[k] = U_elements[j];
561  }
562  }
563  }
564 
565  }
566 
567  //
568  // Send to destination device:
569  //
573  }
574 
575  void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
579  {
580  L = mat_block;
582  U = L; // fairly poor workaround...
583  }
584 
585  void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
589  {
590  L.resize(mat_block.size1(), mat_block.size2());
591  U.resize(mat_block.size1(), mat_block.size2());
592  viennacl::linalg::precondition(mat_block, L, U, tag_);
593  }
594 
595 
596  ILUTagT tag_;
597  index_vector_type block_indices_;
598  viennacl::backend::mem_handle gpu_block_indices_;
602 
603  std::vector<MatrixType> L_blocks_;
604  std::vector<MatrixType> U_blocks_;
605 };
606 
607 
608 }
609 }
610 
611 
612 
613 
614 #endif
615 
616 
617 
const vcl_size_t & size2() const
Returns the number of columns.
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
Definition: fill.hpp:46
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
const vcl_size_t & size1() const
Returns the number of rows.
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
A tag for incomplete LU factorization with static pattern (ILU0)
Definition: ilu0.hpp:58
void precondition(viennacl::compressed_matrix< NumericT > &A, ilu0_tag const &)
Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices...
Definition: ilu0.hpp:78
This file provides the forward declarations for the main types used within ViennaCL.
Implementations of incomplete factorization preconditioners with static nonzero pattern.
A block ILU preconditioner class, can be supplied to solve()-routines.
Definition: block_ilu.hpp:132
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
float NumericT
Definition: bisect.cpp:40
void extract_block_matrix(viennacl::compressed_matrix< NumericT > const &A, viennacl::compressed_matrix< NumericT > &diagonal_block_A, vcl_size_t start_index, vcl_size_t stop_index)
Extracts a diagonal block from a larger system matrix.
Definition: block_ilu.hpp:79
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
Definition: block_ilu.hpp:137
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
Definition: blas3.hpp:36
A tag class representing an upper triangular matrix.
Definition: forwards.h:854
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, vcl_size_t num_blocks=8)
Definition: block_ilu.hpp:140
ilu_vector_range(VectorT &v, SizeT start_index, SizeT vec_size)
Definition: block_ilu.hpp:46
std::size_t vcl_size_t
Definition: forwards.h:75
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void apply(VectorT &vec) const
Definition: block_ilu.hpp:174
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, index_vector_type const &block_boundaries)
Definition: block_ilu.hpp:317
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
Implementations of an incomplete factorization preconditioner with threshold (ILUT) ...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, vcl_size_t num_blocks=8)
Definition: block_ilu.hpp:289
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
Definition: util.hpp:115
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:859
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
Helper range class for representing a subvector of a larger buffer.
Definition: block_ilu.hpp:43
Common routines used within ILU-type preconditioners.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, index_vector_type const &block_boundaries)
Definition: block_ilu.hpp:161
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:118
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixType >::value >::type block_inplace_solve(const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::backend::mem_handle const &block_index_array, vcl_size_t num_blocks, viennacl::vector_base< ScalarType > const &mat_diagonal, viennacl::vector_base< ScalarType > &vec, SOLVERTAG tag)
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.
Definition: memory.hpp:622