00001 #ifndef VIENNACL_LINALG_AMG_HPP_
00002 #define VIENNACL_LINALG_AMG_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00026 #include <boost/numeric/ublas/matrix.hpp>
00027 #include <boost/numeric/ublas/lu.hpp>
00028 #include <boost/numeric/ublas/operation.hpp>
00029 #include <boost/numeric/ublas/vector_proxy.hpp>
00030 #include <boost/numeric/ublas/matrix_proxy.hpp>
00031 #include <boost/numeric/ublas/vector.hpp>
00032 #include <boost/numeric/ublas/triangular.hpp>
00033 #include <vector>
00034 #include <cmath>
00035 #include "viennacl/forwards.h"
00036 #include "viennacl/tools/tools.hpp"
00037 #include "viennacl/linalg/prod.hpp"
00038 #include "viennacl/linalg/direct_solve.hpp"
00039
00040 #include "viennacl/linalg/detail/amg/amg_base.hpp"
00041 #include "viennacl/linalg/detail/amg/amg_coarse.hpp"
00042 #include "viennacl/linalg/detail/amg/amg_interpol.hpp"
00043
00044 #include <map>
00045
00046 #ifdef _OPENMP
00047 #include <omp.h>
00048 #endif
00049
00050 #include "viennacl/linalg/detail/amg/amg_debug.hpp"
00051
00052 #define VIENNACL_AMG_COARSE_LIMIT 50
00053 #define VIENNACL_AMG_MAX_LEVELS 100
00054
00055 namespace viennacl
00056 {
00057 namespace linalg
00058 {
00059 typedef detail::amg::amg_tag amg_tag;
00060
00061
00062
00070 template <typename InternalType1, typename InternalType2>
00071 void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00072 {
00073 typedef typename InternalType1::value_type SparseMatrixType;
00074 typedef typename InternalType2::value_type PointVectorType;
00075 typedef typename SparseMatrixType::value_type ScalarType;
00076 typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00077 typedef typename SparseMatrixType::iterator2 InternalColIterator;
00078
00079 unsigned int i, iterations, c_points, f_points;
00080 detail::amg::amg_slicing<InternalType1,InternalType2> Slicing;
00081
00082
00083 iterations = tag.get_coarselevels();
00084 if (iterations == 0)
00085 iterations = VIENNACL_AMG_MAX_LEVELS;
00086
00087
00088 if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3)
00089 Slicing.init(iterations);
00090
00091 for (i=0; i<iterations; ++i)
00092 {
00093
00094 Pointvector[i] = PointVectorType(A[i].size1());
00095 Pointvector[i].init_points();
00096
00097
00098 detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag);
00099
00100
00101 c_points = Pointvector[i].get_cpoints();
00102 f_points = Pointvector[i].get_fpoints();
00103
00104 #if defined (DEBUG) //or defined(DEBUGBENCH)
00105 std::cout << "Level " << i << ": ";
00106 std::cout << "No of C points = " << c_points << ", ";
00107 std::cout << "No of F points = " << f_points << std::endl;
00108 #endif
00109
00110
00111 if (c_points == 0 || f_points == 0)
00112 break;
00113
00114
00115 detail::amg::amg_interpol (i, A, P, Pointvector, tag);
00116
00117
00118 detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]);
00119
00120
00121
00122
00123 Pointvector[i].delete_points();
00124
00125 #ifdef DEBUG
00126 std::cout << "Coarse Grid Operator Matrix:" << std::endl;
00127 printmatrix (A[i+1]);
00128 #endif
00129
00130
00131 if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT)
00132 {
00133 tag.set_coarselevels(i+1);
00134 return;
00135 }
00136 }
00137 tag.set_coarselevels(i);
00138 }
00139
00148 template <typename MatrixType, typename InternalType1, typename InternalType2>
00149 void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00150 {
00151 typedef typename MatrixType::value_type ScalarType;
00152 typedef typename InternalType1::value_type SparseMatrixType;
00153
00154 if (tag.get_coarselevels() > 0)
00155 {
00156 A.resize(tag.get_coarselevels()+1);
00157 P.resize(tag.get_coarselevels());
00158 Pointvector.resize(tag.get_coarselevels());
00159 }
00160 else
00161 {
00162 A.resize(VIENNACL_AMG_MAX_LEVELS+1);
00163 P.resize(VIENNACL_AMG_MAX_LEVELS);
00164 Pointvector.resize(VIENNACL_AMG_MAX_LEVELS);
00165 }
00166
00167
00168 SparseMatrixType A0 (mat);
00169 A.insert_element (0, A0);
00170 }
00171
00181 template <typename InternalType1, typename InternalType2>
00182 void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00183 {
00184 typedef typename InternalType1::value_type MatrixType;
00185
00186
00187 A.resize(tag.get_coarselevels()+1);
00188 P.resize(tag.get_coarselevels());
00189 R.resize(tag.get_coarselevels());
00190
00191
00192 for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00193 {
00194 A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00195 A[i] = A_setup[i];
00196 }
00197 for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00198 {
00199 P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00200 P[i] = P_setup[i];
00201 }
00202 for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00203 {
00204 R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00205 P_setup[i].set_trans(true);
00206 R[i] = P_setup[i];
00207 P_setup[i].set_trans(false);
00208 }
00209 }
00210
00220 template <typename InternalType1, typename InternalType2>
00221 void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00222 {
00223 typedef typename InternalType1::value_type MatrixType;
00224 typedef typename InternalType2::value_type::value_type ScalarType;
00225
00226
00227 A.resize(tag.get_coarselevels()+1);
00228 P.resize(tag.get_coarselevels());
00229 R.resize(tag.get_coarselevels());
00230
00231
00232 for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00233 {
00234 A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00235 viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]);
00236 }
00237 for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00238 {
00239 P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00240 viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]);
00241
00242 }
00243 for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00244 {
00245 R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00246 P_setup[i].set_trans(true);
00247 viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]);
00248 P_setup[i].set_trans(false);
00249 }
00250 }
00251
00260 template <typename InternalVectorType, typename SparseMatrixType>
00261 void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag)
00262 {
00263 typedef typename InternalVectorType::value_type VectorType;
00264
00265 result.resize(tag.get_coarselevels()+1);
00266 rhs.resize(tag.get_coarselevels()+1);
00267 residual.resize(tag.get_coarselevels());
00268
00269 for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
00270 {
00271 result[level] = VectorType(A[level].size1());
00272 result[level].clear();
00273 rhs[level] = VectorType(A[level].size1());
00274 rhs[level].clear();
00275 }
00276 for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
00277 {
00278 residual[level] = VectorType(A[level].size1());
00279 residual[level].clear();
00280 }
00281 }
00289 template <typename ScalarType, typename SparseMatrixType>
00290 void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<ScalarType> & Permutation, SparseMatrixType const & A)
00291 {
00292 typedef typename SparseMatrixType::const_iterator1 ConstRowIterator;
00293 typedef typename SparseMatrixType::const_iterator2 ConstColIterator;
00294
00295
00296 op.resize(A.size1(),A.size2(),false);
00297 for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter)
00298 for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00299 op (col_iter.index1(), col_iter.index2()) = *col_iter;
00300
00301
00302 Permutation = boost::numeric::ublas::permutation_matrix<ScalarType> (op.size1());
00303 boost::numeric::ublas::lu_factorize(op,Permutation);
00304 }
00305
00308 template <typename MatrixType>
00309 class amg_precond
00310 {
00311 typedef typename MatrixType::value_type ScalarType;
00312 typedef boost::numeric::ublas::vector<ScalarType> VectorType;
00313 typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00314 typedef detail::amg::amg_pointvector PointVectorType;
00315
00316 typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00317 typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00318 typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00319 typedef typename SparseMatrixType::iterator2 InternalColIterator;
00320
00321 boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00322 boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00323 boost::numeric::ublas::vector <MatrixType> A;
00324 boost::numeric::ublas::vector <MatrixType> P;
00325 boost::numeric::ublas::vector <MatrixType> R;
00326 boost::numeric::ublas::vector <PointVectorType> Pointvector;
00327
00328 mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00329 mutable boost::numeric::ublas::permutation_matrix<ScalarType> Permutation;
00330
00331 mutable boost::numeric::ublas::vector <VectorType> result;
00332 mutable boost::numeric::ublas::vector <VectorType> rhs;
00333 mutable boost::numeric::ublas::vector <VectorType> residual;
00334
00335 mutable bool done_init_apply;
00336
00337 amg_tag _tag;
00338 public:
00339
00340 amg_precond(): Permutation(0) {}
00346 amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0)
00347 {
00348 _tag = tag;
00349
00350 amg_init (mat,A_setup,P_setup,Pointvector,_tag);
00351
00352 done_init_apply = false;
00353 }
00354
00357 void setup()
00358 {
00359
00360 amg_setup(A_setup,P_setup,Pointvector,_tag);
00361
00362 amg_transform_cpu(A,P,R,A_setup,P_setup,_tag);
00363
00364 done_init_apply = false;
00365 }
00366
00371 void init_apply() const
00372 {
00373
00374 amg_setup_apply(result,rhs,residual,A_setup,_tag);
00375
00376 amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]);
00377
00378 done_init_apply = true;
00379 }
00380
00386 template <typename VectorType>
00387 ScalarType calc_complexity(VectorType & avgstencil)
00388 {
00389 avgstencil = VectorType (_tag.get_coarselevels()+1);
00390 unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00391
00392 for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level)
00393 {
00394 level_coefficients = 0;
00395 for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00396 {
00397 for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00398 {
00399 if (level == 0)
00400 systemmat_nonzero++;
00401 nonzero++;
00402 level_coefficients++;
00403 }
00404 }
00405 avgstencil[level] = level_coefficients/(double)A_setup[level].size1();
00406 }
00407 return nonzero/static_cast<double>(systemmat_nonzero);
00408 }
00409
00414 template <typename VectorType>
00415 void apply(VectorType & vec) const
00416 {
00417
00418 if (!done_init_apply)
00419 init_apply();
00420
00421 int level;
00422
00423
00424 rhs[0] = vec;
00425 for (level=0; level <(signed)_tag.get_coarselevels(); level++)
00426 {
00427 result[level].clear();
00428
00429
00430 smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]);
00431
00432 #ifdef DEBUG
00433 std::cout << "After presmooth:" << std::endl;
00434 printvector(result[level]);
00435 #endif
00436
00437
00438 residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]);
00439
00440 #ifdef DEBUG
00441 std::cout << "Residual:" << std::endl;
00442 printvector(residual[level]);
00443 #endif
00444
00445
00446 rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]);
00447
00448 #ifdef DEBUG
00449 std::cout << "Restricted Residual: " << std::endl;
00450 printvector(rhs[level+1]);
00451 #endif
00452 }
00453
00454
00455 result[level] = rhs[level];
00456 boost::numeric::ublas::lu_substitute(op,Permutation,result[level]);
00457
00458 #ifdef DEBUG
00459 std::cout << "After direct solve: " << std::endl;
00460 printvector (result[level]);
00461 #endif
00462
00463 for (level=_tag.get_coarselevels()-1; level >= 0; level--)
00464 {
00465 #ifdef DEBUG
00466 std::cout << "Coarse Error: " << std::endl;
00467 printvector(result[level+1]);
00468 #endif
00469
00470
00471 result[level] += boost::numeric::ublas::prod (P[level], result[level+1]);
00472
00473 #ifdef DEBUG
00474 std::cout << "Corrected Result: " << std::endl;
00475 printvector (result[level]);
00476 #endif
00477
00478
00479 smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]);
00480
00481 #ifdef DEBUG
00482 std::cout << "After postsmooth: " << std::endl;
00483 printvector (result[level]);
00484 #endif
00485 }
00486 vec = result[0];
00487 }
00488
00495 template <typename VectorType>
00496 void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const
00497 {
00498 VectorType old_result (x.size());
00499 unsigned int index;
00500 ScalarType sum = 0, diag = 1;
00501
00502 for (int i=0; i<iterations; ++i)
00503 {
00504 old_result = x;
00505 x.clear();
00506 #ifdef _OPENMP
00507 #pragma omp parallel for private (sum,diag) shared (rhs,x)
00508 #endif
00509 for (index=0; index<A_setup[level].size1(); ++index)
00510 {
00511 InternalConstRowIterator row_iter = A_setup[level].begin1();
00512 row_iter += index;
00513 sum = 0;
00514 diag = 1;
00515 for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00516 {
00517 if (col_iter.index1() == col_iter.index2())
00518 diag = *col_iter;
00519 else
00520 sum += *col_iter * old_result[col_iter.index2()];
00521 }
00522 x[index]= _tag.get_jacobiweight() * (rhs[index] - sum) / diag + (1-_tag.get_jacobiweight()) * old_result[index];
00523 }
00524 }
00525 }
00526
00527 amg_tag & tag() { return _tag; }
00528 };
00529
00534 template <typename ScalarType, unsigned int MAT_ALIGNMENT>
00535 class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> >
00536 {
00537 typedef viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType;
00538 typedef viennacl::vector<ScalarType> VectorType;
00539 typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00540 typedef detail::amg::amg_pointvector PointVectorType;
00541
00542 typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00543 typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00544 typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00545 typedef typename SparseMatrixType::iterator2 InternalColIterator;
00546
00547 boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00548 boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00549 boost::numeric::ublas::vector <MatrixType> A;
00550 boost::numeric::ublas::vector <MatrixType> P;
00551 boost::numeric::ublas::vector <MatrixType> R;
00552 boost::numeric::ublas::vector <PointVectorType> Pointvector;
00553
00554 mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00555 mutable boost::numeric::ublas::permutation_matrix<ScalarType> Permutation;
00556
00557 mutable boost::numeric::ublas::vector <VectorType> result;
00558 mutable boost::numeric::ublas::vector <VectorType> rhs;
00559 mutable boost::numeric::ublas::vector <VectorType> residual;
00560
00561 mutable bool done_init_apply;
00562
00563 amg_tag _tag;
00564
00565 public:
00566
00567 amg_precond(): Permutation(0) {}
00568
00574 amg_precond(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, amg_tag const & tag): Permutation(0)
00575 {
00576 _tag = tag;
00577
00578
00579 std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1());
00580 viennacl::copy(mat, mat2);
00581
00582
00583 amg_init (mat2,A_setup,P_setup,Pointvector,_tag);
00584
00585 done_init_apply = false;
00586 }
00587
00590 void setup()
00591 {
00592
00593 amg_setup(A_setup,P_setup,Pointvector,_tag);
00594
00595 amg_transform_gpu(A,P,R,A_setup,P_setup,_tag);
00596
00597 done_init_apply = false;
00598 }
00599
00604 void init_apply() const
00605 {
00606
00607 amg_setup_apply(result,rhs,residual,A_setup,_tag);
00608
00609 amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]);
00610
00611 done_init_apply = true;
00612 }
00613
00619 template <typename VectorType>
00620 ScalarType calc_complexity(VectorType & avgstencil)
00621 {
00622 avgstencil = VectorType (_tag.get_coarselevels()+1);
00623 unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00624
00625 for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level)
00626 {
00627 level_coefficients = 0;
00628 for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00629 {
00630 for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00631 {
00632 if (level == 0)
00633 systemmat_nonzero++;
00634 nonzero++;
00635 level_coefficients++;
00636 }
00637 }
00638 avgstencil[level] = level_coefficients/(double)A[level].size1();
00639 }
00640 return nonzero/static_cast<double>(systemmat_nonzero);
00641 }
00642
00647 template <typename VectorType>
00648 void apply(VectorType & vec) const
00649 {
00650 if (!done_init_apply)
00651 init_apply();
00652
00653 int level;
00654
00655
00656 rhs[0] = vec;
00657 for (level=0; level <(signed)_tag.get_coarselevels(); level++)
00658 {
00659 result[level].clear();
00660
00661
00662 smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]);
00663
00664 #ifdef DEBUG
00665 std::cout << "After presmooth: " << std::endl;
00666 printvector(result[level]);
00667 #endif
00668
00669
00670 residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]);
00671
00672 #ifdef DEBUG
00673 std::cout << "Residual: " << std::endl;
00674 printvector(residual[level]);
00675 #endif
00676
00677
00678
00679 rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]);
00680
00681 #ifdef DEBUG
00682 std::cout << "Restricted Residual: " << std::endl;
00683 printvector(rhs[level+1]);
00684 #endif
00685 }
00686
00687
00688
00689 result[level] = rhs[level];
00690 boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size());
00691
00692 copy (result[level],result_cpu);
00693 boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu);
00694 copy (result_cpu, result[level]);
00695
00696 #ifdef DEBUG
00697 std::cout << "After direct solve: " << std::endl;
00698 printvector (result[level]);
00699 #endif
00700
00701 for (level=_tag.get_coarselevels()-1; level >= 0; level--)
00702 {
00703 #ifdef DEBUG
00704 std::cout << "Coarse Error: " << std::endl;
00705 printvector(result[level+1]);
00706 #endif
00707
00708
00709 result[level] += viennacl::linalg::prod(P[level],result[level+1]);
00710
00711 #ifdef DEBUG
00712 std::cout << "Corrected Result: " << std::endl;
00713 printvector (result[level]);
00714 #endif
00715
00716
00717 smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]);
00718
00719 #ifdef DEBUG
00720 std::cout << "After postsmooth: " << std::endl;
00721 printvector (result[level]);
00722 #endif
00723 }
00724 vec = result[0];
00725 }
00726
00733 template <typename VectorType>
00734 void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const
00735 {
00736 VectorType old_result (x.size());
00737
00738
00739
00740
00741
00742 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::program_name(),
00743 "jacobi");
00744
00745 for (unsigned int i=0; i<iterations; ++i)
00746 {
00747 old_result = x;
00748 x.clear();
00749 viennacl::ocl::enqueue(k(A[level].handle1(), A[level].handle2(), A[level].handle(),
00750 static_cast<ScalarType>(_tag.get_jacobiweight()),
00751 old_result,
00752 x,
00753 rhs,
00754 static_cast<cl_uint>(rhs.size())));
00755
00756 }
00757 }
00758
00759 amg_tag & tag() { return _tag; }
00760 };
00761
00762 }
00763 }
00764
00765
00766
00767 #endif
00768