ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/linalg/amg.hpp Source File

Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_AMG_HPP_
00002 #define VIENNACL_LINALG_AMG_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00026 #include <boost/numeric/ublas/matrix.hpp>
00027 #include <boost/numeric/ublas/lu.hpp>
00028 #include <boost/numeric/ublas/operation.hpp>
00029 #include <boost/numeric/ublas/vector_proxy.hpp>
00030 #include <boost/numeric/ublas/matrix_proxy.hpp>
00031 #include <boost/numeric/ublas/vector.hpp>
00032 #include <boost/numeric/ublas/triangular.hpp> 
00033 #include <vector>
00034 #include <cmath>
00035 #include "viennacl/forwards.h"
00036 #include "viennacl/tools/tools.hpp"
00037 #include "viennacl/linalg/prod.hpp"
00038 #include "viennacl/linalg/direct_solve.hpp"
00039 
00040 #include "viennacl/linalg/detail/amg/amg_base.hpp"
00041 #include "viennacl/linalg/detail/amg/amg_coarse.hpp"
00042 #include "viennacl/linalg/detail/amg/amg_interpol.hpp"
00043 
00044 #include <map>
00045 
00046 #ifdef _OPENMP
00047  #include <omp.h>
00048 #endif
00049 
00050 #include "viennacl/linalg/detail/amg/amg_debug.hpp"
00051 
00052 #define VIENNACL_AMG_COARSE_LIMIT 50
00053 #define VIENNACL_AMG_MAX_LEVELS 100
00054 
00055 namespace viennacl
00056 {
00057   namespace linalg
00058   {    
00059     typedef detail::amg::amg_tag          amg_tag;
00060     
00061     
00062     
00070     template <typename InternalType1, typename InternalType2>
00071     void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00072     {
00073       typedef typename InternalType1::value_type SparseMatrixType;
00074       typedef typename InternalType2::value_type PointVectorType;     
00075       typedef typename SparseMatrixType::value_type ScalarType;   
00076       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00077       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00078       
00079       unsigned int i, iterations, c_points, f_points;
00080       detail::amg::amg_slicing<InternalType1,InternalType2> Slicing;
00081       
00082       // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process.
00083       iterations = tag.get_coarselevels();
00084       if (iterations == 0)
00085         iterations = VIENNACL_AMG_MAX_LEVELS;
00086              
00087       // For parallel coarsenings build data structures (number of threads set automatically).
00088       if (tag.get_coarse() == VIENNACL_AMG_COARSE_RS0 || tag.get_coarse() == VIENNACL_AMG_COARSE_RS3)
00089         Slicing.init(iterations);
00090       
00091       for (i=0; i<iterations; ++i)
00092       {  
00093         // Initialize Pointvector on level i and construct points.
00094         Pointvector[i] = PointVectorType(A[i].size1());
00095         Pointvector[i].init_points();
00096         
00097         // Construct C and F points on coarse level (i is fine level, i+1 coarse level).
00098         detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag);
00099 
00100         // Calculate number of C and F points on level i.
00101         c_points = Pointvector[i].get_cpoints();
00102         f_points = Pointvector[i].get_fpoints();
00103 
00104         #if defined (DEBUG) //or defined(DEBUGBENCH) 
00105         std::cout << "Level " << i << ": ";
00106         std::cout << "No of C points = " << c_points << ", ";
00107         std::cout << "No of F points = " << f_points << std::endl;
00108         #endif
00109         
00110         // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i.
00111         if (c_points == 0 || f_points == 0)
00112           break;
00113           
00114         // Construct interpolation matrix for level i.
00115         detail::amg::amg_interpol (i, A, P, Pointvector, tag);
00116         
00117         // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P).
00118         detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]);
00119         
00120         // Test triple matrix product. Very slow for large matrix sizes (ublas).
00121         // test_triplematprod(A[i],P[i],A[i+1]);
00122         
00123         Pointvector[i].delete_points();
00124         
00125         #ifdef DEBUG
00126         std::cout << "Coarse Grid Operator Matrix:" << std::endl;
00127         printmatrix (A[i+1]);
00128         #endif  
00129         
00130         // If Limit of coarse points is reached then stop. Coarsest level is level i+1.
00131         if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT)
00132         {
00133           tag.set_coarselevels(i+1);
00134           return;
00135         }
00136       }
00137       tag.set_coarselevels(i);  
00138     }
00139     
00148     template <typename MatrixType, typename InternalType1, typename InternalType2>
00149     void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
00150     {
00151       typedef typename MatrixType::value_type ScalarType;
00152       typedef typename InternalType1::value_type SparseMatrixType;
00153       
00154       if (tag.get_coarselevels() > 0)
00155       {
00156         A.resize(tag.get_coarselevels()+1);
00157         P.resize(tag.get_coarselevels());
00158         Pointvector.resize(tag.get_coarselevels());
00159       }
00160       else
00161       {
00162         A.resize(VIENNACL_AMG_MAX_LEVELS+1);
00163         P.resize(VIENNACL_AMG_MAX_LEVELS);
00164         Pointvector.resize(VIENNACL_AMG_MAX_LEVELS);
00165       }
00166       
00167       // Insert operator matrix as operator for finest level.
00168       SparseMatrixType A0 (mat);
00169       A.insert_element (0, A0);  
00170     }
00171     
00181     template <typename InternalType1, typename InternalType2>
00182     void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00183     {  
00184       typedef typename InternalType1::value_type MatrixType;
00185       
00186       // Resize internal data structures to actual size.
00187       A.resize(tag.get_coarselevels()+1);
00188       P.resize(tag.get_coarselevels());
00189       R.resize(tag.get_coarselevels());
00190       
00191       // Transform into matrix type.
00192       for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00193       {
00194         A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00195         A[i] = A_setup[i];
00196       }
00197       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00198       {
00199         P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00200         P[i] = P_setup[i];
00201       }
00202       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00203       {
00204         R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00205         P_setup[i].set_trans(true);
00206         R[i] = P_setup[i];
00207         P_setup[i].set_trans(false);
00208       }
00209     }
00210     
00220     template <typename InternalType1, typename InternalType2>
00221     void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
00222     {
00223       typedef typename InternalType1::value_type MatrixType;
00224       typedef typename InternalType2::value_type::value_type ScalarType;
00225       
00226       // Resize internal data structures to actual size.
00227       A.resize(tag.get_coarselevels()+1);
00228       P.resize(tag.get_coarselevels());
00229       R.resize(tag.get_coarselevels());
00230       
00231       // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>.
00232       for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
00233       {
00234         A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
00235         viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]);
00236       }
00237       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00238       {
00239         P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
00240         viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]);
00241         //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]);
00242       }
00243       for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
00244       {
00245         R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
00246         P_setup[i].set_trans(true);
00247         viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]);
00248         P_setup[i].set_trans(false);
00249       }
00250     }
00251     
00260     template <typename InternalVectorType, typename SparseMatrixType>
00261     void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag)
00262     {        
00263       typedef typename InternalVectorType::value_type VectorType;
00264       
00265       result.resize(tag.get_coarselevels()+1);
00266       rhs.resize(tag.get_coarselevels()+1);
00267       residual.resize(tag.get_coarselevels());
00268             
00269       for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
00270       {
00271         result[level] = VectorType(A[level].size1());
00272         result[level].clear();
00273         rhs[level] = VectorType(A[level].size1());
00274         rhs[level].clear();
00275       }
00276       for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
00277       {
00278         residual[level] = VectorType(A[level].size1());
00279         residual[level].clear();
00280       }
00281     }
00289     template <typename ScalarType, typename SparseMatrixType>
00290     void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<ScalarType> & Permutation, SparseMatrixType const & A)
00291     {
00292       typedef typename SparseMatrixType::const_iterator1 ConstRowIterator;
00293       typedef typename SparseMatrixType::const_iterator2 ConstColIterator;
00294     
00295       // Copy to operator matrix. Needed 
00296       op.resize(A.size1(),A.size2(),false);
00297       for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter)
00298         for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00299           op (col_iter.index1(), col_iter.index2()) = *col_iter;
00300       
00301       // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()!
00302       Permutation = boost::numeric::ublas::permutation_matrix<ScalarType> (op.size1());
00303       boost::numeric::ublas::lu_factorize(op,Permutation);
00304     }
00305     
00308     template <typename MatrixType>
00309     class amg_precond
00310     {
00311       typedef typename MatrixType::value_type ScalarType;
00312       typedef boost::numeric::ublas::vector<ScalarType> VectorType;
00313       typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00314       typedef detail::amg::amg_pointvector PointVectorType;
00315       
00316       typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00317       typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00318       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00319       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00320       
00321       boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00322       boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00323       boost::numeric::ublas::vector <MatrixType> A;
00324       boost::numeric::ublas::vector <MatrixType> P;
00325       boost::numeric::ublas::vector <MatrixType> R;
00326       boost::numeric::ublas::vector <PointVectorType> Pointvector;
00327        
00328       mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00329       mutable boost::numeric::ublas::permutation_matrix<ScalarType> Permutation;  
00330       
00331       mutable boost::numeric::ublas::vector <VectorType> result;
00332       mutable boost::numeric::ublas::vector <VectorType> rhs;
00333       mutable boost::numeric::ublas::vector <VectorType> residual;
00334       
00335       mutable bool done_init_apply;
00336           
00337       amg_tag _tag;
00338     public:
00339     
00340       amg_precond(): Permutation(0) {}
00346       amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0)
00347       {
00348         _tag = tag;
00349         // Initialize data structures.
00350         amg_init (mat,A_setup,P_setup,Pointvector,_tag);
00351         
00352         done_init_apply = false;
00353       }
00354       
00357       void setup()
00358       {
00359         // Start setup phase.
00360         amg_setup(A_setup,P_setup,Pointvector,_tag);
00361         // Transform to CPU-Matrixtype for precondition phase.
00362         amg_transform_cpu(A,P,R,A_setup,P_setup,_tag);
00363         
00364         done_init_apply = false;
00365       }
00366       
00371       void init_apply() const
00372       {
00373         // Setup precondition phase (Data structures).
00374         amg_setup_apply(result,rhs,residual,A_setup,_tag);
00375         // Do LU factorization for direct solve.
00376         amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]);
00377         
00378         done_init_apply = true;
00379       }
00380       
00386       template <typename VectorType>
00387       ScalarType calc_complexity(VectorType & avgstencil)
00388       {
00389         avgstencil = VectorType (_tag.get_coarselevels()+1);
00390         unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00391           
00392         for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level)
00393         {
00394           level_coefficients = 0;
00395           for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00396           { 
00397             for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00398             {
00399               if (level == 0)
00400                 systemmat_nonzero++;
00401               nonzero++;
00402               level_coefficients++;
00403             }
00404           }
00405           avgstencil[level] = level_coefficients/(double)A_setup[level].size1();
00406         }
00407         return nonzero/static_cast<double>(systemmat_nonzero);
00408       }
00409 
00414       template <typename VectorType>
00415       void apply(VectorType & vec) const
00416       {   
00417         // Build data structures and do lu factorization before first iteration step.
00418         if (!done_init_apply)
00419           init_apply();
00420         
00421         int level;
00422         
00423         // Precondition operation (Yang, p.3)
00424         rhs[0] = vec;
00425         for (level=0; level <(signed)_tag.get_coarselevels(); level++)
00426         {    
00427           result[level].clear();
00428           
00429           // Apply Smoother _presmooth times.
00430           smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]);    
00431           
00432           #ifdef DEBUG
00433           std::cout << "After presmooth:" << std::endl;
00434           printvector(result[level]);
00435           #endif
00436 
00437           // Compute residual.
00438           residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]);
00439           
00440           #ifdef DEBUG          
00441           std::cout << "Residual:" << std::endl;
00442           printvector(residual[level]);
00443           #endif
00444           
00445           // Restrict to coarse level. Restricted residual is RHS of coarse level.
00446           rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]);
00447           
00448           #ifdef DEBUG
00449           std::cout << "Restricted Residual: " << std::endl;
00450           printvector(rhs[level+1]);
00451           #endif
00452         }
00453           
00454         // On highest level use direct solve to solve equation.
00455         result[level] = rhs[level];
00456         boost::numeric::ublas::lu_substitute(op,Permutation,result[level]);
00457 
00458         #ifdef DEBUG
00459         std::cout << "After direct solve: " << std::endl;
00460         printvector (result[level]);
00461         #endif
00462           
00463         for (level=_tag.get_coarselevels()-1; level >= 0; level--)
00464         {       
00465           #ifdef DEBUG
00466           std::cout << "Coarse Error: " << std::endl;
00467           printvector(result[level+1]);
00468           #endif
00469           
00470           // Interpolate error to fine level. Correct solution by adding error.
00471           result[level] += boost::numeric::ublas::prod (P[level], result[level+1]);
00472               
00473           #ifdef DEBUG
00474           std::cout << "Corrected Result: " << std::endl;
00475           printvector (result[level]);
00476           #endif
00477           
00478           // Apply Smoother _postsmooth times.
00479           smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]);
00480           
00481           #ifdef DEBUG
00482           std::cout << "After postsmooth: " << std::endl;
00483           printvector (result[level]);
00484           #endif
00485         }    
00486         vec = result[0];
00487       }
00488       
00495       template <typename VectorType>
00496       void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const
00497       {
00498         VectorType old_result (x.size());
00499         unsigned int index;
00500         ScalarType sum = 0, diag = 1;
00501         
00502         for (int i=0; i<iterations; ++i)
00503         {
00504           old_result = x;
00505           x.clear();
00506 #ifdef _OPENMP
00507           #pragma omp parallel for private (sum,diag) shared (rhs,x)
00508 #endif          
00509           for (index=0; index<A_setup[level].size1(); ++index)  
00510           {
00511             InternalConstRowIterator row_iter = A_setup[level].begin1();
00512             row_iter += index; 
00513             sum = 0;
00514             diag = 1;
00515             for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00516             {
00517               if (col_iter.index1() == col_iter.index2())
00518                 diag = *col_iter;
00519               else
00520                 sum += *col_iter * old_result[col_iter.index2()];
00521             }
00522             x[index]= _tag.get_jacobiweight() * (rhs[index] - sum) / diag + (1-_tag.get_jacobiweight()) * old_result[index];
00523           }
00524         }
00525       }
00526       
00527       amg_tag & tag() { return _tag; }
00528     };
00529     
00534     template <typename ScalarType, unsigned int MAT_ALIGNMENT>
00535     class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> >
00536     {
00537       typedef viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType;
00538       typedef viennacl::vector<ScalarType> VectorType;
00539       typedef detail::amg::amg_sparsematrix<ScalarType> SparseMatrixType;
00540       typedef detail::amg::amg_pointvector PointVectorType;
00541       
00542       typedef typename SparseMatrixType::const_iterator1 InternalConstRowIterator;
00543       typedef typename SparseMatrixType::const_iterator2 InternalConstColIterator;
00544       typedef typename SparseMatrixType::iterator1 InternalRowIterator;
00545       typedef typename SparseMatrixType::iterator2 InternalColIterator;
00546       
00547       boost::numeric::ublas::vector <SparseMatrixType> A_setup;
00548       boost::numeric::ublas::vector <SparseMatrixType> P_setup;
00549       boost::numeric::ublas::vector <MatrixType> A;
00550       boost::numeric::ublas::vector <MatrixType> P;
00551       boost::numeric::ublas::vector <MatrixType> R;
00552       boost::numeric::ublas::vector <PointVectorType> Pointvector;
00553       
00554       mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
00555       mutable boost::numeric::ublas::permutation_matrix<ScalarType> Permutation;  
00556       
00557       mutable boost::numeric::ublas::vector <VectorType> result;
00558       mutable boost::numeric::ublas::vector <VectorType> rhs;
00559       mutable boost::numeric::ublas::vector <VectorType> residual;
00560           
00561       mutable bool done_init_apply;
00562     
00563       amg_tag _tag;
00564       
00565     public:
00566       
00567       amg_precond(): Permutation(0) {}
00568       
00574       amg_precond(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat, amg_tag const & tag): Permutation(0)
00575       {
00576         _tag = tag;
00577         
00578         // Copy to CPU. Internal structure of sparse matrix is used for copy operation.
00579         std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1());
00580         viennacl::copy(mat, mat2);
00581         
00582         // Initialize data structures.
00583         amg_init (mat2,A_setup,P_setup,Pointvector,_tag);
00584           
00585         done_init_apply = false;
00586       }
00587       
00590       void setup()
00591       {
00592         // Start setup phase.
00593         amg_setup(A_setup,P_setup,Pointvector,_tag);  
00594         // Transform to GPU-Matrixtype for precondition phase.
00595         amg_transform_gpu(A,P,R,A_setup,P_setup,_tag);  
00596         
00597         done_init_apply = false;
00598       }
00599       
00604       void init_apply() const
00605       {
00606         // Setup precondition phase (Data structures).
00607         amg_setup_apply(result,rhs,residual,A_setup,_tag);
00608         // Do LU factorization for direct solve.
00609         amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]);
00610         
00611         done_init_apply = true;
00612       }
00613 
00619       template <typename VectorType>
00620       ScalarType calc_complexity(VectorType & avgstencil)
00621       {
00622         avgstencil = VectorType (_tag.get_coarselevels()+1);
00623         unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
00624         
00625         for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level)
00626         {
00627           level_coefficients = 0;
00628           for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
00629           { 
00630             for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
00631             {
00632               if (level == 0)
00633                 systemmat_nonzero++;
00634               nonzero++;
00635               level_coefficients++;
00636             }
00637           }
00638           avgstencil[level] = level_coefficients/(double)A[level].size1();
00639         }
00640         return nonzero/static_cast<double>(systemmat_nonzero);
00641       }
00642 
00647       template <typename VectorType>
00648       void apply(VectorType & vec) const
00649       {
00650         if (!done_init_apply)
00651           init_apply();  
00652         
00653         int level;
00654         
00655         // Precondition operation (Yang, p.3).
00656         rhs[0] = vec;
00657         for (level=0; level <(signed)_tag.get_coarselevels(); level++)
00658         {    
00659           result[level].clear();
00660           
00661           // Apply Smoother _presmooth times.
00662           smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]);
00663 
00664           #ifdef DEBUG
00665           std::cout << "After presmooth: " << std::endl;
00666           printvector(result[level]);
00667           #endif
00668           
00669           // Compute residual.
00670           residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]);
00671           
00672           #ifdef DEBUG             
00673           std::cout << "Residual: " << std::endl;
00674           printvector(residual[level]);
00675           #endif
00676           
00677           // Restrict to coarse level. Result is RHS of coarse level equation.
00678           //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]);
00679           rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]);
00680           
00681           #ifdef DEBUG
00682           std::cout << "Restricted Residual: " << std::endl;
00683           printvector(rhs[level+1]);
00684           #endif
00685         }
00686           
00687         // On highest level use direct solve to solve equation (on the CPU)
00688         //TODO: Use GPU direct solve!
00689         result[level] = rhs[level];
00690         boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size());
00691 
00692         copy (result[level],result_cpu);
00693         boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu);
00694         copy (result_cpu, result[level]);
00695         
00696         #ifdef DEBUG
00697         std::cout << "After direct solve: " << std::endl;
00698         printvector (result[level]);
00699         #endif
00700           
00701         for (level=_tag.get_coarselevels()-1; level >= 0; level--)
00702         {   
00703           #ifdef DEBUG
00704           std::cout << "Coarse Error: " << std::endl;
00705           printvector(result[level+1]);
00706           #endif
00707           
00708           // Interpolate error to fine level and correct solution.
00709           result[level] += viennacl::linalg::prod(P[level],result[level+1]);
00710               
00711           #ifdef DEBUG
00712           std::cout << "Corrected Result: " << std::endl;
00713           printvector (result[level]);
00714           #endif
00715           
00716           // Apply Smoother _postsmooth times.
00717           smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]);
00718           
00719           #ifdef DEBUG
00720           std::cout << "After postsmooth: " << std::endl;
00721           printvector (result[level]);
00722           #endif
00723         }    
00724         vec = result[0];
00725       }
00726       
00733       template <typename VectorType>
00734       void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const
00735       {     
00736         VectorType old_result (x.size());
00737   
00738   //viennacl::ocl::program & p = viennacl::ocl::current_context().add_program
00739   //          (viennacl::tools::make_double_kernel(jacobi_kernel,viennacl::ocl::current_device().info()), "jacobi_kernel");
00740   //viennacl::ocl::kernel & k = p.add_kernel("jacobi");
00741   
00742         viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::program_name(),
00743                     "jacobi");
00744   
00745         for (unsigned int i=0; i<iterations; ++i)
00746         {
00747           old_result = x;    
00748           x.clear();
00749           viennacl::ocl::enqueue(k(A[level].handle1(), A[level].handle2(), A[level].handle(),
00750                                   static_cast<ScalarType>(_tag.get_jacobiweight()), 
00751                                   old_result,
00752                                   x,
00753                                   rhs,
00754                                   static_cast<cl_uint>(rhs.size()))); 
00755           
00756         }
00757       }
00758       
00759       amg_tag & tag() { return _tag; }
00760     };
00761 
00762   }
00763 }
00764 
00765 
00766 
00767 #endif
00768