ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/linalg/gmres.hpp Source File

Go to the documentation of this file.
00001 #ifndef VIENNACL_GMRES_HPP_
00002 #define VIENNACL_GMRES_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #include <vector>
00025 #include <cmath>
00026 #include <limits>
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/tools/tools.hpp"
00029 #include "viennacl/linalg/norm_2.hpp"
00030 #include "viennacl/linalg/prod.hpp"
00031 #include "viennacl/linalg/inner_prod.hpp"
00032 #include "viennacl/traits/clear.hpp"
00033 #include "viennacl/traits/size.hpp"
00034 #include "viennacl/meta/result_of.hpp"
00035 
00036 namespace viennacl
00037 {
00038   namespace linalg
00039   {
00040     
00043     class gmres_tag       //generalized minimum residual
00044     {
00045       public:
00052         gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20) 
00053          : _tol(tol), _iterations(max_iterations), _krylov_dim(krylov_dim), iters_taken_(0) {};
00054         
00056         double tolerance() const { return _tol; }
00058         unsigned int max_iterations() const { return _iterations; }
00060         unsigned int krylov_dim() const { return _krylov_dim; }
00062         unsigned int max_restarts() const
00063         { 
00064           unsigned int ret = _iterations / _krylov_dim;
00065           if (ret > 0 && (ret * _krylov_dim == _iterations) )
00066             return ret - 1;
00067           return ret;
00068         }
00069         
00071         unsigned int iters() const { return iters_taken_; }
00073         void iters(unsigned int i) const { iters_taken_ = i; }
00074         
00076         double error() const { return last_error_; }
00078         void error(double e) const { last_error_ = e; }
00079         
00080       private:
00081         double _tol;
00082         unsigned int _iterations;
00083         unsigned int _krylov_dim;
00084         
00085         //return values from solver
00086         mutable unsigned int iters_taken_;
00087         mutable double last_error_;
00088     };
00089     
00090     namespace
00091     {
00092       
00093       template <typename SRC_VECTOR, typename DEST_VECTOR>
00094       void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, unsigned int len)
00095       {
00096         for (unsigned int i=0; i<len; ++i)
00097           dest[i] = src[i];
00098       }
00099 
00100       template <typename ScalarType, typename DEST_VECTOR>
00101       void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, unsigned int len)
00102       {
00103         viennacl::copy(src.begin(), src.begin() + len, dest.begin());
00104       }
00105 
00106       template <typename ScalarType>
00107       void gmres_copy_helper(viennacl::vector<ScalarType> const & src, viennacl::vector<ScalarType> & dest, unsigned int len)
00108       {
00109         viennacl::copy(src.begin(), src.begin() + len, dest.begin());
00110       }
00111 
00112     }
00113 
00124     template <typename MatrixType, typename VectorType, typename PreconditionerType>
00125     VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
00126     {
00127       typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
00128       typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00129       unsigned int problem_size = viennacl::traits::size(rhs);
00130       VectorType result(problem_size);
00131       viennacl::traits::clear(result);
00132       unsigned int krylov_dim = tag.krylov_dim();
00133       if (problem_size < tag.krylov_dim())
00134         krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
00135       
00136       VectorType res(problem_size);
00137       VectorType v_k_tilde(problem_size);
00138       VectorType v_k_tilde_temp(problem_size);
00139       
00140       std::vector< std::vector<CPU_ScalarType> > R(krylov_dim);
00141       std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
00142       std::vector<VectorType> U(krylov_dim);
00143 
00144       const CPU_ScalarType gpu_scalar_minus_1 = static_cast<CPU_ScalarType>(-1);    //representing the scalar '-1' on the GPU. Prevents blocking write operations
00145       const CPU_ScalarType gpu_scalar_1 = static_cast<CPU_ScalarType>(1);    //representing the scalar '1' on the GPU. Prevents blocking write operations
00146       const CPU_ScalarType gpu_scalar_2 = static_cast<CPU_ScalarType>(2);    //representing the scalar '2' on the GPU. Prevents blocking write operations
00147       
00148       CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
00149       
00150       unsigned int k;
00151       for (k = 0; k < krylov_dim; ++k)
00152       {
00153         R[k].resize(tag.krylov_dim()); 
00154         viennacl::traits::resize(U[k], problem_size);
00155       }
00156 
00157       //std::cout << "Starting GMRES..." << std::endl;
00158       tag.iters(0);
00159       
00160       for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
00161       {
00162         //std::cout << "-- GMRES Start " << it << " -- " << std::endl;
00163         
00164         res = rhs;
00165         res -= viennacl::linalg::prod(matrix, result);  //initial guess zero
00166         precond.apply(res);
00167         //std::cout << "Residual: " << res << std::endl;
00168         
00169         CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res); 
00170         CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
00171         //std::cout << "rho_0: " << rho_0 << std::endl;
00172 
00173         if (rho_0 / norm_rhs < tag.tolerance() || (norm_rhs == CPU_ScalarType(0.0)) )
00174         {
00175           //std::cout << "Allowed Error reached at begin of loop" << std::endl;
00176           tag.error(rho_0 / norm_rhs);
00177           return result;
00178         }
00179 
00180         res /= rho_0;
00181         //std::cout << "Normalized Residual: " << res << std::endl;
00182         
00183         for (k=0; k<krylov_dim; ++k)
00184         {
00185           viennacl::traits::clear(R[k]);
00186           viennacl::traits::clear(U[k]);
00187           R[k].resize(krylov_dim); 
00188           viennacl::traits::resize(U[k], problem_size);
00189         }
00190 
00191         for (k = 0; k < krylov_dim; ++k)
00192         {
00193           tag.iters( tag.iters() + 1 ); //increase iteration counter
00194 
00195           //compute v_k = A * v_{k-1} via Householder matrices
00196           if (k == 0)
00197           {
00198             v_k_tilde = viennacl::linalg::prod(matrix, res);
00199             precond.apply(v_k_tilde);
00200           }
00201           else
00202           {
00203             viennacl::traits::clear(v_k_tilde);
00204             v_k_tilde[k-1] = gpu_scalar_1;
00205             //Householder rotations part 1
00206             for (int i = k-1; i > -1; --i)
00207               v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
00208 
00209             v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
00210             precond.apply(v_k_tilde_temp);
00211             v_k_tilde = v_k_tilde_temp;
00212 
00213             //Householder rotations part 2
00214             for (unsigned int i = 0; i < k; ++i)
00215               v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
00216           }
00217           
00218           //std::cout << "v_k_tilde: " << v_k_tilde << std::endl;
00219 
00220           viennacl::traits::clear(U[k]);
00221           viennacl::traits::resize(U[k], problem_size);
00222           //copy first k entries from v_k_tilde to U[k]:
00223           gmres_copy_helper(v_k_tilde, U[k], k);
00224           
00225           U[k][k] = std::sqrt( viennacl::linalg::inner_prod(v_k_tilde, v_k_tilde) - viennacl::linalg::inner_prod(U[k], U[k]) );
00226 
00227           if (fabs(U[k][k]) < CPU_ScalarType(10 * std::numeric_limits<CPU_ScalarType>::epsilon()))
00228             break; //Note: Solution is essentially (up to round-off error) already in Krylov space. No need to proceed.
00229           
00230           //copy first k+1 entries from U[k] to R[k]
00231           gmres_copy_helper(U[k], R[k], k+1);
00232           
00233           U[k] -= v_k_tilde;
00234           //std::cout << "U[k] before normalization: " << U[k] << std::endl;
00235           U[k] *= gpu_scalar_minus_1 / viennacl::linalg::norm_2( U[k] );
00236           //std::cout << "Householder vector U[k]: " << U[k] << std::endl;
00237           
00238           //DEBUG: Make sure that P_k v_k_tilde equals (rho_{1,k}, ... , rho_{k,k}, 0, 0 )
00239 #ifdef VIENNACL_GMRES_DEBUG
00240           std::cout << "P_k v_k_tilde: " << (v_k_tilde - 2.0 * U[k] * inner_prod(U[k], v_k_tilde)) << std::endl;
00241           std::cout << "R[k]: [" << R[k].size() << "](";
00242           for (size_t i=0; i<R[k].size(); ++i)
00243             std::cout << R[k][i] << ",";
00244           std::cout << ")" << std::endl;
00245 #endif
00246           //std::cout << "P_k res: " << (res - 2.0 * U[k] * inner_prod(U[k], res)) << std::endl;
00247           res -= U[k] * (viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2);
00248           //std::cout << "zeta_k: " << viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2 << std::endl;
00249           //std::cout << "Updated res: " << res << std::endl;
00250 
00251 #ifdef VIENNACL_GMRES_DEBUG
00252           VectorType v1(U[k].size()); v1.clear(); v1.resize(U[k].size());
00253           v1(0) = 1.0;
00254           v1 -= U[k] * (viennacl::linalg::inner_prod( U[k], v1 ) * gpu_scalar_2);
00255           std::cout << "v1: " << v1 << std::endl;
00256           boost::numeric::ublas::matrix<ScalarType> P = -2.0 * outer_prod(U[k], U[k]);
00257           P(0,0) += 1.0; P(1,1) += 1.0; P(2,2) += 1.0;
00258           std::cout << "P: " << P << std::endl;
00259 #endif
00260           
00261           if (res[k] > rho) //machine precision reached
00262             res[k] = rho;
00263 
00264           if (res[k] < -1.0 * rho) //machine precision reached
00265             res[k] = -1.0 * rho;
00266           
00267           projection_rhs[k] = res[k];
00268           
00269           rho *= std::sin( std::acos(projection_rhs[k] / rho) );
00270           
00271 #ifdef VIENNACL_GMRES_DEBUG
00272           std::cout << "k-th component of r: " << res[k] << std::endl;
00273           std::cout << "New rho (norm of res): " << rho << std::endl;
00274 #endif        
00275 
00276           if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance())
00277           {
00278             //std::cout << "Krylov space big enough" << endl;
00279             tag.error( std::fabs(rho*rho_0 / norm_rhs) );
00280             ++k;
00281             break;
00282           }
00283           
00284           //std::cout << "Current residual: " << rho * rho_0 << std::endl;
00285           //std::cout << " - End of Krylov space setup - " << std::endl;
00286         } // for k
00287         
00288 #ifdef VIENNACL_GMRES_DEBUG
00289         //inplace solution of the upper triangular matrix:
00290         std::cout << "Upper triangular system:" << std::endl;
00291         std::cout << "Size of Krylov space: " << k << std::endl;
00292         for (size_t i=0; i<k; ++i)
00293         {
00294           for (size_t j=0; j<k; ++j)
00295           {
00296             std::cout << R[j][i] << ", ";
00297           }
00298           std::cout << " | " << projection_rhs[i] << std::endl;
00299         }
00300 #endif        
00301         
00302         for (int i=k-1; i>-1; --i)
00303         {
00304           for (unsigned int j=i+1; j<k; ++j)
00305             //temp_rhs[i] -= R[i][j] * temp_rhs[j];   //if R is not transposed
00306             projection_rhs[i] -= R[j][i] * projection_rhs[j];     //R is transposed
00307             
00308           projection_rhs[i] /= R[i][i];
00309         }
00310         
00311 #ifdef VIENNACL_GMRES_DEBUG
00312         std::cout << "Result of triangular solver: ";
00313         for (size_t i=0; i<k; ++i)
00314           std::cout << projection_rhs[i] << ", ";
00315         std::cout << std::endl;
00316 #endif        
00317         res *= projection_rhs[0];
00318         
00319         if (k > 0)
00320         {
00321           for (unsigned int i = 0; i < k-1; ++i)
00322           {
00323             res[i] += projection_rhs[i+1];
00324           }
00325         }
00326 
00327         for (int i = k-1; i > -1; --i)
00328           res -= U[i] * (viennacl::linalg::inner_prod(U[i], res) * gpu_scalar_2);
00329 
00330         res *= rho_0;
00331         result += res;
00332 
00333         if ( std::fabs(rho*rho_0 / norm_rhs) < tag.tolerance() )
00334         {
00335           //std::cout << "Allowed Error reached at end of loop" << std::endl;
00336           tag.error(std::fabs(rho*rho_0 / norm_rhs));
00337           return result;
00338         }
00339 
00340         //res = rhs;
00341         //res -= viennacl::linalg::prod(matrix, result);
00342         //std::cout << "norm_2(r)=" << norm_2(r) << std::endl;
00343         //std::cout << "std::abs(rho*rho_0)=" << std::abs(rho*rho_0) << std::endl;
00344         //std::cout << r << std::endl; 
00345 
00346         tag.error(std::fabs(rho*rho_0));
00347       }
00348 
00349       return result;
00350     }
00351 
00354     template <typename MatrixType, typename VectorType>
00355     VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag)
00356     {
00357       return solve(matrix, rhs, tag, no_precond());
00358     }
00359 
00360 
00361   }
00362 }
00363 
00364 #endif