00001 #ifndef VIENNACL_GMRES_HPP_
00002 #define VIENNACL_GMRES_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include <vector>
00025 #include <cmath>
00026 #include <limits>
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/tools/tools.hpp"
00029 #include "viennacl/linalg/norm_2.hpp"
00030 #include "viennacl/linalg/prod.hpp"
00031 #include "viennacl/linalg/inner_prod.hpp"
00032 #include "viennacl/traits/clear.hpp"
00033 #include "viennacl/traits/size.hpp"
00034 #include "viennacl/meta/result_of.hpp"
00035
00036 namespace viennacl
00037 {
00038 namespace linalg
00039 {
00040
00043 class gmres_tag
00044 {
00045 public:
00052 gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
00053 : _tol(tol), _iterations(max_iterations), _krylov_dim(krylov_dim), iters_taken_(0) {};
00054
00056 double tolerance() const { return _tol; }
00058 unsigned int max_iterations() const { return _iterations; }
00060 unsigned int krylov_dim() const { return _krylov_dim; }
00062 unsigned int max_restarts() const
00063 {
00064 unsigned int ret = _iterations / _krylov_dim;
00065 if (ret > 0 && (ret * _krylov_dim == _iterations) )
00066 return ret - 1;
00067 return ret;
00068 }
00069
00071 unsigned int iters() const { return iters_taken_; }
00073 void iters(unsigned int i) const { iters_taken_ = i; }
00074
00076 double error() const { return last_error_; }
00078 void error(double e) const { last_error_ = e; }
00079
00080 private:
00081 double _tol;
00082 unsigned int _iterations;
00083 unsigned int _krylov_dim;
00084
00085
00086 mutable unsigned int iters_taken_;
00087 mutable double last_error_;
00088 };
00089
00090 namespace
00091 {
00092
00093 template <typename SRC_VECTOR, typename DEST_VECTOR>
00094 void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, unsigned int len)
00095 {
00096 for (unsigned int i=0; i<len; ++i)
00097 dest[i] = src[i];
00098 }
00099
00100 template <typename ScalarType, typename DEST_VECTOR>
00101 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, unsigned int len)
00102 {
00103 viennacl::copy(src.begin(), src.begin() + len, dest.begin());
00104 }
00105
00106 template <typename ScalarType>
00107 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, viennacl::vector<ScalarType> & dest, unsigned int len)
00108 {
00109 viennacl::copy(src.begin(), src.begin() + len, dest.begin());
00110 }
00111
00112 }
00113
00124 template <typename MatrixType, typename VectorType, typename PreconditionerType>
00125 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
00126 {
00127 typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType;
00128 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
00129 unsigned int problem_size = viennacl::traits::size(rhs);
00130 VectorType result(problem_size);
00131 viennacl::traits::clear(result);
00132 unsigned int krylov_dim = tag.krylov_dim();
00133 if (problem_size < tag.krylov_dim())
00134 krylov_dim = problem_size;
00135
00136 VectorType res(problem_size);
00137 VectorType v_k_tilde(problem_size);
00138 VectorType v_k_tilde_temp(problem_size);
00139
00140 std::vector< std::vector<CPU_ScalarType> > R(krylov_dim);
00141 std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
00142 std::vector<VectorType> U(krylov_dim);
00143
00144 const CPU_ScalarType gpu_scalar_minus_1 = static_cast<CPU_ScalarType>(-1);
00145 const CPU_ScalarType gpu_scalar_1 = static_cast<CPU_ScalarType>(1);
00146 const CPU_ScalarType gpu_scalar_2 = static_cast<CPU_ScalarType>(2);
00147
00148 CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
00149
00150 unsigned int k;
00151 for (k = 0; k < krylov_dim; ++k)
00152 {
00153 R[k].resize(tag.krylov_dim());
00154 viennacl::traits::resize(U[k], problem_size);
00155 }
00156
00157
00158 tag.iters(0);
00159
00160 for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
00161 {
00162
00163
00164 res = rhs;
00165 res -= viennacl::linalg::prod(matrix, result);
00166 precond.apply(res);
00167
00168
00169 CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res);
00170 CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
00171
00172
00173 if (rho_0 / norm_rhs < tag.tolerance() || (norm_rhs == CPU_ScalarType(0.0)) )
00174 {
00175
00176 tag.error(rho_0 / norm_rhs);
00177 return result;
00178 }
00179
00180 res /= rho_0;
00181
00182
00183 for (k=0; k<krylov_dim; ++k)
00184 {
00185 viennacl::traits::clear(R[k]);
00186 viennacl::traits::clear(U[k]);
00187 R[k].resize(krylov_dim);
00188 viennacl::traits::resize(U[k], problem_size);
00189 }
00190
00191 for (k = 0; k < krylov_dim; ++k)
00192 {
00193 tag.iters( tag.iters() + 1 );
00194
00195
00196 if (k == 0)
00197 {
00198 v_k_tilde = viennacl::linalg::prod(matrix, res);
00199 precond.apply(v_k_tilde);
00200 }
00201 else
00202 {
00203 viennacl::traits::clear(v_k_tilde);
00204 v_k_tilde[k-1] = gpu_scalar_1;
00205
00206 for (int i = k-1; i > -1; --i)
00207 v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
00208
00209 v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
00210 precond.apply(v_k_tilde_temp);
00211 v_k_tilde = v_k_tilde_temp;
00212
00213
00214 for (unsigned int i = 0; i < k; ++i)
00215 v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
00216 }
00217
00218
00219
00220 viennacl::traits::clear(U[k]);
00221 viennacl::traits::resize(U[k], problem_size);
00222
00223 gmres_copy_helper(v_k_tilde, U[k], k);
00224
00225 U[k][k] = std::sqrt( viennacl::linalg::inner_prod(v_k_tilde, v_k_tilde) - viennacl::linalg::inner_prod(U[k], U[k]) );
00226
00227 if (fabs(U[k][k]) < CPU_ScalarType(10 * std::numeric_limits<CPU_ScalarType>::epsilon()))
00228 break;
00229
00230
00231 gmres_copy_helper(U[k], R[k], k+1);
00232
00233 U[k] -= v_k_tilde;
00234
00235 U[k] *= gpu_scalar_minus_1 / viennacl::linalg::norm_2( U[k] );
00236
00237
00238
00239 #ifdef VIENNACL_GMRES_DEBUG
00240 std::cout << "P_k v_k_tilde: " << (v_k_tilde - 2.0 * U[k] * inner_prod(U[k], v_k_tilde)) << std::endl;
00241 std::cout << "R[k]: [" << R[k].size() << "](";
00242 for (size_t i=0; i<R[k].size(); ++i)
00243 std::cout << R[k][i] << ",";
00244 std::cout << ")" << std::endl;
00245 #endif
00246
00247 res -= U[k] * (viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2);
00248
00249
00250
00251 #ifdef VIENNACL_GMRES_DEBUG
00252 VectorType v1(U[k].size()); v1.clear(); v1.resize(U[k].size());
00253 v1(0) = 1.0;
00254 v1 -= U[k] * (viennacl::linalg::inner_prod( U[k], v1 ) * gpu_scalar_2);
00255 std::cout << "v1: " << v1 << std::endl;
00256 boost::numeric::ublas::matrix<ScalarType> P = -2.0 * outer_prod(U[k], U[k]);
00257 P(0,0) += 1.0; P(1,1) += 1.0; P(2,2) += 1.0;
00258 std::cout << "P: " << P << std::endl;
00259 #endif
00260
00261 if (res[k] > rho)
00262 res[k] = rho;
00263
00264 if (res[k] < -1.0 * rho)
00265 res[k] = -1.0 * rho;
00266
00267 projection_rhs[k] = res[k];
00268
00269 rho *= std::sin( std::acos(projection_rhs[k] / rho) );
00270
00271 #ifdef VIENNACL_GMRES_DEBUG
00272 std::cout << "k-th component of r: " << res[k] << std::endl;
00273 std::cout << "New rho (norm of res): " << rho << std::endl;
00274 #endif
00275
00276 if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance())
00277 {
00278
00279 tag.error( std::fabs(rho*rho_0 / norm_rhs) );
00280 ++k;
00281 break;
00282 }
00283
00284
00285
00286 }
00287
00288 #ifdef VIENNACL_GMRES_DEBUG
00289
00290 std::cout << "Upper triangular system:" << std::endl;
00291 std::cout << "Size of Krylov space: " << k << std::endl;
00292 for (size_t i=0; i<k; ++i)
00293 {
00294 for (size_t j=0; j<k; ++j)
00295 {
00296 std::cout << R[j][i] << ", ";
00297 }
00298 std::cout << " | " << projection_rhs[i] << std::endl;
00299 }
00300 #endif
00301
00302 for (int i=k-1; i>-1; --i)
00303 {
00304 for (unsigned int j=i+1; j<k; ++j)
00305
00306 projection_rhs[i] -= R[j][i] * projection_rhs[j];
00307
00308 projection_rhs[i] /= R[i][i];
00309 }
00310
00311 #ifdef VIENNACL_GMRES_DEBUG
00312 std::cout << "Result of triangular solver: ";
00313 for (size_t i=0; i<k; ++i)
00314 std::cout << projection_rhs[i] << ", ";
00315 std::cout << std::endl;
00316 #endif
00317 res *= projection_rhs[0];
00318
00319 if (k > 0)
00320 {
00321 for (unsigned int i = 0; i < k-1; ++i)
00322 {
00323 res[i] += projection_rhs[i+1];
00324 }
00325 }
00326
00327 for (int i = k-1; i > -1; --i)
00328 res -= U[i] * (viennacl::linalg::inner_prod(U[i], res) * gpu_scalar_2);
00329
00330 res *= rho_0;
00331 result += res;
00332
00333 if ( std::fabs(rho*rho_0 / norm_rhs) < tag.tolerance() )
00334 {
00335
00336 tag.error(std::fabs(rho*rho_0 / norm_rhs));
00337 return result;
00338 }
00339
00340
00341
00342
00343
00344
00345
00346 tag.error(std::fabs(rho*rho_0));
00347 }
00348
00349 return result;
00350 }
00351
00354 template <typename MatrixType, typename VectorType>
00355 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag)
00356 {
00357 return solve(matrix, rhs, tag, no_precond());
00358 }
00359
00360
00361 }
00362 }
00363
00364 #endif