ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
cg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CG_HPP_
2 #define VIENNACL_LINALG_CG_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <map>
27 #include <cmath>
28 #include <numeric>
29 
30 #include "viennacl/forwards.h"
31 #include "viennacl/tools/tools.hpp"
32 #include "viennacl/linalg/ilu.hpp"
33 #include "viennacl/linalg/prod.hpp"
37 #include "viennacl/traits/size.hpp"
40 
41 namespace viennacl
42 {
43 namespace linalg
44 {
45 
48 class cg_tag
49 {
50 public:
56  cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), abs_tol_(0), iterations_(max_iterations) {}
57 
59  double tolerance() const { return tol_; }
60 
62  double abs_tolerance() const { return abs_tol_; }
64  void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
65 
67  unsigned int max_iterations() const { return iterations_; }
68 
70  unsigned int iters() const { return iters_taken_; }
71  void iters(unsigned int i) const { iters_taken_ = i; }
72 
74  double error() const { return last_error_; }
76  void error(double e) const { last_error_ = e; }
77 
78 
79 private:
80  double tol_;
81  double abs_tol_;
82  unsigned int iterations_;
83 
84  //return values from solver
85  mutable unsigned int iters_taken_;
86  mutable double last_error_;
87 };
88 
89 namespace detail
90 {
91 
93  template<typename VectorT, typename PreconditionerT>
94  class z_handler{
95  public:
96  z_handler(VectorT & residual) : z_(residual){ }
97  VectorT & get() { return z_; }
98  private:
99  VectorT z_;
100  };
101 
102  template<typename VectorT>
104  public:
105  z_handler(VectorT & residual) : presidual_(&residual){ }
106  VectorT & get() { return *presidual_; }
107  private:
108  VectorT * presidual_;
109  };
110 
111 }
112 
113 namespace detail
114 {
115 
127  //template<typename MatrixType, typename ScalarType>
128  template<typename MatrixT, typename NumericT>
129  viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A,
130  viennacl::vector<NumericT> const & rhs,
131  cg_tag const & tag,
133  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
134  void *monitor_data = NULL)
135  {
136  typedef typename viennacl::vector<NumericT>::difference_type difference_type;
137 
138  viennacl::vector<NumericT> result(rhs);
139  viennacl::traits::clear(result);
140 
141  viennacl::vector<NumericT> residual(rhs);
144  viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // temporary buffer
145  std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size());
146  vcl_size_t buffer_size_per_vector = inner_prod_buffer.size() / 3;
147  difference_type buffer_offset_per_vector = static_cast<difference_type>(buffer_size_per_vector);
148 
149  NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); norm_rhs_squared *= norm_rhs_squared;
150 
151  if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check for early convergence of A*x = 0
152  return result;
153 
154  NumericT inner_prod_rr = norm_rhs_squared;
155  NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap);
156  NumericT beta = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * beta * beta - inner_prod_rr) / inner_prod_rr;
157  NumericT inner_prod_ApAp = 0;
158  NumericT inner_prod_pAp = 0;
159 
160  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
161  {
162  tag.iters(i+1);
163 
164  viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, Ap, beta, inner_prod_buffer);
165  viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer);
166 
167  // bring back the partial results to the host:
168  viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
169 
170  inner_prod_rr = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_offset_per_vector, NumericT(0));
171  inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() + buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, NumericT(0));
172  inner_prod_pAp = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_offset_per_vector, NumericT(0));
173 
174  if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / norm_rhs_squared)), monitor_data))
175  break;
176  if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() * tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here
177  break;
178 
179  alpha = inner_prod_rr / inner_prod_pAp;
180  beta = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr;
181  }
182 
183  //store last error estimate:
184  tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared));
185 
186  return result;
187  }
188 
189 
191  template<typename NumericT>
193  viennacl::vector<NumericT> const & rhs,
194  cg_tag const & tag,
196  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
197  void *monitor_data = NULL)
198  {
199  return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
200  }
201 
202 
204  template<typename NumericT>
206  viennacl::vector<NumericT> const & rhs,
207  cg_tag const & tag,
209  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
210  void *monitor_data = NULL)
211  {
212  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
213  }
214 
215 
216 
218  template<typename NumericT>
220  viennacl::vector<NumericT> const & rhs,
221  cg_tag const & tag,
223  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
224  void *monitor_data = NULL)
225  {
226  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
227  }
228 
229 
230 
232  template<typename NumericT>
234  viennacl::vector<NumericT> const & rhs,
235  cg_tag const & tag,
237  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
238  void *monitor_data = NULL)
239  {
240  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
241  }
242 
243 
245  template<typename NumericT>
247  viennacl::vector<NumericT> const & rhs,
248  cg_tag const & tag,
250  bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
251  void *monitor_data = NULL)
252  {
253  return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
254  }
255 
256 
257  template<typename MatrixT, typename VectorT, typename PreconditionerT>
258  VectorT solve_impl(MatrixT const & matrix,
259  VectorT const & rhs,
260  cg_tag const & tag,
261  PreconditionerT const & precond,
262  bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
263  void *monitor_data = NULL)
264  {
265  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
266  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
267 
268  VectorT result = rhs;
269  viennacl::traits::clear(result);
270 
271  VectorT residual = rhs;
272  VectorT tmp = rhs;
274  VectorT & z = zhandler.get();
275 
276  precond.apply(z);
277  VectorT p = z;
278 
279  CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z);
280  CPU_NumericType alpha;
281  CPU_NumericType new_ip_rr = 0;
282  CPU_NumericType beta;
283  CPU_NumericType norm_rhs_squared = ip_rr;
284  CPU_NumericType new_ipp_rr_over_norm_rhs;
285 
286  if (norm_rhs_squared <= tag.abs_tolerance()) //solution is zero if RHS norm is zero
287  return result;
288 
289  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
290  {
291  tag.iters(i+1);
292  tmp = viennacl::linalg::prod(matrix, p);
293 
294  alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);
295 
296  result += alpha * p;
297  residual -= alpha * tmp;
298  z = residual;
299  precond.apply(z);
300 
301  if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z))
302  new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2);
303  else
304  new_ip_rr = viennacl::linalg::inner_prod(residual, z);
305 
306  new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared;
307  if (monitor && monitor(result, std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data))
308  break;
309  if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() * tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here
310  break;
311 
312  beta = new_ip_rr / ip_rr;
313  ip_rr = new_ip_rr;
314 
315  p = z + beta*p;
316  }
317 
318  //store last error estimate:
319  tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared)));
320 
321  return result;
322  }
323 
324 }
325 
326 
327 
338 template<typename MatrixT, typename VectorT, typename PreconditionerT>
339 VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, PreconditionerT const & precond)
340 {
341  return detail::solve_impl(matrix, rhs, tag, precond);
342 }
343 
349 template<typename IndexT, typename NumericT, typename PreconditionerT>
350 std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const & precond)
351 {
353  viennacl::copy(A, vcl_A);
354 
355  viennacl::vector<NumericT> vcl_rhs(rhs.size());
356  viennacl::copy(rhs, vcl_rhs);
357 
358  viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
359 
360  std::vector<NumericT> result(vcl_result.size());
361  viennacl::copy(vcl_result, result);
362  return result;
363 }
364 
371 template<typename MatrixT, typename VectorT>
372 VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag)
373 {
374  return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
375 }
376 
377 
378 
379 template<typename VectorT>
381 {
382 public:
384 
385  cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {}
386 
387  template<typename MatrixT, typename PreconditionerT>
388  VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const
389  {
390  if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account
391  {
392  VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
393  mod_rhs = b - mod_rhs;
394  VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
395  return init_guess_ + y;
396  }
397  return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_);
398  }
399 
400 
401  template<typename MatrixT>
402  VectorT operator()(MatrixT const & A, VectorT const & b) const
403  {
405  }
406 
411  void set_initial_guess(VectorT const & x) { init_guess_ = x; }
412 
420  void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
421  {
422  monitor_callback_ = monitor_fun;
423  user_data_ = user_data;
424  }
425 
427  cg_tag const & tag() const { return tag_; }
428 
429 private:
430  cg_tag tag_;
431  VectorT init_guess_;
432  bool (*monitor_callback_)(VectorT const &, numeric_type, void *);
433  void *user_data_;
434 };
435 
436 
437 }
438 }
439 
440 #endif
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:406
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:96
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
cg_tag(double tol=1e-8, unsigned int max_iterations=300)
The constructor.
Definition: cg.hpp:56
Generic size and resize functionality for different vector and matrix types.
base_type::difference_type difference_type
Definition: vector.hpp:957
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Various little tools used here and there in ViennaCL.
double tolerance() const
Returns the relative tolerance.
Definition: cg.hpp:59
viennacl::vector< NumericT > solve_impl(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Overload for the pipelined CG implementation for the ViennaCL sparse matrix types.
Definition: bicgstab.hpp:219
handles the no_precond case at minimal overhead
Definition: cg.hpp:94
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:375
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:100
void pipelined_cg_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm.
unsigned int iters() const
Return the number of solver iterations:
Definition: cg.hpp:70
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496
void pipelined_cg_prod(MatrixT const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm.
VectorT operator()(MatrixT const &A, VectorT const &b) const
Definition: cg.hpp:402
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
unsigned int max_iterations() const
Returns the maximum number of iterations.
Definition: cg.hpp:67
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
Definition: blas3.hpp:36
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:841
A tag class representing the use of no preconditioner.
Definition: forwards.h:873
Implementations of incomplete factorization preconditioners. Convenience header file.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:403
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:98
double abs_tolerance() const
Returns the absolute tolerance.
Definition: cg.hpp:62
std::size_t vcl_size_t
Definition: forwards.h:75
viennacl::result_of::cpu_value_type< VectorT >::type numeric_type
Definition: cg.hpp:383
Generic clear functionality for different vector and matrix types.
cg_solver(cg_tag const &tag)
Definition: cg.hpp:385
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:271
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: cg.hpp:74
z_handler(VectorT &residual)
Definition: cg.hpp:96
void abs_tolerance(double new_tol)
Sets the absolute tolerance.
Definition: cg.hpp:64
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void iters(unsigned int i) const
Definition: cg.hpp:71
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: cg.hpp:76
Helper meta function for retrieving the main RAM-based value type. Particularly important to obtain T...
Definition: result_of.hpp:269
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: cg.hpp:48
void set_monitor(bool(*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor...
Definition: cg.hpp:420
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:848
void set_initial_guess(VectorT const &x)
Specifies an initial guess for the iterative solver.
Definition: cg.hpp:411
A collection of compile time type deductions.
cg_tag const & tag() const
Returns the solver tag containing basic configuration such as tolerances, etc.
Definition: cg.hpp:427
VectorT operator()(MatrixT const &A, VectorT const &b, PreconditionerT const &precond) const
Definition: cg.hpp:388
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)