ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
mixed_precision_cg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
2 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <map>
27 #include <cmath>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
30 #include "viennacl/linalg/ilu.hpp"
31 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/traits/size.hpp"
37 
39 
40 namespace viennacl
41 {
42  namespace linalg
43  {
44 
48  {
49  public:
56  mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
57 
59  double tolerance() const { return tol_; }
61  float inner_tolerance() const { return inner_tol_; }
63  unsigned int max_iterations() const { return iterations_; }
64 
66  unsigned int iters() const { return iters_taken_; }
67  void iters(unsigned int i) const { iters_taken_ = i; }
68 
70  double error() const { return last_error_; }
72  void error(double e) const { last_error_ = e; }
73 
74 
75  private:
76  double tol_;
77  unsigned int iterations_;
78  float inner_tol_;
79 
80  //return values from solver
81  mutable unsigned int iters_taken_;
82  mutable double last_error_;
83  };
84 
85 
95  template<typename MatrixType, typename VectorType>
96  VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
97  {
98  //typedef typename VectorType::value_type ScalarType;
99  typedef typename viennacl::result_of::cpu_value_type<VectorType>::type CPU_ScalarType;
100 
101  //std::cout << "Starting CG" << std::endl;
102  vcl_size_t problem_size = viennacl::traits::size(rhs);
103  VectorType result(rhs);
104  viennacl::traits::clear(result);
105 
106  VectorType residual = rhs;
107 
108  CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
109  CPU_ScalarType new_ip_rr = 0;
110  CPU_ScalarType norm_rhs_squared = ip_rr;
111 
112  if (norm_rhs_squared <= 0) //solution is zero if RHS norm is zero
113  return result;
114 
115  viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
116  viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
117  viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
118  viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
119  float inner_ip_rr = static_cast<float>(ip_rr);
120  float new_inner_ip_rr = 0;
121  float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
122  float alpha;
123  float beta;
124 
125  // transfer rhs to single precision:
126  p_low_precision = rhs;
127  residual_low_precision = p_low_precision;
128 
129  // transfer matrix to single precision:
130  viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
131  viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() );
132  viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() );
133 
134  viennacl::vector_base<CPU_ScalarType> matrix_elements_high_precision(const_cast<viennacl::backend::mem_handle &>(matrix.handle()), matrix.nnz(), 0, 1);
135  viennacl::vector_base<float> matrix_elements_low_precision(matrix_low_precision.handle(), matrix.nnz(), 0, 1);
136  matrix_elements_low_precision = matrix_elements_high_precision;
137  matrix_low_precision.generate_row_block_information();
138 
139  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
140  {
141  tag.iters(i+1);
142 
143  // lower precision 'inner iteration'
144  tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
145 
146  alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
147  result_low_precision += alpha * p_low_precision;
148  residual_low_precision -= alpha * tmp_low_precision;
149 
150  new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
151 
152  beta = new_inner_ip_rr / inner_ip_rr;
153  inner_ip_rr = new_inner_ip_rr;
154 
155  p_low_precision = residual_low_precision + beta * p_low_precision;
156 
157  //
158  // If enough progress has been achieved, update current residual with high precision evaluation
159  // This is effectively a restart of the CG method
160  //
161  if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
162  {
163  residual = result_low_precision; // reusing residual vector as temporary buffer for conversion. Overwritten below anyway
164  result += residual;
165 
166  // residual = b - Ax (without introducing a temporary)
167  residual = viennacl::linalg::prod(matrix, result);
168  residual = rhs - residual;
169 
170  new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
171  if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here
172  break;
173 
174  p_low_precision = residual;
175 
176  result_low_precision.clear();
177  residual_low_precision = p_low_precision;
178  initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
179  inner_ip_rr = static_cast<float>(new_ip_rr);
180  }
181  }
182 
183  //store last error estimate:
184  tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
185 
186  return result;
187  }
188 
189  template<typename MatrixType, typename VectorType>
190  VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
191  {
192  return solve(matrix, rhs, tag);
193  }
194 
195 
196  }
197 }
198 
199 #endif
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Various little tools used here and there in ViennaCL.
float inner_tolerance() const
Returns the relative tolerance.
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:375
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:100
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
A tag class representing the use of no preconditioner.
Definition: forwards.h:873
Implementations of incomplete factorization preconditioners. Convenience header file.
unsigned int max_iterations() const
Returns the maximum number of iterations.
double tolerance() const
Returns the relative tolerance.
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104
std::size_t vcl_size_t
Definition: forwards.h:75
Generic clear functionality for different vector and matrix types.
mixed_precision_cg_tag(double tol=1e-8, unsigned int max_iterations=300, float inner_tol=1e-2f)
The constructor.
double error() const
Returns the estimated relative error at the end of the solver run.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:271
Proxy classes for vectors.
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
void clear()
Resets all entries to zero. Does not change the size of the vector.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
void error(double e) const
Sets the estimated relative error at the end of the solver run.
A collection of compile time type deductions.
unsigned int iters() const
Return the number of solver iterations:
Main interface routines for memory management.