ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
fspai.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include <utility>
22 #include <iostream>
23 #include <fstream>
24 #include <string>
25 #include <algorithm>
26 #include <vector>
27 #include <math.h>
28 #include <map>
29 
30 //boost includes
31 #include "boost/numeric/ublas/vector.hpp"
32 #include "boost/numeric/ublas/matrix.hpp"
33 #include "boost/numeric/ublas/matrix_proxy.hpp"
34 #include "boost/numeric/ublas/vector_proxy.hpp"
35 #include "boost/numeric/ublas/storage.hpp"
36 #include "boost/numeric/ublas/io.hpp"
37 #include "boost/numeric/ublas/lu.hpp"
38 #include "boost/numeric/ublas/triangular.hpp"
39 #include "boost/numeric/ublas/matrix_expression.hpp"
40 
41 // ViennaCL includes
42 #include "viennacl/linalg/prod.hpp"
43 #include "viennacl/matrix.hpp"
47 #include "viennacl/scalar.hpp"
48 #include "viennacl/linalg/cg.hpp"
50 #include "viennacl/linalg/ilu.hpp"
51 //#include <omp.h>
52 
57 namespace viennacl
58 {
59 namespace linalg
60 {
61 namespace detail
62 {
63 namespace spai
64 {
65 
71 class fspai_tag
72 {
73 public:
82  double residual_norm_threshold = 1e-3,
83  unsigned int iteration_limit = 5,
84  bool is_static = false,
85  bool is_right = false)
86  : residual_norm_threshold_(residual_norm_threshold),
87  iteration_limit_(iteration_limit),
88  is_static_(is_static),
89  is_right_(is_right) {}
90 
91  inline double getResidualNormThreshold() const { return residual_norm_threshold_; }
92  inline unsigned long getIterationLimit () const { return iteration_limit_; }
93  inline bool getIsStatic() const { return is_static_; }
94  inline bool getIsRight() const { return is_right_; }
95  inline void setResidualNormThreshold(double residual_norm_threshold)
96  {
97  if (residual_norm_threshold > 0)
98  residual_norm_threshold_ = residual_norm_threshold;
99  }
100  inline void setIterationLimit(unsigned long iteration_limit)
101  {
102  if (iteration_limit > 0)
103  iteration_limit_ = iteration_limit;
104  }
105  inline void setIsRight(bool is_right) { is_right_ = is_right; }
106  inline void setIsStatic(bool is_static) { is_static_ = is_static; }
107 
108 private:
109  double residual_norm_threshold_;
110  unsigned long iteration_limit_;
111  bool is_static_;
112  bool is_right_;
113 };
114 
115 
116 //
117 // Helper: Store A in an STL container of type, exploiting symmetry
118 // Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix...
119 //
120 template<typename MatrixT, typename NumericT>
121 void sym_sparse_matrix_to_stl(MatrixT const & A, std::vector<std::map<unsigned int, NumericT> > & STL_A)
122 {
123  STL_A.resize(A.size1());
124  for (typename MatrixT::const_iterator1 row_it = A.begin1();
125  row_it != A.end1();
126  ++row_it)
127  {
128  for (typename MatrixT::const_iterator2 col_it = row_it.begin();
129  col_it != row_it.end();
130  ++col_it)
131  {
132  if (col_it.index1() >= col_it.index2())
133  STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it;
134  else
135  break; //go to next row
136  }
137  }
138 }
139 
140 
141 //
142 // Generate index sets J_k, k=0,...,N-1
143 //
144 template<typename MatrixT>
145 void generateJ(MatrixT const & A, std::vector<std::vector<vcl_size_t> > & J)
146 {
147  for (typename MatrixT::const_iterator1 row_it = A.begin1();
148  row_it != A.end1();
149  ++row_it)
150  {
151  for (typename MatrixT::const_iterator2 col_it = row_it.begin();
152  col_it != row_it.end();
153  ++col_it)
154  {
155  if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part
156  {
157  J[col_it.index2()].push_back(col_it.index1());
158  J[col_it.index1()].push_back(col_it.index2());
159  }
160  else
161  break; //go to next row
162  }
163  }
164 }
165 
166 
167 //
168 // Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A
169 // Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation
170 //
171 template<typename NumericT, typename MatrixT, typename VectorT>
172 void fill_blocks(std::vector< std::map<unsigned int, NumericT> > & A,
173  std::vector<MatrixT> & blocks,
174  std::vector<std::vector<vcl_size_t> > const & J,
175  std::vector<VectorT> & Y)
176 {
177  for (vcl_size_t k=0; k<A.size(); ++k)
178  {
179  std::vector<vcl_size_t> const & Jk = J[k];
180  VectorT & yk = Y[k];
181  MatrixT & block_k = blocks[k];
182 
183  yk.resize(Jk.size());
184  block_k.resize(Jk.size(), Jk.size());
185  block_k.clear();
186 
187  for (vcl_size_t i=0; i<Jk.size(); ++i)
188  {
189  vcl_size_t row_index = Jk[i];
190  std::map<unsigned int, NumericT> & A_row = A[row_index];
191 
192  //fill y_k:
193  yk[i] = A_row[static_cast<unsigned int>(k)];
194 
195  for (vcl_size_t j=0; j<Jk.size(); ++j)
196  {
197  vcl_size_t col_index = Jk[j];
198  if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part
199  block_k(i, j) = A_row[static_cast<unsigned int>(col_index)];
200  }
201  }
202  }
203 }
204 
205 
206 //
207 // Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58
208 //
209 template<typename MatrixT>
210 void cholesky_decompose(MatrixT & A)
211 {
212  for (vcl_size_t k=0; k<A.size2(); ++k)
213  {
214  if (A(k,k) <= 0)
215  {
216  std::cout << "k: " << k << std::endl;
217  std::cout << "A(k,k): " << A(k,k) << std::endl;
218  }
219 
220  assert(A(k,k) > 0 && bool("Matrix not positive definite in Cholesky factorization."));
221 
222  A(k,k) = std::sqrt(A(k,k));
223 
224  for (vcl_size_t i=k+1; i<A.size1(); ++i)
225  {
226  A(i,k) /= A(k,k);
227  for (vcl_size_t j=k+1; j<=i; ++j)
228  A(i,j) -= A(i,k) * A(j,k);
229  }
230  }
231 }
232 
233 
234 //
235 // Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
236 //
237 template<typename MatrixT, typename VectorT>
238 void cholesky_solve(MatrixT const & L, VectorT & b)
239 {
240  // inplace forward solve L x = b
241  for (vcl_size_t i=0; i<L.size1(); ++i)
242  {
243  for (vcl_size_t j=0; j<i; ++j)
244  b[i] -= L(i,j) * b[j];
245  b[i] /= L(i,i);
246  }
247 
248  // inplace backward solve L^T x = b:
249  for (vcl_size_t i=L.size1()-1;; --i)
250  {
251  for (vcl_size_t k=i+1; k<L.size1(); ++k)
252  b[i] -= L(k,i) * b[k];
253  b[i] /= L(i,i);
254 
255  if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here
256  break;
257  }
258 }
259 
260 
261 
262 //
263 // Compute the Cholesky factor L from the sparse vectors y_k
264 //
265 template<typename MatrixT, typename VectorT>
266 void computeL(MatrixT const & A,
267  MatrixT & L,
268  MatrixT & L_trans,
269  std::vector<VectorT> & Y,
270  std::vector<std::vector<vcl_size_t> > & J)
271 {
272  typedef typename VectorT::value_type NumericType;
273  typedef std::vector<std::map<unsigned int, NumericType> > STLSparseMatrixType;
274 
275  STLSparseMatrixType L_temp(A.size1());
276 
277  for (vcl_size_t k=0; k<A.size1(); ++k)
278  {
279  std::vector<vcl_size_t> const & Jk = J[k];
280  VectorT const & yk = Y[k];
281 
282  //compute L(k,k):
283  NumericType Lkk = A(k,k);
284  for (vcl_size_t i=0; i<Jk.size(); ++i)
285  Lkk -= A(Jk[i],k) * yk[i];
286 
287  Lkk = NumericType(1) / std::sqrt(Lkk);
288  L_temp[k][static_cast<unsigned int>(k)] = Lkk;
289  L_trans(k,k) = Lkk;
290 
291  //write lower diagonal entries:
292  for (vcl_size_t i=0; i<Jk.size(); ++i)
293  {
294  L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i];
295  L_trans(k, Jk[i]) = -Lkk * yk[i];
296  }
297  } //for k
298 
299 
300  //build L from L_temp
301  for (vcl_size_t i=0; i<L_temp.size(); ++i)
302  for (typename std::map<unsigned int, NumericType>::const_iterator it = L_temp[i].begin();
303  it != L_temp[i].end();
304  ++it)
305  L(i, it->first) = it->second;
306 }
307 
308 
309 //
310 // Top level FSPAI function
311 //
312 template<typename MatrixT>
313 void computeFSPAI(MatrixT const & A,
314  MatrixT const & PatternA,
315  MatrixT & L,
316  MatrixT & L_trans,
317  fspai_tag)
318 {
319  typedef typename MatrixT::value_type NumericT;
320  typedef boost::numeric::ublas::matrix<NumericT> DenseMatrixType;
321  typedef std::vector<std::map<unsigned int, NumericT> > SparseMatrixType;
322 
323  //
324  // preprocessing: Store A in a STL container:
325  //
326  //std::cout << "Transferring to STL container:" << std::endl;
327  std::vector<std::vector<NumericT> > y_k(A.size1());
328  SparseMatrixType STL_A(A.size1());
329  sym_sparse_matrix_to_stl(A, STL_A);
330 
331 
332  //
333  // Step 1: Generate pattern indices
334  //
335  //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
336  std::vector<std::vector<vcl_size_t> > J(A.size1());
337  generateJ(PatternA, J);
338 
339  //
340  // Step 2: Set up matrix blocks
341  //
342  //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
343  std::vector<DenseMatrixType> subblocks_A(A.size1());
344  fill_blocks(STL_A, subblocks_A, J, y_k);
345  STL_A.clear(); //not needed anymore
346 
347  //
348  // Step 3: Cholesky-factor blocks
349  //
350  //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
351  for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
352  {
353  //std::cout << "Block before: " << subblocks_A[i] << std::endl;
354  cholesky_decompose(subblocks_A[i]);
355  //std::cout << "Block after: " << subblocks_A[i] << std::endl;
356  }
357 
358 
359  /*vcl_size_t num_bytes = 0;
360  for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
361  num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
362  //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl;
363 
364  //
365  // Step 4: Solve for y_k
366  //
367  //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
368  for (vcl_size_t i=0; i<y_k.size(); ++i)
369  {
370  if (subblocks_A[i].size1() > 0) //block might be empty...
371  {
372  //y_k[i].resize(subblocks_A[i].size1());
373  //std::cout << "y_k[" << i << "]: ";
374  //for (vcl_size_t j=0; j<y_k[i].size(); ++j)
375  // std::cout << y_k[i][j] << " ";
376  //std::cout << std::endl;
377  cholesky_solve(subblocks_A[i], y_k[i]);
378  }
379  }
380 
381 
382  //
383  // Step 5: Set up Cholesky factors L and L_trans
384  //
385  //std::cout << "computeFSPAI(): Computing L..." << std::endl;
386  L.resize(A.size1(), A.size2(), false);
387  L.reserve(A.nnz(), false);
388  L_trans.resize(A.size1(), A.size2(), false);
389  L_trans.reserve(A.nnz(), false);
390  computeL(A, L, L_trans, y_k, J);
391 
392  //std::cout << "L: " << L << std::endl;
393 }
394 
395 
396 
397 }
398 }
399 }
400 }
401 
402 #endif
Implementations of dense matrix related operations including matrix-vector products.
void cholesky_decompose(MatrixT &A)
Definition: fspai.hpp:210
void sym_sparse_matrix_to_stl(MatrixT const &A, std::vector< std::map< unsigned int, NumericT > > &STL_A)
Definition: fspai.hpp:121
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void cholesky_solve(MatrixT const &L, VectorT &b)
Definition: fspai.hpp:238
A tag for FSPAI. Experimental.
Definition: fspai.hpp:71
Definition: blas3.hpp:36
void setIterationLimit(unsigned long iteration_limit)
Definition: fspai.hpp:100
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of the compressed_matrix class.
Implementations of operations using sparse matrices.
unsigned long getIterationLimit() const
Definition: fspai.hpp:92
std::size_t vcl_size_t
Definition: forwards.h:75
The conjugate gradient method is implemented here.
void computeL(MatrixT const &A, MatrixT &L, MatrixT &L_trans, std::vector< VectorT > &Y, std::vector< std::vector< vcl_size_t > > &J)
Definition: fspai.hpp:266
void generateJ(MatrixT const &A, std::vector< std::vector< vcl_size_t > > &J)
Definition: fspai.hpp:145
void computeFSPAI(MatrixT const &A, MatrixT const &PatternA, MatrixT &L, MatrixT &L_trans, fspai_tag)
Definition: fspai.hpp:313
void fill_blocks(std::vector< std::map< unsigned int, NumericT > > &A, std::vector< MatrixT > &blocks, std::vector< std::vector< vcl_size_t > > const &J, std::vector< VectorT > &Y)
Definition: fspai.hpp:172
Implementation of the ViennaCL scalar class.
fspai_tag(double residual_norm_threshold=1e-3, unsigned int iteration_limit=5, bool is_static=false, bool is_right=false)
Constructor.
Definition: fspai.hpp:81
void setResidualNormThreshold(double residual_norm_threshold)
Definition: fspai.hpp:95