ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/linalg/detail/spai/fspai.hpp Source File

Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
00002 #define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00020 #include <utility>
00021 #include <iostream>
00022 #include <fstream>
00023 #include <string>
00024 #include <algorithm>
00025 #include <vector>
00026 #include <math.h>
00027 #include <map>
00028 
00029 //boost includes
00030 #include "boost/numeric/ublas/vector.hpp"
00031 #include "boost/numeric/ublas/matrix.hpp"
00032 #include "boost/numeric/ublas/matrix_proxy.hpp"
00033 #include "boost/numeric/ublas/vector_proxy.hpp"
00034 #include "boost/numeric/ublas/storage.hpp"
00035 #include "boost/numeric/ublas/io.hpp"
00036 #include "boost/numeric/ublas/lu.hpp"
00037 #include "boost/numeric/ublas/triangular.hpp"
00038 #include "boost/numeric/ublas/matrix_expression.hpp"
00039 
00040 // ViennaCL includes
00041 #include "viennacl/linalg/prod.hpp"
00042 #include "viennacl/matrix.hpp"
00043 #include "viennacl/compressed_matrix.hpp"
00044 #include "viennacl/linalg/compressed_matrix_operations.hpp"
00045 #include "viennacl/linalg/matrix_operations.hpp"
00046 #include "viennacl/scalar.hpp"
00047 #include "viennacl/linalg/cg.hpp"
00048 #include "viennacl/linalg/inner_prod.hpp"
00049 #include "viennacl/linalg/ilu.hpp"
00050 //#include <omp.h>
00051 
00056 namespace viennacl
00057 {
00058     namespace linalg
00059     {
00060       namespace detail
00061       {
00062         namespace spai
00063         {
00064         
00069           class fspai_tag{
00076           public:
00077               fspai_tag(
00078                       double residual_norm_threshold = 1e-3,
00079                       unsigned int iteration_limit = 5, 
00080                       bool is_static = false,
00081                       bool is_right = false) :
00082               _residual_norm_threshold(residual_norm_threshold),
00083               _iteration_limit(iteration_limit),
00084               _is_static(is_static),
00085               _is_right(is_right){};
00086               
00087               inline const double getResidualNormThreshold() const
00088               { return _residual_norm_threshold; }
00089               inline const unsigned long getIterationLimit () const
00090               { return _iteration_limit; }
00091               inline const bool getIsStatic() const
00092               { return _is_static; }
00093               inline const bool getIsRight() const
00094               { return _is_right; }
00095               inline void setResidualNormThreshold(double residual_norm_threshold){
00096                   if(residual_norm_threshold > 0)
00097                       _residual_norm_threshold = residual_norm_threshold;
00098               }
00099               inline void setIterationLimit(unsigned long iteration_limit){
00100                   if(iteration_limit > 0)
00101                       _iteration_limit = iteration_limit;
00102               }
00103               inline void setIsRight(bool is_right){
00104                   _is_right = is_right;
00105               }
00106               inline void setIsStatic(bool is_static){
00107                   _is_static = is_static;
00108               }
00109               
00110           private:
00111               double _residual_norm_threshold;
00112               unsigned long _iteration_limit;
00113               bool _is_static;
00114               bool _is_right;
00115           };
00116           
00117           
00118           //
00119           // Helper: Store A in an STL container of type, exploiting symmetry
00120           // Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix...
00121           //
00122           template <typename MatrixType, typename ScalarType>
00123           void sym_sparse_matrix_to_stl(MatrixType const & A, std::vector<std::map<unsigned int, ScalarType> > & STL_A)
00124           {
00125             STL_A.resize(A.size1());
00126             for (typename MatrixType::const_iterator1 row_it  = A.begin1();
00127                                                       row_it != A.end1();
00128                                                     ++row_it)
00129             {
00130               for (typename MatrixType::const_iterator2 col_it  = row_it.begin();
00131                                                         col_it != row_it.end();
00132                                                       ++col_it)
00133               {
00134                 if (col_it.index1() >= col_it.index2())
00135                   STL_A[col_it.index1()][col_it.index2()] = *col_it;
00136                 else
00137                   break; //go to next row
00138               }
00139             }
00140           }
00141           
00142           
00143           //
00144           // Generate index sets J_k, k=0,...,N-1
00145           //
00146           template <typename MatrixType>
00147           void generateJ(MatrixType const & A, std::vector<std::vector<size_t> > & J)
00148           {
00149             for (typename MatrixType::const_iterator1 row_it  = A.begin1();
00150                                                       row_it != A.end1();
00151                                                     ++row_it)
00152             {
00153               for (typename MatrixType::const_iterator2 col_it  = row_it.begin();
00154                                                         col_it != row_it.end();
00155                                                       ++col_it)
00156               {
00157                 if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part
00158                 {
00159                   J[col_it.index2()].push_back(col_it.index1());
00160                   J[col_it.index1()].push_back(col_it.index2());
00161                 }
00162                 else
00163                   break; //go to next row
00164               }
00165             }
00166           }
00167 
00168 
00169           //
00170           // Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A 
00171           // Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation
00172           //
00173           template <typename ScalarType, typename MatrixType, typename VectorType>
00174           void fill_blocks(std::vector< std::map<unsigned int, ScalarType> > & A,
00175                           std::vector<MatrixType> & blocks,
00176                           std::vector<std::vector<size_t> > const & J,
00177                           std::vector<VectorType> & Y)
00178           {
00179             for (size_t k=0; k<A.size(); ++k)
00180             {
00181               std::vector<size_t> const & Jk = J[k];
00182               VectorType & yk = Y[k];
00183               MatrixType & block_k = blocks[k];
00184 
00185               yk.resize(Jk.size());
00186               block_k.resize(Jk.size(), Jk.size());
00187               block_k.clear();
00188               
00189               for (size_t i=0; i<Jk.size(); ++i)
00190               {
00191                 size_t row_index = Jk[i];
00192                 std::map<unsigned int, ScalarType> & A_row = A[row_index];
00193                 
00194                 //fill y_k:
00195                 yk[i] = A_row[k];
00196                 
00197                 for (size_t j=0; j<Jk.size(); ++j)
00198                 {
00199                   size_t col_index = Jk[j];
00200                   if (col_index <= row_index && A_row.find(col_index) != A_row.end()) //block is symmetric, thus store only lower triangular part
00201                     block_k(i, j) = A_row[col_index];
00202                 }
00203               }
00204             }
00205           }
00206           
00207           
00208           //
00209           // Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58
00210           //
00211           template <typename MatrixType>
00212           void cholesky_decompose(MatrixType & A)
00213           {
00214             for (size_t k=0; k<A.size2(); ++k)
00215             {
00216               if (A(k,k) <= 0)
00217               {
00218                 std::cout << "k: " << k << std::endl;
00219                 std::cout << "A(k,k): " << A(k,k) << std::endl;
00220               }
00221               
00222               assert(A(k,k) > 0);
00223               
00224               A(k,k) = std::sqrt(A(k,k));
00225               
00226               for (size_t i=k+1; i<A.size1(); ++i)
00227               {
00228                 A(i,k) /= A(k,k);
00229                 for (size_t j=k+1; j<=i; ++j)
00230                   A(i,j) -= A(i,k) * A(j,k);
00231               }
00232             }
00233           }
00234           
00235           
00236           //
00237           // Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
00238           //
00239           template <typename MatrixType, typename VectorType>
00240           void cholesky_solve(MatrixType const & L, VectorType & b)
00241           {
00242             typedef typename VectorType::value_type  ScalarType;
00243             
00244             // inplace forward solve L x = b
00245             for (size_t i=0; i<L.size1(); ++i)
00246             {
00247               for (size_t j=0; j<i; ++j)
00248                 b[i] -= L(i,j) * b[j];
00249               b[i] /= L(i,i);
00250             }
00251             
00252             // inplace backward solve L^T x = b:
00253             for (size_t i=L.size1()-1; ; --i)
00254             {
00255               for (size_t k=i+1; k<L.size1(); ++k)
00256                 b[i] -= L(k,i) * b[k];
00257               b[i] /= L(i,i);
00258               
00259               if (i==0) //size_t might be unsigned, therefore manual check for equality with zero here
00260                 break;
00261             }
00262           }
00263           
00264           
00265           
00266           //
00267           // Compute the Cholesky factor L from the sparse vectors y_k
00268           //
00269           template <typename MatrixType, typename VectorType1>
00270           void computeL(MatrixType const & A, 
00271                         MatrixType & L,
00272                         MatrixType & L_trans,
00273                         std::vector<VectorType1> & Y,
00274                         std::vector<std::vector<size_t> > & J)
00275           {
00276             typedef typename VectorType1::value_type    ScalarType;
00277             typedef std::vector<std::map<unsigned int, ScalarType> >     STLSparseMatrixType;
00278             
00279             STLSparseMatrixType L_temp(A.size1());
00280             
00281             for (size_t k=0; k<A.size1(); ++k)
00282             {
00283               std::vector<size_t> const & Jk = J[k];
00284               VectorType1 const & yk = Y[k];
00285               
00286               //compute L(k,k):
00287               ScalarType Lkk = A(k,k);
00288               for (size_t i=0; i<Jk.size(); ++i)
00289                 Lkk -= A(Jk[i],k) * yk[i];
00290               
00291               Lkk = 1.0 / sqrt(Lkk);
00292               L_temp[k][k] = Lkk;
00293               L_trans(k,k) = Lkk;
00294               
00295               //write lower diagonal entries:
00296               for (size_t i=0; i<Jk.size(); ++i)
00297               {
00298                 L_temp[Jk[i]][k] = -Lkk * yk[i];
00299                 L_trans(k, Jk[i]) = -Lkk * yk[i];
00300               }
00301             } //for k
00302             
00303             
00304             //build L from L_temp
00305             for (size_t i=0; i<L_temp.size(); ++i)
00306               for (typename std::map<unsigned int, ScalarType>::const_iterator it = L_temp[i].begin();
00307                   it != L_temp[i].end();
00308                 ++it)
00309                   L(i, it->first) = it->second;
00310           }
00311           
00312 
00313           //
00314           // Top level FSPAI function
00315           //
00316           template <typename MatrixType>
00317           void computeFSPAI(MatrixType const & A, 
00318                             MatrixType const & PatternA,
00319                             MatrixType & L, 
00320                             MatrixType & L_trans, 
00321                             fspai_tag const & tag)
00322           {
00323             typedef typename MatrixType::value_type              ScalarType;
00324             typedef boost::numeric::ublas::matrix<ScalarType>    DenseMatrixType;
00325             typedef std::vector<std::map<unsigned int, ScalarType> >     SparseMatrixType;
00326             
00327             //
00328             // preprocessing: Store A in a STL container:
00329             //
00330             //std::cout << "Transferring to STL container:" << std::endl;
00331             std::vector<std::vector<ScalarType> >    y_k(A.size1());
00332             SparseMatrixType   STL_A(A.size1());
00333             sym_sparse_matrix_to_stl(A, STL_A);
00334             
00335             
00336             //
00337             // Step 1: Generate pattern indices
00338             //
00339             //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
00340             std::vector<std::vector<size_t> > J(A.size1());
00341             generateJ(PatternA, J);
00342 
00343             //
00344             // Step 2: Set up matrix blocks
00345             //
00346             //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
00347             std::vector<DenseMatrixType>  subblocks_A(A.size1());
00348             fill_blocks(STL_A, subblocks_A, J, y_k);
00349             STL_A.clear(); //not needed anymore
00350             
00351             //
00352             // Step 3: Cholesky-factor blocks
00353             //
00354             //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
00355             for (size_t i=0; i<subblocks_A.size(); ++i)
00356             {
00357               //std::cout << "Block before: " << subblocks_A[i] << std::endl;
00358               cholesky_decompose(subblocks_A[i]);
00359               //std::cout << "Block after: " << subblocks_A[i] << std::endl;
00360             }
00361             
00362             
00363             /*size_t num_bytes = 0;
00364             for (size_t i=0; i<subblocks_A.size(); ++i)
00365               num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
00366             //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl;
00367             
00368             //
00369             // Step 4: Solve for y_k
00370             //
00371             //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
00372             for (size_t i=0; i<y_k.size(); ++i)
00373             {
00374               if (subblocks_A[i].size1() > 0) //block might be empty...
00375               {
00376                 //y_k[i].resize(subblocks_A[i].size1());
00377                 //std::cout << "y_k[" << i << "]: ";
00378                 //for (size_t j=0; j<y_k[i].size(); ++j)
00379                 //  std::cout << y_k[i][j] << " ";
00380                 //std::cout << std::endl;
00381                 cholesky_solve(subblocks_A[i], y_k[i]);
00382               }
00383             }
00384             
00385             
00386             //
00387             // Step 5: Set up Cholesky factors L and L_trans
00388             //
00389             //std::cout << "computeFSPAI(): Computing L..." << std::endl;
00390             L.resize(A.size1(), A.size2(), false);
00391             L.reserve(A.nnz(), false);
00392             L_trans.resize(A.size1(), A.size2(), false);
00393             L_trans.reserve(A.nnz(), false);
00394             computeL(A, L, L_trans, y_k, J);
00395             
00396             //std::cout << "L: " << L << std::endl;
00397           }
00398           
00399           
00400           
00401         }
00402       }
00403     }
00404 }
00405 
00406 #endif