00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
00002 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include <utility>
00025 #include <iostream>
00026 #include <fstream>
00027 #include <string>
00028 #include <algorithm>
00029 #include <vector>
00030 #include <math.h>
00031 #include <map>
00032
00033
00034 #include "viennacl/linalg/detail/spai/spai_tag.hpp"
00035 #include "viennacl/linalg/qr.hpp"
00036 #include "viennacl/linalg/detail/spai/spai-dynamic.hpp"
00037 #include "viennacl/linalg/detail/spai/spai-static.hpp"
00038 #include "viennacl/linalg/detail/spai/sparse_vector.hpp"
00039 #include "viennacl/linalg/detail/spai/block_matrix.hpp"
00040 #include "viennacl/linalg/detail/spai/block_vector.hpp"
00041
00042
00043 #include "boost/numeric/ublas/vector.hpp"
00044 #include "boost/numeric/ublas/matrix.hpp"
00045 #include "boost/numeric/ublas/matrix_proxy.hpp"
00046 #include "boost/numeric/ublas/vector_proxy.hpp"
00047 #include "boost/numeric/ublas/storage.hpp"
00048 #include "boost/numeric/ublas/io.hpp"
00049 #include "boost/numeric/ublas/lu.hpp"
00050 #include "boost/numeric/ublas/triangular.hpp"
00051 #include "boost/numeric/ublas/matrix_expression.hpp"
00052
00053
00054 #include "viennacl/linalg/prod.hpp"
00055 #include "viennacl/matrix.hpp"
00056 #include "viennacl/compressed_matrix.hpp"
00057 #include "viennacl/linalg/compressed_matrix_operations.hpp"
00058 #include "viennacl/linalg/matrix_operations.hpp"
00059 #include "viennacl/scalar.hpp"
00060 #include "viennacl/linalg/inner_prod.hpp"
00061 #include "viennacl/linalg/ilu.hpp"
00062 #include "viennacl/ocl/backend.hpp"
00063 #include "viennacl/linalg/kernels/spai_source.h"
00064 #include "viennacl/linalg/kernels/spai_kernels.h"
00065
00066
00067
00068 #define VIENNACL_SPAI_K_b 20
00069
00070 namespace viennacl
00071 {
00072 namespace linalg
00073 {
00074 namespace detail
00075 {
00076 namespace spai
00077 {
00078
00079
00080 template<typename SparseVectorType>
00081 void print_sparse_vector(const SparseVectorType& v){
00082 for(typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it!= v.end(); ++vec_it){
00083 std::cout<<"[ "<<vec_it->first<<" ]:"<<vec_it->second<<std::endl;
00084 }
00085 }
00086 template<typename DenseMatrixType>
00087 void print_matrix( DenseMatrixType & m){
00088 for(int i = 0; i < m.size2(); ++i){
00089 for(int j = 0; j < m.size1(); ++j){
00090 std::cout<<m(j, i)<<" ";
00091 }
00092 std::cout<<std::endl;
00093 }
00094 }
00095
00101 template<typename SparseVectorType, typename ScalarType>
00102 void add_sparse_vectors(const SparseVectorType& v, const ScalarType b, SparseVectorType& res_v){
00103 for(typename SparseVectorType::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it){
00104 res_v[v_it->first] += b*v_it->second;
00105 }
00106 }
00107
00114 template<typename SparseVectorType, typename ScalarType>
00115 void compute_spai_residual(const std::vector<SparseVectorType>& A_v_c, const SparseVectorType& v,
00116 const unsigned int ind, SparseVectorType& res){
00117 for(typename SparseVectorType::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it){
00118 add_sparse_vectors(A_v_c[v_it->first], v_it->second, res);
00119 }
00120 res[ind] -= static_cast<ScalarType>(1);
00121 }
00122
00129 template<typename SparseVectorType>
00130 void build_index_set(const std::vector<SparseVectorType>& A_v_c, const SparseVectorType& v, std::vector<unsigned int>& J,
00131 std::vector<unsigned int>& I){
00132 buildColumnIndexSet(v, J);
00133 projectRows(A_v_c, J, I);
00134 }
00135
00136
00146 template<typename SparseMatrixType, typename DenseMatrixType, typename SparseVectorType, typename VectorType>
00147 void block_set_up(const SparseMatrixType& A,
00148 const std::vector<SparseVectorType>& A_v_c,
00149 const std::vector<SparseVectorType>& M_v,
00150 std::vector<std::vector<unsigned int> >& g_I,
00151 std::vector<std::vector<unsigned int> >& g_J,
00152 std::vector<DenseMatrixType>& g_A_I_J,
00153 std::vector<VectorType>& g_b_v){
00154 #ifdef _OPENMP
00155 #pragma omp parallel for
00156 #endif
00157 for(std::size_t i = 0; i < M_v.size(); ++i){
00158 build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]);
00159 initProjectSubMatrix(A, g_J[i], g_I[i], g_A_I_J[i]);
00160
00161 single_qr(g_A_I_J[i], g_b_v[i]);
00162
00163 }
00164 }
00165
00172 template<typename SparseVectorType>
00173 void index_set_up(const std::vector<SparseVectorType>& A_v_c,
00174 const std::vector<SparseVectorType>& M_v,
00175 std::vector<std::vector<unsigned int> >& g_J,
00176 std::vector<std::vector<unsigned int> >& g_I)
00177 {
00178 #ifdef _OPENMP
00179 #pragma omp parallel for
00180 #endif
00181 for(std::size_t i = 0; i < M_v.size(); ++i){
00182 build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]);
00183 }
00184 }
00185
00186
00198 template<typename ScalarType, unsigned int MAT_ALIGNMENT, typename SparseVectorType>
00199 void block_set_up(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A,
00200 const std::vector<SparseVectorType>& A_v_c,
00201 const std::vector<SparseVectorType>& M_v,
00202 std::vector<cl_uint> g_is_update,
00203 std::vector<std::vector<unsigned int> >& g_I,
00204 std::vector<std::vector<unsigned int> >& g_J,
00205 block_matrix & g_A_I_J,
00206 block_vector & g_bv,
00207 const unsigned int cur_iter)
00208 {
00209 bool is_empty_block;
00210
00211 index_set_up(A_v_c, M_v, g_J, g_I);
00212 block_assembly(A, g_J, g_I, g_A_I_J, g_is_update, is_empty_block, cur_iter);
00213 block_qr<ScalarType>(g_I, g_J, g_A_I_J, g_bv, g_is_update, cur_iter);
00214
00215 }
00216
00217
00218
00219
00220
00227 template<typename ScalarType, typename SparseVectorType>
00228 void custom_fan_out(const std::vector<ScalarType> & m_in,
00229 unsigned int start_m_ind,
00230 const std::vector<unsigned int> & J,
00231 SparseVectorType & m)
00232 {
00233 unsigned int cnt = 0;
00234 for (std::size_t i = 0; i < J.size(); ++i) {
00235 m[J[i]] = m_in[start_m_ind + cnt++];
00236 }
00237 }
00238
00239
00240
00241
00254 template<typename SparseVectorType, typename ScalarType>
00255 void least_square_solve(std::vector<SparseVectorType> & A_v_c,
00256 std::vector<SparseVectorType> & M_v,
00257 std::vector<std::vector<unsigned int> >& g_I,
00258 std::vector<std::vector<unsigned int> > & g_J,
00259 block_matrix & g_A_I_J_vcl,
00260 block_vector & g_bv_vcl,
00261 std::vector<SparseVectorType> & g_res,
00262 std::vector<cl_uint> & g_is_update,
00263 const spai_tag & tag,
00264 const unsigned int cur_iter){
00265 unsigned int y_sz, m_sz;
00266 std::vector<cl_uint> y_inds(M_v.size() + 1, static_cast<cl_uint>(0));
00267 std::vector<cl_uint> m_inds(M_v.size() + 1, static_cast<cl_uint>(0));
00268 get_size(g_I, y_sz);
00269 init_start_inds(g_I, y_inds);
00270 init_start_inds(g_J, m_inds);
00271
00272 std::vector<ScalarType> y_v(y_sz, static_cast<ScalarType>(0));
00273 for(std::size_t i = 0; i < M_v.size(); ++i){
00274 for(std::size_t j = 0; j < g_I[i].size(); ++j){
00275 if(g_I[i][j] == i)
00276 y_v[y_inds[i] + j] = static_cast<ScalarType>(1.0);
00277 }
00278 }
00279
00280 get_size(g_J, m_sz);
00281 std::vector<ScalarType> m_v(m_sz, static_cast<cl_uint>(0));
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293 block_vector y_v_vcl;
00294 block_vector m_v_vcl;
00295
00296 y_v_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00297 static_cast<unsigned int>(sizeof(ScalarType)*y_v.size()),
00298 &(y_v[0]));
00299 m_v_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00300 static_cast<unsigned int>(sizeof(ScalarType)*m_v.size()),
00301 &(m_v[0]));
00302 y_v_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00303 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
00304 &(y_inds[0]));
00305 viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00306 static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
00307 &(g_is_update[0]));
00308 viennacl::ocl::kernel& ls_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_least_squares");
00309 ls_kernel.local_work_size(0, 1);
00310 ls_kernel.global_work_size(0, 256);
00311 viennacl::ocl::enqueue(ls_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_bv_vcl.handle(), g_bv_vcl.handle1(), m_v_vcl.handle(),
00312 y_v_vcl.handle(), y_v_vcl.handle1(),
00313 g_A_I_J_vcl.handle1(), g_is_update_vcl,
00314
00315 static_cast<unsigned int>(M_v.size())));
00316
00317 cl_int vcl_err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00318 m_v_vcl.handle(), CL_TRUE, 0,
00319 sizeof(ScalarType)*(m_v.size()),
00320 &(m_v[0]), 0, NULL, NULL);
00321 VIENNACL_ERR_CHECK(vcl_err);
00322
00323
00324 for(std::size_t i = 0; i < M_v.size(); ++i){
00325 if(g_is_update[i]){
00326
00327 custom_fan_out(m_v, m_inds[i], g_J[i], M_v[i]);
00328 g_res[i].clear();
00329 compute_spai_residual<SparseVectorType, ScalarType>(A_v_c, M_v[i], static_cast<unsigned int>(i), g_res[i]);
00330 ScalarType res_norm = 0;
00331
00332 sparse_norm_2(g_res[i], res_norm);
00333
00334
00335
00336 g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic())?(1):(0);
00337
00338 }
00339 }
00340 }
00341
00342
00354 template<typename SparseVectorType, typename DenseMatrixType, typename VectorType>
00355 void least_square_solve(const std::vector<SparseVectorType>& A_v_c,
00356 std::vector<DenseMatrixType>& g_R,
00357 std::vector<VectorType>& g_b_v,
00358 std::vector<std::vector<unsigned int> >& g_I,
00359 std::vector<std::vector<unsigned int> >& g_J,
00360 std::vector<SparseVectorType>& g_res,
00361 std::vector<bool>& g_is_update,
00362 std::vector<SparseVectorType>& M_v,
00363 const spai_tag& tag){
00364 typedef typename DenseMatrixType::value_type ScalarType;
00365
00366 #ifdef _OPENMP
00367 #pragma omp parallel for
00368 #endif
00369 for(std::size_t i = 0; i < M_v.size(); ++i){
00370 if(g_is_update[i]){
00371 VectorType y = boost::numeric::ublas::zero_vector<ScalarType>(g_I[i].size());
00372
00373 projectI<VectorType, ScalarType>(g_I[i], y, static_cast<unsigned int>(tag.getBegInd() + i));
00374 apply_q_trans_vec(g_R[i], g_b_v[i], y);
00375 VectorType m_new = boost::numeric::ublas::zero_vector<ScalarType>(g_R[i].size2());
00376 backwardSolve(g_R[i], y, m_new);
00377 fanOutVector(m_new, g_J[i], M_v[i]);
00378 g_res[i].clear();
00379 compute_spai_residual<SparseVectorType, ScalarType>(A_v_c, M_v[i], static_cast<unsigned int>(tag.getBegInd() + i), g_res[i]);
00380 ScalarType res_norm = 0;
00381 sparse_norm_2(g_res[i], res_norm);
00382
00383
00384
00385 g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic());
00386 }
00387 }
00388 }
00389
00390
00391 template<typename VectorType>
00392 bool is_all_update(VectorType& parallel_is_update){
00393
00394 for(unsigned int i = 0; i < parallel_is_update.size(); ++i){
00395 if(parallel_is_update[i])
00396 return true;
00397 }
00398 return false;
00399 }
00400
00401
00402
00407 template<typename SparseMatrixType, typename SparseVectorType>
00408 void vectorize_column_matrix(const SparseMatrixType& M_in, std::vector<SparseVectorType>& M_v){
00409 for(typename SparseMatrixType::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it){
00410
00411 for(typename SparseMatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it){
00412 M_v[static_cast<unsigned int>(col_it.index2())][static_cast<unsigned int>(col_it.index1())] = *col_it;
00413 }
00414
00415 }
00416 }
00417
00418
00419 template<typename SparseMatrixType, typename SparseVectorType>
00420 void vectorize_row_matrix(const SparseMatrixType& M_in, std::vector<SparseVectorType>& M_v){
00421 for(typename SparseMatrixType::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it){
00422 for(typename SparseMatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it){
00423 M_v[static_cast<unsigned int>(col_it.index1())][static_cast<unsigned int>(col_it.index2())] = *col_it;
00424 }
00425 }
00426 }
00427
00428
00429
00430
00431
00432 void write_set_to_array(const std::vector<std::vector<unsigned int> >& ind_set, std::vector<cl_uint>& a){
00433 unsigned int cnt = 0;
00434
00435 for(size_t i = 0; i < ind_set.size(); ++i){
00436 for(size_t j = 0; j < ind_set[i].size(); ++j){
00437 a[cnt++] = static_cast<cl_uint>(ind_set[i][j]);
00438 }
00439 }
00440 }
00441
00442
00443
00444
00454 template<typename ScalarType, unsigned int MAT_ALIGNMENT>
00455 void block_assembly(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, const std::vector<std::vector<unsigned int> >& g_J,
00456 const std::vector<std::vector<unsigned int> >& g_I,
00457 block_matrix& g_A_I_J_vcl,
00458 std::vector<cl_uint>& g_is_update,
00459 bool& is_empty_block,
00460 const unsigned int cur_iter){
00461
00462 unsigned int sz_I, sz_J, sz_blocks;
00463 std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
00464 std::vector<cl_uint> i_ind(g_I.size() + 1, static_cast<cl_uint>(0));
00465 std::vector<cl_uint> j_ind(g_I.size() + 1, static_cast<cl_uint>(0));
00466 std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
00467
00468 init_start_inds(g_J, j_ind);
00469 init_start_inds(g_I, i_ind);
00470
00471 get_size(g_J, sz_J);
00472 get_size(g_I, sz_I);
00473 std::vector<cl_uint> I_set(sz_I, static_cast<cl_uint>(0));
00474
00475 std::vector<cl_uint> J_set(sz_J, static_cast<cl_uint>(0));
00476
00477
00478 write_set_to_array(g_I, I_set);
00479 write_set_to_array(g_J, J_set);
00480
00481 if(I_set.size() > 0 && J_set.size() > 0){
00482 compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims);
00483 std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0));
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494 block_vector set_I_vcl, set_J_vcl;
00495
00496
00497 g_A_I_J_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00498 static_cast<unsigned int>(sizeof(ScalarType)*(sz_blocks)),
00499 &(con_A_I_J[0]));
00500
00501 g_A_I_J_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00502 static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<cl_uint>(g_I.size())),
00503 &(matrix_dims[0]));
00504
00505 g_A_I_J_vcl.handle2() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00506 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
00507 &(blocks_ind[0]));
00508
00509 set_I_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00510 static_cast<unsigned int>(sizeof(cl_uint)*sz_I),
00511 &(I_set[0]));
00512
00513 set_J_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00514 static_cast<unsigned int>(sizeof(cl_uint)*sz_J),
00515 &(J_set[0]));
00516
00517 set_I_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00518 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
00519 &(i_ind[0]));
00520
00521 set_J_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00522 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
00523 &(j_ind[0]));
00524
00525 viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00526 static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
00527 &(g_is_update[0]));
00528 viennacl::ocl::kernel& assembly_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "assemble_blocks");
00529 assembly_kernel.local_work_size(0, 1);
00530 assembly_kernel.global_work_size(0, 256);
00531 viennacl::ocl::enqueue(assembly_kernel(A.handle1(), A.handle2(), A.handle(),
00532 set_I_vcl.handle(), set_J_vcl.handle(), set_I_vcl.handle1(),
00533 set_J_vcl.handle1(),
00534 g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(), g_A_I_J_vcl.handle(),
00535 g_is_update_vcl,
00536 static_cast<unsigned int>(g_I.size())));
00537 is_empty_block = false;
00538 }else{
00539 is_empty_block = true;
00540 }
00541 }
00542
00543
00544
00550 template<typename SparseMatrixType, typename SparseVectorType>
00551 void insert_sparse_columns(const std::vector<SparseVectorType>& M_v,
00552 SparseMatrixType& M,
00553 bool is_right){
00554 if (is_right)
00555 {
00556 for(unsigned int i = 0; i < M_v.size(); ++i){
00557 for(typename SparseVectorType::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it){
00558 M(vec_it->first, i) = vec_it->second;
00559 }
00560 }
00561 }
00562 else
00563 {
00564 for(unsigned int i = 0; i < M_v.size(); ++i){
00565 for(typename SparseVectorType::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it){
00566 M(i, vec_it->first) = vec_it->second;
00567 }
00568 }
00569 }
00570 }
00571
00576 template<typename MatrixType>
00577 void sparse_transpose(const MatrixType& A_in, MatrixType& A){
00578 typedef typename MatrixType::value_type ScalarType;
00579 std::vector<std::map<size_t, ScalarType> > temp_A(A_in.size2());
00580 A.resize(A_in.size2(), A_in.size1(), false);
00581
00582 for (typename MatrixType::const_iterator1 row_it = A_in.begin1();
00583 row_it != A_in.end1();
00584 ++row_it)
00585 {
00586 for (typename MatrixType::const_iterator2 col_it = row_it.begin();
00587 col_it != row_it.end();
00588 ++col_it)
00589 {
00590 temp_A[col_it.index2()][col_it.index1()] = *col_it;
00591 }
00592 }
00593
00594 for (size_t i=0; i<temp_A.size(); ++i)
00595 {
00596 for (typename std::map<size_t, ScalarType>::const_iterator it = temp_A[i].begin();
00597 it != temp_A[i].end();
00598 ++it)
00599 A(i, it->first) = it->second;
00600 }
00601 }
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00619 template <typename MatrixType>
00620 void computeSPAI(const MatrixType & A, MatrixType & M, spai_tag & tag){
00621 typedef typename MatrixType::value_type ScalarType;
00622 typedef typename boost::numeric::ublas::vector<ScalarType> VectorType;
00623 typedef typename viennacl::linalg::detail::spai::sparse_vector<ScalarType> SparseVectorType;
00624 typedef typename boost::numeric::ublas::matrix<ScalarType> DenseMatrixType;
00625
00626 unsigned int cur_iter = 0;
00627 tag.setBegInd(0); tag.setEndInd(VIENNACL_SPAI_K_b);
00628 bool go_on = true;
00629 std::vector<SparseVectorType> A_v_c(M.size2());
00630 std::vector<SparseVectorType> M_v(M.size2());
00631 vectorize_column_matrix(A, A_v_c);
00632 vectorize_column_matrix(M, M_v);
00633
00634
00635 while(go_on){
00636 go_on = (tag.getEndInd() < static_cast<long>(M.size2()));
00637 cur_iter = 0;
00638 unsigned int l_sz = tag.getEndInd() - tag.getBegInd();
00639
00640 std::vector<bool> g_is_update(l_sz, true);
00641
00642
00643
00644
00645
00646
00647 std::vector<SparseVectorType> l_M_v(l_sz);
00648
00649 std::copy(M_v.begin() + tag.getBegInd(), M_v.begin() + tag.getEndInd(), l_M_v.begin());
00650
00651
00652
00653
00654
00655
00656
00657
00658 std::vector<DenseMatrixType> g_A_I_J(l_sz);
00659
00660 std::vector<VectorType> g_b_v(l_sz);
00661
00662 std::vector<SparseVectorType> g_res(l_sz);
00663
00664 std::vector<std::vector<unsigned int> > g_I(l_sz);
00665
00666 std::vector<std::vector<unsigned int> > g_J(l_sz);
00667 while((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update)){
00668
00669
00670 if(cur_iter == 0) block_set_up(A, A_v_c, l_M_v, g_I, g_J, g_A_I_J, g_b_v);
00671 else block_update(A, A_v_c, g_res, g_is_update, g_I, g_J, g_b_v, g_A_I_J, tag);
00672
00673
00674 least_square_solve(A_v_c, g_A_I_J, g_b_v, g_I, g_J, g_res, g_is_update, l_M_v, tag);
00675
00676 if(tag.getIsStatic()) break;
00677 cur_iter++;
00678
00679
00680 }
00681 std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd());
00682 tag.setBegInd(tag.getEndInd());
00683 tag.setEndInd(std::min(static_cast<long>(tag.getBegInd() + VIENNACL_SPAI_K_b), static_cast<long>(M.size2())));
00684
00685
00686 }
00687 M.resize(M.size1(), M.size2(), false);
00688 insert_sparse_columns(M_v, M, tag.getIsRight());
00689 }
00690
00691
00692
00700 template <typename ScalarType, unsigned int MAT_ALIGNMENT>
00701 void computeSPAI(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A,
00702 const boost::numeric::ublas::compressed_matrix<ScalarType>& cpu_A,
00703 boost::numeric::ublas::compressed_matrix<ScalarType>& cpu_M,
00704 viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& M,
00705 const spai_tag& tag){
00706 typedef typename boost::numeric::ublas::vector<ScalarType> VectorType;
00707 typedef typename viennacl::linalg::detail::spai::sparse_vector<ScalarType> SparseVectorType;
00708 typedef typename boost::numeric::ublas::matrix<ScalarType> DenseMatrixType;
00709 typedef typename boost::numeric::ublas::compressed_matrix<ScalarType> CPUMatrixType;
00710
00711
00712 unsigned int cur_iter = 0;
00713 std::vector<cl_uint> g_is_update(cpu_M.size2(), static_cast<cl_uint>(1));
00714
00715
00716 std::vector<SparseVectorType> A_v_c(cpu_M.size2());
00717 std::vector<SparseVectorType> M_v(cpu_M.size2());
00718 vectorize_column_matrix(cpu_A, A_v_c);
00719 vectorize_column_matrix(cpu_M, M_v);
00720 std::vector<SparseVectorType> g_res(cpu_M.size2());
00721 std::vector<std::vector<unsigned int> > g_I(cpu_M.size2());
00722 std::vector<std::vector<unsigned int> > g_J(cpu_M.size2());
00723
00724
00725 block_matrix g_A_I_J_vcl;
00726 block_vector g_bv_vcl;
00727 while((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update)){
00728
00729
00730
00731
00732 if(cur_iter == 0) block_set_up(A, A_v_c, M_v, g_is_update, g_I, g_J, g_A_I_J_vcl, g_bv_vcl, cur_iter);
00733 else block_update(A, A_v_c, g_is_update, g_res, g_J, g_I, g_A_I_J_vcl, g_bv_vcl, tag, cur_iter);
00734
00735
00736
00737
00738 least_square_solve<SparseVectorType, ScalarType>(A_v_c, M_v, g_I, g_J, g_A_I_J_vcl, g_bv_vcl, g_res, g_is_update, tag, cur_iter);
00739
00740 if(tag.getIsStatic()) break;
00741 cur_iter++;
00742 }
00743 cpu_M.resize(cpu_M.size1(), cpu_M.size2(), false);
00744 insert_sparse_columns(M_v, cpu_M, tag.getIsRight());
00745
00746 M.resize(static_cast<unsigned int>(cpu_M.size1()), static_cast<unsigned int>(cpu_M.size2()));
00747 viennacl::copy(cpu_M, M);
00748 }
00749
00750 }
00751 }
00752 }
00753 }
00754 #endif