00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
00002 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00026 #include <utility>
00027 #include <iostream>
00028 #include <fstream>
00029 #include <string>
00030 #include <algorithm>
00031 #include <vector>
00032 #include <math.h>
00033 #include <map>
00034
00035
00036
00037 #include "boost/numeric/ublas/vector.hpp"
00038 #include "boost/numeric/ublas/matrix.hpp"
00039 #include "boost/numeric/ublas/matrix_proxy.hpp"
00040 #include "boost/numeric/ublas/vector_proxy.hpp"
00041 #include "boost/numeric/ublas/storage.hpp"
00042 #include "boost/numeric/ublas/io.hpp"
00043 #include "boost/numeric/ublas/lu.hpp"
00044 #include "boost/numeric/ublas/triangular.hpp"
00045 #include "boost/numeric/ublas/matrix_expression.hpp"
00046
00047 #include "viennacl/linalg/prod.hpp"
00048 #include "viennacl/matrix.hpp"
00049 #include "viennacl/compressed_matrix.hpp"
00050 #include "viennacl/linalg/compressed_matrix_operations.hpp"
00051 #include "viennacl/linalg/matrix_operations.hpp"
00052 #include "viennacl/scalar.hpp"
00053 #include "viennacl/linalg/cg.hpp"
00054 #include "viennacl/linalg/inner_prod.hpp"
00055 #include "viennacl/linalg/ilu.hpp"
00056 #include "viennacl/ocl/backend.hpp"
00057
00058 #include "viennacl/linalg/detail/spai/block_matrix.hpp"
00059 #include "viennacl/linalg/detail/spai/block_vector.hpp"
00060 #include "viennacl/linalg/detail/spai/qr.hpp"
00061 #include "viennacl/linalg/detail/spai/spai_tag.hpp"
00062 #include "viennacl/linalg/kernels/spai_source.h"
00063 #include "viennacl/linalg/kernels/spai_kernels.h"
00064
00065 namespace viennacl
00066 {
00067 namespace linalg
00068 {
00069 namespace detail
00070 {
00071 namespace spai
00072 {
00073
00074 typedef std::pair<unsigned int, double> PairT;
00075 struct CompareSecond{
00076 bool operator()(const PairT& left, const PairT& right)
00077 {
00078 return static_cast<double>(left.second) > static_cast<double>(right.second);
00079 }
00080 };
00081
00082
00089 template<typename SparseMatrixType, typename DenseMatrixType>
00090 void initProjectSubMatrix(const SparseMatrixType& A_in, const std::vector<unsigned int>& J, std::vector<unsigned int>& I,
00091 DenseMatrixType& A_out){
00092 typedef typename DenseMatrixType::value_type ScalarType;
00093 A_out.resize(I.size(), J.size(), false);
00094 for(size_t j = 0; j < J.size(); ++j){
00095 for(size_t i = 0; i < I.size(); ++i){
00096 A_out(i,j) = A_in(I[i],J[j]);
00097 }
00098 }
00099 }
00100
00105 bool isInIndexSet(const std::vector<unsigned int>& J, const unsigned int& ind){
00106 return (std::find(J.begin(), J.end(), ind) != J.end());
00107 }
00108
00114 template<typename MatrixType>
00115 void composeNewR(const MatrixType& A, const MatrixType& R_n, MatrixType& R){
00116 typedef typename MatrixType::value_type ScalarType;
00117 size_t row_n = R_n.size1() - (A.size1() - R.size2());
00118 MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(R.size1() + row_n, R.size2() + A.size2());
00119
00120 boost::numeric::ublas::project(C, boost::numeric::ublas::range(0,R.size1()), boost::numeric::ublas::range(0, R.size2())) += R;
00121
00122 boost::numeric::ublas::project(C, boost::numeric::ublas::range(0, R.size2()), boost::numeric::ublas::range(R.size2(),
00123 R.size2() + A.size2())) +=
00124 boost::numeric::ublas::project(A, boost::numeric::ublas::range(0, R.size2()), boost::numeric::ublas::range(0, A.size2()));
00125
00126 if(R_n.size1() > 0 && R_n.size2() > 0)
00127 boost::numeric::ublas::project(C, boost::numeric::ublas::range(R.size2(), R.size1() + row_n),
00128 boost::numeric::ublas::range(R.size2(), R.size2() + A.size2())) += R_n;
00129 R = C;
00130 }
00131
00136 template<typename VectorType>
00137 void composeNewVector(const VectorType& v_n, VectorType& v){
00138 typedef typename VectorType::value_type ScalarType;
00139 VectorType w = boost::numeric::ublas::zero_vector<ScalarType>(v.size() + v_n.size());
00140 boost::numeric::ublas::project(w, boost::numeric::ublas::range(0, v.size())) += v;
00141 boost::numeric::ublas::project(w, boost::numeric::ublas::range(v.size(), v.size() + v_n.size())) += v_n;
00142 v = w;
00143 }
00144
00149 template<typename SparseVectorType, typename ScalarType>
00150 void sparse_norm_2(const SparseVectorType& v, ScalarType& norm){
00151 for(typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it != v.end(); ++vec_it){
00152 norm += (vec_it->second)*(vec_it->second);
00153 }
00154 norm = std::sqrt(norm);
00155 }
00156
00162 template<typename SparseVectorType, typename ScalarType>
00163 void sparse_inner_prod(const SparseVectorType& v1, const SparseVectorType& v2, ScalarType& res_v){
00164 typename SparseVectorType::const_iterator v_it1 = v1.begin();
00165 typename SparseVectorType::const_iterator v_it2 = v2.begin();
00166 while((v_it1 != v1.end())&&(v_it2 != v2.end())){
00167 if(v_it1->first == v_it2->first){
00168 res_v += (v_it1->second)*(v_it2->second);
00169 ++v_it1;
00170 ++v_it2;
00171 }
00172 else if(v_it1->first < v_it2->first){
00173 ++v_it1;
00174 }
00175 else
00176 ++v_it2;
00177
00178
00179 }
00180 }
00181
00189 template <typename SparseVectorType, typename ScalarType>
00190 bool buildAugmentedIndexSet(const std::vector<SparseVectorType>& A_v_c,
00191 const SparseVectorType& res,
00192 std::vector<unsigned int>& J,
00193 std::vector<unsigned int>& J_u,
00194 const spai_tag& tag){
00195 std::vector<std::pair<unsigned int, ScalarType> > p;
00196 size_t cur_size = 0;
00197 ScalarType inprod, norm2;
00198
00199 for(typename SparseVectorType::const_iterator res_it = res.begin(); res_it != res.end(); ++res_it){
00200 if(!isInIndexSet(J, res_it->first) && (std::abs(res_it->second) > tag.getResidualThreshold())){
00201 inprod = norm2 = 0;
00202 sparse_inner_prod(res, A_v_c[res_it->first], inprod);
00203 sparse_norm_2(A_v_c[res_it->first], norm2);
00204 p.push_back(std::pair<size_t, ScalarType>(res_it->first, (inprod*inprod)/(norm2*norm2)));
00205 }
00206 }
00207
00208 std::sort(p.begin(), p.end(), CompareSecond());
00209 while ((cur_size < J.size())&&(p.size() > 0)) {
00210 J_u.push_back(p[0].first);
00211 p.erase(p.begin());
00212 cur_size++;
00213 }
00214 p.clear();
00215 return (cur_size > 0);
00216 }
00217
00224 template<typename SparseVectorType>
00225 void buildNewRowSet(const std::vector<SparseVectorType>& A_v_c, const std::vector<unsigned int>& I,
00226 const std::vector<unsigned int>& J_n, std::vector<unsigned int>& I_n){
00227 for(size_t i = 0; i < J_n.size(); ++i){
00228 for(typename SparseVectorType::const_iterator col_it = A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it){
00229 if(!isInIndexSet(I, col_it->first)&&!isInIndexSet(I_n, col_it->first)){
00230 I_n.push_back(col_it->first);
00231 }
00232 }
00233 }
00234 }
00235
00241 template<typename MatrixType>
00242 void QRBlockComposition(const MatrixType& A_I_J, const MatrixType& A_I_J_u, MatrixType& A_I_u_J_u){
00243 typedef typename MatrixType::value_type ScalarType;
00244 size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
00245 size_t row_n2 = A_I_u_J_u.size1();
00246 size_t row_n = row_n1 + row_n2;
00247 size_t col_n = A_I_J_u.size2();
00248 MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(row_n, col_n);
00249 boost::numeric::ublas::project(C, boost::numeric::ublas::range(0, row_n1), boost::numeric::ublas::range(0, col_n)) +=
00250 boost::numeric::ublas::project(A_I_J_u, boost::numeric::ublas::range(A_I_J.size2(), A_I_J_u.size1()),
00251 boost::numeric::ublas::range(0, col_n));
00252
00253 boost::numeric::ublas::project(C, boost::numeric::ublas::range(row_n1, row_n1 + row_n2),
00254 boost::numeric::ublas::range(0, col_n)) += A_I_u_J_u;
00255 A_I_u_J_u = C;
00256 }
00257
00269 template<typename SparseMatrixType, typename SparseVectorType, typename DenseMatrixType, typename VectorType>
00270 void block_update(const SparseMatrixType& A, const std::vector<SparseVectorType>& A_v_c,
00271 std::vector<SparseVectorType>& g_res,
00272 std::vector<bool>& g_is_update,
00273 std::vector<std::vector<unsigned int> >& g_I,
00274 std::vector<std::vector<unsigned int> >& g_J,
00275 std::vector<VectorType>& g_b_v,
00276 std::vector<DenseMatrixType>& g_A_I_J,
00277 spai_tag const & tag){
00278 typedef typename DenseMatrixType::value_type ScalarType;
00279
00280 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
00281
00282 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
00283
00284 std::vector<DenseMatrixType> g_A_I_J_u(g_J.size());
00285
00286 std::vector<DenseMatrixType> g_A_I_u_J_u(g_J.size());
00287
00288 std::vector<VectorType> g_b_v_u(g_J.size());
00289 #ifdef _OPENMP
00290 #pragma omp parallel for
00291 #endif
00292 for(std::size_t i = 0; i < g_J.size(); ++i){
00293 if(g_is_update[i]){
00294 if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag)){
00295
00296 initProjectSubMatrix(A, g_J_u[i], g_I[i], g_A_I_J_u[i]);
00297
00298 apply_q_trans_mat(g_A_I_J[i], g_b_v[i], g_A_I_J_u[i]);
00299
00300 buildNewRowSet(A_v_c, g_I[i], g_J_u[i], g_I_u[i]);
00301 initProjectSubMatrix(A, g_J_u[i], g_I_u[i], g_A_I_u_J_u[i]);
00302
00303 QRBlockComposition(g_A_I_J[i], g_A_I_J_u[i], g_A_I_u_J_u[i]);
00304
00305 single_qr(g_A_I_u_J_u[i], g_b_v_u[i]);
00306
00307 composeNewR(g_A_I_J_u[i], g_A_I_u_J_u[i], g_A_I_J[i]);
00308 composeNewVector(g_b_v_u[i], g_b_v[i]);
00309
00310 g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
00311 g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
00312 }else{
00313 g_is_update[i] = false;
00314 }
00315 }
00316 }
00317 }
00318
00319
00320
00321
00331 template<typename ScalarType>
00332 void block_q_multiplication(const std::vector<std::vector<unsigned int> >& g_J_u,
00333 const std::vector<std::vector<unsigned int> >& g_I,
00334 block_matrix& g_A_I_J_vcl,
00335 block_vector& g_bv_vcl,
00336 block_matrix& g_A_I_J_u_vcl,
00337 std::vector<cl_uint>& g_is_update,
00338 const unsigned int cur_iter){
00339 unsigned int local_r_n, local_c_n, sz_blocks;
00340 get_max_block_size(g_I, local_r_n);
00341 get_max_block_size(g_J_u, local_c_n);
00342
00343 std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
00344 std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
00345 compute_blocks_size(g_I, g_J_u, sz_blocks, blocks_ind, matrix_dims);
00346 std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0));
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357 viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00358 static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
00359 &(g_is_update[0]));
00360 viennacl::ocl::kernel& block_q_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_q_mult");
00361 block_q_kernel.local_work_size(0, local_c_n);
00362 block_q_kernel.global_work_size(0, 256);
00363 viennacl::ocl::enqueue(block_q_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(),
00364 g_bv_vcl.handle(),
00365 g_bv_vcl.handle1(), g_A_I_J_vcl.handle1(), g_A_I_J_u_vcl.handle1(), g_is_update_vcl,
00366 viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
00367 static_cast<cl_uint>(g_I.size())));
00368 }
00369
00376 void assemble_qr_row_inds(const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> > g_J,
00377 const std::vector<std::vector<unsigned int> >& g_I_u,
00378 std::vector<std::vector<unsigned int> >& g_I_q){
00379 #ifdef _OPENMP
00380 #pragma omp parallel for
00381 #endif
00382 for(std::size_t i = 0; i < g_I.size(); ++i){
00383 for(std::size_t j = g_J[i].size(); j < g_I[i].size(); ++j){
00384 g_I_q[i].push_back(g_I[i][j]);
00385 }
00386
00387 for(std::size_t j = 0; j < g_I_u[i].size(); ++j){
00388 g_I_q[i].push_back(g_I_u[i][j]);
00389 }
00390 }
00391 }
00392
00407 template<typename ScalarType>
00408 void assemble_qr_block(
00409 const std::vector<std::vector<unsigned int> >& g_J,
00410 const std::vector<std::vector<unsigned int> >& g_I,
00411 const std::vector<std::vector<unsigned int> >& g_J_u,
00412 const std::vector<std::vector<unsigned int> >& g_I_u,
00413 std::vector<std::vector<unsigned int> >& g_I_q,
00414 block_matrix& g_A_I_J_u_vcl,
00415 viennacl::ocl::handle<cl_mem>& matrix_dimensions,
00416 block_matrix& g_A_I_u_J_u_vcl,
00417 std::vector<cl_uint>& g_is_update,
00418 const bool is_empty_block,
00419 const unsigned int cur_iter){
00420
00421 assemble_qr_row_inds(g_I, g_J, g_I_u, g_I_q);
00422 unsigned int sz_blocks;
00423 std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
00424 std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
00425 compute_blocks_size(g_I_q, g_J_u, sz_blocks, blocks_ind, matrix_dims);
00426 std::vector<ScalarType> con_A_I_J_q(sz_blocks, static_cast<ScalarType>(0));
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448 block_matrix g_A_I_J_q_vcl;
00449
00450 g_A_I_J_q_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00451 static_cast<unsigned int>(sizeof(ScalarType)*sz_blocks),
00452 &(con_A_I_J_q[0]));
00453 g_A_I_J_q_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00454 static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
00455 &(matrix_dims[0]));
00456 g_A_I_J_q_vcl.handle2() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00457 static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
00458 &(blocks_ind[0]));
00459 viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00460 static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
00461 &(g_is_update[0]));
00462
00463 if(!is_empty_block){
00464 viennacl::ocl::kernel& qr_assembly_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_qr_assembly");
00465 qr_assembly_kernel.local_work_size(0, 1);
00466 qr_assembly_kernel.global_work_size(0, 256);
00467 viennacl::ocl::enqueue(qr_assembly_kernel(matrix_dimensions,
00468 g_A_I_J_u_vcl.handle(),
00469 g_A_I_J_u_vcl.handle2(),
00470 g_A_I_J_u_vcl.handle1(),
00471 g_A_I_u_J_u_vcl.handle(),
00472 g_A_I_u_J_u_vcl.handle2(),
00473 g_A_I_u_J_u_vcl.handle1(),
00474 g_A_I_J_q_vcl.handle(),
00475 g_A_I_J_q_vcl.handle2(),
00476 g_A_I_J_q_vcl.handle1(),
00477 g_is_update_vcl,
00478 static_cast<unsigned int>(g_I.size())));
00479 }else{
00480 viennacl::ocl::kernel& qr_assembly_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_qr_assembly_1");
00481 qr_assembly_kernel.local_work_size(0, 1);
00482 qr_assembly_kernel.global_work_size(0, 256);
00483 viennacl::ocl::enqueue(qr_assembly_kernel(matrix_dimensions, g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(),
00484 g_A_I_J_u_vcl.handle1(),
00485 g_A_I_J_q_vcl.handle(),
00486 g_A_I_J_q_vcl.handle2(), g_A_I_J_q_vcl.handle1(),
00487 g_is_update_vcl,
00488 static_cast<unsigned int>(g_I.size())));
00489 }
00490 g_A_I_u_J_u_vcl.handle() = g_A_I_J_q_vcl.handle();
00491 g_A_I_u_J_u_vcl.handle1() = g_A_I_J_q_vcl.handle1();
00492 g_A_I_u_J_u_vcl.handle2() = g_A_I_J_q_vcl.handle2();
00493 }
00494
00506 template<typename ScalarType>
00507 void assemble_r(std::vector<std::vector<unsigned int> >& g_I, std::vector<std::vector<unsigned int> >& g_J,
00508 block_matrix& g_A_I_J_vcl,
00509 block_matrix& g_A_I_J_u_vcl,
00510 block_matrix& g_A_I_u_J_u_vcl,
00511 block_vector& g_bv_vcl,
00512 block_vector& g_bv_vcl_u,
00513 std::vector<cl_uint>& g_is_update,
00514 const unsigned int cur_iter){
00515 std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
00516 std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
00517 std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
00518 unsigned int sz_blocks, bv_size;
00519 compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims);
00520 get_size(g_J, bv_size);
00521 init_start_inds(g_J, start_bv_r_inds);
00522 std::vector<ScalarType> con_A_I_J_r(sz_blocks, static_cast<ScalarType>(0));
00523 std::vector<ScalarType> b_v_r(bv_size, static_cast<ScalarType>(0));
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539 block_matrix g_A_I_J_r_vcl;
00540 block_vector g_bv_r_vcl;
00541 g_A_I_J_r_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00542 static_cast<unsigned int>(sizeof(ScalarType)*sz_blocks),
00543 &(con_A_I_J_r[0]));
00544 g_A_I_J_r_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00545 static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
00546 &(matrix_dims[0]));
00547 g_A_I_J_r_vcl.handle2() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00548 static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
00549 &(blocks_ind[0]));
00550 g_bv_r_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00551 static_cast<unsigned int>(sizeof(ScalarType)*bv_size),
00552 &(b_v_r[0]));
00553 g_bv_r_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00554 static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
00555 &(start_bv_r_inds[0]));
00556 viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00557 static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
00558 &(g_is_update[0]));
00559 viennacl::ocl::kernel& r_assembly_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_r_assembly");
00560 r_assembly_kernel.local_work_size(0, 1);
00561 r_assembly_kernel.global_work_size(0, 256);
00562
00563 viennacl::ocl::enqueue(r_assembly_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(),
00564 g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(), g_A_I_J_u_vcl.handle1(),
00565 g_A_I_u_J_u_vcl.handle(), g_A_I_u_J_u_vcl.handle2(), g_A_I_u_J_u_vcl.handle1(),
00566 g_A_I_J_r_vcl.handle(), g_A_I_J_r_vcl.handle2(), g_A_I_J_r_vcl.handle1(),
00567 g_is_update_vcl, static_cast<cl_uint>(g_I.size())));
00568
00569 viennacl::ocl::kernel & bv_assembly_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_bv_assembly");
00570 bv_assembly_kernel.local_work_size(0, 1);
00571 bv_assembly_kernel.global_work_size(0, 256);
00572 viennacl::ocl::enqueue(bv_assembly_kernel(g_bv_vcl.handle(), g_bv_vcl.handle1(), g_A_I_J_vcl.handle1(), g_bv_vcl_u.handle(),
00573 g_bv_vcl_u.handle1(), g_A_I_J_u_vcl.handle1(),
00574 g_bv_r_vcl.handle(), g_bv_r_vcl.handle1(), g_A_I_J_r_vcl.handle1(), g_is_update_vcl,
00575 static_cast<cl_uint>(g_I.size())));
00576 g_bv_vcl.handle() = g_bv_r_vcl.handle();
00577 g_bv_vcl.handle1() = g_bv_r_vcl.handle1();
00578
00579 g_A_I_J_vcl.handle() = g_A_I_J_r_vcl.handle();
00580 g_A_I_J_vcl.handle2() = g_A_I_J_r_vcl.handle2();
00581 g_A_I_J_vcl.handle1() = g_A_I_J_r_vcl.handle1();
00582 }
00583
00596 template<typename ScalarType, unsigned int MAT_ALIGNMENT, typename SparseVectorType>
00597 void block_update(const viennacl::compressed_matrix<ScalarType, MAT_ALIGNMENT>& A, const std::vector<SparseVectorType>& A_v_c,
00598 std::vector<cl_uint>& g_is_update,
00599 std::vector<SparseVectorType>& g_res,
00600 std::vector<std::vector<unsigned int> >& g_J,
00601 std::vector<std::vector<unsigned int> >& g_I,
00602 block_matrix& g_A_I_J_vcl,
00603 block_vector& g_bv_vcl,
00604 spai_tag const & tag,
00605 const unsigned int cur_iter){
00606
00607 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
00608
00609 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
00610
00611 std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
00612
00613 block_matrix g_A_I_J_u_vcl;
00614
00615 block_matrix g_A_I_u_J_u_vcl;
00616 bool is_empty_block;
00617
00618 block_vector g_bv_u_vcl;
00619 #ifdef _OPENMP
00620 #pragma omp parallel for
00621 #endif
00622 for(std::size_t i = 0; i < g_J.size(); ++i){
00623 if(g_is_update[i]){
00624 if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag)){
00625 buildNewRowSet(A_v_c, g_I[i], g_J_u[i], g_I_u[i]);
00626 }
00627 }
00628 }
00629
00630 block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block, cur_iter);
00631
00632 block_q_multiplication<ScalarType>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, g_A_I_J_u_vcl, g_is_update, cur_iter);
00633
00634 block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, is_empty_block, cur_iter);
00635 assemble_qr_block<ScalarType>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, g_A_I_J_vcl.handle1(),
00636 g_A_I_u_J_u_vcl, g_is_update, is_empty_block, cur_iter);
00637
00638 block_qr<ScalarType>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, cur_iter);
00639
00640 #ifdef _OPENMP
00641 #pragma omp parallel for
00642 #endif
00643 for(std::size_t i = 0; i < g_J.size(); ++i){
00644 g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
00645 g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
00646 }
00647 assemble_r<ScalarType>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl, g_bv_vcl, g_bv_u_vcl, g_is_update, cur_iter);
00648 }
00649
00650 }
00651 }
00652 }
00653 }
00654 #endif