ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
spai.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <utility>
26 #include <iostream>
27 #include <fstream>
28 #include <string>
29 #include <algorithm>
30 #include <vector>
31 #include <math.h>
32 #include <map>
33 
34 //local includes
36 #include "viennacl/linalg/qr.hpp"
42 
43 //boost includes
44 #include "boost/numeric/ublas/vector.hpp"
45 #include "boost/numeric/ublas/matrix.hpp"
46 #include "boost/numeric/ublas/matrix_proxy.hpp"
47 #include "boost/numeric/ublas/vector_proxy.hpp"
48 #include "boost/numeric/ublas/storage.hpp"
49 #include "boost/numeric/ublas/io.hpp"
50 #include "boost/numeric/ublas/lu.hpp"
51 #include "boost/numeric/ublas/triangular.hpp"
52 #include "boost/numeric/ublas/matrix_expression.hpp"
53 
54 // ViennaCL includes
55 #include "viennacl/linalg/prod.hpp"
56 #include "viennacl/matrix.hpp"
60 #include "viennacl/scalar.hpp"
62 #include "viennacl/linalg/ilu.hpp"
63 #include "viennacl/ocl/backend.hpp"
65 
66 
67 
68 #define VIENNACL_SPAI_K_b 20
69 
70 namespace viennacl
71 {
72 namespace linalg
73 {
74 namespace detail
75 {
76 namespace spai
77 {
78 
79 //debug function for print
80 template<typename SparseVectorT>
81 void print_sparse_vector(SparseVectorT const & v)
82 {
83  for (typename SparseVectorT::const_iterator vec_it = v.begin(); vec_it!= v.end(); ++vec_it)
84  std::cout << "[ " << vec_it->first << " ]:" << vec_it->second << std::endl;
85 }
86 
87 template<typename DenseMatrixT>
88 void print_matrix(DenseMatrixT & m)
89 {
90  for (int i = 0; i < m.size2(); ++i)
91  {
92  for (int j = 0; j < m.size1(); ++j)
93  std::cout<<m(j, i)<<" ";
94  std::cout<<std::endl;
95  }
96 }
97 
104 template<typename SparseVectorT, typename NumericT>
105 void add_sparse_vectors(SparseVectorT const & v, NumericT b, SparseVectorT & res_v)
106 {
107  for (typename SparseVectorT::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it)
108  res_v[v_it->first] += b*v_it->second;
109 }
110 
111 //sparse-matrix - vector product
119 template<typename SparseVectorT, typename NumericT>
120 void compute_spai_residual(std::vector<SparseVectorT> const & A_v_c,
121  SparseVectorT const & v,
122  unsigned int ind,
123  SparseVectorT & res)
124 {
125  for (typename SparseVectorT::const_iterator v_it = v.begin(); v_it != v.end(); ++v_it)
126  add_sparse_vectors(A_v_c[v_it->first], v_it->second, res);
127 
128  res[ind] -= NumericT(1);
129 }
130 
138 template<typename SparseVectorT>
139 void build_index_set(std::vector<SparseVectorT> const & A_v_c,
140  SparseVectorT const & v,
141  std::vector<unsigned int> & J,
142  std::vector<unsigned int> & I)
143 {
144  buildColumnIndexSet(v, J);
145  projectRows(A_v_c, J, I);
146 }
147 
155 template<typename SparseMatrixT, typename DenseMatrixT>
156 void initProjectSubMatrix(SparseMatrixT const & A_in,
157  std::vector<unsigned int> const & J,
158  std::vector<unsigned int> & I,
159  DenseMatrixT & A_out)
160 {
161  A_out.resize(I.size(), J.size(), false);
162  for (vcl_size_t j = 0; j < J.size(); ++j)
163  for (vcl_size_t i = 0; i < I.size(); ++i)
164  A_out(i,j) = A_in(I[i],J[j]);
165 }
166 
167 
168 /************************************************** CPU BLOCK SET UP ***************************************/
169 
180 template<typename SparseMatrixT, typename DenseMatrixT, typename SparseVectorT, typename VectorT>
181 void block_set_up(SparseMatrixT const & A,
182  std::vector<SparseVectorT> const & A_v_c,
183  std::vector<SparseVectorT> const & M_v,
184  std::vector<std::vector<unsigned int> >& g_I,
185  std::vector<std::vector<unsigned int> >& g_J,
186  std::vector<DenseMatrixT>& g_A_I_J,
187  std::vector<VectorT>& g_b_v)
188 {
189 #ifdef VIENNACL_WITH_OPENMP
190  #pragma omp parallel for
191 #endif
192  for (long i2 = 0; i2 < static_cast<long>(M_v.size()); ++i2)
193  {
194  vcl_size_t i = static_cast<vcl_size_t>(i2);
195  build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]);
196  initProjectSubMatrix(A, g_J[i], g_I[i], g_A_I_J[i]);
197  //print_matrix(g_A_I_J[i]);
198  single_qr(g_A_I_J[i], g_b_v[i]);
199  //print_matrix(g_A_I_J[i]);
200  }
201 }
202 
210 template<typename SparseVectorT>
211 void index_set_up(std::vector<SparseVectorT> const & A_v_c,
212  std::vector<SparseVectorT> const & M_v,
213  std::vector<std::vector<unsigned int> > & g_J,
214  std::vector<std::vector<unsigned int> > & g_I)
215 {
216 #ifdef VIENNACL_WITH_OPENMP
217  #pragma omp parallel for
218 #endif
219  for (long i2 = 0; i2 < static_cast<long>(M_v.size()); ++i2)
220  {
221  vcl_size_t i = static_cast<vcl_size_t>(i2);
222  build_index_set(A_v_c, M_v[i], g_J[i], g_I[i]);
223  }
224 }
225 
226 /************************************************** GPU BLOCK SET UP ***************************************/
227 
239 template<typename NumericT, unsigned int AlignmentV, typename SparseVectorT>
241  std::vector<SparseVectorT> const & A_v_c,
242  std::vector<SparseVectorT> const & M_v,
243  std::vector<cl_uint> g_is_update,
244  std::vector<std::vector<unsigned int> > & g_I,
245  std::vector<std::vector<unsigned int> > & g_J,
246  block_matrix & g_A_I_J,
247  block_vector & g_bv)
248 {
250  bool is_empty_block;
251 
252  //build index set
253  index_set_up(A_v_c, M_v, g_J, g_I);
254  block_assembly(A, g_J, g_I, g_A_I_J, g_is_update, is_empty_block);
255  block_qr<NumericT>(g_I, g_J, g_A_I_J, g_bv, g_is_update, ctx);
256 }
257 
258 
259 /***************************************************************************************************/
260 /******************************** SOLVING LS PROBLEMS ON GPU ***************************************/
261 /***************************************************************************************************/
262 
270 template<typename NumericT, typename SparseVectorT>
271 void custom_fan_out(std::vector<NumericT> const & m_in,
272  unsigned int start_m_ind,
273  std::vector<unsigned int> const & J,
274  SparseVectorT & m)
275 {
276  unsigned int cnt = 0;
277  for (vcl_size_t i = 0; i < J.size(); ++i)
278  m[J[i]] = m_in[start_m_ind + cnt++];
279 }
280 
281 
282 
283 //GPU based least square problem
297 template<typename SparseVectorT, typename NumericT>
298 void least_square_solve(std::vector<SparseVectorT> & A_v_c,
299  std::vector<SparseVectorT> & M_v,
300  std::vector<std::vector<unsigned int> >& g_I,
301  std::vector<std::vector<unsigned int> > & g_J,
302  block_matrix & g_A_I_J_vcl,
303  block_vector & g_bv_vcl,
304  std::vector<SparseVectorT> & g_res,
305  std::vector<cl_uint> & g_is_update,
306  const spai_tag & tag,
307  viennacl::context ctx)
308 {
309  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
310  unsigned int y_sz, m_sz;
311  std::vector<cl_uint> y_inds(M_v.size() + 1, static_cast<cl_uint>(0));
312  std::vector<cl_uint> m_inds(M_v.size() + 1, static_cast<cl_uint>(0));
313 
314  get_size(g_I, y_sz);
315  init_start_inds(g_I, y_inds);
316  init_start_inds(g_J, m_inds);
317 
318  //create y_v
319  std::vector<NumericT> y_v(y_sz, NumericT(0));
320  for (vcl_size_t i = 0; i < M_v.size(); ++i)
321  {
322  for (vcl_size_t j = 0; j < g_I[i].size(); ++j)
323  {
324  if (g_I[i][j] == i)
325  y_v[y_inds[i] + j] = NumericT(1.0);
326  }
327  }
328  //compute m_v
329  get_size(g_J, m_sz);
330  std::vector<NumericT> m_v(m_sz, static_cast<cl_uint>(0));
331 
332  block_vector y_v_vcl;
333  block_vector m_v_vcl;
334  //prepearing memory for least square problem on GPU
335  y_v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
336  static_cast<unsigned int>(sizeof(NumericT)*y_v.size()),
337  &(y_v[0]));
338  m_v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
339  static_cast<unsigned int>(sizeof(NumericT)*m_v.size()),
340  &(m_v[0]));
341  y_v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
342  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
343  &(y_inds[0]));
344  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
345  static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
346  &(g_is_update[0]));
349  ls_kernel.local_work_size(0, 1);
350  ls_kernel.global_work_size(0, 256);
351  viennacl::ocl::enqueue(ls_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle2(), g_bv_vcl.handle(), g_bv_vcl.handle1(), m_v_vcl.handle(),
352  y_v_vcl.handle(), y_v_vcl.handle1(),
353  g_A_I_J_vcl.handle1(), g_is_update_vcl,
354  //viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
355  static_cast<unsigned int>(M_v.size())));
356  //copy vector m_v back from GPU to CPU
357  cl_int vcl_err = clEnqueueReadBuffer(opencl_ctx.get_queue().handle().get(),
358  m_v_vcl.handle().get(), CL_TRUE, 0,
359  sizeof(NumericT)*(m_v.size()),
360  &(m_v[0]), 0, NULL, NULL);
361  VIENNACL_ERR_CHECK(vcl_err);
362 
363  //fan out vector in parallel
364  //#pragma omp parallel for
365  for (long i = 0; i < static_cast<long>(M_v.size()); ++i)
366  {
367  if (g_is_update[static_cast<vcl_size_t>(i)])
368  {
369  //faned out onto sparse vector
370  custom_fan_out(m_v, m_inds[static_cast<vcl_size_t>(i)], g_J[static_cast<vcl_size_t>(i)], M_v[static_cast<vcl_size_t>(i)]);
371  g_res[static_cast<vcl_size_t>(i)].clear();
372  compute_spai_residual<SparseVectorT, NumericT>(A_v_c, M_v[static_cast<vcl_size_t>(i)], static_cast<unsigned int>(i), g_res[static_cast<vcl_size_t>(i)]);
373  NumericT res_norm = 0;
374  //compute norm of res - just to make sure that this implementatino works correct
375  sparse_norm_2(g_res[static_cast<vcl_size_t>(i)], res_norm);
376  //std::cout<<"Residual norm of column #: "<<i<<std::endl;
377  //std::cout<<res_norm<<std::endl;
378  //std::cout<<"************************"<<std::endl;
379  g_is_update[static_cast<vcl_size_t>(i)] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic())?(1):(0);
380  }
381  }
382 }
383 
384 //CPU based least square problems
397 template<typename SparseVectorT, typename DenseMatrixT, typename VectorT>
398 void least_square_solve(std::vector<SparseVectorT> const & A_v_c,
399  std::vector<DenseMatrixT> & g_R,
400  std::vector<VectorT> & g_b_v,
401  std::vector<std::vector<unsigned int> > & g_I,
402  std::vector<std::vector<unsigned int> > & g_J,
403  std::vector<SparseVectorT> & g_res,
404  std::vector<bool> & g_is_update,
405  std::vector<SparseVectorT> & M_v,
406  spai_tag const & tag)
407 {
408  typedef typename DenseMatrixT::value_type NumericType;
409 
410 #ifdef VIENNACL_WITH_OPENMP
411  #pragma omp parallel for
412 #endif
413  for (long i2 = 0; i2 < static_cast<long>(M_v.size()); ++i2)
414  {
415  vcl_size_t i = static_cast<vcl_size_t>(i2);
416  if (g_is_update[i])
417  {
418  VectorT y = boost::numeric::ublas::zero_vector<NumericType>(g_I[i].size());
419 
420  projectI<VectorT, NumericType>(g_I[i], y, static_cast<unsigned int>(tag.getBegInd() + long(i)));
421  apply_q_trans_vec(g_R[i], g_b_v[i], y);
422 
423  VectorT m_new = boost::numeric::ublas::zero_vector<NumericType>(g_R[i].size2());
424  backwardSolve(g_R[i], y, m_new);
425  fanOutVector(m_new, g_J[i], M_v[i]);
426  g_res[i].clear();
427 
428  compute_spai_residual<SparseVectorT, NumericType>(A_v_c, M_v[i], static_cast<unsigned int>(tag.getBegInd() + long(i)), g_res[i]);
429 
430  NumericType res_norm = 0;
431  sparse_norm_2(g_res[i], res_norm);
432 // std::cout<<"Residual norm of column #: "<<i<<std::endl;
433 // std::cout<<res_norm<<std::endl;
434 // std::cout<<"************************"<<std::endl;
435  g_is_update[i] = (res_norm > tag.getResidualNormThreshold())&& (!tag.getIsStatic());
436  }
437  }
438 }
439 
440 //************************************ UPDATE CHECK ***************************************************//
441 
442 template<typename VectorType>
443 bool is_all_update(VectorType& parallel_is_update)
444 {
445  for (unsigned int i = 0; i < parallel_is_update.size(); ++i)
446  {
447  if (parallel_is_update[i])
448  return true;
449  }
450  return false;
451 }
452 
453 //********************************** MATRIX VECTORIZATION ***********************************************//
454 
455 //Matrix vectorization, column based approach
461 template<typename SparseMatrixT, typename SparseVectorT>
462 void vectorize_column_matrix(SparseMatrixT const & M_in,
463  std::vector<SparseVectorT> & M_v)
464 {
465  for (typename SparseMatrixT::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it)
466  for (typename SparseMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
467  M_v[static_cast<unsigned int>(col_it.index2())][static_cast<unsigned int>(col_it.index1())] = *col_it;
468 }
469 
470 //Matrix vectorization row based approach
471 template<typename SparseMatrixT, typename SparseVectorT>
472 void vectorize_row_matrix(SparseMatrixT const & M_in,
473  std::vector<SparseVectorT> & M_v)
474 {
475  for (typename SparseMatrixT::const_iterator1 row_it = M_in.begin1(); row_it!= M_in.end1(); ++row_it)
476  for (typename SparseMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
477  M_v[static_cast<unsigned int>(col_it.index1())][static_cast<unsigned int>(col_it.index2())] = *col_it;
478 }
479 
480 //************************************* BLOCK ASSEMBLY CODE *********************************************//
481 
482 
483 template<typename SizeT>
484 void write_set_to_array(std::vector<std::vector<SizeT> > const & ind_set,
485  std::vector<cl_uint> & a)
486 {
487  vcl_size_t cnt = 0;
488 
489  for (vcl_size_t i = 0; i < ind_set.size(); ++i)
490  for (vcl_size_t j = 0; j < ind_set[i].size(); ++j)
491  a[cnt++] = static_cast<cl_uint>(ind_set[i][j]);
492 }
493 
494 
495 
496 //assembling blocks on GPU
506 template<typename NumericT, unsigned int AlignmentV>
508  std::vector<std::vector<unsigned int> > const & g_J,
509  std::vector<std::vector<unsigned int> > const & g_I,
510  block_matrix & g_A_I_J_vcl,
511  std::vector<cl_uint> & g_is_update,
512  bool & is_empty_block)
513 {
514  //computing start indices for index sets and start indices for block matrices
515  unsigned int sz_I, sz_J, sz_blocks;
516  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
517  std::vector<cl_uint> i_ind(g_I.size() + 1, static_cast<cl_uint>(0));
518  std::vector<cl_uint> j_ind(g_I.size() + 1, static_cast<cl_uint>(0));
519  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
520  //
521  init_start_inds(g_J, j_ind);
522  init_start_inds(g_I, i_ind);
523  //
524  get_size(g_J, sz_J);
525  get_size(g_I, sz_I);
526  std::vector<cl_uint> I_set(sz_I, static_cast<cl_uint>(0));
527  //
528  std::vector<cl_uint> J_set(sz_J, static_cast<cl_uint>(0));
529 
530  // computing size for blocks
531  // writing set to arrays
532  write_set_to_array(g_I, I_set);
533  write_set_to_array(g_J, J_set);
534 
535  // if block for assembly does exist
536  if (I_set.size() > 0 && J_set.size() > 0)
537  {
539  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
540  compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims);
541  std::vector<NumericT> con_A_I_J(sz_blocks, NumericT(0));
542 
543  block_vector set_I_vcl, set_J_vcl;
544  //init memory on GPU
545  //contigious g_A_I_J
546  g_A_I_J_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
547  static_cast<unsigned int>(sizeof(NumericT)*(sz_blocks)),
548  &(con_A_I_J[0]));
549  g_A_I_J_vcl.handle().context(opencl_ctx);
550 
551  //matrix_dimensions
552  g_A_I_J_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
553  static_cast<unsigned int>(sizeof(cl_uint)*2*static_cast<cl_uint>(g_I.size())),
554  &(matrix_dims[0]));
555  g_A_I_J_vcl.handle1().context(opencl_ctx);
556 
557  //start_block inds
558  g_A_I_J_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
559  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
560  &(blocks_ind[0]));
561  g_A_I_J_vcl.handle2().context(opencl_ctx);
562 
563  //set_I
564  set_I_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
565  static_cast<unsigned int>(sizeof(cl_uint)*sz_I),
566  &(I_set[0]));
567  set_I_vcl.handle().context(opencl_ctx);
568 
569  //set_J
570  set_J_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
571  static_cast<unsigned int>(sizeof(cl_uint)*sz_J),
572  &(J_set[0]));
573  set_J_vcl.handle().context(opencl_ctx);
574 
575  //i_ind
576  set_I_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
577  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
578  &(i_ind[0]));
579  set_I_vcl.handle().context(opencl_ctx);
580 
581  //j_ind
582  set_J_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
583  static_cast<unsigned int>(sizeof(cl_uint)*(g_I.size() + 1)),
584  &(j_ind[0]));
585  set_J_vcl.handle().context(opencl_ctx);
586 
587  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
588  static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
589  &(g_is_update[0]));
590 
593  assembly_kernel.local_work_size(0, 1);
594  assembly_kernel.global_work_size(0, 256);
595  viennacl::ocl::enqueue(assembly_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
596  set_I_vcl.handle(), set_J_vcl.handle(), set_I_vcl.handle1(),
597  set_J_vcl.handle1(),
598  g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(), g_A_I_J_vcl.handle(),
599  g_is_update_vcl,
600  static_cast<unsigned int>(g_I.size())));
601  is_empty_block = false;
602  }
603  else
604  is_empty_block = true;
605 }
606 
607 /************************************************************************************************************************/
608 
615 template<typename SparseMatrixT, typename SparseVectorT>
616 void insert_sparse_columns(std::vector<SparseVectorT> const & M_v,
617  SparseMatrixT& M,
618  bool is_right)
619 {
620  if (is_right)
621  {
622  for (unsigned int i = 0; i < M_v.size(); ++i)
623  for (typename SparseVectorT::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it)
624  M(vec_it->first, i) = vec_it->second;
625  }
626  else //transposed fill of M
627  {
628  for (unsigned int i = 0; i < M_v.size(); ++i)
629  for (typename SparseVectorT::const_iterator vec_it = M_v[i].begin(); vec_it!=M_v[i].end(); ++vec_it)
630  M(i, vec_it->first) = vec_it->second;
631  }
632 }
633 
639 template<typename MatrixT>
640 void sparse_transpose(MatrixT const & A_in, MatrixT & A)
641 {
642  typedef typename MatrixT::value_type NumericType;
643 
644  std::vector<std::map<vcl_size_t, NumericType> > temp_A(A_in.size2());
645  A.resize(A_in.size2(), A_in.size1(), false);
646 
647  for (typename MatrixT::const_iterator1 row_it = A_in.begin1();
648  row_it != A_in.end1();
649  ++row_it)
650  {
651  for (typename MatrixT::const_iterator2 col_it = row_it.begin();
652  col_it != row_it.end();
653  ++col_it)
654  {
655  temp_A[col_it.index2()][col_it.index1()] = *col_it;
656  }
657  }
658 
659  for (vcl_size_t i=0; i<temp_A.size(); ++i)
660  {
661  for (typename std::map<vcl_size_t, NumericType>::const_iterator it = temp_A[i].begin();
662  it != temp_A[i].end();
663  ++it)
664  A(i, it->first) = it->second;
665  }
666 }
667 
668 
669 
670 
671 // template<typename SparseVectorType>
672 // void custom_copy(std::vector<SparseVectorType> & M_v, std::vector<SparseVectorType> & l_M_v, const unsigned int beg_ind){
673 // for (int i = 0; i < l_M_v.size(); ++i){
674 // l_M_v[i] = M_v[i + beg_ind];
675 // }
676 // }
677 
678 //CPU version
685 template<typename MatrixT>
686 void computeSPAI(MatrixT const & A,
687  MatrixT & M,
688  spai_tag & tag)
689 {
690  typedef typename MatrixT::value_type NumericT;
691  typedef typename boost::numeric::ublas::vector<NumericT> VectorType;
692  typedef typename viennacl::linalg::detail::spai::sparse_vector<NumericT> SparseVectorType;
693  typedef typename boost::numeric::ublas::matrix<NumericT> DenseMatrixType;
694 
695  //sparse matrix transpose...
696  unsigned int cur_iter = 0;
698  bool go_on = true;
699  std::vector<SparseVectorType> A_v_c(M.size2());
700  std::vector<SparseVectorType> M_v(M.size2());
701  vectorize_column_matrix(A, A_v_c);
702  vectorize_column_matrix(M, M_v);
703 
704 
705  while (go_on)
706  {
707  go_on = (tag.getEndInd() < static_cast<long>(M.size2()));
708  cur_iter = 0;
709  unsigned int l_sz = static_cast<unsigned int>(tag.getEndInd() - tag.getBegInd());
710  //std::vector<bool> g_is_update(M.size2(), true);
711  std::vector<bool> g_is_update(l_sz, true);
712 
713  //init is update
714  //init_parallel_is_update(g_is_update);
715  //std::vector<SparseVectorType> A_v_c(K);
716  //std::vector<SparseVectorType> M_v(K);
717  //vectorization of marices
718  //print_matrix(M_v);
719 
720  std::vector<SparseVectorType> l_M_v(l_sz);
721  //custom_copy(M_v, l_M_v, beg_ind);
722  std::copy(M_v.begin() + tag.getBegInd(), M_v.begin() + tag.getEndInd(), l_M_v.begin());
723 
724  //print_matrix(l_M_v);
725  //std::vector<SparseVectorType> l_A_v_c(K);
726  //custom_copy(A_v_c, l_A_v_c, beg_ind);
727  //std::copy(A_v_c.begin() + beg_ind, A_v_c.begin() + end_ind, l_A_v_c.begin());
728  //print_matrix(l_A_v_c);
729  //vectorize_row_matrix(A, A_v_r);
730  //working blocks
731 
732  std::vector<DenseMatrixType> g_A_I_J(l_sz);
733  std::vector<VectorType> g_b_v(l_sz);
734  std::vector<SparseVectorType> g_res(l_sz);
735  std::vector<std::vector<unsigned int> > g_I(l_sz);
736  std::vector<std::vector<unsigned int> > g_J(l_sz);
737 
738  while ((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update))
739  {
740  // SET UP THE BLOCKS..
741  // PHASE ONE
742  if (cur_iter == 0)
743  block_set_up(A, A_v_c, l_M_v, g_I, g_J, g_A_I_J, g_b_v);
744  else
745  block_update(A, A_v_c, g_res, g_is_update, g_I, g_J, g_b_v, g_A_I_J, tag);
746 
747  //PHASE TWO, LEAST SQUARE SOLUTION
748  least_square_solve(A_v_c, g_A_I_J, g_b_v, g_I, g_J, g_res, g_is_update, l_M_v, tag);
749 
750  if (tag.getIsStatic()) break;
751  cur_iter++;
752  }
753 
754  std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd());
755  tag.setBegInd(tag.getEndInd());//beg_ind = end_ind;
756  tag.setEndInd(std::min(static_cast<long>(tag.getBegInd() + VIENNACL_SPAI_K_b), static_cast<long>(M.size2())));
757  //std::copy(l_M_v.begin(), l_M_v.end(), M_v.begin() + tag.getBegInd());
758  }
759 
760  M.resize(M.size1(), M.size2(), false);
761  insert_sparse_columns(M_v, M, tag.getIsRight());
762 }
763 
764 
765 //GPU - based version
774 template<typename NumericT, unsigned int AlignmentV>
776  boost::numeric::ublas::compressed_matrix<NumericT> const & cpu_A,
777  boost::numeric::ublas::compressed_matrix<NumericT> & cpu_M, //output
779  spai_tag const & tag)
780 {
781  typedef typename viennacl::linalg::detail::spai::sparse_vector<NumericT> SparseVectorType;
782 
783  //typedef typename viennacl::compressed_matrix<ScalarType> GPUSparseMatrixType;
784  //sparse matrix transpose...
785  unsigned int cur_iter = 0;
786  std::vector<cl_uint> g_is_update(cpu_M.size2(), static_cast<cl_uint>(1));
787  //init is update
788  //init_parallel_is_update(g_is_update);
789  std::vector<SparseVectorType> A_v_c(cpu_M.size2());
790  std::vector<SparseVectorType> M_v(cpu_M.size2());
791  vectorize_column_matrix(cpu_A, A_v_c);
792  vectorize_column_matrix(cpu_M, M_v);
793  std::vector<SparseVectorType> g_res(cpu_M.size2());
794  std::vector<std::vector<unsigned int> > g_I(cpu_M.size2());
795  std::vector<std::vector<unsigned int> > g_J(cpu_M.size2());
796 
797  //OpenCL variables
798  block_matrix g_A_I_J_vcl;
799  block_vector g_bv_vcl;
800  while ((cur_iter < tag.getIterationLimit())&&is_all_update(g_is_update))
801  {
802  // SET UP THE BLOCKS..
803  // PHASE ONE..
804  //timer.start();
805  //index set up on CPU
806  if (cur_iter == 0)
807  block_set_up(A, A_v_c, M_v, g_is_update, g_I, g_J, g_A_I_J_vcl, g_bv_vcl);
808  else
809  block_update(A, A_v_c, g_is_update, g_res, g_J, g_I, g_A_I_J_vcl, g_bv_vcl, tag);
810  //std::cout<<"Phase 2 timing: "<<timer.get()<<std::endl;
811  //PERFORM LEAST SQUARE problems solution
812  //PHASE TWO
813  //timer.start();
814  least_square_solve<SparseVectorType, NumericT>(A_v_c, M_v, g_I, g_J, g_A_I_J_vcl, g_bv_vcl, g_res, g_is_update, tag, viennacl::traits::context(A));
815  //std::cout<<"Phase 3 timing: "<<timer.get()<<std::endl;
816  if (tag.getIsStatic())
817  break;
818  cur_iter++;
819  }
820 
821  cpu_M.resize(cpu_M.size1(), cpu_M.size2(), false);
822  insert_sparse_columns(M_v, cpu_M, tag.getIsRight());
823  //copy back to GPU
824  M.resize(static_cast<unsigned int>(cpu_M.size1()), static_cast<unsigned int>(cpu_M.size2()));
825  viennacl::copy(cpu_M, M);
826 }
827 
828 }
829 }
830 }
831 }
832 #endif
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:587
void insert_sparse_columns(std::vector< SparseVectorT > const &M_v, SparseMatrixT &M, bool is_right)
Insertion of vectorized matrix column into original sparse matrix.
Definition: spai.hpp:616
Represents contigious matrices on GPU.
bool is_all_update(VectorType &parallel_is_update)
Definition: spai.hpp:443
void add_sparse_vectors(SparseVectorT const &v, NumericT b, SparseVectorT &res_v)
Add two sparse vectors res_v = b*v.
Definition: spai.hpp:105
Implementations of dense matrix related operations including matrix-vector products.
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
void sparse_norm_2(SparseVectorT const &v, NumericT &norm)
Computation of Euclidean norm for sparse vector.
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
viennacl::ocl::command_queue & get_queue()
Definition: context.hpp:266
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Implementation of the dense matrix class.
void sparse_transpose(MatrixT const &A_in, MatrixT &A)
Transposition of sparse matrix.
Definition: spai.hpp:640
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
OpenCL kernel file for sparse approximate inverse operations.
void build_index_set(std::vector< SparseVectorT > const &A_v_c, SparseVectorT const &v, std::vector< unsigned int > &J, std::vector< unsigned int > &I)
Setting up index set of columns and rows for certain column.
Definition: spai.hpp:139
void computeSPAI(MatrixT const &A, MatrixT &M, spai_tag &tag)
Construction of SPAI preconditioner on CPU.
Definition: spai.hpp:686
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
void compute_spai_residual(std::vector< SparseVectorT > const &A_v_c, SparseVectorT const &v, unsigned int ind, SparseVectorT &res)
Computation of residual res = A*v - e.
Definition: spai.hpp:120
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:129
void custom_fan_out(std::vector< NumericT > const &m_in, unsigned int start_m_ind, std::vector< unsigned int > const &J, SparseVectorT &m)
Elicitation of sparse vector m for particular column from m_in - contigious vector for all columns...
Definition: spai.hpp:271
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set.
Definition: qr.hpp:151
void least_square_solve(std::vector< SparseVectorT > &A_v_c, std::vector< SparseVectorT > &M_v, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< SparseVectorT > &g_res, std::vector< cl_uint > &g_is_update, const spai_tag &tag, viennacl::context ctx)
Solution of Least square problem on GPU.
Definition: spai.hpp:298
void vectorize_row_matrix(SparseMatrixT const &M_in, std::vector< SparseVectorT > &M_v)
Definition: spai.hpp:472
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Implementation of a helper sparse vector class for SPAI. Experimental.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
void print_matrix(DenseMatrixT &m)
Definition: spai.hpp:88
void block_update(SparseMatrixT const &A, std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorT > &g_b_v, std::vector< DenseMatrixT > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner.
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
viennacl::ocl::handle< cl_command_queue > const & handle() const
float NumericT
Definition: bisect.cpp:40
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
#define VIENNACL_ERR_CHECK(err)
Definition: error.hpp:681
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:311
void apply_q_trans_vec(MatrixT const &R, VectorT const &b_v, VectorT &y)
Recovery Q from matrix R and vector of betas b_v.
Definition: qr.hpp:377
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void block_assembly(viennacl::compressed_matrix< NumericT, AlignmentV > const &A, std::vector< std::vector< unsigned int > > const &g_J, std::vector< std::vector< unsigned int > > const &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J.
Definition: spai.hpp:507
void projectRows(std::vector< SparseVectorT > const &A_v_c, std::vector< unsigned int > const &J, std::vector< unsigned int > &I)
Row projection for matrix A(:,J) -> A(I,J), building index set of non-zero rows.
Implementation of a bunch of (small) matrices on GPU. Experimental.
const OCL_TYPE & get() const
Definition: handle.hpp:189
void backwardSolve(MatrixT const &R, VectorT const &y, VectorT &x)
Solution of linear:R*x=y system by backward substitution.
Definition: blas3.hpp:36
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:165
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of a static SPAI. Experimental.
Implementation of the compressed_matrix class.
Implementation of a bunch of vectors on GPU. Experimental.
Implementations of operations using sparse matrices.
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
void initProjectSubMatrix(SparseMatrixT const &A_in, std::vector< unsigned int > const &J, std::vector< unsigned int > &I, DenseMatrixT &A_out)
Initializes a dense matrix from a sparse one.
Definition: spai.hpp:156
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents a sparse vector based on std::map
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Provides a QR factorization using a block-based approach.
Implementation of the spai tag holding SPAI configuration parameters. Experimental.
std::size_t vcl_size_t
Definition: forwards.h:75
#define VIENNACL_SPAI_K_b
Definition: spai.hpp:68
Implementations of the OpenCL backend, where all contexts are stored in.
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
void block_set_up(SparseMatrixT const &A, std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > const &M_v, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< DenseMatrixT > &g_A_I_J, std::vector< VectorT > &g_b_v)
Setting up blocks and QR factorizing them on CPU.
Definition: spai.hpp:181
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
void write_set_to_array(std::vector< std::vector< SizeT > > const &ind_set, std::vector< cl_uint > &a)
Definition: spai.hpp:484
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Represents a contiguous vector on the GPU to represent a concatentation of small vectors.
void fanOutVector(VectorT const &m_in, std::vector< unsigned int > const &J, SparseVectorT &m)
Projects solution of LS problem onto original column m.
Definition: spai-static.hpp:88
void buildColumnIndexSet(SparseVectorT const &v, std::vector< unsigned int > &J)
Builds index set of projected columns for current column of preconditioner.
unsigned int getIterationLimit() const
Definition: spai_tag.hpp:90
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:594
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
void print_sparse_vector(SparseVectorT const &v)
Definition: spai.hpp:81
void vectorize_column_matrix(SparseMatrixT const &M_in, std::vector< SparseVectorT > &M_v)
Solution of Least square problem on CPU.
Definition: spai.hpp:462
A sparse square matrix in compressed sparse rows format.
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
void index_set_up(std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > const &M_v, std::vector< std::vector< unsigned int > > &g_J, std::vector< std::vector< unsigned int > > &g_I)
Setting up index set of columns and rows for all columns.
Definition: spai.hpp:211
Implementation of the ViennaCL scalar class.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
Implementation of a dynamic SPAI. Provides the routines for automatic pattern updates Experimental...
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:216