ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
direct_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DIRECT_SOLVE_HPP_
2 #define VIENNACL_LINALG_DIRECT_SOLVE_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
27 #include "viennacl/vector.hpp"
29 #include "viennacl/matrix.hpp"
31 #include "viennacl/linalg/prod.hpp"
33 
34 #ifdef VIENNACL_WITH_OPENCL
36 #endif
37 
38 #ifdef VIENNACL_WITH_CUDA
40 #endif
41 
42 #define VIENNACL_DIRECT_SOLVE_BLOCKSIZE 128
43 
44 namespace viennacl
45 {
46 namespace linalg
47 {
48 
49 namespace detail
50 {
51 
52  //
53  // A \ B:
54  //
55 
61  template<typename NumericT, typename SolverTagT>
62  void inplace_solve_kernel(const matrix_base<NumericT> & A, const matrix_base<NumericT> & B, SolverTagT)
63  {
64  assert( (viennacl::traits::size1(A) == viennacl::traits::size2(A)) && bool("Size check failed in inplace_solve(): size1(A) != size2(A)"));
65  assert( (viennacl::traits::size1(A) == viennacl::traits::size1(B)) && bool("Size check failed in inplace_solve(): size1(A) != size1(B)"));
66  switch (viennacl::traits::handle(A).get_active_handle_id())
67  {
69  viennacl::linalg::host_based::inplace_solve(A, const_cast<matrix_base<NumericT> &>(B), SolverTagT());
70  break;
71  #ifdef VIENNACL_WITH_OPENCL
73  viennacl::linalg::opencl::inplace_solve(A, const_cast<matrix_base<NumericT> &>(B), SolverTagT());
74  break;
75  #endif
76  #ifdef VIENNACL_WITH_CUDA
78  viennacl::linalg::cuda::inplace_solve(A, const_cast<matrix_base<NumericT> &>(B), SolverTagT());
79  break;
80  #endif
82  throw memory_exception("not initialised!");
83  default:
84  throw memory_exception("not implemented");
85  }
86  }
87 
88 
89  //
90  // A \ b
91  //
92 
93  template<typename NumericT, typename SolverTagT>
95  const vector_base<NumericT> & vec,
96  SolverTagT)
97  {
98  assert( (mat.size1() == vec.size()) && bool("Size check failed in inplace_solve(): size1(A) != size(b)"));
99  assert( (mat.size2() == vec.size()) && bool("Size check failed in inplace_solve(): size2(A) != size(b)"));
100 
102  {
104  viennacl::linalg::host_based::inplace_solve(mat, const_cast<vector_base<NumericT> &>(vec), SolverTagT());
105  break;
106  #ifdef VIENNACL_WITH_OPENCL
108  viennacl::linalg::opencl::inplace_solve(mat, const_cast<vector_base<NumericT> &>(vec), SolverTagT());
109  break;
110  #endif
111  #ifdef VIENNACL_WITH_CUDA
113  viennacl::linalg::cuda::inplace_solve(mat, const_cast<vector_base<NumericT> &>(vec), SolverTagT());
114  break;
115  #endif
117  throw memory_exception("not initialised!");
118  default:
119  throw memory_exception("not implemented");
120  }
121  }
122 
123 
124  template<typename MatrixT1, typename MatrixT2, typename SolverTagT>
125  void inplace_solve_lower_impl(MatrixT1 const & A, MatrixT2 & B, SolverTagT)
126  {
127  typedef typename viennacl::result_of::cpu_value_type<MatrixT1>::type NumericType;
128 
130  if (A.size1() <= blockSize)
131  inplace_solve_kernel(A, B, SolverTagT());
132  else
133  {
134  for (vcl_size_t i = 0; i < A.size1(); i = i + blockSize)
135  {
136  vcl_size_t Apos1 = i;
137  vcl_size_t Apos2 = std::min<vcl_size_t>(A.size1(), i + blockSize);
138  vcl_size_t Bpos = B.size2();
140  viennacl::project(B, viennacl::range(Apos1, Apos2), viennacl::range(0, Bpos)),
141  SolverTagT());
142  if (Apos2 < A.size1())
143  {
144  viennacl::matrix_range<MatrixT2> B_lower(B, viennacl::range(Apos2, B.size1()), viennacl::range(0, Bpos));
146  viennacl::project(B, viennacl::range(Apos1, Apos2), viennacl::range(0, Bpos)),
147  B_lower,
148  NumericType(-1.0), NumericType(1.0));
149  }
150  }
151  }
152  }
153 
154  template<typename MatrixT1, typename MatrixT2>
155  void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, viennacl::linalg::lower_tag)
156  {
158  }
159 
160  template<typename MatrixT1, typename MatrixT2>
161  void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, viennacl::linalg::unit_lower_tag)
162  {
164  }
165 
166  template<typename MatrixT1, typename MatrixT2, typename SolverTagT>
167  void inplace_solve_upper_impl(MatrixT1 const & A, MatrixT2 & B, SolverTagT)
168  {
169  typedef typename viennacl::result_of::cpu_value_type<MatrixT1>::type NumericType;
170 
171  int blockSize = VIENNACL_DIRECT_SOLVE_BLOCKSIZE;
172  if (static_cast<int>(A.size1()) <= blockSize)
173  inplace_solve_kernel(A, B, SolverTagT());
174  else
175  {
176  for (int i = static_cast<int>(A.size1()); i > 0; i = i - blockSize)
177  {
178  vcl_size_t Apos1 = vcl_size_t(std::max<int>(0, i - blockSize));
179  vcl_size_t Apos2 = vcl_size_t(i);
180  vcl_size_t Bpos = B.size2();
182  viennacl::project(B, viennacl::range(Apos1, Apos2), viennacl::range(0, Bpos)),
183  SolverTagT());
184  if (Apos1 > 0)
185  {
186  viennacl::matrix_range<MatrixT2> B_upper(B, viennacl::range(0, Apos1), viennacl::range(0, Bpos));
187 
189  viennacl::project(B, viennacl::range(Apos1, Apos2), viennacl::range(0, Bpos)),
190  B_upper,
191  NumericType(-1.0), NumericType(1.0));
192  }
193  }
194  }
195  }
196 
197  template<typename MatrixT1, typename MatrixT2>
198  void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, viennacl::linalg::upper_tag)
199  {
201  }
202 
203  template<typename MatrixT1, typename MatrixT2>
204  void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, viennacl::linalg::unit_upper_tag)
205  {
207  }
208 
209 } // namespace detail
210 
216 template<typename NumericT, typename SolverTagT>
219  SolverTagT)
220 {
221  detail::inplace_solve_impl(A,B,SolverTagT());
222 }
223 
229 template<typename NumericT, typename SolverTagT>
232  SolverTagT)
233 {
235 
236  matrix_base<NumericT> B(const_cast<handle_type &>(proxy_B.lhs().handle()),
237  proxy_B.lhs().size2(), proxy_B.lhs().start2(), proxy_B.lhs().stride2(), proxy_B.lhs().internal_size2(),
238  proxy_B.lhs().size1(), proxy_B.lhs().start1(), proxy_B.lhs().stride1(), proxy_B.lhs().internal_size1(),
239  !proxy_B.lhs().row_major());
240 
241  detail::inplace_solve_impl(A,B,SolverTagT());
242 }
243 
244 //upper triangular solver for transposed lower triangular matrices
250 template<typename NumericT, typename SolverTagT>
253  SolverTagT)
254 {
256 
257  matrix_base<NumericT> A(const_cast<handle_type &>(proxy_A.lhs().handle()),
258  proxy_A.lhs().size2(), proxy_A.lhs().start2(), proxy_A.lhs().stride2(), proxy_A.lhs().internal_size2(),
259  proxy_A.lhs().size1(), proxy_A.lhs().start1(), proxy_A.lhs().stride1(), proxy_A.lhs().internal_size1(),
260  !proxy_A.lhs().row_major());
261 
262  detail::inplace_solve_impl(A,B,SolverTagT());
263 }
264 
270 template<typename NumericT, typename SolverTagT>
273  SolverTagT)
274 {
276 
277  matrix_base<NumericT> A(const_cast<handle_type &>(proxy_A.lhs().handle()),
278  proxy_A.lhs().size2(), proxy_A.lhs().start2(), proxy_A.lhs().stride2(), proxy_A.lhs().internal_size2(),
279  proxy_A.lhs().size1(), proxy_A.lhs().start1(), proxy_A.lhs().stride1(), proxy_A.lhs().internal_size1(),
280  !proxy_A.lhs().row_major());
281 
282  matrix_base<NumericT> B(const_cast<handle_type &>(proxy_B.lhs().handle()),
283  proxy_B.lhs().size2(), proxy_B.lhs().start2(), proxy_B.lhs().stride2(), proxy_B.lhs().internal_size2(),
284  proxy_B.lhs().size1(), proxy_B.lhs().start1(), proxy_B.lhs().stride1(), proxy_B.lhs().internal_size1(),
285  !proxy_B.lhs().row_major());
286 
287  detail::inplace_solve_impl(A,B,SolverTagT());
288 }
289 
290 
292 
293 
300 template<typename NumericT, typename SolverTagT>
302  const matrix_base<NumericT> & B,
303  SolverTagT tag)
304 {
305  // do an inplace solve on the result vector:
306  matrix_base<NumericT> result(B);
307  inplace_solve(A, result, tag);
308  return result;
309 }
310 
317 template<typename NumericT, typename SolverTagT>
320  SolverTagT tag)
321 {
322  // do an inplace solve on the result vector:
323  matrix_base<NumericT> result(proxy);
324  inplace_solve(A, result, tag);
325  return result;
326 }
327 
334 template<typename NumericT, typename SolverTagT>
336  const matrix_base<NumericT> & B,
337  SolverTagT tag)
338 {
339  // do an inplace solve on the result vector:
340  matrix_base<NumericT> result(B);
341  inplace_solve(proxy, result, tag);
342  return result;
343 }
344 
351 template<typename NumericT, typename SolverTagT>
354  SolverTagT tag)
355 {
356  // run an inplace solve on the result vector:
357  matrix_base<NumericT> result(proxy_B);
358  inplace_solve(proxy_A, result, tag);
359  return result;
360 }
361 
362 //
364 //
365 
366 namespace detail
367 {
368  template<typename MatrixT1, typename VectorT, typename SolverTagT>
369  void inplace_solve_lower_vec_impl(MatrixT1 const & A, VectorT & b, SolverTagT)
370  {
372  if (A.size1() <= blockSize)
373  inplace_solve_vec_kernel(A, b, SolverTagT());
374  else
375  {
376  VectorT temp(b);
377  for (vcl_size_t i = 0; i < A.size1(); i = i + blockSize)
378  {
379  vcl_size_t Apos1 = i;
380  vcl_size_t Apos2 = std::min<vcl_size_t>(A.size1(), i + blockSize);
382  viennacl::project(b, viennacl::range(Apos1, Apos2)),
383  SolverTagT());
384  if (Apos2 < A.size1())
385  {
386  viennacl::project(temp, viennacl::range(Apos2, A.size1())) = viennacl::linalg::prod(viennacl::project(A, viennacl::range(Apos2, A.size1()), viennacl::range(Apos1, Apos2)),
387  viennacl::project(b, viennacl::range(Apos1, Apos2)));
388  viennacl::project(b, viennacl::range(Apos2, A.size1())) -= viennacl::project(temp, viennacl::range(Apos2, A.size1()));
389  }
390  }
391  }
392  }
393 
394  template<typename MatrixT1, typename VectorT>
395  void inplace_solve_vec_impl(MatrixT1 const & A, VectorT & B, viennacl::linalg::lower_tag)
396  {
398  }
399 
400  template<typename MatrixT1, typename VectorT>
401  void inplace_solve_vec_impl(MatrixT1 const & A, VectorT & B, viennacl::linalg::unit_lower_tag)
402  {
404  }
405 
406  template<typename MatrixT1, typename VectorT, typename SolverTagT>
407  void inplace_solve_upper_vec_impl(MatrixT1 const & A, VectorT & b, SolverTagT)
408  {
409  int blockSize = VIENNACL_DIRECT_SOLVE_BLOCKSIZE;
410  if (static_cast<int>(A.size1()) <= blockSize)
411  inplace_solve_vec_kernel(A, b, SolverTagT());
412  else
413  {
414  VectorT temp(b);
415  for (int i = static_cast<int>(A.size1()); i > 0; i = i - blockSize)
416  {
417  vcl_size_t Apos1 = vcl_size_t(std::max<int>(0, i - blockSize));
418  vcl_size_t Apos2 = vcl_size_t(i);
420  viennacl::project(b, viennacl::range(Apos1, Apos2)),
421  SolverTagT());
422  if (Apos1 > 0)
423  {
425  viennacl::project(b, viennacl::range(Apos1, Apos2)));
426  viennacl::project(b, viennacl::range(0, Apos1)) -= viennacl::project(temp, viennacl::range(0, Apos1));
427  }
428  }
429  }
430  }
431 
432  template<typename MatrixT1, typename VectorT>
433  void inplace_solve_vec_impl(MatrixT1 const & A, VectorT & b, viennacl::linalg::upper_tag)
434  {
436  }
437 
438  template<typename MatrixT1, typename VectorT>
439  void inplace_solve_vec_impl(MatrixT1 const & A, VectorT & b, viennacl::linalg::unit_upper_tag)
440  {
442  }
443 
444 } // namespace detail
445 
452 template<typename NumericT, typename SolverTagT>
454  vector_base<NumericT> & vec,
455  SolverTagT const & tag)
456 {
457 
458  detail::inplace_solve_vec_impl(mat, vec, tag);
459 }
460 
467 template<typename NumericT, typename SolverTagT>
469  vector_base<NumericT> & vec,
470  SolverTagT const & tag)
471 {
473 
474  // wrap existing matrix in a new matrix_base object (no data copy)
475  matrix_base<NumericT> mat(const_cast<handle_type &>(proxy.lhs().handle()),
476  proxy.lhs().size2(), proxy.lhs().start2(), proxy.lhs().stride2(), proxy.lhs().internal_size2(),
477  proxy.lhs().size1(), proxy.lhs().start1(), proxy.lhs().stride1(), proxy.lhs().internal_size1(),
478  !proxy.lhs().row_major());
479  detail::inplace_solve_vec_impl(mat, vec, tag);
480 }
481 
482 
491 template<typename NumericT>
493  const vector_base<NumericT> & vec,
494  viennacl::linalg::upper_tag const & tag)
495 {
496  // run an inplace solve on the result vector:
497  vector<NumericT> result(vec);
498  inplace_solve(mat, result, tag);
499  return result;
500 }
501 
510 template<typename NumericT>
512  const vector_base<NumericT> & vec,
514 {
515  // run an inplace solve on the result vector:
516  vector<NumericT> result(vec);
517  inplace_solve(mat, result, tag);
518  return result;
519 }
520 
529 template<typename NumericT>
531  const vector_base<NumericT> & vec,
532  viennacl::linalg::lower_tag const & tag)
533 {
534  // run an inplace solve on the result vector:
535  vector<NumericT> result(vec);
536  inplace_solve(mat, result, tag);
537  return result;
538 }
539 
548 template<typename NumericT>
550  const vector_base<NumericT> & vec,
552 {
553  // run an inplace solve on the result vector:
554  vector<NumericT> result(vec);
555  inplace_solve(mat, result, tag);
556  return result;
557 }
558 
565 template<typename NumericT, typename SolverTagT>
567  const vector_base<NumericT> & vec,
568  SolverTagT const & tag)
569 {
570  // run an inplace solve on the result vector:
571  vector<NumericT> result(vec);
572  inplace_solve(proxy, result, tag);
573  return result;
574 }
575 
576 
577 }
578 }
579 
580 #endif
viennacl::tools::shared_ptr< char > handle_type
Definition: cpu_ram.hpp:40
Implementations of dense direct triangular solvers are found here.
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
void inplace_solve(const matrix_base< NumericT > &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
void inplace_solve_upper_impl(MatrixT1 const &A, MatrixT2 &B, SolverTagT)
Exception class in case of memory errors.
Definition: forwards.h:572
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
A tag class representing a lower triangular matrix.
Definition: forwards.h:849
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496
void inplace_solve_upper_vec_impl(MatrixT1 const &A, VectorT &b, SolverTagT)
basic_range range
Definition: forwards.h:424
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
void inplace_solve_vec_impl(MatrixT1 const &A, VectorT &B, viennacl::linalg::lower_tag)
Definition: blas3.hpp:36
A tag class representing an upper triangular matrix.
Definition: forwards.h:854
void inplace_solve_kernel(const matrix_base< NumericT > &A, const matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems using a single kernel launch. Matlab notation: A \...
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
void inplace_solve_impl(MatrixT1 const &A, MatrixT2 &B, viennacl::linalg::lower_tag)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:271
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
Proxy classes for vectors.
#define VIENNACL_DIRECT_SOLVE_BLOCKSIZE
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
Proxy classes for matrices.
void inplace_solve_lower_impl(MatrixT1 const &A, MatrixT2 &B, SolverTagT)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void inplace_solve_lower_vec_impl(MatrixT1 const &A, VectorT &b, SolverTagT)
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
Implementations of dense direct solvers using CUDA are found here.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:859
A tag class representing transposed matrices.
Definition: forwards.h:220
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:440
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
void inplace_solve_vec_kernel(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, SolverTagT)
Implementations of dense direct solvers are found here.
Simple enable-if variant that uses the SFINAE pattern.
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:864
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:118