ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
svd.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_SVD_HPP
2 #define VIENNACL_LINALG_SVD_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
28 // Note: Boost.uBLAS is required at the moment
29 #include <boost/numeric/ublas/vector.hpp>
30 #include <boost/numeric/ublas/matrix.hpp>
31 
32 
33 #include <cmath>
34 
35 #include "viennacl/matrix.hpp"
38 
39 namespace viennacl
40 {
41  namespace linalg
42  {
43 
44  namespace detail
45  {
46 
47  template<typename MatrixType, typename VectorType>
48  void givens_prev(MatrixType & matrix,
49  VectorType & tmp1,
50  VectorType & tmp2,
51  int n,
52  int l,
53  int k
54  )
55  {
56  typedef typename MatrixType::value_type ScalarType;
57  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
58 
59  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
61 
62  kernel.global_work_size(0, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size1(matrix), 256));
63  kernel.local_work_size(0, 256);
64 
66  matrix,
67  tmp1,
68  tmp2,
69  static_cast<cl_uint>(n),
70  static_cast<cl_uint>(matrix.internal_size1()),
71  static_cast<cl_uint>(l + 1),
72  static_cast<cl_uint>(k + 1)
73  ));
74  }
75 
76 
77  template<typename MatrixType, typename VectorType>
78  void change_signs(MatrixType& matrix, VectorType& signs, int n)
79  {
80  typedef typename MatrixType::value_type ScalarType;
81  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
82 
83  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
85 
86  kernel.global_work_size(0, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size1(matrix), 16));
87  kernel.global_work_size(1, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size2(matrix), 16));
88 
89  kernel.local_work_size(0, 16);
90  kernel.local_work_size(1, 16);
91 
93  matrix,
94  signs,
95  static_cast<cl_uint>(n),
96  static_cast<cl_uint>(matrix.internal_size1())
97  ));
98  }
99 
100  template<typename MatrixType, typename CPU_VectorType>
101  void svd_qr_shift(MatrixType & vcl_u,
102  MatrixType & vcl_v,
103  CPU_VectorType & q,
104  CPU_VectorType & e)
105  {
106  typedef typename MatrixType::value_type ScalarType;
107  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
108 
109  vcl_size_t n = q.size();
110  int m = static_cast<int>(vcl_u.size1());
111 
112  detail::transpose(vcl_u);
113  detail::transpose(vcl_v);
114 
115  std::vector<CPU_ScalarType> signs_v(n, 1);
116  std::vector<CPU_ScalarType> cs1(n), ss1(n), cs2(n), ss2(n);
117 
119 
120  bool goto_test_conv = false;
121 
122  for (int k = static_cast<int>(n) - 1; k >= 0; k--)
123  {
124  // std::cout << "K = " << k << std::endl;
125 
126  vcl_size_t iter = 0;
127  for (iter = 0; iter < detail::ITER_MAX; iter++)
128  {
129  // test for split
130  int l;
131  for (l = k; l >= 0; l--)
132  {
133  goto_test_conv = false;
134  if (std::fabs(e[vcl_size_t(l)]) <= detail::EPS)
135  {
136  // set it
137  goto_test_conv = true;
138  break;
139  }
140 
141  if (std::fabs(q[vcl_size_t(l) - 1]) <= detail::EPS)
142  {
143  // goto
144  break;
145  }
146  }
147 
148  if (!goto_test_conv)
149  {
150  CPU_ScalarType c = 0.0;
151  CPU_ScalarType s = 1.0;
152 
153  //int l1 = l - 1;
154  //int l2 = k;
155 
156  for (int i = l; i <= k; i++)
157  {
158  CPU_ScalarType f = s * e[vcl_size_t(i)];
159  e[vcl_size_t(i)] = c * e[vcl_size_t(i)];
160 
161  if (std::fabs(f) <= detail::EPS)
162  {
163  //l2 = i - 1;
164  break;
165  }
166 
167  CPU_ScalarType g = q[vcl_size_t(i)];
168  CPU_ScalarType h = detail::pythag(f, g);
169  q[vcl_size_t(i)] = h;
170  c = g / h;
171  s = -f / h;
172 
173  cs1[vcl_size_t(i)] = c;
174  ss1[vcl_size_t(i)] = s;
175  }
176 
177  // std::cout << "Hitted!" << l1 << " " << l2 << "\n";
178 
179  // for (int i = l; i <= l2; i++)
180  // {
181  // for (int j = 0; j < m; j++)
182  // {
183  // CPU_ScalarType y = u(j, l1);
184  // CPU_ScalarType z = u(j, i);
185  // u(j, l1) = y * cs1[i] + z * ss1[i];
186  // u(j, i) = -y * ss1[i] + z * cs1[i];
187  // }
188  // }
189  }
190 
191  CPU_ScalarType z = q[vcl_size_t(k)];
192 
193  if (l == k)
194  {
195  if (z < 0)
196  {
197  q[vcl_size_t(k)] = -z;
198 
199  signs_v[vcl_size_t(k)] *= -1;
200  }
201 
202  break;
203  }
204 
205  if (iter >= detail::ITER_MAX - 1)
206  break;
207 
208  CPU_ScalarType x = q[vcl_size_t(l)];
209  CPU_ScalarType y = q[vcl_size_t(k) - 1];
210  CPU_ScalarType g = e[vcl_size_t(k) - 1];
211  CPU_ScalarType h = e[vcl_size_t(k)];
212  CPU_ScalarType f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2 * h * y);
213 
214  g = detail::pythag<CPU_ScalarType>(f, 1);
215 
216  if (f < 0) {
217  f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x;
218  } else {
219  f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x;
220  }
221 
222  CPU_ScalarType c = 1;
223  CPU_ScalarType s = 1;
224 
225  for (vcl_size_t i = static_cast<vcl_size_t>(l) + 1; i <= static_cast<vcl_size_t>(k); i++)
226  {
227  g = e[i];
228  y = q[i];
229  h = s * g;
230  g = c * g;
231  CPU_ScalarType z2 = detail::pythag(f, h);
232  e[i - 1] = z2;
233  c = f / z2;
234  s = h / z2;
235  f = x * c + g * s;
236  g = -x * s + g * c;
237  h = y * s;
238  y = y * c;
239 
240  cs1[i] = c;
241  ss1[i] = s;
242 
243  z2 = detail::pythag(f, h);
244  q[i - 1] = z2;
245  c = f / z2;
246  s = h / z2;
247  f = c * g + s * y;
248  x = -s * g + c * y;
249 
250  cs2[i] = c;
251  ss2[i] = s;
252  }
253 
254  {
255  viennacl::copy(cs1, tmp1);
256  viennacl::copy(ss1, tmp2);
257 
258  givens_prev(vcl_v, tmp1, tmp2, static_cast<int>(n), l, k);
259  }
260 
261  {
262  viennacl::copy(cs2, tmp1);
263  viennacl::copy(ss2, tmp2);
264 
265  givens_prev(vcl_u, tmp1, tmp2, m, l, k);
266  }
267 
268  e[vcl_size_t(l)] = 0.0;
269  e[vcl_size_t(k)] = f;
270  q[vcl_size_t(k)] = x;
271  }
272 
273  }
274 
275 
276  viennacl::copy(signs_v, tmp1);
277  change_signs(vcl_v, tmp1, static_cast<int>(n));
278 
279  // transpose singular matrices again
280  detail::transpose(vcl_u);
281  detail::transpose(vcl_v);
282  }
283 
284 
285  /*template<typename SCALARTYPE, unsigned int ALIGNMENT>
286  bool householder_c(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A,
287  viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Q,
288  viennacl::vector<SCALARTYPE, ALIGNMENT> & D,
289  vcl_size_t start)
290  {
291 
292  vcl_size_t row_start = start;
293  vcl_size_t col_start = start;
294 
295  if (row_start + 1 >= A.size1())
296  return false;
297 
298  std::vector<SCALARTYPE> tmp(A.size1(), 0);
299 
300  copy_vec(A, D, row_start, col_start, true);
301  fast_copy(D.begin(), D.begin() + (A.size1() - row_start), tmp.begin() + row_start);
302 
303  detail::householder_vector(tmp, row_start);
304 
305  fast_copy(tmp, D);
306 
307  viennacl::ocl::kernel & kernel = viennacl::ocl::get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_COL_KERNEL);
308 
309  //kernel.global_work_size(0, A.size1() << 1);
310 
311  viennacl::ocl::enqueue(kernel(
312  A,
313  Q,
314  D,
315  static_cast<cl_uint>(row_start),
316  static_cast<cl_uint>(col_start),
317  static_cast<cl_uint>(A.size1()),
318  static_cast<cl_uint>(A.size2()),
319  static_cast<cl_uint>(A.internal_size2()),
320  static_cast<cl_uint>(Q.internal_size2()),
321  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
322  ));
323 
324  return true;
325  }*/
326 
327  template<typename SCALARTYPE, unsigned int ALIGNMENT>
331  vcl_size_t row_start, vcl_size_t col_start)
332  {
333  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
334 
335  if (row_start + 1 >= A.size1())
336  return false;
337 
338  prepare_householder_vector(A, D, A.size1(), row_start, col_start, row_start, true);
339 
340  {
342 
343  viennacl::ocl::enqueue(kernel(
344  A,
345  D,
346  static_cast<cl_uint>(row_start),
347  static_cast<cl_uint>(col_start),
348  static_cast<cl_uint>(A.size1()),
349  static_cast<cl_uint>(A.size2()),
350  static_cast<cl_uint>(A.internal_size2()),
351  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
352  ));
353 
354  }
355 
356  {
358 
359  viennacl::ocl::enqueue(kernel(
360  Q,
361  D,
362  static_cast<cl_uint>(A.size1()),
363  // static_cast<cl_uint>(A.size2()),
364  static_cast<cl_uint>(Q.internal_size2()),
365  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
366  ));
367 
368  }
369 
370  return true;
371  }
372 
373  /*
374  template<typename SCALARTYPE, unsigned int ALIGNMENT>
375  bool householder_r(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A,
376  viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& Q,
377  viennacl::vector<SCALARTYPE, ALIGNMENT>& S,
378  vcl_size_t start)
379  {
380 
381  vcl_size_t row_start = start;
382  vcl_size_t col_start = start + 1;
383 
384  if (col_start + 1 >= A.size2())
385  return false;
386 
387  std::vector<SCALARTYPE> tmp(A.size2(), 0);
388 
389  copy_vec(A, S, row_start, col_start, false);
390  fast_copy(S.begin(),
391  S.begin() + (A.size2() - col_start),
392  tmp.begin() + col_start);
393 
394  detail::householder_vector(tmp, col_start);
395  fast_copy(tmp, S);
396 
397  viennacl::ocl::kernel& kernel = viennacl::ocl::get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_ROW_KERNEL);
398 
399  viennacl::ocl::enqueue(kernel(
400  A,
401  Q,
402  S,
403  static_cast<cl_uint>(row_start),
404  static_cast<cl_uint>(col_start),
405  static_cast<cl_uint>(A.size1()),
406  static_cast<cl_uint>(A.size2()),
407  static_cast<cl_uint>(A.internal_size2()),
408  static_cast<cl_uint>(Q.internal_size2()),
409  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
410  ));
411  return true;
412  } */
413 
414  template<typename SCALARTYPE, unsigned int ALIGNMENT>
418  vcl_size_t row_start, vcl_size_t col_start)
419  {
420  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
421 
422  if (col_start + 1 >= A.size2())
423  return false;
424 
425  prepare_householder_vector(A, D, A.size2(), row_start, col_start, col_start, false);
426 
427  {
429 
430  viennacl::ocl::enqueue(kernel(
431  A,
432  D,
433  static_cast<cl_uint>(row_start),
434  static_cast<cl_uint>(col_start),
435  static_cast<cl_uint>(A.size1()),
436  static_cast<cl_uint>(A.size2()),
437  static_cast<cl_uint>(A.internal_size2()),
438  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
439  ));
440  }
441 
442  {
444 
445  viennacl::ocl::enqueue(kernel(
446  Q,
447  D,
448  static_cast<cl_uint>(A.size1()),
449  static_cast<cl_uint>(A.size2()),
450  static_cast<cl_uint>(Q.internal_size2()),
451  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
452  ));
453  }
454 
455  return true;
456  }
457 
458  template<typename SCALARTYPE, unsigned int ALIGNMENT>
462  {
463  vcl_size_t row_num = Ai.size1();
464  vcl_size_t col_num = Ai.size2();
465 
466  vcl_size_t to = std::min(row_num, col_num);
467  vcl_size_t big_to = std::max(row_num, col_num);
468 
469  //for storing householder vector
471 
474 
475  for (vcl_size_t i = 0; i < to; i++)
476  {
477  householder_c(Ai, QL, hh_vector, i, i);
478  householder_r(Ai, QR, hh_vector, i, i+1);
479  }
480  }
481 
482  } // namespace detail
483 
484 
491  template<typename SCALARTYPE, unsigned int ALIGNMENT>
495  {
496  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
498 
499  vcl_size_t row_num = A.size1();
500  vcl_size_t col_num = A.size2();
501 
502  vcl_size_t to = std::min(row_num, col_num);
503 
504 
505  //viennacl::vector<SCALARTYPE, ALIGNMENT> d(to);
506  //viennacl::vector<SCALARTYPE, ALIGNMENT> s(to + 1);
507 
508  // first stage
509  detail::bidiag(A, QL, QR);
510 
511  // second stage
512  //std::vector<SCALARTYPE> dh(to, 0);
513  //std::vector<SCALARTYPE> sh(to + 1, 0);
514  boost::numeric::ublas::vector<SCALARTYPE> dh = boost::numeric::ublas::scalar_vector<SCALARTYPE>(to, 0);
515  boost::numeric::ublas::vector<SCALARTYPE> sh = boost::numeric::ublas::scalar_vector<SCALARTYPE>(to + 1, 0);
516 
517 
519 
520  detail::svd_qr_shift( QL, QR, dh, sh);
521 
522  // Write resulting diagonal matrix with singular values to A:
523  boost::numeric::ublas::matrix<SCALARTYPE> h_Sigma(row_num, col_num);
524  h_Sigma.clear();
525 
526  for (vcl_size_t i = 0; i < to; i++)
527  h_Sigma(i, i) = dh[i];
528 
529  copy(h_Sigma, A);
530  }
531  }
532 }
533 #endif
void change_signs(MatrixType &matrix, VectorType &signs, int n)
Definition: svd.hpp:78
const std::string SVD_INVERSE_SIGNS_KERNEL
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Implementation of the dense matrix class.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
const std::string SVD_HOUSEHOLDER_UPDATE_QR_KERNEL
A dense matrix class.
Definition: forwards.h:375
void transpose(matrix_base< SCALARTYPE > &A)
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:652
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
void prepare_householder_vector(matrix_base< SCALARTYPE > &A, vector_base< SCALARTYPE > &D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column)
void svd(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &QL, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &QR)
Computes the singular value decomposition of a matrix A. Experimental in 1.3.x.
Definition: svd.hpp:492
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
OpenCL kernel file for singular value decomposition.
bool householder_r(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Q, viennacl::vector< SCALARTYPE, ALIGNMENT > &D, vcl_size_t row_start, vcl_size_t col_start)
Definition: svd.hpp:415
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
Common routines used for the QR method and SVD. Experimental.
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:644
Definition: blas3.hpp:36
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
size_type size1() const
Definition: matrix_def.hpp:44
bool householder_c(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Q, viennacl::vector< SCALARTYPE, ALIGNMENT > &D, vcl_size_t row_start, vcl_size_t col_start)
Definition: svd.hpp:328
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
void bidiag_pack_svd(viennacl::matrix< SCALARTYPE > &A, VectorType &dh, VectorType &sh)
void bidiag(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Ai, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &QL, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &QR)
Definition: svd.hpp:459
SCALARTYPE pythag(SCALARTYPE a, SCALARTYPE b)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:271
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
void givens_prev(MatrixType &matrix, VectorType &tmp1, VectorType &tmp2, int n, int l, int k)
Definition: svd.hpp:48
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
float ScalarType
Definition: fft_1d.cpp:42
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
void svd_qr_shift(MatrixType &vcl_u, MatrixType &vcl_v, CPU_VectorType &q, CPU_VectorType &e)
Definition: svd.hpp:101
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
const std::string SVD_GIVENS_PREV_KERNEL