1 #ifndef VIENNACL_LINALG_QR_METHOD_HPP_
2 #define VIENNACL_LINALG_QR_METHOD_HPP_
28 #include <boost/numeric/ublas/vector.hpp>
29 #include <boost/numeric/ublas/matrix.hpp>
41 template <
typename SCALARTYPE>
49 (void)A; (void)n; (void)last_n; (void)q; (void)p;
50 #ifdef VIENNACL_WITH_OPENCL
60 static_cast<cl_uint>(n),
61 static_cast<cl_uint
>(last_n),
73 static_cast<cl_uint>(n),
74 static_cast<cl_uint
>(last_n),
81 template <
typename SCALARTYPE,
typename VectorType>
83 const VectorType& buf,
91 (void)A; (void)buf; (void)buf_vcl; (void)m; (void)n; (void)last_n;
92 #ifdef VIENNACL_WITH_OPENCL
105 static_cast<cl_uint>(m),
106 static_cast<cl_uint
>(n),
107 static_cast<cl_uint>(last_n)
118 static_cast<cl_uint>(m),
119 static_cast<cl_uint
>(n),
120 static_cast<cl_uint>(last_n)
127 template<
typename SCALARTYPE,
typename MatrixT>
135 for (
int i = 0; i < last_n; i++)
137 SCALARTYPE v_in = A(i, n);
138 SCALARTYPE z = A(i, n - 1);
139 A(i, n - 1) = q * z + p * v_in;
140 A(i, n) = q * v_in - p * z;
144 template<
typename SCALARTYPE,
typename MatrixT>
146 const std::vector<SCALARTYPE>& buf,
153 for (
int i = 0; i < last_i; i++)
155 int start_k = is_triangular?
std::max(i + 1, m):m;
157 SCALARTYPE* a_row = A.row(i);
159 SCALARTYPE a_ik = a_row[start_k];
160 SCALARTYPE a_ik_1 = 0;
161 SCALARTYPE a_ik_2 = 0;
164 a_ik_1 = a_row[start_k + 1];
166 for (
int k = start_k; k < n; k++)
168 bool notlast = (k != n - 1);
170 SCALARTYPE p = buf[5 *
static_cast<vcl_size_t>(k)] * a_ik + buf[5 * static_cast<vcl_size_t>(k) + 1] * a_ik_1;
174 a_ik_2 = a_row[k + 2];
175 p = p + buf[5 *
static_cast<vcl_size_t>(k) + 2] * a_ik_2;
176 a_ik_2 = a_ik_2 - p * buf[5 *
static_cast<vcl_size_t>(k) + 4];
180 a_ik_1 = a_ik_1 - p * buf[5 *
static_cast<vcl_size_t>(k) + 3];
192 template<
typename SCALARTYPE>
203 data.resize(internal_size * internal_size);
208 return data[
static_cast<vcl_size_t>(i) * internal_size_ + static_cast<vcl_size_t>(j)];
235 template <
typename SCALARTYPE,
typename VectorType>
243 int nn =
static_cast<int>(vcl_H.
size1());
247 std::vector<SCALARTYPE> buf(5 *
vcl_size_t(nn));
256 SCALARTYPE eps = 2 *
static_cast<SCALARTYPE
>(
EPS);
257 SCALARTYPE exshift = 0;
268 SCALARTYPE out1, out2;
272 for (
int i = 0; i < nn; i++)
274 for (
int j =
std::max(i - 1, 0); j < nn; j++)
275 norm = norm + std::fabs(H(i, j));
286 s = std::fabs(H(l - 1, l - 1)) + std::fabs(H(l, l));
289 if (std::fabs(H(l, l - 1)) < eps * s)
299 H(n, n) = H(n, n) + exshift;
308 w = H(n, n - 1) * H(n - 1, n);
309 p = (H(n - 1, n - 1) - H(n, n)) / 2;
311 z =
static_cast<SCALARTYPE
>(std::sqrt(std::fabs(q)));
312 H(n, n) = H(n, n) + exshift;
313 H(n - 1, n - 1) = H(n - 1, n - 1) + exshift;
319 z = (p >= 0) ? (p + z) : (p - z);
322 if (z <= 0 && z >= 0)
327 s = std::fabs(x) + std::fabs(z);
330 r =
static_cast<SCALARTYPE
>(std::sqrt(p * p + q * q));
335 for (
int j = n - 1; j < nn; j++)
337 SCALARTYPE h_nj = H(n, j);
339 H(n - 1, j) = q * z + p * h_nj;
340 H(n, j) = q * h_nj - p * z;
369 w = H(n, n - 1) * H(n - 1, n);
376 for (
int i = 0; i <= n; i++)
379 s = std::fabs(H(n, n - 1)) + std::fabs(H(n - 1, n - 2));
380 x = y = SCALARTYPE(0.75) * s;
381 w = SCALARTYPE(-0.4375) * s * s;
391 s =
static_cast<SCALARTYPE
>(std::sqrt(s));
393 s = x - w / ((y - x) / 2 + s);
394 for (
int i = 0; i <= n; i++)
397 x = y = w = SCALARTYPE(0.964);
407 SCALARTYPE h_m1_m1 = H(m + 1, m + 1);
411 p = (r * s - w) / H(m + 1, m) + H(m, m + 1);
412 q = h_m1_m1 - z - r - s;
414 s = std::fabs(p) + std::fabs(q) + std::fabs(r);
420 if (std::fabs(H(m, m - 1)) * (std::fabs(q) + std::fabs(r)) < eps * (std::fabs(p) * (std::fabs(H(m - 1, m - 1)) + std::fabs(z) + std::fabs(h_m1_m1))))
425 for (
int i = m + 2; i <= n; i++)
433 for (
int k = m; k < n; k++)
435 bool notlast = (k != n - 1);
440 r = (notlast ? H(k + 2, k - 1) : 0);
441 x = std::fabs(p) + std::fabs(q) + std::fabs(r);
450 if (x <= 0 && x >= 0)
break;
452 s =
static_cast<SCALARTYPE
>(std::sqrt(p * p + q * q + r * r));
458 H(k, k - 1) = -s * x;
461 H(k, k - 1) = -H(k, k - 1);
477 SCALARTYPE* a_row_k = H.
row(k);
478 SCALARTYPE* a_row_k_1 = H.row(k + 1);
479 SCALARTYPE* a_row_k_2 = H.row(k + 2);
481 for (
int j = k; j < nn; j++)
483 SCALARTYPE h_kj = a_row_k[j];
484 SCALARTYPE h_k1_j = a_row_k_1[j];
486 p = h_kj + q * h_k1_j;
489 SCALARTYPE h_k2_j = a_row_k_2[j];
491 a_row_k_2[j] = h_k2_j - p * z;
494 a_row_k[j] = h_kj - p * x;
495 a_row_k_1[j] = h_k1_j - p * y;
502 for (
int i = k; i <
std::min(nn, k + 4); i++)
504 p = x * H(i, k) + y * H(i, k + 1);
507 p = p + z * H(i, k + 2);
508 H(i, k + 2) = H(i, k + 2) - p * r;
511 H(i, k) = H(i, k) - p;
512 H(i, k + 1) = H(i, k + 1) - p * q;
528 update_float_QR_column<SCALARTYPE>(H, buf, m, n, n,
true);
541 for (n = nn - 1; n >= 0; n--)
547 if (q <= 0 && q >= 0)
551 for (
int i = n - 1; i >= 0; i--)
555 for (
int j = l; j <= n; j++)
556 r = r + H(i, j) * H(j, n);
568 H(i, n) = (w > 0 || w < 0) ? (-r / w) : (-r / (eps * norm));
576 t = (x * s - z * r) / q;
578 H(i + 1, n) = (std::fabs(x) > std::fabs(z)) ? ((-r - w * t) / x) : ((-s - y * t) / z);
582 t = std::fabs(H(i, n));
583 if ((eps * t) * t > 1)
584 for (
int j = i; j <= n; j++)
595 if (std::fabs(H(n, n - 1)) > std::fabs(H(n - 1, n)))
597 H(n - 1, n - 1) = q / H(n, n - 1);
598 H(n - 1, n) = -(H(n, n) - p) / H(n, n - 1);
602 cdiv<SCALARTYPE>(0, -H(n - 1, n), H(n - 1, n - 1) - p, q, out1, out2);
604 H(n - 1, n - 1) = out1;
610 for (
int i = n - 2; i >= 0; i--)
612 SCALARTYPE ra, sa, vr, vi;
615 for (
int j = l; j <= n; j++)
617 SCALARTYPE h_ij = H(i, j);
618 ra = ra + h_ij * H(j, n - 1);
619 sa = sa + h_ij * H(j, n);
635 cdiv<SCALARTYPE>(-ra, -sa, w, q, out1, out2);
646 if ( (vr <= 0 && vr >= 0) && (vi <= 0 && vi >= 0) )
647 vr = eps * norm * (std::fabs(w) + std::fabs(q) + std::fabs(x) + std::fabs(y) + std::fabs(z));
649 cdiv<SCALARTYPE>(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi, out1, out2);
655 if (std::fabs(x) > (std::fabs(z) + std::fabs(q)))
657 H(i + 1, n - 1) = (-ra - w * H(i, n - 1) + q * H(i, n)) / x;
658 H(i + 1, n) = (-sa - w * H(i, n) - q * H(i, n - 1)) / x;
662 cdiv<SCALARTYPE>(-r - y * H(i, n - 1), -s - y * H(i, n), z, q, out1, out2);
664 H(i + 1, n - 1) = out1;
670 t =
std::max(std::fabs(H(i, n - 1)), std::fabs(H(i, n)));
671 if ((eps * t) * t > 1)
673 for (
int j = i; j <= n; j++)
693 template <
typename SCALARTYPE>
701 if(start + 2 >= A_size1)
712 template <
typename SCALARTYPE>
727 template <
typename SCALARTYPE>
730 std::vector<SCALARTYPE> & D,
731 std::vector<SCALARTYPE> & E,
732 bool is_symmetric =
true)
735 assert(A.
size1() == A.
size2() && bool(
"Input matrix must be square for QR method!"));
772 boost::numeric::ublas::matrix<SCALARTYPE> eigen_values(A.
size1(), A.
size1());
773 eigen_values.clear();
777 if(std::fabs(E[i]) <
EPS)
779 eigen_values(i, i) = D[i];
783 eigen_values(i, i) = D[i];
784 eigen_values(i, i + 1) = E[i];
785 eigen_values(i + 1, i) = -E[i];
786 eigen_values(i + 1, i + 1) = D[i];
791 copy(eigen_values, A);
795 template <
typename SCALARTYPE>
798 std::vector<SCALARTYPE>& D,
799 std::vector<SCALARTYPE>& E
805 template <
typename SCALARTYPE>
808 std::vector<SCALARTYPE>& D
811 std::vector<SCALARTYPE> E(A.
size1());
816 template <
typename SCALARTYPE>
822 std::vector<SCALARTYPE> std_D(D.
size());
823 std::vector<SCALARTYPE> E(A.
size1());
void final_iter_update(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Internal helper class representing a row-major dense matrix used for the QR method for the purpose of...
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Implementation of the dense matrix class.
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
void tql2(matrix_base< SCALARTYPE, F > &Q, VectorType &d, VectorType &e)
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void final_iter_update_gpu(matrix_base< SCALARTYPE > &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
const std::string SVD_UPDATE_QR_COLUMN_KERNEL
Implementation of the tql2-algorithm for eigenvalue computations.
void transpose(matrix_base< SCALARTYPE > &A)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
T max(const T &lhs, const T &rhs)
Maximum.
void prepare_householder_vector(matrix_base< SCALARTYPE > &A, vector_base< SCALARTYPE > &D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column)
void update_float_QR_column_gpu(matrix_base< SCALARTYPE > &A, const VectorType &buf, viennacl::vector< SCALARTYPE > &buf_vcl, int m, int n, int last_n, bool)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Common routines used for the QR method and SVD. Experimental.
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
result_of::size_type< T >::type start(T const &obj)
void qr_method_nsm(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D, std::vector< SCALARTYPE > &E)
Common base class for dense vectors, vector ranges, and vector slices.
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
size_type size2() const
Returns the number of columns.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void tridiagonal_reduction(matrix_base< SCALARTYPE > &A, matrix_base< SCALARTYPE > &Q)
size_type size1() const
Returns the number of rows.
std::vector< SCALARTYPE > data
void qr_method(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D, std::vector< SCALARTYPE > &E, bool is_symmetric=true)
SCALARTYPE & operator()(int i, int j)
void update_float_QR_column(MatrixT &A, const std::vector< SCALARTYPE > &buf, int m, int n, int last_i, bool is_triangular)
FastMatrix(vcl_size_t sz, vcl_size_t internal_size)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
const std::string SVD_FINAL_ITER_UPDATE_KERNEL
void hqr2(viennacl::matrix< SCALARTYPE > &vcl_H, viennacl::matrix< SCALARTYPE > &V, VectorType &d, VectorType &e)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
T min(const T &lhs, const T &rhs)
Minimum.
bool householder_twoside(matrix_base< SCALARTYPE > &A, matrix_base< SCALARTYPE > &Q, vector_base< SCALARTYPE > &D, vcl_size_t start)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
void qr_method_sym(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D)