ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 
27 #include "viennacl/ocl/device.hpp"
28 #include "viennacl/ocl/handle.hpp"
29 #include "viennacl/ocl/kernel.hpp"
30 #include "viennacl/scalar.hpp"
31 #include "viennacl/vector.hpp"
33 #include "viennacl/tools/tools.hpp"
37 
39 
40 #include "viennacl/traits/size.hpp"
44 
47 
49 
50 namespace viennacl
51 {
52 namespace linalg
53 {
54 namespace opencl
55 {
56 
57 namespace detail
58 {
59 
60  template<typename NumericT>
61  viennacl::ocl::kernel & legacy_kernel_for_matrix(matrix_base<NumericT> const & M, std::string const & kernel_name)
62  {
63  viennacl::ocl::context & ctx = traits::opencl_context(M);
64  viennacl::ocl::program * program;
65  if (M.row_major())
66  {
68  KernelClass::init(ctx);
69  program = &ctx.get_program(KernelClass::program_name());
70  }
71  else
72  {
74  KernelClass::init(ctx);
75  program = &ctx.get_program(KernelClass::program_name());
76  }
77  return program->get_kernel(kernel_name);
78  }
79 
80 }
81 
82 //
83 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
84 //
85 
86 const std::string SVD_BIDIAG_PACK_KERNEL = "bidiag_pack";
87 const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL = "house_update_A_left";
88 const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL = "house_update_A_right";
89 const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL = "house_update_QL";
90 const std::string SVD_GIVENS_NEXT_KERNEL = "givens_next";
91 const std::string SVD_COPY_COL_KERNEL = "copy_col";
92 const std::string SVD_COPY_ROW_KERNEL = "copy_row";
93 
94 template<typename DestNumericT, typename SrcNumericT>
96 {
97  assert(dest.row_major() == src.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
98 
99  assert(viennacl::traits::opencl_handle(dest).context() == viennacl::traits::opencl_handle(src).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
100 
101  std::string kernel_name("convert_");
102  kernel_name += dest.row_major() ? "row_" : "col_";
104  kernel_name += "_";
106 
107  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(dest).context());
110 
111  viennacl::ocl::enqueue(k( dest, cl_uint(dest.start1()), cl_uint(dest.stride1()), cl_uint(dest.size1()), cl_uint(dest.internal_size1()), cl_uint(dest.start2()), cl_uint(dest.stride2()), cl_uint(dest.size2()), cl_uint(dest.internal_size2()),
112  src, cl_uint( src.start1()), cl_uint( src.stride1()), cl_uint( src.size1()), cl_uint( src.internal_size1()), cl_uint( src.start2()), cl_uint( src.stride2()), cl_uint( src.size2()), cl_uint( src.internal_size2())
113  ) );
114 }
115 
116 template<typename NumericT,
117  typename ScalarT1>
119  matrix_base<NumericT> const & B, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha)
120 {
121  assert(A.row_major() == B.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
122 
123  std::string kernel_name("assign_*m_**00");
124  bool is_scalar_cpu = is_cpu_scalar<ScalarT1>::value;
125  kernel_name[7] = is_scalar_cpu ? 'h' : 'd';
126  kernel_name[10] = flip_sign_alpha ? '1' : '0';
127  kernel_name[11] = reciprocal_alpha ? '1' : '0';
128 
129  scheduler::statement statement = scheduler::preset::av(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &A, &B, &alpha, flip_sign_alpha, reciprocal_alpha);
130  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
131 }
132 
133 
134 template<typename NumericT,
135  typename ScalarT1, typename ScalarT2>
137  matrix_base<NumericT> const & B, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
138  matrix_base<NumericT> const & C, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
139 {
140  assert(A.row_major() == B.row_major() && A.row_major() == C.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
141 
142  std::string kernel_name("assign_*m*m_****");
143  bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;
144  bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;
145  kernel_name[7] = is_scalar_cpu1 ? 'h' : 'd';
146  kernel_name[9] = is_scalar_cpu2 ? 'h' : 'd';
147  kernel_name[12] = flip_sign_alpha ? '1' : '0';
148  kernel_name[13] = reciprocal_alpha ? '1' : '0';
149  kernel_name[14] = flip_sign_beta ? '1' : '0';
150  kernel_name[15] = reciprocal_beta ? '1' : '0';
151 
152  scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &A, &B, &alpha, flip_sign_alpha, reciprocal_alpha, &C, &beta, flip_sign_beta, reciprocal_beta);
153  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
154 }
155 
156 
157 template<typename NumericT,
158  typename ScalarT1, typename ScalarT2>
160  matrix_base<NumericT> const & B, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
161  matrix_base<NumericT> const & C, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
162 {
163  assert(A.row_major() == B.row_major() && A.row_major() == C.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
164 
165  std::string kernel_name("ip_add_*v*v_****");
166  bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;
167  bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;
168  kernel_name[7] = is_scalar_cpu1 ? 'h' : 'd';
169  kernel_name[9] = is_scalar_cpu2 ? 'h' : 'd';
170  kernel_name[12] = flip_sign_alpha ? '1' : '0';
171  kernel_name[13] = reciprocal_alpha ? '1' : '0';
172  kernel_name[14] = flip_sign_beta ? '1' : '0';
173  kernel_name[15] = reciprocal_beta ? '1' : '0';
174 
175 
176  scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &A, &B, &alpha, flip_sign_alpha, reciprocal_alpha, &C, &beta, flip_sign_beta, reciprocal_beta);
177  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
178 }
179 
180 template<typename NumericT,
181  typename SizeT, typename DistanceT>
183  matrix_base<NumericT> & temp_trans)
184 {
185  std::string kernel_name("trans_kernel");
186  viennacl::ocl::kernel& kernel = detail::legacy_kernel_for_matrix(proxy.lhs(),kernel_name);
187  viennacl::ocl::enqueue(kernel(proxy.lhs(),
188  static_cast<cl_uint>(proxy.lhs().start1()), static_cast<cl_uint>(proxy.lhs().start2()),
189  static_cast<cl_uint>(proxy.lhs().internal_size1()), static_cast<cl_uint>(proxy.lhs().internal_size2()),
190  static_cast<cl_uint>(proxy.lhs().size1()), static_cast<cl_uint>(proxy.lhs().size2()),
191  static_cast<cl_uint>(proxy.lhs().stride1()), static_cast<cl_uint>(proxy.lhs().stride2()),
192 
193  temp_trans,
194  static_cast<cl_uint>(temp_trans.start1()), static_cast<cl_uint>(temp_trans.start2()),
195  static_cast<cl_uint>(temp_trans.internal_size1()), static_cast<cl_uint>(temp_trans.internal_size2()),
196  static_cast<cl_uint>(temp_trans.stride1()), static_cast<cl_uint>(temp_trans.stride2())));
197 }
198 
199 template<typename NumericT>
200 void matrix_assign(matrix_base<NumericT> & A, NumericT s, bool up_to_internal_size = false)
201 {
204 
205  dynamic_cast<device_specific::matrix_axpy_template*>(kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).template_of("assign_cpu"))->up_to_internal_size(up_to_internal_size);
206  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("assign_cpu", statement);
207 }
208 
209 template<typename NumericT>
211 {
214  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("diagonal_assign_cpu", statement);
215 }
216 
217 template<typename NumericT>
219 {
221  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_diag_from_vector", statement);
222 }
223 
224 template<typename NumericT>
226 {
228  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_diag_to_vector", statement);
229 }
230 
231 template<typename NumericT>
232 void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & vec)
233 {
234  scheduler::statement statement = scheduler::preset::matrix_row(&vec, &A, i);
235  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_row", statement);
236 }
237 
238 template<typename NumericT>
239 void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & vec)
240 {
242  kernels::matrix<NumericT>::execution_handler(A.row_major(), viennacl::traits::opencl_context(A)).execute("matrix_column", statement);
243 }
244 
245 
246 //
248 //
249 
250 // Binary operations A = B .* C and A = B ./ C
256 template<typename NumericT, typename OpT>
259 {
260  assert(A.row_major() == proxy.lhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
261  assert(A.row_major() == proxy.rhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
262  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
263  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
264 
266  scheduler::statement statement = scheduler::preset::binary_element_op(&A, &proxy.lhs(), &proxy.rhs(),TYPE);
268 }
269 
270 
271 // Unary operations
272 
278 template<typename NumericT, typename OpT>
281 {
282  assert(A.row_major() == proxy.lhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
283  assert(A.row_major() == proxy.rhs().row_major() && bool("Elementwise operations on mixed matrix layouts not supported yet!"));
284 
285  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
286  assert(viennacl::traits::opencl_handle(A).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
287 
289  scheduler::statement statement = scheduler::preset::unary_element_op(&A, &proxy.lhs(),TYPE);
291 }
292 
293 
294 
304 template<typename NumericT>
305 void prod_impl(const matrix_base<NumericT> & A, bool trans_A,
306  const vector_base<NumericT> & vec,
307  vector_base<NumericT> & result)
308 {
309  // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
310  assert(viennacl::traits::handle(vec) != viennacl::traits::handle(result) && bool("No direct inplace matrix-vector product possible. Introduce a temporary!"));
311 
312  std::string kernel_name = std::string("mat_vec_") + (trans_A ^ A.row_major()?"T":"N");
313  scheduler::statement statement = scheduler::preset::mat_vec_prod(&A, trans_A, &vec, &result);
314  kernels::row_wise_reduction<NumericT>::execution_handler(viennacl::traits::opencl_context(A)).execute(kernel_name, statement);
315 }
316 
317 //
318 
319 
325 template<typename NumericT, typename ScalarType >
326 void prod_impl(matrix_base<NumericT> const & A, bool A_trans,
327  matrix_base<NumericT> const & B, bool B_trans,
329  ScalarType alpha,
330  ScalarType beta)
331 {
332  bool effective_A_trans = A_trans ^ A.row_major();
333  bool effective_B_trans = B_trans ^ B.row_major();
334 
335  char cAt = effective_A_trans ? 'T' : 'N';
336  char cBt = effective_B_trans ? 'T' : 'N';
337 
338  std::string kernel_prefix("prod_");
339  kernel_prefix+=cAt;
340  kernel_prefix+=cBt;
341 
342  scheduler::statement statement = scheduler::preset::mat_mat_prod(alpha, &A, effective_A_trans, &B, effective_B_trans, beta, &C);
343  kernels::matrix_prod<NumericT>::execution_handler(C.row_major(), viennacl::traits::opencl_context(C)).execute(kernel_prefix, statement);
344 }
345 
346 //
348 //
349 
350 
363 template<typename NumericT, typename ScalarT1>
365  ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
366  const vector_base<NumericT> & vec1,
367  const vector_base<NumericT> & vec2)
368 {
369  assert( (viennacl::traits::size1(A) == viennacl::traits::size(vec1)) && bool("Size mismatch in scaled_rank_1_update: size1(A) != size(v1)"));
370  assert( (viennacl::traits::size2(A) == viennacl::traits::size(vec2)) && bool("Size mismatch in scaled_rank_1_update: size2(A) != size(v2)"));
371 
372  cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
374  viennacl::ocl::kernel& kernel= detail::legacy_kernel_for_matrix(A, is_cpu ? "scaled_rank1_update_cpu" : "scaled_rank1_update_gpu");
375 
376  viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(A),
377  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
378  cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
379  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
381 
382  viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
383  options_alpha,
384 
385  viennacl::traits::opencl_handle(vec1),
386  cl_uint(viennacl::traits::start(vec1)),
387  cl_uint(viennacl::traits::stride(vec1)),
388  cl_uint(viennacl::traits::size(vec1)),
389 
390  viennacl::traits::opencl_handle(vec2),
391  cl_uint(viennacl::traits::start(vec2)),
392  cl_uint(viennacl::traits::stride(vec2)),
393  cl_uint(viennacl::traits::size(vec2))
394  )
395  );
396 }
397 
398 //
399 template <typename SCALARTYPE, typename VectorType>
401  VectorType & dh,
402  VectorType & sh
403  )
404 {
405  viennacl::vector<SCALARTYPE> D(dh.size());
406  viennacl::vector<SCALARTYPE> S(sh.size());
407 
408  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
409  viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_BIDIAG_PACK_KERNEL);
410 
411  viennacl::ocl::enqueue(kernel(
412  A,
413  D,
414  S,
415  static_cast<cl_uint>(A.size1()),
416  static_cast<cl_uint>(A.size2()),
417  static_cast<cl_uint>(A.internal_size2())
418  ));
419 
420  fast_copy(D, dh);
421  fast_copy(S, sh);
422 }
423 
424 
425 template <typename NumericT>
429  )
430 {
431  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
432 
433  if(A.row_major())
434  {
437 
438  viennacl::ocl::enqueue(kernel(
439  A,
440  dh,
441  sh,
442  cl_uint(viennacl::traits::size1(A)),
443  cl_uint(viennacl::traits::size2(A)),
445  ));
446  }
447  else
448  {
451 
452  viennacl::ocl::enqueue(kernel(
453  A,
454  dh,
455  sh,
456  cl_uint(viennacl::traits::size1(A)),
457  cl_uint(viennacl::traits::size2(A)),
459  ));
460  }
461 }
462 
463 
464 template <typename NumericT>
468 {
469 
470  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
471  if(A.row_major())
472  {
475  viennacl::ocl::enqueue(kernel(
476  A,
477  D,
478  static_cast<cl_uint>(start + 1),
479  static_cast<cl_uint>(start),
480  cl_uint(viennacl::traits::size1(A)),
481  cl_uint(viennacl::traits::size2(A)),
483  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
484  ));
485  }
486  else
487  {
490  viennacl::ocl::enqueue(kernel(
491  A,
492  D,
493  static_cast<cl_uint>(start + 1),
494  static_cast<cl_uint>(start),
495  cl_uint(viennacl::traits::size1(A)),
496  cl_uint(viennacl::traits::size2(A)),
498  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
499  ));
500  }
501 
502 
503 
504 
505 }
506 
507 template <typename NumericT>
510 {
511  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
512 
513  if(A.row_major())
514  {
517 
518  viennacl::ocl::enqueue(kernel(
519  A,
520  D,
521  static_cast<cl_uint>(0),
522  static_cast<cl_uint>(0),
523  cl_uint(viennacl::traits::size1(A)),
524  cl_uint(viennacl::traits::size2(A)),
526  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
527  ));
528  }
529  else
530  {
533 
534  viennacl::ocl::enqueue(kernel(
535  A,
536  D,
537  static_cast<cl_uint>(0),
538  static_cast<cl_uint>(0),
539  cl_uint(viennacl::traits::size1(A)),
540  cl_uint(viennacl::traits::size2(A)),
542  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
543  ));
544  }
545 
546 
547 }
548 
549 
550 
551 template <typename NumericT>
554  vcl_size_t A_size1)
555 
556 {
557  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(Q).context());
558 
559  if(Q.row_major())
560  {
563 
564  viennacl::ocl::enqueue(kernel(
565  Q,
566  D,
567  cl_uint(A_size1),
569  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
570  ));
571  }
572  else
573  {
576 
577  viennacl::ocl::enqueue(kernel(
578  Q,
579  D,
580  cl_uint(A_size1),
582  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(NumericT)))
583  ));
584  }
585 
586 }
587 
588 
589 template<typename NumericT>
591  vector_base<NumericT>& tmp1,
592  vector_base<NumericT>& tmp2,
593  int l,
594  int m
595  )
596  {
597  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
598 
599  if(matrix.row_major())
600  {
603  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
604  kernel.local_work_size(0, 256);
605 
606  viennacl::ocl::enqueue(kernel(
607  matrix,
608  tmp1,
609  tmp2,
610  cl_uint(viennacl::traits::size1(matrix)),
611  cl_uint(viennacl::traits::internal_size2(matrix)),
612  static_cast<cl_uint>(l),
613  static_cast<cl_uint>(m - 1)
614  ));
615  }
616  else
617  {
620  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
621  kernel.local_work_size(0, 256);
622 
623  viennacl::ocl::enqueue(kernel(
624  matrix,
625  tmp1,
626  tmp2,
627  cl_uint(viennacl::traits::size1(matrix)),
628  cl_uint(viennacl::traits::internal_size2(matrix)),
629  static_cast<cl_uint>(l),
630  static_cast<cl_uint>(m - 1)
631  ));
632  }
633 
634 
635  }
636 
637  template <typename NumericT>
640  vcl_size_t row_start,
641  vcl_size_t col_start,
642  bool copy_col
643  )
644  {
645  std::string kernel_name = copy_col ? SVD_COPY_COL_KERNEL : SVD_COPY_ROW_KERNEL;
646  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
647 
648  if(A.row_major())
649  {
652 
653  viennacl::ocl::enqueue(kernel(
654  A,
655  V,
656  static_cast<cl_uint>(row_start),
657  static_cast<cl_uint>(col_start),
658  copy_col ? cl_uint(viennacl::traits::size1(A))
659  : cl_uint(viennacl::traits::size2(A)),
660  static_cast<cl_uint>(A.internal_size2())
661  ));
662  }
663  else
664  {
667 
668  viennacl::ocl::enqueue(kernel(
669  A,
670  V,
671  static_cast<cl_uint>(row_start),
672  static_cast<cl_uint>(col_start),
673  copy_col ? cl_uint(viennacl::traits::size1(A))
674  : cl_uint(viennacl::traits::size2(A)),
675  static_cast<cl_uint>(A.internal_size2())
676  ));
677  }
678 
679 
680  }
681 
682 } // namespace opencl
683 } //namespace linalg
684 } //namespace viennacl
685 
686 
687 #endif
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &A)
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void am(matrix_base< NumericT > &A, matrix_base< NumericT > const &B, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &vec)
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
statement matrix_diag_from_vector(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, int id)
Definition: preset.hpp:363
Generic size and resize functionality for different vector and matrix types.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
viennacl::ocl::program & get_program(std::string const &name)
Returns the program with the provided name.
Definition: context.hpp:532
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:382
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
const std::string SVD_BIDIAG_PACK_KERNEL
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:390
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
size_type stride2() const
Returns the number of columns.
Definition: matrix_def.hpp:234
const std::string SVD_GIVENS_NEXT_KERNEL
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: matrix.hpp:696
A dense matrix class.
Definition: forwards.h:375
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:652
Determines row and column increments for matrices and matrix proxies.
void bidiag_pack(matrix_base< NumericT > &A, viennacl::vector< NumericT > &dh, viennacl::vector< NumericT > &sh)
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
Definition: preset.hpp:33
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
void scaled_rank_1_update(matrix_base< NumericT > &A, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
OpenCL kernel file for singular value decomposition.
const std::string SVD_COPY_ROW_KERNEL
Common implementations shared by OpenCL-based operations.
float NumericT
Definition: bisect.cpp:40
void copy_vec(matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void ambm(matrix_base< NumericT > &A, matrix_base< NumericT > const &B, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &C, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:726
statement binary_element_op(NumericT const *x, NumericT const *y, NumericT const *z, scheduler::operation_node_type TYPE)
Definition: preset.hpp:284
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:448
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:644
Definition: blas3.hpp:36
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
statement mat_vec_prod(viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::vector_base< NumericT > const *x, viennacl::vector_base< NumericT > const *y)
Definition: preset.hpp:410
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:42
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
Definition: matrix.hpp:772
Metafunction for querying type informations.
Definition: forwards.h:156
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
size_type stride1() const
Returns the number of rows.
Definition: matrix_def.hpp:232
statement unary_element_op(NumericT const *x, NumericT const *y, scheduler::operation_node_type TYPE)
Definition: preset.hpp:305
scheduler::statement av(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a)
Definition: preset.hpp:88
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
void bidiag_pack_svd(viennacl::matrix< SCALARTYPE > &A, VectorType &dh, VectorType &sh)
Wrapper class for an OpenCL program.
Definition: program.hpp:42
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
void execute(template_base const &T, statements_container const &statements, viennacl::ocl::context &ctx=viennacl::ocl::current_context(), bool force_compilation=false)
Definition: execute.hpp:44
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
operation_node_type
Enumeration for identifying the possible operations.
Definition: forwards.h:68
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
statement mat_mat_prod(NumericT alpha, viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::matrix_base< NumericT > const *B, bool B_trans, NumericT beta, viennacl::matrix_base< NumericT > const *C)
Definition: preset.hpp:416
statement matrix_diag_to_vector(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, int id)
Definition: preset.hpp:357
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &vec)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:541
viennacl::ocl::kernel & legacy_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:248
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
void matrix_assign(matrix_base< NumericT > &A, NumericT s, bool up_to_internal_size=false)
scheduler::statement diagonal_assign_cpu(matrix_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:147
float ScalarType
Definition: fft_1d.cpp:42
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
statement matrix_row(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, unsigned int id)
Definition: preset.hpp:344
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
Definition: matrix.hpp:609
void givens_next(matrix_base< NumericT > &matrix, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
A tag class representing transposed matrices.
Definition: forwards.h:220
statement matrix_column(viennacl::vector_base< NumericT > const *x, viennacl::matrix_base< NumericT > const *A, unsigned int id)
Definition: preset.hpp:350
size_type start2() const
Returns the number of columns.
Definition: matrix_def.hpp:230
void execute(container_type::key_type const &key, statements_container const &statements)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
const std::string SVD_COPY_COL_KERNEL
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &vec)
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:123
viennacl::ocl::kernel & get_kernel(std::string const &name)
Returns the kernel with the provided name.
Definition: context.hpp:773
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
const char * operator_string(scheduler::operation_node_type type)
static void init(viennacl::ocl::context &ctx)
Definition: matrix.hpp:869
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
void prod_impl(const matrix_base< NumericT > &A, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &A, matrix_base< NumericT > const &B, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &C, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_diagonal_assign(matrix_base< NumericT > &A, NumericT s)
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.
Definition: matrix_def.hpp:228
Runtime generation of OpenCL kernels for matrix operations.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)