ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_VECTOR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/range.hpp"
27 #include "viennacl/scalar.hpp"
28 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/traits/size.hpp"
37 
38 #ifdef VIENNACL_WITH_OPENCL
40 #endif
41 
42 #ifdef VIENNACL_WITH_CUDA
44 #endif
45 
46 namespace viennacl
47 {
48  namespace linalg
49  {
50  template<typename DestNumericT, typename SrcNumericT>
52  {
53  assert(viennacl::traits::size(dest) == viennacl::traits::size(src) && bool("Incompatible vector sizes in v1 = v2 (convert): size(v1) != size(v2)"));
54 
55  switch (viennacl::traits::handle(dest).get_active_handle_id())
56  {
59  break;
60 #ifdef VIENNACL_WITH_OPENCL
63  break;
64 #endif
65 #ifdef VIENNACL_WITH_CUDA
68  break;
69 #endif
71  throw memory_exception("not initialised!");
72  default:
73  throw memory_exception("not implemented");
74  }
75  }
76 
77  template<typename T, typename ScalarType1>
78  void av(vector_base<T> & vec1,
79  vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
80  {
81  assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 = v2 @ alpha: size(v1) != size(v2)"));
82 
83  switch (viennacl::traits::handle(vec1).get_active_handle_id())
84  {
86  viennacl::linalg::host_based::av(vec1, vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
87  break;
88 #ifdef VIENNACL_WITH_OPENCL
90  viennacl::linalg::opencl::av(vec1, vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
91  break;
92 #endif
93 #ifdef VIENNACL_WITH_CUDA
95  viennacl::linalg::cuda::av(vec1, vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
96  break;
97 #endif
99  throw memory_exception("not initialised!");
100  default:
101  throw memory_exception("not implemented");
102  }
103  }
104 
105 
106  template<typename T, typename ScalarType1, typename ScalarType2>
107  void avbv(vector_base<T> & vec1,
108  vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
109  vector_base<T> const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
110  {
111  assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 = v2 @ alpha + v3 @ beta: size(v1) != size(v2)"));
112  assert(viennacl::traits::size(vec2) == viennacl::traits::size(vec3) && bool("Incompatible vector sizes in v1 = v2 @ alpha + v3 @ beta: size(v2) != size(v3)"));
113 
114  switch (viennacl::traits::handle(vec1).get_active_handle_id())
115  {
118  vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
119  vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
120  break;
121 #ifdef VIENNACL_WITH_OPENCL
124  vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
125  vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
126  break;
127 #endif
128 #ifdef VIENNACL_WITH_CUDA
131  vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
132  vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
133  break;
134 #endif
136  throw memory_exception("not initialised!");
137  default:
138  throw memory_exception("not implemented");
139  }
140  }
141 
142 
143  template<typename T, typename ScalarType1, typename ScalarType2>
144  void avbv_v(vector_base<T> & vec1,
145  vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
146  vector_base<T> const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
147  {
148  assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 += v2 @ alpha + v3 @ beta: size(v1) != size(v2)"));
149  assert(viennacl::traits::size(vec2) == viennacl::traits::size(vec3) && bool("Incompatible vector sizes in v1 += v2 @ alpha + v3 @ beta: size(v2) != size(v3)"));
150 
151  switch (viennacl::traits::handle(vec1).get_active_handle_id())
152  {
155  vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
156  vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
157  break;
158 #ifdef VIENNACL_WITH_OPENCL
161  vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
162  vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
163  break;
164 #endif
165 #ifdef VIENNACL_WITH_CUDA
168  vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
169  vec3, beta, len_beta, reciprocal_beta, flip_sign_beta);
170  break;
171 #endif
173  throw memory_exception("not initialised!");
174  default:
175  throw memory_exception("not implemented");
176  }
177  }
178 
179 
186  template<typename T>
187  void vector_assign(vector_base<T> & vec1, const T & alpha, bool up_to_internal_size = false)
188  {
189  switch (viennacl::traits::handle(vec1).get_active_handle_id())
190  {
192  viennacl::linalg::host_based::vector_assign(vec1, alpha, up_to_internal_size);
193  break;
194 #ifdef VIENNACL_WITH_OPENCL
196  viennacl::linalg::opencl::vector_assign(vec1, alpha, up_to_internal_size);
197  break;
198 #endif
199 #ifdef VIENNACL_WITH_CUDA
201  viennacl::linalg::cuda::vector_assign(vec1, alpha, up_to_internal_size);
202  break;
203 #endif
205  throw memory_exception("not initialised!");
206  default:
207  throw memory_exception("not implemented");
208  }
209  }
210 
211 
217  template<typename T>
219  {
220  assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in vector_swap()"));
221 
222  switch (viennacl::traits::handle(vec1).get_active_handle_id())
223  {
226  break;
227 #ifdef VIENNACL_WITH_OPENCL
230  break;
231 #endif
232 #ifdef VIENNACL_WITH_CUDA
235  break;
236 #endif
238  throw memory_exception("not initialised!");
239  default:
240  throw memory_exception("not implemented");
241  }
242  }
243 
244 
246 
247 
248 
254  template<typename T, typename OP>
256  vector_expression<const vector_base<T>, const vector_base<T>, OP> const & proxy)
257  {
258  assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()"));
259 
260  switch (viennacl::traits::handle(vec1).get_active_handle_id())
261  {
264  break;
265 #ifdef VIENNACL_WITH_OPENCL
268  break;
269 #endif
270 #ifdef VIENNACL_WITH_CUDA
273  break;
274 #endif
276  throw memory_exception("not initialised!");
277  default:
278  throw memory_exception("not implemented");
279  }
280  }
281 
284 // Helper macro for generating binary element-wise operations such as element_prod(), element_div(), element_pow() without unnecessary code duplication */
285 #define VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(OPNAME) \
286  template<typename T> \
287  viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_##OPNAME> > \
288  element_##OPNAME(vector_base<T> const & v1, vector_base<T> const & v2) \
289  { \
290  return viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_##OPNAME> >(v1, v2); \
291  } \
292 \
293  template<typename V1, typename V2, typename OP, typename T> \
294  viennacl::vector_expression<const vector_expression<const V1, const V2, OP>, const vector_base<T>, op_element_binary<op_##OPNAME> > \
295  element_##OPNAME(vector_expression<const V1, const V2, OP> const & proxy, vector_base<T> const & v2) \
296  { \
297  return viennacl::vector_expression<const vector_expression<const V1, const V2, OP>, const vector_base<T>, op_element_binary<op_##OPNAME> >(proxy, v2); \
298  } \
299 \
300  template<typename T, typename V2, typename V3, typename OP> \
301  viennacl::vector_expression<const vector_base<T>, const vector_expression<const V2, const V3, OP>, op_element_binary<op_##OPNAME> > \
302  element_##OPNAME(vector_base<T> const & v1, vector_expression<const V2, const V3, OP> const & proxy) \
303  { \
304  return viennacl::vector_expression<const vector_base<T>, const vector_expression<const V2, const V3, OP>, op_element_binary<op_##OPNAME> >(v1, proxy); \
305  } \
306 \
307  template<typename V1, typename V2, typename OP1, \
308  typename V3, typename V4, typename OP2> \
309  viennacl::vector_expression<const vector_expression<const V1, const V2, OP1>, \
310  const vector_expression<const V3, const V4, OP2>, \
311  op_element_binary<op_##OPNAME> > \
312  element_##OPNAME(vector_expression<const V1, const V2, OP1> const & proxy1, \
313  vector_expression<const V3, const V4, OP2> const & proxy2) \
314  {\
315  return viennacl::vector_expression<const vector_expression<const V1, const V2, OP1>, \
316  const vector_expression<const V3, const V4, OP2>, \
317  op_element_binary<op_##OPNAME> >(proxy1, proxy2); \
318  }
319 
320  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(prod) //for element_prod()
321  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(div) //for element_div()
322  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(pow) //for element_pow()
323 
324  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(eq)
325  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(neq)
326  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(greater)
327  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(less)
328  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(geq)
329  VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(leq)
330 
331 #undef VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS
332 
333 // Helper macro for generating unary element-wise operations such as element_exp(), element_sin(), etc. without unnecessary code duplication */
334 #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \
335  template<typename T> \
336  viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<op_##funcname> > \
337  element_##funcname(vector_base<T> const & v) \
338  { \
339  return viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<op_##funcname> >(v, v); \
340  } \
341  template<typename LHS, typename RHS, typename OP> \
342  viennacl::vector_expression<const vector_expression<const LHS, const RHS, OP>, \
343  const vector_expression<const LHS, const RHS, OP>, \
344  op_element_unary<op_##funcname> > \
345  element_##funcname(vector_expression<const LHS, const RHS, OP> const & proxy) \
346  { \
347  return viennacl::vector_expression<const vector_expression<const LHS, const RHS, OP>, \
348  const vector_expression<const LHS, const RHS, OP>, \
349  op_element_unary<op_##funcname> >(proxy, proxy); \
350  } \
351 
369 
370 #undef VIENNACL_MAKE_UNARY_ELEMENT_OP
371 
374 
376 
377  //implementation of inner product:
378  //namespace {
379 
386  template<typename T>
387  void inner_prod_impl(vector_base<T> const & vec1,
388  vector_base<T> const & vec2,
389  scalar<T> & result)
390  {
391  assert( vec1.size() == vec2.size() && bool("Size mismatch") );
392 
394  {
397  break;
398 #ifdef VIENNACL_WITH_OPENCL
400  viennacl::linalg::opencl::inner_prod_impl(vec1, vec2, result);
401  break;
402 #endif
403 #ifdef VIENNACL_WITH_CUDA
405  viennacl::linalg::cuda::inner_prod_impl(vec1, vec2, result);
406  break;
407 #endif
409  throw memory_exception("not initialised!");
410  default:
411  throw memory_exception("not implemented");
412  }
413  }
414 
415  // vector expression on lhs
416  template<typename LHS, typename RHS, typename OP, typename T>
418  vector_base<T> const & vec2,
419  scalar<T> & result)
420  {
421  viennacl::vector<T> temp = vec1;
422  inner_prod_impl(temp, vec2, result);
423  }
424 
425 
426  // vector expression on rhs
427  template<typename T, typename LHS, typename RHS, typename OP>
428  void inner_prod_impl(vector_base<T> const & vec1,
430  scalar<T> & result)
431  {
432  viennacl::vector<T> temp = vec2;
433  inner_prod_impl(vec1, temp, result);
434  }
435 
436 
437  // vector expression on lhs and rhs
438  template<typename LHS1, typename RHS1, typename OP1,
439  typename LHS2, typename RHS2, typename OP2, typename T>
442  scalar<T> & result)
443  {
444  viennacl::vector<T> temp1 = vec1;
445  viennacl::vector<T> temp2 = vec2;
446  inner_prod_impl(temp1, temp2, result);
447  }
448 
449 
450 
451 
458  template<typename T>
459  void inner_prod_cpu(vector_base<T> const & vec1,
460  vector_base<T> const & vec2,
461  T & result)
462  {
463  assert( vec1.size() == vec2.size() && bool("Size mismatch") );
464 
466  {
469  break;
470 #ifdef VIENNACL_WITH_OPENCL
472  viennacl::linalg::opencl::inner_prod_cpu(vec1, vec2, result);
473  break;
474 #endif
475 #ifdef VIENNACL_WITH_CUDA
477  viennacl::linalg::cuda::inner_prod_cpu(vec1, vec2, result);
478  break;
479 #endif
481  throw memory_exception("not initialised!");
482  default:
483  throw memory_exception("not implemented");
484  }
485  }
486 
487  // vector expression on lhs
488  template<typename LHS, typename RHS, typename OP, typename T>
490  vector_base<T> const & vec2,
491  T & result)
492  {
493  viennacl::vector<T> temp = vec1;
494  inner_prod_cpu(temp, vec2, result);
495  }
496 
497 
498  // vector expression on rhs
499  template<typename T, typename LHS, typename RHS, typename OP>
500  void inner_prod_cpu(vector_base<T> const & vec1,
502  T & result)
503  {
504  viennacl::vector<T> temp = vec2;
505  inner_prod_cpu(vec1, temp, result);
506  }
507 
508 
509  // vector expression on lhs and rhs
510  template<typename LHS1, typename RHS1, typename OP1,
511  typename LHS2, typename RHS2, typename OP2, typename S3>
514  S3 & result)
515  {
516  viennacl::vector<S3> temp1 = vec1;
517  viennacl::vector<S3> temp2 = vec2;
518  inner_prod_cpu(temp1, temp2, result);
519  }
520 
521 
522 
529  template<typename T>
531  vector_tuple<T> const & y_tuple,
532  vector_base<T> & result)
533  {
534  assert( x.size() == y_tuple.const_at(0).size() && bool("Size mismatch") );
535  assert( result.size() == y_tuple.const_size() && bool("Number of elements does not match result size") );
536 
538  {
541  break;
542 #ifdef VIENNACL_WITH_OPENCL
544  viennacl::linalg::opencl::inner_prod_impl(x, y_tuple, result);
545  break;
546 #endif
547 #ifdef VIENNACL_WITH_CUDA
549  viennacl::linalg::cuda::inner_prod_impl(x, y_tuple, result);
550  break;
551 #endif
553  throw memory_exception("not initialised!");
554  default:
555  throw memory_exception("not implemented");
556  }
557  }
558 
559 
565  template<typename T>
566  void norm_1_impl(vector_base<T> const & vec,
567  scalar<T> & result)
568  {
569  switch (viennacl::traits::handle(vec).get_active_handle_id())
570  {
573  break;
574 #ifdef VIENNACL_WITH_OPENCL
577  break;
578 #endif
579 #ifdef VIENNACL_WITH_CUDA
582  break;
583 #endif
585  throw memory_exception("not initialised!");
586  default:
587  throw memory_exception("not implemented");
588  }
589  }
590 
591 
597  template<typename LHS, typename RHS, typename OP, typename S2>
599  S2 & result)
600  {
602  norm_1_impl(temp, result);
603  }
604 
605 
606 
612  template<typename T>
613  void norm_1_cpu(vector_base<T> const & vec,
614  T & result)
615  {
616  switch (viennacl::traits::handle(vec).get_active_handle_id())
617  {
620  break;
621 #ifdef VIENNACL_WITH_OPENCL
624  break;
625 #endif
626 #ifdef VIENNACL_WITH_CUDA
629  break;
630 #endif
632  throw memory_exception("not initialised!");
633  default:
634  throw memory_exception("not implemented");
635  }
636  }
637 
643  template<typename LHS, typename RHS, typename OP, typename S2>
645  S2 & result)
646  {
648  norm_1_cpu(temp, result);
649  }
650 
651 
652 
653 
659  template<typename T>
660  void norm_2_impl(vector_base<T> const & vec,
661  scalar<T> & result)
662  {
663  switch (viennacl::traits::handle(vec).get_active_handle_id())
664  {
667  break;
668 #ifdef VIENNACL_WITH_OPENCL
671  break;
672 #endif
673 #ifdef VIENNACL_WITH_CUDA
676  break;
677 #endif
679  throw memory_exception("not initialised!");
680  default:
681  throw memory_exception("not implemented");
682  }
683  }
684 
690  template<typename LHS, typename RHS, typename OP, typename T>
692  scalar<T> & result)
693  {
694  viennacl::vector<T> temp = vec;
695  norm_2_impl(temp, result);
696  }
697 
698 
704  template<typename T>
705  void norm_2_cpu(vector_base<T> const & vec,
706  T & result)
707  {
708  switch (viennacl::traits::handle(vec).get_active_handle_id())
709  {
712  break;
713 #ifdef VIENNACL_WITH_OPENCL
716  break;
717 #endif
718 #ifdef VIENNACL_WITH_CUDA
721  break;
722 #endif
724  throw memory_exception("not initialised!");
725  default:
726  throw memory_exception("not implemented");
727  }
728  }
729 
735  template<typename LHS, typename RHS, typename OP, typename S2>
737  S2 & result)
738  {
740  norm_2_cpu(temp, result);
741  }
742 
743 
744 
745 
751  template<typename T>
752  void norm_inf_impl(vector_base<T> const & vec,
753  scalar<T> & result)
754  {
755  switch (viennacl::traits::handle(vec).get_active_handle_id())
756  {
759  break;
760 #ifdef VIENNACL_WITH_OPENCL
763  break;
764 #endif
765 #ifdef VIENNACL_WITH_CUDA
768  break;
769 #endif
771  throw memory_exception("not initialised!");
772  default:
773  throw memory_exception("not implemented");
774  }
775  }
776 
782  template<typename LHS, typename RHS, typename OP, typename T>
784  scalar<T> & result)
785  {
786  viennacl::vector<T> temp = vec;
787  norm_inf_impl(temp, result);
788  }
789 
790 
796  template<typename T>
797  void norm_inf_cpu(vector_base<T> const & vec,
798  T & result)
799  {
800  switch (viennacl::traits::handle(vec).get_active_handle_id())
801  {
804  break;
805 #ifdef VIENNACL_WITH_OPENCL
808  break;
809 #endif
810 #ifdef VIENNACL_WITH_CUDA
813  break;
814 #endif
816  throw memory_exception("not initialised!");
817  default:
818  throw memory_exception("not implemented");
819  }
820  }
821 
827  template<typename LHS, typename RHS, typename OP, typename S2>
829  S2 & result)
830  {
832  norm_inf_cpu(temp, result);
833  }
834 
835 
836  //This function should return a CPU scalar, otherwise statements like
837  // vcl_rhs[index_norm_inf(vcl_rhs)]
838  // are ambiguous
844  template<typename T>
846  {
847  switch (viennacl::traits::handle(vec).get_active_handle_id())
848  {
851 #ifdef VIENNACL_WITH_OPENCL
854 #endif
855 #ifdef VIENNACL_WITH_CUDA
858 #endif
860  throw memory_exception("not initialised!");
861  default:
862  throw memory_exception("not implemented");
863  }
864  }
865 
870  template<typename LHS, typename RHS, typename OP>
872  {
874  return index_norm_inf(temp);
875  }
876 
878 
884  template<typename NumericT>
886  {
887  switch (viennacl::traits::handle(vec).get_active_handle_id())
888  {
891  break;
892 #ifdef VIENNACL_WITH_OPENCL
895  break;
896 #endif
897 #ifdef VIENNACL_WITH_CUDA
900  break;
901 #endif
903  throw memory_exception("not initialised!");
904  default:
905  throw memory_exception("not implemented");
906  }
907  }
908 
914  template<typename LHS, typename RHS, typename OP, typename NumericT>
916  {
917  viennacl::vector<NumericT> temp = vec;
918  max_impl(temp, result);
919  }
920 
921 
927  template<typename T>
928  void max_cpu(vector_base<T> const & vec, T & result)
929  {
930  switch (viennacl::traits::handle(vec).get_active_handle_id())
931  {
934  break;
935 #ifdef VIENNACL_WITH_OPENCL
938  break;
939 #endif
940 #ifdef VIENNACL_WITH_CUDA
942  viennacl::linalg::cuda::max_cpu(vec, result);
943  break;
944 #endif
946  throw memory_exception("not initialised!");
947  default:
948  throw memory_exception("not implemented");
949  }
950  }
951 
957  template<typename LHS, typename RHS, typename OP, typename S2>
958  void max_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, S2 & result)
959  {
961  max_cpu(temp, result);
962  }
963 
965 
971  template<typename NumericT>
973  {
974  switch (viennacl::traits::handle(vec).get_active_handle_id())
975  {
978  break;
979 #ifdef VIENNACL_WITH_OPENCL
982  break;
983 #endif
984 #ifdef VIENNACL_WITH_CUDA
987  break;
988 #endif
990  throw memory_exception("not initialised!");
991  default:
992  throw memory_exception("not implemented");
993  }
994  }
995 
1001  template<typename LHS, typename RHS, typename OP, typename NumericT>
1003  {
1004  viennacl::vector<NumericT> temp = vec;
1005  min_impl(temp, result);
1006  }
1007 
1008 
1014  template<typename T>
1015  void min_cpu(vector_base<T> const & vec, T & result)
1016  {
1017  switch (viennacl::traits::handle(vec).get_active_handle_id())
1018  {
1019  case viennacl::MAIN_MEMORY:
1021  break;
1022 #ifdef VIENNACL_WITH_OPENCL
1024  viennacl::linalg::opencl::min_cpu(vec, result);
1025  break;
1026 #endif
1027 #ifdef VIENNACL_WITH_CUDA
1028  case viennacl::CUDA_MEMORY:
1029  viennacl::linalg::cuda::min_cpu(vec, result);
1030  break;
1031 #endif
1033  throw memory_exception("not initialised!");
1034  default:
1035  throw memory_exception("not implemented");
1036  }
1037  }
1038 
1044  template<typename LHS, typename RHS, typename OP, typename S2>
1046  {
1048  min_cpu(temp, result);
1049  }
1050 
1052 
1058  template<typename NumericT>
1060  {
1061  switch (viennacl::traits::handle(vec).get_active_handle_id())
1062  {
1063  case viennacl::MAIN_MEMORY:
1065  break;
1066 #ifdef VIENNACL_WITH_OPENCL
1069  break;
1070 #endif
1071 #ifdef VIENNACL_WITH_CUDA
1072  case viennacl::CUDA_MEMORY:
1073  viennacl::linalg::cuda::sum_impl(vec, result);
1074  break;
1075 #endif
1077  throw memory_exception("not initialised!");
1078  default:
1079  throw memory_exception("not implemented");
1080  }
1081  }
1082 
1088  template<typename LHS, typename RHS, typename OP, typename NumericT>
1090  {
1091  viennacl::vector<NumericT> temp = vec;
1092  sum_impl(temp, result);
1093  }
1094 
1095 
1101  template<typename T>
1102  void sum_cpu(vector_base<T> const & vec, T & result)
1103  {
1104  switch (viennacl::traits::handle(vec).get_active_handle_id())
1105  {
1106  case viennacl::MAIN_MEMORY:
1108  break;
1109 #ifdef VIENNACL_WITH_OPENCL
1111  viennacl::linalg::opencl::sum_cpu(vec, result);
1112  break;
1113 #endif
1114 #ifdef VIENNACL_WITH_CUDA
1115  case viennacl::CUDA_MEMORY:
1116  viennacl::linalg::cuda::sum_cpu(vec, result);
1117  break;
1118 #endif
1120  throw memory_exception("not initialised!");
1121  default:
1122  throw memory_exception("not implemented");
1123  }
1124  }
1125 
1131  template<typename LHS, typename RHS, typename OP, typename S2>
1133  {
1135  sum_cpu(temp, result);
1136  }
1137 
1138 
1139 
1140 
1141 
1151  template<typename T>
1153  vector_base<T> & vec2,
1154  T alpha, T beta)
1155  {
1156  switch (viennacl::traits::handle(vec1).get_active_handle_id())
1157  {
1158  case viennacl::MAIN_MEMORY:
1159  viennacl::linalg::host_based::plane_rotation(vec1, vec2, alpha, beta);
1160  break;
1161 #ifdef VIENNACL_WITH_OPENCL
1163  viennacl::linalg::opencl::plane_rotation(vec1, vec2, alpha, beta);
1164  break;
1165 #endif
1166 #ifdef VIENNACL_WITH_CUDA
1167  case viennacl::CUDA_MEMORY:
1168  viennacl::linalg::cuda::plane_rotation(vec1, vec2, alpha, beta);
1169  break;
1170 #endif
1172  throw memory_exception("not initialised!");
1173  default:
1174  throw memory_exception("not implemented");
1175  }
1176  }
1177 
1189  template<typename NumericT>
1191  vector_base<NumericT> & vec2)
1192  {
1193  switch (viennacl::traits::handle(vec1).get_active_handle_id())
1194  {
1195  case viennacl::MAIN_MEMORY:
1197  break;
1198  #ifdef VIENNACL_WITH_OPENCL
1201  break;
1202  #endif
1203 
1204  #ifdef VIENNACL_WITH_CUDA
1205  case viennacl::CUDA_MEMORY:
1207  break;
1208  #endif
1209 
1211  throw memory_exception("not initialised!");
1212  default:
1213  throw memory_exception("not implemented");
1214  }
1215  }
1216 
1222  template<typename NumericT>
1224  {
1225  inclusive_scan(vec, vec);
1226  }
1227 
1239  template<typename NumericT>
1241  vector_base<NumericT> & vec2)
1242  {
1243  switch (viennacl::traits::handle(vec1).get_active_handle_id())
1244  {
1245  case viennacl::MAIN_MEMORY:
1247  break;
1248  #ifdef VIENNACL_WITH_OPENCL
1251  break;
1252  #endif
1253 
1254  #ifdef VIENNACL_WITH_CUDA
1255  case viennacl::CUDA_MEMORY:
1257  break;
1258  #endif
1259 
1261  throw memory_exception("not initialised!");
1262  default:
1263  throw memory_exception("not implemented");
1264  }
1265  }
1266 
1272  template<typename NumericT>
1274  {
1275  exclusive_scan(vec, vec);
1276  }
1277  } //namespace linalg
1278 
1279  template<typename T, typename LHS, typename RHS, typename OP>
1281  {
1282  assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!"));
1283  assert( (v1.size() > 0) && bool("Vector not yet initialized!") );
1284 
1286 
1287  return v1;
1288  }
1289 
1290  template<typename T, typename LHS, typename RHS, typename OP>
1292  {
1293  assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!"));
1294  assert( (v1.size() > 0) && bool("Vector not yet initialized!") );
1295 
1297 
1298  return v1;
1299  }
1300 
1301 } //namespace viennacl
1302 
1303 
1304 #endif
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
void norm_2_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
vcl_size_t const_size() const
Definition: vector.hpp:1143
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void vector_assign(vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
void norm_2_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the l^2-norm of a vector - implementation.
void norm_1_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP.
Worker class for decomposing expression templates.
Definition: op_executor.hpp:80
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors with the final reduction step on the CPU - dispatcher inter...
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
void min_cpu(vector_base< T > const &vec, T &result)
Computes the minimum of a vector with final reduction on the CPU.
vector< NumericT > operator-=(vector_base< NumericT > &v1, const viennacl::vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, viennacl::op_prod > &proxy)
Implementation of the operation v1 -= A * v2, where A is a matrix.
Implementations of NMF operations using OpenCL.
void inner_prod_cpu(vector_base< NumericT > const &x, vector_base< NumericT > const &y, NumericT &result)
Exception class in case of memory errors.
Definition: forwards.h:572
void max_cpu(vector_base< T > const &vec, T &result)
Computes the maximum of a vector with final reduction on the CPU.
Generic size and resize functionality for different vector and matrix types.
Defines the worker class for decomposing an expression tree into small chunks, which can be processed...
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void plane_rotation(vector_base< NumericT > &x, vector_base< NumericT > &y, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void norm_inf_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vector< NumericT > operator+=(vector_base< NumericT > &v1, const viennacl::vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, viennacl::op_prod > &proxy)
Implementation of the operation v1 += A * v2, where A is a matrix.
void sum_impl(vector_base< T > const &vec, scalar< T > &result)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^2-norm of a vector with final reduction on the CPU - dispatcher interface.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
cl_uint index_norm_inf(vector_base< NumericT > const &x)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void norm_1_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^1-norm of a vector.
void avbv_v(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void max_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU.
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector.
void norm_1_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the l^1-norm of a vector.
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
A tag class representing inplace addition.
Definition: forwards.h:83
void vector_assign(vector_base< NumericT > &x, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
void max_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - dispatcher interface.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
viennacl::vector< float > v1
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
void max_impl(vector_base< T > const &vec, scalar< T > &result)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector.
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum of a vector.
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, scalar< T > &result)
Computes the inner product of two vectors - dispatcher interface.
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on the CPU.
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:269
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
#define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname)
void inner_prod_impl(vector_base< NumericT > const &x, vector_base< NumericT > const &y, scalar< NumericT > &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(x...
void norm_inf_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the supremum-norm of a vector.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
void inner_prod_cpu(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied.
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector.
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector - dispatcher interface.
void sum_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104
void sum_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
std::size_t vcl_size_t
Definition: forwards.h:75
void min_impl(vector_base< T > const &vec, scalar< T > &result)
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP.
void norm_inf_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the supremum-norm of a vector.
void min_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
void norm_1_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the l^1-norm of a vector.
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void inclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan.
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void vector_swap(vector_base< NumericT > &x, vector_base< NumericT > &y)
Swaps the contents of two vectors, data is copied.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void min_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU.
A tag class representing inplace subtraction.
Definition: forwards.h:85
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void sum_cpu(vector_base< T > const &vec, T &result)
Computes the sum of a vector with final reduction on the CPU.
void av(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void element_op(matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
void norm_inf_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the supremum-norm of a vector.
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void avbv(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1146
Implementation of a range object for use with proxy objects.
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
Implementation of the ViennaCL scalar class.
Implementations of NMF operations using CUDA.
void norm_2_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector with final reduction on the CPU.
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector.
Simple enable-if variant that uses the SFINAE pattern.
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:118
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...