ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
38 
39 #ifdef VIENNACL_WITH_OPENMP
40 #include <omp.h>
41 #endif
42 
43 // Minimum vector size for using OpenMP on vector operations:
44 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
45  #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
46 #endif
47 
48 namespace viennacl
49 {
50 namespace linalg
51 {
52 namespace host_based
53 {
54 namespace detail
55 {
56  template<typename NumericT>
57  NumericT flip_sign(NumericT val) { return -val; }
58  inline unsigned long flip_sign(unsigned long val) { return val; }
59  inline unsigned int flip_sign(unsigned int val) { return val; }
60  inline unsigned short flip_sign(unsigned short val) { return val; }
61  inline unsigned char flip_sign(unsigned char val) { return val; }
62 }
63 
64 //
65 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
66 //
67 template<typename DestNumericT, typename SrcNumericT>
69 {
70  DestNumericT * data_dest = detail::extract_raw_pointer<DestNumericT>(dest);
71  SrcNumericT const * data_src = detail::extract_raw_pointer<SrcNumericT>(src);
72 
73  vcl_size_t start_dest = viennacl::traits::start(dest);
74  vcl_size_t inc_dest = viennacl::traits::stride(dest);
75  vcl_size_t size_dest = viennacl::traits::size(dest);
76 
77  vcl_size_t start_src = viennacl::traits::start(src);
78  vcl_size_t inc_src = viennacl::traits::stride(src);
79 
80 #ifdef VIENNACL_WITH_OPENMP
81  #pragma omp parallel for if (size_dest > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
82 #endif
83  for (long i = 0; i < static_cast<long>(size_dest); ++i)
84  data_dest[static_cast<vcl_size_t>(i)*inc_dest+start_dest] = static_cast<DestNumericT>(data_src[static_cast<vcl_size_t>(i)*inc_src+start_src]);
85 }
86 
87 template<typename NumericT, typename ScalarT1>
89  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
90 {
91  typedef NumericT value_type;
92 
93  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
94  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
95 
96  value_type data_alpha = alpha;
97  if (flip_sign_alpha)
98  data_alpha = detail::flip_sign(data_alpha);
99 
103 
106 
107  if (reciprocal_alpha)
108  {
109 #ifdef VIENNACL_WITH_OPENMP
110  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
111 #endif
112  for (long i = 0; i < static_cast<long>(size1); ++i)
113  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
114  }
115  else
116  {
117 #ifdef VIENNACL_WITH_OPENMP
118  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
119 #endif
120  for (long i = 0; i < static_cast<long>(size1); ++i)
121  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
122  }
123 }
124 
125 
126 template<typename NumericT, typename ScalarT1, typename ScalarT2>
128  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
129  vector_base<NumericT> const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
130 {
131  typedef NumericT value_type;
132 
133  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
134  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
135  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
136 
137  value_type data_alpha = alpha;
138  if (flip_sign_alpha)
139  data_alpha = detail::flip_sign(data_alpha);
140 
141  value_type data_beta = beta;
142  if (flip_sign_beta)
143  data_beta = detail::flip_sign(data_beta);
144 
148 
151 
152  vcl_size_t start3 = viennacl::traits::start(vec3);
154 
155  if (reciprocal_alpha)
156  {
157  if (reciprocal_beta)
158  {
159 #ifdef VIENNACL_WITH_OPENMP
160  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
161 #endif
162  for (long i = 0; i < static_cast<long>(size1); ++i)
163  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
164  }
165  else
166  {
167 #ifdef VIENNACL_WITH_OPENMP
168  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
169 #endif
170  for (long i = 0; i < static_cast<long>(size1); ++i)
171  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
172  }
173  }
174  else
175  {
176  if (reciprocal_beta)
177  {
178 #ifdef VIENNACL_WITH_OPENMP
179  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
180 #endif
181  for (long i = 0; i < static_cast<long>(size1); ++i)
182  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
183  }
184  else
185  {
186 #ifdef VIENNACL_WITH_OPENMP
187  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
188 #endif
189  for (long i = 0; i < static_cast<long>(size1); ++i)
190  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
191  }
192  }
193 }
194 
195 
196 template<typename NumericT, typename ScalarT1, typename ScalarT2>
198  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
199  vector_base<NumericT> const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
200 {
201  typedef NumericT value_type;
202 
203  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
204  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
205  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
206 
207  value_type data_alpha = alpha;
208  if (flip_sign_alpha)
209  data_alpha = detail::flip_sign(data_alpha);
210 
211  value_type data_beta = beta;
212  if (flip_sign_beta)
213  data_beta = detail::flip_sign(data_beta);
214 
218 
221 
222  vcl_size_t start3 = viennacl::traits::start(vec3);
224 
225  if (reciprocal_alpha)
226  {
227  if (reciprocal_beta)
228  {
229 #ifdef VIENNACL_WITH_OPENMP
230  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
231 #endif
232  for (long i = 0; i < static_cast<long>(size1); ++i)
233  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
234  }
235  else
236  {
237 #ifdef VIENNACL_WITH_OPENMP
238  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
239 #endif
240  for (long i = 0; i < static_cast<long>(size1); ++i)
241  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
242  }
243  }
244  else
245  {
246  if (reciprocal_beta)
247  {
248 #ifdef VIENNACL_WITH_OPENMP
249  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
250 #endif
251  for (long i = 0; i < static_cast<long>(size1); ++i)
252  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
253  }
254  else
255  {
256 #ifdef VIENNACL_WITH_OPENMP
257  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
258 #endif
259  for (long i = 0; i < static_cast<long>(size1); ++i)
260  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
261  }
262  }
263 }
264 
265 
266 
267 
274 template<typename NumericT>
275 void vector_assign(vector_base<NumericT> & vec1, const NumericT & alpha, bool up_to_internal_size = false)
276 {
277  typedef NumericT value_type;
278 
279  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
280 
284  vcl_size_t loop_bound = up_to_internal_size ? vec1.internal_size() : size1; //Note: Do NOT use traits::internal_size() here, because vector proxies don't require padding.
285 
286  value_type data_alpha = static_cast<value_type>(alpha);
287 
288 #ifdef VIENNACL_WITH_OPENMP
289  #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
290 #endif
291  for (long i = 0; i < static_cast<long>(loop_bound); ++i)
292  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha;
293 }
294 
295 
301 template<typename NumericT>
303 {
304  typedef NumericT value_type;
305 
306  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
307  value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
308 
312 
315 
316 #ifdef VIENNACL_WITH_OPENMP
317  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
318 #endif
319  for (long i = 0; i < static_cast<long>(size1); ++i)
320  {
321  value_type temp = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2];
322  data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
323  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = temp;
324  }
325 }
326 
327 
329 
335 template<typename NumericT, typename OpT>
338 {
339  typedef NumericT value_type;
341 
342  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
343  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
344  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
345 
349 
351  vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs());
352 
353  vcl_size_t start3 = viennacl::traits::start(proxy.rhs());
354  vcl_size_t inc3 = viennacl::traits::stride(proxy.rhs());
355 
356 #ifdef VIENNACL_WITH_OPENMP
357  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
358 #endif
359  for (long i = 0; i < static_cast<long>(size1); ++i)
360  OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1], data_vec2[static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
361 }
362 
368 template<typename NumericT, typename OpT>
371 {
372  typedef NumericT value_type;
374 
375  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
376  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
377 
381 
383  vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs());
384 
385 #ifdef VIENNACL_WITH_OPENMP
386  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
387 #endif
388  for (long i = 0; i < static_cast<long>(size1); ++i)
389  OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1], data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]);
390 }
391 
392 
394 
395 
396 //implementation of inner product:
397 
398 namespace detail
399 {
400 
401 // the following circumvents problems when trying to use a variable of template parameter type for a reduction.
402 // Such a behavior is not covered by the OpenMP standard, hence we manually apply some preprocessor magic to resolve the problem.
403 // See https://github.com/viennacl/viennacl-dev/issues/112 for a detailed explanation and discussion.
404 
405 #define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART) \
406  inline RESULTSCALART inner_prod_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1, \
407  RESULTSCALART const * data_vec2, vcl_size_t start2, vcl_size_t inc2) { \
408  TEMPSCALART temp = 0;
409 
410 #define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART) \
411  for (long i = 0; i < static_cast<long>(size1); ++i) \
412  temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] * data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]; \
413  return static_cast<RESULTSCALART>(temp); \
414  }
415 
416 // char
418 #ifdef VIENNACL_WITH_OPENMP
419  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
420 #endif
422 
423 VIENNACL_INNER_PROD_IMPL_1(unsigned char, int)
424 #ifdef VIENNACL_WITH_OPENMP
425  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
426 #endif
427 VIENNACL_INNER_PROD_IMPL_2(unsigned char)
428 
429 
430 // short
431 VIENNACL_INNER_PROD_IMPL_1(short, int)
432 #ifdef VIENNACL_WITH_OPENMP
433  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
434 #endif
436 
437 VIENNACL_INNER_PROD_IMPL_1(unsigned short, int)
438 #ifdef VIENNACL_WITH_OPENMP
439  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
440 #endif
441 VIENNACL_INNER_PROD_IMPL_2(unsigned short)
442 
443 
444 // int
446 #ifdef VIENNACL_WITH_OPENMP
447  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
448 #endif
450 
451 VIENNACL_INNER_PROD_IMPL_1(unsigned int, unsigned int)
452 #ifdef VIENNACL_WITH_OPENMP
453  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
454 #endif
455 VIENNACL_INNER_PROD_IMPL_2(unsigned int)
456 
457 
458 // long
459 VIENNACL_INNER_PROD_IMPL_1(long, long)
460 #ifdef VIENNACL_WITH_OPENMP
461  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
462 #endif
464 
465 VIENNACL_INNER_PROD_IMPL_1(unsigned long, unsigned long)
466 #ifdef VIENNACL_WITH_OPENMP
467  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
468 #endif
469 VIENNACL_INNER_PROD_IMPL_2(unsigned long)
470 
471 
472 // float
473 VIENNACL_INNER_PROD_IMPL_1(float, float)
474 #ifdef VIENNACL_WITH_OPENMP
475  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
476 #endif
478 
479 // double
480 VIENNACL_INNER_PROD_IMPL_1(double, double)
481 #ifdef VIENNACL_WITH_OPENMP
482  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
483 #endif
485 
486 #undef VIENNACL_INNER_PROD_IMPL_1
487 #undef VIENNACL_INNER_PROD_IMPL_2
488 }
489 
496 template<typename NumericT, typename ScalarT>
498  vector_base<NumericT> const & vec2,
499  ScalarT & result)
500 {
501  typedef NumericT value_type;
502 
503  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
504  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
505 
509 
512 
513  result = detail::inner_prod_impl(data_vec1, start1, inc1, size1,
514  data_vec2, start2, inc2); //Note: Assignment to result might be expensive, thus a temporary is introduced here
515 }
516 
517 template<typename NumericT>
519  vector_tuple<NumericT> const & vec_tuple,
520  vector_base<NumericT> & result)
521 {
522  typedef NumericT value_type;
523 
524  value_type const * data_x = detail::extract_raw_pointer<value_type>(x);
525 
526  vcl_size_t start_x = viennacl::traits::start(x);
529 
530  std::vector<value_type> temp(vec_tuple.const_size());
531  std::vector<value_type const *> data_y(vec_tuple.const_size());
532  std::vector<vcl_size_t> start_y(vec_tuple.const_size());
533  std::vector<vcl_size_t> stride_y(vec_tuple.const_size());
534 
535  for (vcl_size_t j=0; j<vec_tuple.const_size(); ++j)
536  {
537  data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.const_at(j));
538  start_y[j] = viennacl::traits::start(vec_tuple.const_at(j));
539  stride_y[j] = viennacl::traits::stride(vec_tuple.const_at(j));
540  }
541 
542  // Note: No OpenMP here because it cannot perform a reduction on temp-array. Savings in memory bandwidth are expected to still justify this approach...
543  for (vcl_size_t i = 0; i < size_x; ++i)
544  {
545  value_type entry_x = data_x[i*inc_x+start_x];
546  for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j)
547  temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
548  }
549 
550  for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j)
551  result[j] = temp[j]; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
552 }
553 
554 
555 namespace detail
556 {
557 
558 #define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART) \
559  inline RESULTSCALART norm_1_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
560  TEMPSCALART temp = 0;
561 
562 #define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART) \
563  for (long i = 0; i < static_cast<long>(size1); ++i) \
564  temp += static_cast<TEMPSCALART>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]))); \
565  return static_cast<RESULTSCALART>(temp); \
566  }
567 
568 // char
569 VIENNACL_NORM_1_IMPL_1(char, int)
570 #ifdef VIENNACL_WITH_OPENMP
571  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
572 #endif
573 VIENNACL_NORM_1_IMPL_2(char, int)
574 
575 VIENNACL_NORM_1_IMPL_1(unsigned char, int)
576 #ifdef VIENNACL_WITH_OPENMP
577  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
578 #endif
579 VIENNACL_NORM_1_IMPL_2(unsigned char, int)
580 
581 // short
582 VIENNACL_NORM_1_IMPL_1(short, int)
583 #ifdef VIENNACL_WITH_OPENMP
584  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
585 #endif
586 VIENNACL_NORM_1_IMPL_2(short, int)
587 
588 VIENNACL_NORM_1_IMPL_1(unsigned short, int)
589 #ifdef VIENNACL_WITH_OPENMP
590  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
591 #endif
592 VIENNACL_NORM_1_IMPL_2(unsigned short, int)
593 
594 
595 // int
596 VIENNACL_NORM_1_IMPL_1(int, int)
597 #ifdef VIENNACL_WITH_OPENMP
598  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
599 #endif
600 VIENNACL_NORM_1_IMPL_2(int, int)
601 
602 VIENNACL_NORM_1_IMPL_1(unsigned int, unsigned int)
603 #ifdef VIENNACL_WITH_OPENMP
604  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
605 #endif
606 VIENNACL_NORM_1_IMPL_2(unsigned int, unsigned int)
607 
608 
609 // long
610 VIENNACL_NORM_1_IMPL_1(long, long)
611 #ifdef VIENNACL_WITH_OPENMP
612  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
613 #endif
614 VIENNACL_NORM_1_IMPL_2(long, long)
615 
616 VIENNACL_NORM_1_IMPL_1(unsigned long, unsigned long)
617 #ifdef VIENNACL_WITH_OPENMP
618  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
619 #endif
620 VIENNACL_NORM_1_IMPL_2(unsigned long, unsigned long)
621 
622 
623 // float
624 VIENNACL_NORM_1_IMPL_1(float, float)
625 #ifdef VIENNACL_WITH_OPENMP
626  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
627 #endif
628 VIENNACL_NORM_1_IMPL_2(float, float)
629 
630 // double
631 VIENNACL_NORM_1_IMPL_1(double, double)
632 #ifdef VIENNACL_WITH_OPENMP
633  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
634 #endif
635 VIENNACL_NORM_1_IMPL_2(double, double)
636 
637 #undef VIENNACL_NORM_1_IMPL_1
638 #undef VIENNACL_NORM_1_IMPL_2
639 
640 }
641 
647 template<typename NumericT, typename ScalarT>
649  ScalarT & result)
650 {
651  typedef NumericT value_type;
652 
653  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
654 
658 
659  result = detail::norm_1_impl(data_vec1, start1, inc1, size1); //Note: Assignment to result might be expensive, thus using a temporary for accumulation
660 }
661 
662 
663 
664 namespace detail
665 {
666 
667 #define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART) \
668  inline RESULTSCALART norm_2_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
669  TEMPSCALART temp = 0;
670 
671 #define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART) \
672  for (long i = 0; i < static_cast<long>(size1); ++i) { \
673  RESULTSCALART data = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]; \
674  temp += static_cast<TEMPSCALART>(data * data); \
675  } \
676  return static_cast<RESULTSCALART>(temp); \
677  }
678 
679 // char
680 VIENNACL_NORM_2_IMPL_1(char, int)
681 #ifdef VIENNACL_WITH_OPENMP
682  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
683 #endif
684 VIENNACL_NORM_2_IMPL_2(char, int)
685 
686 VIENNACL_NORM_2_IMPL_1(unsigned char, int)
687 #ifdef VIENNACL_WITH_OPENMP
688  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
689 #endif
690 VIENNACL_NORM_2_IMPL_2(unsigned char, int)
691 
692 
693 // short
694 VIENNACL_NORM_2_IMPL_1(short, int)
695 #ifdef VIENNACL_WITH_OPENMP
696  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
697 #endif
698 VIENNACL_NORM_2_IMPL_2(short, int)
699 
700 VIENNACL_NORM_2_IMPL_1(unsigned short, int)
701 #ifdef VIENNACL_WITH_OPENMP
702  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
703 #endif
704 VIENNACL_NORM_2_IMPL_2(unsigned short, int)
705 
706 
707 // int
708 VIENNACL_NORM_2_IMPL_1(int, int)
709 #ifdef VIENNACL_WITH_OPENMP
710  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
711 #endif
712 VIENNACL_NORM_2_IMPL_2(int, int)
713 
714 VIENNACL_NORM_2_IMPL_1(unsigned int, unsigned int)
715 #ifdef VIENNACL_WITH_OPENMP
716  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
717 #endif
718 VIENNACL_NORM_2_IMPL_2(unsigned int, unsigned int)
719 
720 
721 // long
722 VIENNACL_NORM_2_IMPL_1(long, long)
723 #ifdef VIENNACL_WITH_OPENMP
724  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
725 #endif
726 VIENNACL_NORM_2_IMPL_2(long, long)
727 
728 VIENNACL_NORM_2_IMPL_1(unsigned long, unsigned long)
729 #ifdef VIENNACL_WITH_OPENMP
730  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
731 #endif
732 VIENNACL_NORM_2_IMPL_2(unsigned long, unsigned long)
733 
734 
735 // float
736 VIENNACL_NORM_2_IMPL_1(float, float)
737 #ifdef VIENNACL_WITH_OPENMP
738  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
739 #endif
740 VIENNACL_NORM_2_IMPL_2(float, float)
741 
742 // double
743 VIENNACL_NORM_2_IMPL_1(double, double)
744 #ifdef VIENNACL_WITH_OPENMP
745  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
746 #endif
747 VIENNACL_NORM_2_IMPL_2(double, double)
748 
749 #undef VIENNACL_NORM_2_IMPL_1
750 #undef VIENNACL_NORM_2_IMPL_2
751 
752 }
753 
754 
760 template<typename NumericT, typename ScalarT>
762  ScalarT & result)
763 {
764  typedef NumericT value_type;
765 
766  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
767 
771 
772  result = std::sqrt(detail::norm_2_impl(data_vec1, start1, inc1, size1)); //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
773 }
774 
780 template<typename NumericT, typename ScalarT>
782  ScalarT & result)
783 {
784  typedef NumericT value_type;
785 
786  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
787 
791 
792  value_type temp = 0;
793 
794  // Note: No max() reduction in OpenMP yet
795  for (vcl_size_t i = 0; i < size1; ++i)
796  temp = std::max<value_type>(temp, static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])))); //casting to double in order to avoid problems if T is an integer type
797 
798  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
799 }
800 
801 //This function should return a CPU scalar, otherwise statements like
802 // vcl_rhs[index_norm_inf(vcl_rhs)]
803 // are ambiguous
809 template<typename NumericT>
811 {
812  typedef NumericT value_type;
813 
814  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
815 
819 
820  value_type temp = 0;
821  value_type data;
822  vcl_size_t index = start1;
823 
824  // Note: No suitable reduction in OpenMP yet
825  for (vcl_size_t i = 0; i < size1; ++i)
826  {
827  data = static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))); //casting to double in order to avoid problems if T is an integer type
828  if (data > temp)
829  {
830  index = i;
831  temp = data;
832  }
833  }
834 
835  return index;
836 }
837 
843 template<typename NumericT, typename ScalarT>
844 void max_impl(vector_base<NumericT> const & vec1,
845  ScalarT & result)
846 {
847  typedef NumericT value_type;
848 
849  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
850 
854 
855  value_type temp = data_vec1[start1];
856 
857  // Note: No max() reduction in OpenMP yet
858  for (vcl_size_t i = 1; i < size1; ++i)
859  temp = std::max<value_type>(temp, data_vec1[i*inc1+start1]);
860 
861  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
862 }
863 
869 template<typename NumericT, typename ScalarT>
870 void min_impl(vector_base<NumericT> const & vec1,
871  ScalarT & result)
872 {
873  typedef NumericT value_type;
874 
875  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
876 
880 
881  value_type temp = data_vec1[start1];
882 
883  // Note: No max() reduction in OpenMP yet
884  for (vcl_size_t i = 1; i < size1; ++i)
885  temp = std::min<value_type>(temp, data_vec1[i*inc1+start1]);
886 
887  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
888 }
889 
895 template<typename NumericT, typename ScalarT>
896 void sum_impl(vector_base<NumericT> const & vec1,
897  ScalarT & result)
898 {
899  typedef NumericT value_type;
900 
901  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
902 
906 
907  value_type temp = 0;
908 
909  // Note: Have a look at inner_prod and norm_X before adding OpenMP pragmas here
910  for (vcl_size_t i = 0; i < size1; ++i)
911  temp += data_vec1[i*inc1+start1];
912 
913  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
914 }
915 
916 
926 template<typename NumericT>
928  vector_base<NumericT> & vec2,
929  NumericT alpha, NumericT beta)
930 {
931  typedef NumericT value_type;
932 
933  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
934  value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
935 
939 
942 
943  value_type data_alpha = alpha;
944  value_type data_beta = beta;
945 
946 #ifdef VIENNACL_WITH_OPENMP
947  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
948 #endif
949  for (long i = 0; i < static_cast<long>(size1); ++i)
950  {
951  value_type temp1 = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
952  value_type temp2 = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2];
953 
954  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
955  data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
956  }
957 }
958 
959 namespace detail
960 {
962  template<typename NumericT>
964  vector_base<NumericT> & vec2,
965  bool is_inclusive)
966  {
967  NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1);
968  NumericT * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2);
969 
973  if (size1 < 1)
974  return;
975 
978 
979 #ifdef VIENNACL_WITH_OPENMP
981  {
982  std::vector<NumericT> thread_results(omp_get_max_threads());
983 
984  // inclusive scan each thread segment:
985  #pragma omp parallel
986  {
987  vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
988  vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
989  vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread, size1);
990 
991  NumericT thread_sum = 0;
992  for(vcl_size_t i = thread_start; i < thread_stop; i++)
993  thread_sum += data_vec1[i * inc1 + start1];
994 
995  thread_results[omp_get_thread_num()] = thread_sum;
996  }
997 
998  // exclusive-scan of thread results:
999  NumericT current_offset = 0;
1000  for (vcl_size_t i=0; i<thread_results.size(); ++i)
1001  {
1002  NumericT tmp = thread_results[i];
1003  thread_results[i] = current_offset;
1004  current_offset += tmp;
1005  }
1006 
1007  // exclusive/inclusive scan of each segment with correct offset:
1008  #pragma omp parallel
1009  {
1010  vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
1011  vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
1012  vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread, size1);
1013 
1014  NumericT thread_sum = thread_results[omp_get_thread_num()];
1015  if (is_inclusive)
1016  {
1017  for(vcl_size_t i = thread_start; i < thread_stop; i++)
1018  {
1019  thread_sum += data_vec1[i * inc1 + start1];
1020  data_vec2[i * inc2 + start2] = thread_sum;
1021  }
1022  }
1023  else
1024  {
1025  for(vcl_size_t i = thread_start; i < thread_stop; i++)
1026  {
1027  NumericT tmp = data_vec1[i * inc1 + start1];
1028  data_vec2[i * inc2 + start2] = thread_sum;
1029  thread_sum += tmp;
1030  }
1031  }
1032  }
1033  } else
1034 #endif
1035  {
1036  NumericT sum = 0;
1037  if (is_inclusive)
1038  {
1039  for(vcl_size_t i = 0; i < size1; i++)
1040  {
1041  sum += data_vec1[i * inc1 + start1];
1042  data_vec2[i * inc2 + start2] = sum;
1043  }
1044  }
1045  else
1046  {
1047  for(vcl_size_t i = 0; i < size1; i++)
1048  {
1049  NumericT tmp = data_vec1[i * inc1 + start1];
1050  data_vec2[i * inc2 + start2] = sum;
1051  sum += tmp;
1052  }
1053  }
1054  }
1055 
1056  }
1057 }
1058 
1067 template<typename NumericT>
1069  vector_base<NumericT> & vec2)
1070 {
1071  detail::vector_scan_impl(vec1, vec2, true);
1072 }
1073 
1082 template<typename NumericT>
1084  vector_base<NumericT> & vec2)
1085 {
1086  detail::vector_scan_impl(vec1, vec2, false);
1087 }
1088 
1089 
1090 } //namespace host_based
1091 } //namespace linalg
1092 } //namespace viennacl
1093 
1094 
1095 #endif
vcl_size_t const_size() const
Definition: vector.hpp:1143
#define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART)
#define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART)
#define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART)
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
Generic size and resize functionality for different vector and matrix types.
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
#define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART)
Definition: blas3.hpp:36
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:269
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104
std::size_t vcl_size_t
Definition: forwards.h:75
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void vector_scan_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2, bool is_inclusive)
Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend.
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART)
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1146
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector_def.hpp:120
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< T > const &x, vector_tuple< T > const &y_tuple, vector_base< T > &result)
Computes the inner products , , ..., and writes the result to a (sub-)vector...
Implementation of the ViennaCL scalar class.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_1_impl(viennacl::vector_expression< LHS, RHS, OP > const &vec, S2 &result)
Computes the l^1-norm of a vector - interface for a vector expression. Creates a temporary.
#define VIENNACL_OPENMP_VECTOR_MIN_SIZE
Simple enable-if variant that uses the SFINAE pattern.