1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
39 #ifdef VIENNACL_WITH_OPENMP
44 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
45 #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
56 template<
typename NumericT>
58 inline unsigned long flip_sign(
unsigned long val) {
return val; }
59 inline unsigned int flip_sign(
unsigned int val) {
return val; }
60 inline unsigned short flip_sign(
unsigned short val) {
return val; }
61 inline unsigned char flip_sign(
unsigned char val) {
return val; }
67 template<
typename DestNumericT,
typename SrcNumericT>
70 DestNumericT * data_dest = detail::extract_raw_pointer<DestNumericT>(dest);
71 SrcNumericT
const * data_src = detail::extract_raw_pointer<SrcNumericT>(src);
80 #ifdef VIENNACL_WITH_OPENMP
81 #pragma omp parallel for if (size_dest > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
83 for (
long i = 0; i < static_cast<long>(size_dest); ++i)
84 data_dest[static_cast<vcl_size_t>(i)*inc_dest+start_dest] =
static_cast<DestNumericT
>(data_src[
static_cast<vcl_size_t>(i)*inc_src+start_src]);
87 template<
typename NumericT,
typename ScalarT1>
93 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
94 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
96 value_type data_alpha = alpha;
107 if (reciprocal_alpha)
109 #ifdef VIENNACL_WITH_OPENMP
110 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
112 for (
long i = 0; i < static_cast<long>(
size1); ++i)
113 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
117 #ifdef VIENNACL_WITH_OPENMP
118 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
120 for (
long i = 0; i < static_cast<long>(
size1); ++i)
121 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
126 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
133 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
134 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
135 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
137 value_type data_alpha = alpha;
141 value_type data_beta = beta;
155 if (reciprocal_alpha)
159 #ifdef VIENNACL_WITH_OPENMP
160 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
162 for (
long i = 0; i < static_cast<long>(
size1); ++i)
163 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
167 #ifdef VIENNACL_WITH_OPENMP
168 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
170 for (
long i = 0; i < static_cast<long>(
size1); ++i)
171 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
178 #ifdef VIENNACL_WITH_OPENMP
179 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
181 for (
long i = 0; i < static_cast<long>(
size1); ++i)
182 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
186 #ifdef VIENNACL_WITH_OPENMP
187 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
189 for (
long i = 0; i < static_cast<long>(
size1); ++i)
190 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
196 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
203 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
204 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
205 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
207 value_type data_alpha = alpha;
211 value_type data_beta = beta;
225 if (reciprocal_alpha)
229 #ifdef VIENNACL_WITH_OPENMP
230 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
232 for (
long i = 0; i < static_cast<long>(
size1); ++i)
233 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
237 #ifdef VIENNACL_WITH_OPENMP
238 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
240 for (
long i = 0; i < static_cast<long>(
size1); ++i)
241 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
248 #ifdef VIENNACL_WITH_OPENMP
249 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
251 for (
long i = 0; i < static_cast<long>(
size1); ++i)
252 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
256 #ifdef VIENNACL_WITH_OPENMP
257 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
259 for (
long i = 0; i < static_cast<long>(
size1); ++i)
260 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
274 template<
typename NumericT>
279 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
286 value_type data_alpha =
static_cast<value_type
>(alpha);
288 #ifdef VIENNACL_WITH_OPENMP
289 #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
291 for (
long i = 0; i < static_cast<long>(loop_bound); ++i)
292 data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_alpha;
301 template<
typename NumericT>
306 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
307 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
316 #ifdef VIENNACL_WITH_OPENMP
317 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
319 for (
long i = 0; i < static_cast<long>(
size1); ++i)
321 value_type temp = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
322 data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1];
323 data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = temp;
335 template<
typename NumericT,
typename OpT>
342 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
343 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
344 value_type
const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
356 #ifdef VIENNACL_WITH_OPENMP
357 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
359 for (
long i = 0; i < static_cast<long>(
size1); ++i)
360 OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
368 template<
typename NumericT,
typename OpT>
375 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
376 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
385 #ifdef VIENNACL_WITH_OPENMP
386 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
388 for (
long i = 0; i < static_cast<long>(
size1); ++i)
389 OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2]);
405 #define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART) \
406 inline RESULTSCALART inner_prod_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1, \
407 RESULTSCALART const * data_vec2, vcl_size_t start2, vcl_size_t inc2) { \
408 TEMPSCALART temp = 0;
410 #define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART) \
411 for (long i = 0; i < static_cast<long>(size1); ++i) \
412 temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] * data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]; \
413 return static_cast<RESULTSCALART>(temp); \
418 #ifdef VIENNACL_WITH_OPENMP
419 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
424 #ifdef VIENNACL_WITH_OPENMP
425 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
432 #ifdef VIENNACL_WITH_OPENMP
433 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
438 #ifdef VIENNACL_WITH_OPENMP
439 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
446 #ifdef VIENNACL_WITH_OPENMP
447 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
452 #ifdef VIENNACL_WITH_OPENMP
453 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
460 #ifdef VIENNACL_WITH_OPENMP
461 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
466 #ifdef VIENNACL_WITH_OPENMP
467 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
474 #ifdef VIENNACL_WITH_OPENMP
475 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
481 #ifdef VIENNACL_WITH_OPENMP
482 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
486 #undef VIENNACL_INNER_PROD_IMPL_1
487 #undef VIENNACL_INNER_PROD_IMPL_2
496 template<
typename NumericT,
typename ScalarT>
503 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
504 value_type
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
514 data_vec2, start2, inc2);
517 template<
typename NumericT>
524 value_type
const * data_x = detail::extract_raw_pointer<value_type>(x);
530 std::vector<value_type> temp(vec_tuple.
const_size());
531 std::vector<value_type const *> data_y(vec_tuple.
const_size());
532 std::vector<vcl_size_t> start_y(vec_tuple.
const_size());
533 std::vector<vcl_size_t> stride_y(vec_tuple.
const_size());
537 data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.
const_at(j));
545 value_type entry_x = data_x[i*inc_x+start_x];
547 temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
558 #define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART) \
559 inline RESULTSCALART norm_1_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
560 TEMPSCALART temp = 0;
562 #define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART) \
563 for (long i = 0; i < static_cast<long>(size1); ++i) \
564 temp += static_cast<TEMPSCALART>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]))); \
565 return static_cast<RESULTSCALART>(temp); \
570 #ifdef VIENNACL_WITH_OPENMP
571 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
576 #ifdef VIENNACL_WITH_OPENMP
577 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
583 #ifdef VIENNACL_WITH_OPENMP
584 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
589 #ifdef VIENNACL_WITH_OPENMP
590 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
597 #ifdef VIENNACL_WITH_OPENMP
598 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
603 #ifdef VIENNACL_WITH_OPENMP
604 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
611 #ifdef VIENNACL_WITH_OPENMP
612 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
617 #ifdef VIENNACL_WITH_OPENMP
618 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
625 #ifdef VIENNACL_WITH_OPENMP
626 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
632 #ifdef VIENNACL_WITH_OPENMP
633 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
637 #undef VIENNACL_NORM_1_IMPL_1
638 #undef VIENNACL_NORM_1_IMPL_2
647 template<
typename NumericT,
typename ScalarT>
653 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
667 #define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART) \
668 inline RESULTSCALART norm_2_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
669 TEMPSCALART temp = 0;
671 #define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART) \
672 for (long i = 0; i < static_cast<long>(size1); ++i) { \
673 RESULTSCALART data = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]; \
674 temp += static_cast<TEMPSCALART>(data * data); \
676 return static_cast<RESULTSCALART>(temp); \
681 #ifdef VIENNACL_WITH_OPENMP
682 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
687 #ifdef VIENNACL_WITH_OPENMP
688 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
695 #ifdef VIENNACL_WITH_OPENMP
696 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
701 #ifdef VIENNACL_WITH_OPENMP
702 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
709 #ifdef VIENNACL_WITH_OPENMP
710 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
715 #ifdef VIENNACL_WITH_OPENMP
716 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
723 #ifdef VIENNACL_WITH_OPENMP
724 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
729 #ifdef VIENNACL_WITH_OPENMP
730 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
737 #ifdef VIENNACL_WITH_OPENMP
738 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
744 #ifdef VIENNACL_WITH_OPENMP
745 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
749 #undef VIENNACL_NORM_2_IMPL_1
750 #undef VIENNACL_NORM_2_IMPL_2
760 template<
typename NumericT,
typename ScalarT>
766 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
780 template<
typename NumericT,
typename ScalarT>
786 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
796 temp = std::max<value_type>(temp, static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))));
809 template<
typename NumericT>
814 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
827 data =
static_cast<value_type
>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])));
843 template<
typename NumericT,
typename ScalarT>
849 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
855 value_type temp = data_vec1[
start1];
859 temp = std::max<value_type>(temp, data_vec1[i*inc1+start1]);
869 template<
typename NumericT,
typename ScalarT>
875 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
881 value_type temp = data_vec1[
start1];
885 temp = std::min<value_type>(temp, data_vec1[i*inc1+start1]);
895 template<
typename NumericT,
typename ScalarT>
901 value_type
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
911 temp += data_vec1[i*inc1+start1];
926 template<
typename NumericT>
933 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
934 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
943 value_type data_alpha = alpha;
944 value_type data_beta = beta;
946 #ifdef VIENNACL_WITH_OPENMP
947 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
949 for (
long i = 0; i < static_cast<long>(
size1); ++i)
951 value_type temp1 = data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1];
952 value_type temp2 = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
954 data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
955 data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
962 template<
typename NumericT>
967 NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1);
968 NumericT * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2);
979 #ifdef VIENNACL_WITH_OPENMP
982 std::vector<NumericT> thread_results(omp_get_max_threads());
987 vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
988 vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
989 vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread,
size1);
992 for(
vcl_size_t i = thread_start; i < thread_stop; i++)
993 thread_sum += data_vec1[i * inc1 + start1];
995 thread_results[omp_get_thread_num()] = thread_sum;
1000 for (
vcl_size_t i=0; i<thread_results.size(); ++i)
1003 thread_results[i] = current_offset;
1004 current_offset += tmp;
1008 #pragma omp parallel
1010 vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
1011 vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
1012 vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread,
size1);
1014 NumericT thread_sum = thread_results[omp_get_thread_num()];
1017 for(
vcl_size_t i = thread_start; i < thread_stop; i++)
1019 thread_sum += data_vec1[i * inc1 +
start1];
1020 data_vec2[i * inc2 +
start2] = thread_sum;
1025 for(
vcl_size_t i = thread_start; i < thread_stop; i++)
1028 data_vec2[i * inc2 +
start2] = thread_sum;
1041 sum += data_vec1[i * inc1 +
start1];
1067 template<
typename NumericT>
1082 template<
typename NumericT>
vcl_size_t const_size() const
#define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART)
#define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART)
#define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART)
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP.
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
Generic size and resize functionality for different vector and matrix types.
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Worker class for decomposing expression templates.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
#define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART)
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
NumericT flip_sign(NumericT val)
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common base class for dense vectors, vector ranges, and vector slices.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void vector_scan_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2, bool is_inclusive)
Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend.
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART)
VectorType const & const_at(vcl_size_t i) const
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< T > const &x, vector_tuple< T > const &y_tuple, vector_base< T > &result)
Computes the inner products , , ..., and writes the result to a (sub-)vector...
Implementation of the ViennaCL scalar class.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_1_impl(viennacl::vector_expression< LHS, RHS, OP > const &vec, S2 &result)
Computes the l^1-norm of a vector - interface for a vector expression. Creates a temporary.
#define VIENNACL_OPENMP_VECTOR_MIN_SIZE
Simple enable-if variant that uses the SFINAE pattern.