ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 
27 #include "viennacl/forwards.h"
29 #include "viennacl/ocl/device.hpp"
30 #include "viennacl/ocl/handle.hpp"
31 #include "viennacl/ocl/kernel.hpp"
32 #include "viennacl/scalar.hpp"
33 #include "viennacl/tools/tools.hpp"
40 #include "viennacl/traits/size.hpp"
44 
45 namespace viennacl
46 {
47 namespace linalg
48 {
49 namespace opencl
50 {
51 
52 //
53 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
54 //
55 template<typename DestNumericT, typename SrcNumericT>
57 {
58  assert(viennacl::traits::opencl_handle(dest).context() == viennacl::traits::opencl_handle(src).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
59 
60  std::string kernel_name("convert_");
62  kernel_name += "_";
64 
65  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(dest).context());
68 
69  viennacl::ocl::enqueue(k( dest, cl_uint(dest.start()), cl_uint(dest.stride()), cl_uint(dest.size()),
70  src, cl_uint( src.start()), cl_uint( src.stride())
71  ) );
72 
73 }
74 
75 template<typename NumericT, typename ScalarT1>
77  vector_base<NumericT> const & y, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha)
78 {
79  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
80  std::string kernel_name("assign_*v_**00");
81  bool is_scalar_cpu = is_cpu_scalar<ScalarT1>::value;
82  kernel_name[7] = is_scalar_cpu ? 'h' : 'd';
83  kernel_name[10] = flip_sign_alpha ? '1' : '0';
84  kernel_name[11] = reciprocal_alpha ? '1' : '0';
85 
86  scheduler::statement statement = scheduler::preset::av(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &alpha, flip_sign_alpha, reciprocal_alpha);
87  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_name, statement);
88 }
89 
90 
91 template<typename NumericT, typename ScalarT1, typename ScalarT2>
93  vector_base<NumericT> const & y, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
94  vector_base<NumericT> const & z, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
95 {
96  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
97  assert(viennacl::traits::opencl_handle(y).context() == viennacl::traits::opencl_handle(z).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
98 
99  std::string kernel_name("assign_*v*v_****");
100  bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;
101  bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;
102  kernel_name[7] = is_scalar_cpu1 ? 'h' : 'd';
103  kernel_name[9] = is_scalar_cpu2 ? 'h' : 'd';
104  kernel_name[12] = flip_sign_alpha ? '1' : '0';
105  kernel_name[13] = reciprocal_alpha ? '1' : '0';
106  kernel_name[14] = flip_sign_beta ? '1' : '0';
107  kernel_name[15] = reciprocal_beta ? '1' : '0';
108 
109  scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &alpha, flip_sign_alpha, reciprocal_alpha, &z, &beta, flip_sign_beta, reciprocal_beta);
110  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_name, statement);
111 }
112 
113 
114 template<typename NumericT, typename ScalarT1, typename ScalarT2>
116  vector_base<NumericT> const & y, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
117  vector_base<NumericT> const & z, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
118 {
119  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
120  assert(viennacl::traits::opencl_handle(y).context() == viennacl::traits::opencl_handle(z).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
121 
122  std::string kernel_name("ip_add_*v*v_****");
123  bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;
124  bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;
125  kernel_name[7] = is_scalar_cpu1 ? 'h' : 'd';
126  kernel_name[9] = is_scalar_cpu2 ? 'h' : 'd';
127  kernel_name[12] = flip_sign_alpha ? '1' : '0';
128  kernel_name[13] = reciprocal_alpha ? '1' : '0';
129  kernel_name[14] = flip_sign_beta ? '1' : '0';
130  kernel_name[15] = reciprocal_beta ? '1' : '0';
131 
132  scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &x, &y, &alpha, flip_sign_alpha, reciprocal_alpha, &z, &beta, flip_sign_beta, reciprocal_beta);
133  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_name, statement);
134 }
135 
136 
143 template<typename NumericT>
144 void vector_assign(vector_base<NumericT> & x, const NumericT & alpha, bool up_to_internal_size = false)
145 {
148 
149  dynamic_cast<device_specific::vector_axpy_template*>(kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).template_of("assign_cpu"))->up_to_internal_size(up_to_internal_size);
150  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("assign_cpu", statement);
151 }
152 
153 
159 template<typename NumericT>
161 {
162  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
164  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("swap", statement);
165 }
166 
168 
174 template<typename NumericT, typename OP>
177 {
178  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
179  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
180 
182  scheduler::statement statement = scheduler::preset::binary_element_op(&x, &proxy.lhs(), &proxy.rhs(),TYPE);
184 }
185 
187 
193 template<typename NumericT, typename OP>
196 {
197  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
198  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
199 
201  scheduler::statement statement = scheduler::preset::unary_element_op(&x, &proxy.lhs(),TYPE);
203 
204 }
205 
207 
214 template<typename NumericT>
216  vector_base<NumericT> const & y,
217  scalar<NumericT> & result)
218 {
219  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
220  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
221 
222  scheduler::statement statement = scheduler::preset::inner_prod(&result, &x, &y);
223  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("inner_prod", statement);
224 }
225 
226 namespace detail
227 {
228  template<typename NumericT>
230  {
232  ret.start = cl_uint(viennacl::traits::start(vec));
233  ret.stride = cl_uint(viennacl::traits::stride(vec));
234  ret.size = cl_uint(viennacl::traits::size(vec));
236  return ret;
237  }
238 }
239 
246 template<typename NumericT>
248  vector_tuple<NumericT> const & vec_tuple,
249  vector_base<NumericT> & result)
250 {
251  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
253 
254  vcl_size_t current_index = 0;
255  while (current_index < vec_tuple.const_size())
256  {
258 
259  vcl_size_t diff = vec_tuple.const_size() - current_index;
260  vcl_size_t upper_bound;
261  std::string kernel_prefix;
262  if (diff>=8) upper_bound = 8, kernel_prefix = "inner_prod_8";
263  else if (diff>=4) upper_bound = 4, kernel_prefix = "inner_prod_4";
264  else if (diff>=3) upper_bound = 3, kernel_prefix = "inner_prod_3";
265  else if (diff>=2) upper_bound = 2, kernel_prefix = "inner_prod_2";
266  else upper_bound = 1, kernel_prefix = "inner_prod_1";
267 
268  std::vector<range_t> ranges;
269  ranges.reserve(upper_bound);
270  for (unsigned int i = 0; i < upper_bound; ++i)
271  ranges.push_back(range_t(result, viennacl::range(current_index+i, current_index+i+1)));
272 
273  for (unsigned int i = 0; i < upper_bound; ++i)
274  statements.push_back(scheduler::preset::inner_prod(&ranges[i], &x, &vec_tuple.const_at(current_index+i)));
275 
277  current_index += upper_bound;
278  }
279 }
280 
281 
282 template<typename NumericT>
284  vector_base<NumericT> const & y,
285  NumericT & result)
286 {
288  inner_prod_impl(x, y, tmp);
289  result = tmp;
290 }
291 
292 
294 
300 template<typename NumericT>
302  scalar<NumericT> & result)
303 {
304  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
305 
306  scheduler::statement statement = scheduler::preset::norm_1(&result, &x);
307  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("norm_1", statement);
308 }
309 
315 template<typename NumericT>
317  NumericT & result)
318 {
320  norm_1_impl(x, tmp);
321  result = tmp;
322 }
323 
324 
325 
327 
328 
334 template<typename NumericT>
336  scalar<NumericT> & result)
337 {
338  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
339 
340  scheduler::statement statement = scheduler::preset::norm_2(&result, &x);
341  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("norm_2", statement);
342 }
343 
349 template<typename NumericT>
351  NumericT & result)
352 {
354  norm_2_impl(x, tmp);
355  result = tmp;
356 }
357 
358 
359 
361 
367 template<typename NumericT>
369  scalar<NumericT> & result)
370 {
371  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
372 
373  scheduler::statement statement = scheduler::preset::norm_inf(&result, &x);
374  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("norm_inf", statement);
375 }
376 
382 template<typename NumericT>
384  NumericT & result)
385 {
387  norm_inf_impl(x, tmp);
388  result = tmp;
389 }
390 
391 
393 
394 //This function should return a CPU scalar, otherwise statements like
395 // vcl_rhs[index_norm_inf(vcl_rhs)]
396 // are ambiguous
402 template<typename NumericT>
404 {
407  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("index_norm_inf", statement);
408  NumericT host_result = result;
409  return static_cast<cl_uint>(host_result);
410 }
411 
413 
419 template<typename NumericT>
421  scalar<NumericT> & result)
422 {
423  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
424 
425  scheduler::statement statement = scheduler::preset::max(&result, &x);
426  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("max", statement);
427 }
428 
434 template<typename NumericT>
436  NumericT & result)
437 {
439  max_impl(x, tmp);
440  result = tmp;
441 }
442 
443 
445 
451 template<typename NumericT>
453  scalar<NumericT> & result)
454 {
455  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
456 
457  scheduler::statement statement = scheduler::preset::min(&result, &x);
458  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("min", statement);
459 }
460 
466 template<typename NumericT>
468  NumericT & result)
469 {
471  min_impl(x, tmp);
472  result = tmp;
473 }
474 
476 
482 template<typename NumericT>
484  scalar<NumericT> & result)
485 {
486  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
487 
489  viennacl::linalg::opencl::inner_prod_impl(x, all_ones, result);
490 }
491 
497 template<typename NumericT>
498 void sum_cpu(vector_base<NumericT> const & x, NumericT & result)
499 {
501  sum_impl(x, tmp);
502  result = tmp;
503 }
504 
505 
506 //TODO: Special case x == y allows improvement!!
516 template<typename NumericT>
519  NumericT alpha, NumericT beta)
520 {
521  assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
523 
525  kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("plane_rotation", statement);
526 }
527 
529 
530 
531 namespace detail
532 {
538  template<typename NumericT>
539  void scan_impl(vector_base<NumericT> const & input,
540  vector_base<NumericT> & output,
541  bool is_inclusive)
542  {
543  vcl_size_t local_worksize = 128;
544  vcl_size_t workgroups = 128;
545 
546  viennacl::backend::mem_handle opencl_carries;
547  viennacl::backend::memory_create(opencl_carries, sizeof(NumericT)*workgroups, viennacl::traits::context(input));
548 
549  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
554 
555  // First step: Scan within each thread group and write carries
556  k1.local_work_size(0, local_worksize);
557  k1.global_work_size(0, workgroups * local_worksize);
558  viennacl::ocl::enqueue(k1( input, cl_uint( input.start()), cl_uint( input.stride()), cl_uint(input.size()),
559  output, cl_uint(output.start()), cl_uint(output.stride()),
560  cl_uint(is_inclusive ? 0 : 1), opencl_carries.opencl_handle())
561  );
562 
563  // Second step: Compute offset for each thread group (exclusive scan for each thread group)
564  k2.local_work_size(0, workgroups);
565  k2.global_work_size(0, workgroups);
566  viennacl::ocl::enqueue(k2(opencl_carries.opencl_handle()));
567 
568  // Third step: Offset each thread group accordingly
569  k3.local_work_size(0, local_worksize);
570  k3.global_work_size(0, workgroups * local_worksize);
571  viennacl::ocl::enqueue(k3(output, cl_uint(output.start()), cl_uint(output.stride()), cl_uint(output.size()),
572  opencl_carries.opencl_handle())
573  );
574  }
575 }
576 
577 
583 template<typename NumericT>
585  vector_base<NumericT> & output)
586 {
587  detail::scan_impl(input, output, true);
588 }
589 
590 
596 template<typename NumericT>
598  vector_base<NumericT> & output)
599 {
600  detail::scan_impl(input, output, false);
601 }
602 
603 
604 } //namespace opencl
605 } //namespace linalg
606 } //namespace viennacl
607 
608 
609 #endif
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
cl_uint stride
Increment between integers.
Definition: kernel.hpp:50
void norm_2_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
vcl_size_t const_size() const
Definition: vector.hpp:1143
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:167
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:45
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
void norm_1_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Represents an OpenCL device within ViennaCL.
void inner_prod_cpu(vector_base< NumericT > const &x, vector_base< NumericT > const &y, NumericT &result)
Generic size and resize functionality for different vector and matrix types.
void plane_rotation(vector_base< NumericT > &x, vector_base< NumericT > &y, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void norm_inf_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
Definition: preset.hpp:229
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:48
Various little tools used here and there in ViennaCL.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
cl_uint index_norm_inf(vector_base< NumericT > const &x)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void norm_1_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^1-norm of a vector.
void avbv_v(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector.
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:269
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:371
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239
static void init(viennacl::ocl::context &ctx)
Definition: scan.hpp:162
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
Definition: preset.hpp:33
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:241
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:206
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: scan.hpp:155
void vector_assign(vector_base< NumericT > &x, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:54
Common implementations shared by OpenCL-based operations.
float NumericT
Definition: bisect.cpp:40
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
Definition: vector_def.hpp:124
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:276
statement binary_element_op(NumericT const *x, NumericT const *y, NumericT const *z, scheduler::operation_node_type TYPE)
Definition: preset.hpp:284
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector.
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum of a vector.
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:448
Definition: blas3.hpp:36
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:434
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:269
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
OpenCL kernel file for vector operations.
void inner_prod_impl(vector_base< NumericT > const &x, vector_base< NumericT > const &y, scalar< NumericT > &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(x...
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
template_base * template_of(std::string const &key)
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:255
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:235
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:247
Metafunction for querying type informations.
Definition: forwards.h:156
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104
statement unary_element_op(NumericT const *x, NumericT const *y, scheduler::operation_node_type TYPE)
Definition: preset.hpp:305
scheduler::statement av(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a)
Definition: preset.hpp:88
std::size_t vcl_size_t
Definition: forwards.h:75
std::list< scheduler::statement > data_type
Definition: forwards.h:265
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
OpenCL kernel file for scan operations. To be merged back to vector operations.
void norm_inf_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the supremum-norm of a vector.
operation_node_type
Enumeration for identifying the possible operations.
Definition: forwards.h:68
void vector_swap(vector_base< NumericT > &x, vector_base< NumericT > &y)
Swaps the contents of two vectors, data is copied.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void av(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
Definition: preset.hpp:103
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:113
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
void avbv(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1146
viennacl::ocl::packed_cl_uint make_layout(vector_base< NumericT > const &vec)
void execute(container_type::key_type const &key, statements_container const &statements)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
Forward declarations of the implicit_vector_base, vector_base class.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:123
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
Definition: preset.hpp:95
const char * operator_string(scheduler::operation_node_type type)
size_type start() const
Returns the offset within the buffer.
Definition: vector_def.hpp:122
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void scan_impl(vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive)
Worker routine for scan routines using OpenCL.
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
Definition: blas3_solve.cpp:69
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
Implementation of the ViennaCL scalar class.
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:296
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector.
Simple enable-if variant that uses the SFINAE pattern.
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:52