1 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
55 template<
typename DestNumericT,
typename SrcNumericT>
58 assert(viennacl::traits::opencl_handle(dest).
context() == viennacl::traits::opencl_handle(src).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
60 std::string kernel_name(
"convert_");
75 template<
typename NumericT,
typename ScalarT1>
79 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(y).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
80 std::string kernel_name(
"assign_*v_**00");
82 kernel_name[7] = is_scalar_cpu ?
'h' :
'd';
83 kernel_name[10] = flip_sign_alpha ?
'1' :
'0';
84 kernel_name[11] = reciprocal_alpha ?
'1' :
'0';
91 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
96 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(y).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
97 assert(viennacl::traits::opencl_handle(y).
context() == viennacl::traits::opencl_handle(z).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
99 std::string kernel_name(
"assign_*v*v_****");
102 kernel_name[7] = is_scalar_cpu1 ?
'h' :
'd';
103 kernel_name[9] = is_scalar_cpu2 ?
'h' :
'd';
104 kernel_name[12] = flip_sign_alpha ?
'1' :
'0';
105 kernel_name[13] = reciprocal_alpha ?
'1' :
'0';
106 kernel_name[14] = flip_sign_beta ?
'1' :
'0';
107 kernel_name[15] = reciprocal_beta ?
'1' :
'0';
114 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2>
119 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(y).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
120 assert(viennacl::traits::opencl_handle(y).
context() == viennacl::traits::opencl_handle(z).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
122 std::string kernel_name(
"ip_add_*v*v_****");
125 kernel_name[7] = is_scalar_cpu1 ?
'h' :
'd';
126 kernel_name[9] = is_scalar_cpu2 ?
'h' :
'd';
127 kernel_name[12] = flip_sign_alpha ?
'1' :
'0';
128 kernel_name[13] = reciprocal_alpha ?
'1' :
'0';
129 kernel_name[14] = flip_sign_beta ?
'1' :
'0';
130 kernel_name[15] = reciprocal_beta ?
'1' :
'0';
143 template<
typename NumericT>
159 template<
typename NumericT>
162 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(y).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
174 template<
typename NumericT,
typename OP>
178 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
179 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
193 template<
typename NumericT,
typename OP>
197 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
198 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
214 template<
typename NumericT>
219 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(y).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
220 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
228 template<
typename NumericT>
246 template<
typename NumericT>
251 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
255 while (current_index < vec_tuple.
const_size())
261 std::string kernel_prefix;
262 if (diff>=8) upper_bound = 8, kernel_prefix =
"inner_prod_8";
263 else if (diff>=4) upper_bound = 4, kernel_prefix =
"inner_prod_4";
264 else if (diff>=3) upper_bound = 3, kernel_prefix =
"inner_prod_3";
265 else if (diff>=2) upper_bound = 2, kernel_prefix =
"inner_prod_2";
266 else upper_bound = 1, kernel_prefix =
"inner_prod_1";
268 std::vector<range_t> ranges;
269 ranges.reserve(upper_bound);
270 for (
unsigned int i = 0; i < upper_bound; ++i)
271 ranges.push_back(range_t(result,
viennacl::range(current_index+i, current_index+i+1)));
273 for (
unsigned int i = 0; i < upper_bound; ++i)
277 current_index += upper_bound;
282 template<
typename NumericT>
300 template<
typename NumericT>
304 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
315 template<
typename NumericT>
334 template<
typename NumericT>
338 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
349 template<
typename NumericT>
367 template<
typename NumericT>
371 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
382 template<
typename NumericT>
402 template<
typename NumericT>
409 return static_cast<cl_uint
>(host_result);
419 template<
typename NumericT>
423 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
434 template<
typename NumericT>
451 template<
typename NumericT>
455 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
466 template<
typename NumericT>
482 template<
typename NumericT>
486 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
497 template<
typename NumericT>
516 template<
typename NumericT>
521 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(y).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
538 template<
typename NumericT>
559 output, cl_uint(output.
start()), cl_uint(output.
stride()),
560 cl_uint(is_inclusive ? 0 : 1), opencl_carries.opencl_handle())
572 opencl_carries.opencl_handle())
583 template<
typename NumericT>
596 template<
typename NumericT>
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
cl_uint stride
Increment between integers.
void norm_2_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
vcl_size_t const_size() const
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void norm_1_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Represents an OpenCL device within ViennaCL.
void inner_prod_cpu(vector_base< NumericT > const &x, vector_base< NumericT > const &y, NumericT &result)
Generic size and resize functionality for different vector and matrix types.
void plane_rotation(vector_base< NumericT > &x, vector_base< NumericT > &y, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void norm_inf_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
Represents an OpenCL kernel within ViennaCL.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
cl_uint start
Starting value of the integer stride.
static std::string program_name()
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
cl_uint index_norm_inf(vector_base< NumericT > const &x)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
void norm_1_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^1-norm of a vector.
void avbv_v(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector.
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
An expression template class that represents a binary operation that yields a vector.
static void init(viennacl::ocl::context &ctx)
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
void vector_assign(vector_base< NumericT > &x, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Common implementations shared by OpenCL-based operations.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement binary_element_op(NumericT const *x, NumericT const *y, NumericT const *z, scheduler::operation_node_type TYPE)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector.
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum of a vector.
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Class for representing non-strided subvectors of a bigger vector x.
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
OpenCL kernel file for vector operations.
void inner_prod_impl(vector_base< NumericT > const &x, vector_base< NumericT > const &y, scalar< NumericT > &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(x...
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
template_base * template_of(std::string const &key)
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Metafunction for querying type informations.
Common base class for dense vectors, vector ranges, and vector slices.
statement unary_element_op(NumericT const *x, NumericT const *y, scheduler::operation_node_type TYPE)
scheduler::statement av(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a)
std::list< scheduler::statement > data_type
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
OpenCL kernel file for scan operations. To be merged back to vector operations.
void norm_inf_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the supremum-norm of a vector.
operation_node_type
Enumeration for identifying the possible operations.
void vector_swap(vector_base< NumericT > &x, vector_base< NumericT > &y)
Swaps the contents of two vectors, data is copied.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void av(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void avbv(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
VectorType const & const_at(vcl_size_t i) const
viennacl::ocl::packed_cl_uint make_layout(vector_base< NumericT > const &vec)
void execute(container_type::key_type const &key, statements_container const &statements)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Forward declarations of the implicit_vector_base, vector_base class.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
const char * operator_string(scheduler::operation_node_type type)
size_type start() const
Returns the offset within the buffer.
Helper class for converting a type to its string representation.
void scan_impl(vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive)
Worker routine for scan routines using OpenCL.
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class.
static void init(viennacl::ocl::context &ctx)
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector.
Simple enable-if variant that uses the SFINAE pattern.
cl_uint size
Number of values in the stride.