doc/html/opencl_2vector__operations_8hpp_source.html

 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_

 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_


 /* =========================================================================

    Copyright (c) 2010-2015, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include <cmath>


 #include "viennacl/forwards.h"

 #include "viennacl/detail/vector_def.hpp"

 #include "viennacl/ocl/device.hpp"

 #include "viennacl/ocl/handle.hpp"

 #include "viennacl/ocl/kernel.hpp"

 #include "viennacl/scalar.hpp"

 #include "viennacl/tools/tools.hpp"

 #include "viennacl/linalg/opencl/common.hpp"

 #include "viennacl/linalg/opencl/kernels/vector.hpp"

 #include "viennacl/linalg/opencl/kernels/scan.hpp"

 #include "viennacl/meta/predicate.hpp"

 #include "viennacl/meta/enable_if.hpp"

 #include "viennacl/scheduler/preset.hpp"

 #include "viennacl/traits/size.hpp"

 #include "viennacl/traits/start.hpp"

 #include "viennacl/traits/handle.hpp"

 #include "viennacl/traits/stride.hpp"


 namespace viennacl

 {

 namespace linalg

 {

 namespace opencl

 {


 //

 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!

 //

 template<typename DestNumericT, typename SrcNumericT>

 void convert(vector_base<DestNumericT> & dest, vector_base<SrcNumericT> const & src)

 {

   assert(viennacl::traits::opencl_handle(dest).context() == viennacl::traits::opencl_handle(src).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   std::string kernel_name("convert_");

   kernel_name += viennacl::ocl::type_to_string<DestNumericT>::apply();

   kernel_name += "_";

   kernel_name += viennacl::ocl::type_to_string<SrcNumericT>::apply();


   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(dest).context());

   viennacl::linalg::opencl::kernels::vector_convert::init(ctx);

   viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_convert::program_name(), kernel_name);


   viennacl::ocl::enqueue(k( dest, cl_uint(dest.start()), cl_uint(dest.stride()), cl_uint(dest.size()),

                             src,  cl_uint( src.start()), cl_uint( src.stride())

                         ) );


 }


 template<typename NumericT, typename ScalarT1>

 void av(vector_base<NumericT> & x,

         vector_base<NumericT> const & y, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   std::string kernel_name("assign_*v_**00");

   bool is_scalar_cpu = is_cpu_scalar<ScalarT1>::value;

   kernel_name[7]  =    is_scalar_cpu ? 'h' : 'd';

   kernel_name[10] =  flip_sign_alpha ? '1' : '0';

   kernel_name[11] = reciprocal_alpha ? '1' : '0';


   scheduler::statement statement = scheduler::preset::av(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &alpha, flip_sign_alpha, reciprocal_alpha);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_name, statement);

 }


 template<typename NumericT, typename ScalarT1, typename ScalarT2>

 void avbv(vector_base<NumericT> & x,

           vector_base<NumericT> const & y, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,

           vector_base<NumericT> const & z, ScalarT2 const & beta,  vcl_size_t /* len_beta */,  bool reciprocal_beta,  bool flip_sign_beta)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   assert(viennacl::traits::opencl_handle(y).context() == viennacl::traits::opencl_handle(z).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   std::string kernel_name("assign_*v*v_****");

   bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;

   bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;

   kernel_name[7]  = is_scalar_cpu1   ? 'h' : 'd';

   kernel_name[9]  = is_scalar_cpu2   ? 'h' : 'd';

   kernel_name[12] = flip_sign_alpha  ? '1' : '0';

   kernel_name[13] = reciprocal_alpha ? '1' : '0';

   kernel_name[14] = flip_sign_beta   ? '1' : '0';

   kernel_name[15] = reciprocal_beta  ? '1' : '0';


   scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &alpha, flip_sign_alpha, reciprocal_alpha, &z, &beta, flip_sign_beta, reciprocal_beta);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_name, statement);

 }


 template<typename NumericT, typename ScalarT1, typename ScalarT2>

 void avbv_v(vector_base<NumericT> & x,

             vector_base<NumericT> const & y, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,

             vector_base<NumericT> const & z, ScalarT2 const & beta,  vcl_size_t /* len_beta */,  bool reciprocal_beta,  bool flip_sign_beta)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   assert(viennacl::traits::opencl_handle(y).context() == viennacl::traits::opencl_handle(z).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   std::string kernel_name("ip_add_*v*v_****");

   bool is_scalar_cpu1 = is_cpu_scalar<ScalarT1>::value;

   bool is_scalar_cpu2 = is_cpu_scalar<ScalarT2>::value;

   kernel_name[7]  = is_scalar_cpu1    ? 'h' : 'd';

   kernel_name[9]  = is_scalar_cpu2    ? 'h' : 'd';

   kernel_name[12] = flip_sign_alpha  ? '1' : '0';

   kernel_name[13] = reciprocal_alpha ? '1' : '0';

   kernel_name[14] = flip_sign_beta   ? '1' : '0';

   kernel_name[15] = reciprocal_beta  ? '1' : '0';


   scheduler::statement statement = scheduler::preset::avbv(scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &x, &y, &alpha, flip_sign_alpha, reciprocal_alpha, &z, &beta, flip_sign_beta, reciprocal_beta);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_name, statement);

 }


 template<typename NumericT>

 void vector_assign(vector_base<NumericT> & x, const NumericT & alpha, bool up_to_internal_size = false)

 {

   scalar_vector<NumericT> y(viennacl::traits::size(x),alpha,viennacl::traits::context(x));

   scheduler::statement statement = scheduler::preset::assign_cpu(&x, &y);


   dynamic_cast<device_specific::vector_axpy_template*>(kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).template_of("assign_cpu"))->up_to_internal_size(up_to_internal_size);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("assign_cpu", statement);

 }


 template<typename NumericT>

 void vector_swap(vector_base<NumericT> & x, vector_base<NumericT> & y)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   device_specific::statements_container statement = scheduler::preset::swap(&x, &y);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("swap", statement);

 }


 template<typename NumericT, typename OP>

 void element_op(vector_base<NumericT> & x,

                 vector_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_element_binary<OP> > const & proxy)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::operation_node_type TYPE = scheduler::operation_node_type(scheduler::result_of::op_type_info<op_element_binary<OP> >::id);

   scheduler::statement statement = scheduler::preset::binary_element_op(&x, &proxy.lhs(), &proxy.rhs(),TYPE);

   kernels::vector_element<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(device_specific::tree_parsing::operator_string(TYPE), statement);

 }


 template<typename NumericT, typename OP>

 void element_op(vector_base<NumericT> & x,

                 vector_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_element_unary<OP> > const & proxy)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::operation_node_type TYPE = scheduler::operation_node_type(scheduler::result_of::op_type_info<op_element_unary<OP> >::id);

   scheduler::statement statement = scheduler::preset::unary_element_op(&x, &proxy.lhs(),TYPE);

   kernels::vector_element<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(device_specific::tree_parsing::operator_string(TYPE), statement);


 }


 template<typename NumericT>

 void inner_prod_impl(vector_base<NumericT> const & x,

                      vector_base<NumericT> const & y,

                      scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::statement statement = scheduler::preset::inner_prod(&result, &x, &y);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("inner_prod", statement);

 }


 namespace detail

 {

   template<typename NumericT>

   viennacl::ocl::packed_cl_uint make_layout(vector_base<NumericT> const & vec)

   {

     viennacl::ocl::packed_cl_uint ret;

     ret.start           = cl_uint(viennacl::traits::start(vec));

     ret.stride          = cl_uint(viennacl::traits::stride(vec));

     ret.size            = cl_uint(viennacl::traits::size(vec));

     ret.internal_size   = cl_uint(viennacl::traits::internal_size(vec));

     return ret;

   }

 }


 template<typename NumericT>

 void inner_prod_impl(vector_base<NumericT> const & x,

                      vector_tuple<NumericT> const & vec_tuple,

                      vector_base<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   typedef viennacl::vector_range< viennacl::vector_base<NumericT> > range_t;


   vcl_size_t current_index = 0;

   while (current_index < vec_tuple.const_size())

   {

     device_specific::statements_container::data_type statements;


     vcl_size_t diff = vec_tuple.const_size() - current_index;

     vcl_size_t upper_bound;

     std::string kernel_prefix;

     if (diff>=8) upper_bound = 8, kernel_prefix = "inner_prod_8";

     else if (diff>=4) upper_bound = 4, kernel_prefix = "inner_prod_4";

     else if (diff>=3) upper_bound = 3, kernel_prefix = "inner_prod_3";

     else if (diff>=2) upper_bound = 2, kernel_prefix = "inner_prod_2";

     else upper_bound = 1, kernel_prefix = "inner_prod_1";


     std::vector<range_t> ranges;

     ranges.reserve(upper_bound);

     for (unsigned int i = 0; i < upper_bound; ++i)

       ranges.push_back(range_t(result, viennacl::range(current_index+i, current_index+i+1)));


     for (unsigned int i = 0; i < upper_bound; ++i)

       statements.push_back(scheduler::preset::inner_prod(&ranges[i], &x, &vec_tuple.const_at(current_index+i)));


     kernels::vector_multi_inner_prod<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute(kernel_prefix, device_specific::statements_container(statements, device_specific::statements_container::INDEPENDENT));

     current_index += upper_bound;

   }

 }


 template<typename NumericT>

 void inner_prod_cpu(vector_base<NumericT> const & x,

                     vector_base<NumericT> const & y,

                     NumericT & result)

 {

   viennacl::scalar<NumericT> tmp(0, viennacl::traits::context(x));

   inner_prod_impl(x, y, tmp);

   result = tmp;

 }


 template<typename NumericT>

 void norm_1_impl(vector_base<NumericT> const & x,

                  scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::statement statement = scheduler::preset::norm_1(&result, &x);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("norm_1", statement);

 }


 template<typename NumericT>

 void norm_1_cpu(vector_base<NumericT> const & x,

                 NumericT & result)

 {

   viennacl::scalar<NumericT> tmp(0, viennacl::traits::context(x));

   norm_1_impl(x, tmp);

   result = tmp;

 }


 template<typename NumericT>

 void norm_2_impl(vector_base<NumericT> const & x,

                  scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::statement statement = scheduler::preset::norm_2(&result, &x);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("norm_2", statement);

 }


 template<typename NumericT>

 void norm_2_cpu(vector_base<NumericT> const & x,

                 NumericT & result)

 {

   scalar<NumericT> tmp(0, viennacl::traits::context(x));

   norm_2_impl(x, tmp);

   result = tmp;

 }


 template<typename NumericT>

 void norm_inf_impl(vector_base<NumericT> const & x,

                    scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::statement statement = scheduler::preset::norm_inf(&result, &x);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("norm_inf", statement);

 }


 template<typename NumericT>

 void norm_inf_cpu(vector_base<NumericT> const & x,

                   NumericT & result)

 {

   scalar<NumericT> tmp(0, viennacl::traits::context(x));

   norm_inf_impl(x, tmp);

   result = tmp;

 }


 //This function should return a CPU scalar, otherwise statements like

 // vcl_rhs[index_norm_inf(vcl_rhs)]

 // are ambiguous

 template<typename NumericT>

 cl_uint index_norm_inf(vector_base<NumericT> const & x)

 {

   viennacl::scalar<NumericT> result(0, viennacl::traits::context(x));

   scheduler::statement statement = scheduler::preset::index_norm_inf(&result, &x);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("index_norm_inf", statement);

   NumericT host_result = result;

   return static_cast<cl_uint>(host_result);

 }


 template<typename NumericT>

 void max_impl(vector_base<NumericT> const & x,

                    scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::statement statement = scheduler::preset::max(&result, &x);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("max", statement);

 }


 template<typename NumericT>

 void max_cpu(vector_base<NumericT> const & x,

                   NumericT & result)

 {

   scalar<NumericT> tmp(0, viennacl::traits::context(x));

   max_impl(x, tmp);

   result = tmp;

 }


 template<typename NumericT>

 void min_impl(vector_base<NumericT> const & x,

                    scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   scheduler::statement statement = scheduler::preset::min(&result, &x);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("min", statement);

 }


 template<typename NumericT>

 void min_cpu(vector_base<NumericT> const & x,

                   NumericT & result)

 {

   scalar<NumericT> tmp(0, viennacl::traits::context(x));

   min_impl(x, tmp);

   result = tmp;

 }


 template<typename NumericT>

 void sum_impl(vector_base<NumericT> const & x,

                    scalar<NumericT> & result)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));


   viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(x.size(), NumericT(1), viennacl::traits::context(x));

   viennacl::linalg::opencl::inner_prod_impl(x, all_ones, result);

 }


 template<typename NumericT>

 void sum_cpu(vector_base<NumericT> const & x, NumericT & result)

 {

   scalar<NumericT> tmp(0, viennacl::traits::context(x));

   sum_impl(x, tmp);

   result = tmp;

 }


 //TODO: Special case x == y allows improvement!!

 template<typename NumericT>

 void plane_rotation(vector_base<NumericT> & x,

                     vector_base<NumericT> & y,

                     NumericT alpha, NumericT beta)

 {

   assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(y).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));

   assert(viennacl::traits::size(x) == viennacl::traits::size(y));


   device_specific::statements_container statement = scheduler::preset::plane_rotation(&x, &y, &alpha, &beta);

   kernels::vector<NumericT>::execution_handler(viennacl::traits::opencl_context(x)).execute("plane_rotation", statement);

 }


 namespace detail

 {

   template<typename NumericT>

   void scan_impl(vector_base<NumericT> const & input,

                  vector_base<NumericT>       & output,

                  bool is_inclusive)

   {

     vcl_size_t local_worksize = 128;

     vcl_size_t workgroups = 128;


     viennacl::backend::mem_handle opencl_carries;

     viennacl::backend::memory_create(opencl_carries, sizeof(NumericT)*workgroups, viennacl::traits::context(input));


     viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());

     viennacl::linalg::opencl::kernels::scan<NumericT>::init(ctx);

     viennacl::ocl::kernel& k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::scan<NumericT>::program_name(), "scan_1");

     viennacl::ocl::kernel& k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::scan<NumericT>::program_name(), "scan_2");

     viennacl::ocl::kernel& k3 = ctx.get_kernel(viennacl::linalg::opencl::kernels::scan<NumericT>::program_name(), "scan_3");


     // First step: Scan within each thread group and write carries

     k1.local_work_size(0, local_worksize);

     k1.global_work_size(0, workgroups * local_worksize);

     viennacl::ocl::enqueue(k1( input, cl_uint( input.start()), cl_uint( input.stride()), cl_uint(input.size()),

                               output, cl_uint(output.start()), cl_uint(output.stride()),

                               cl_uint(is_inclusive ? 0 : 1), opencl_carries.opencl_handle())

                           );


     // Second step: Compute offset for each thread group (exclusive scan for each thread group)

     k2.local_work_size(0, workgroups);

     k2.global_work_size(0, workgroups);

     viennacl::ocl::enqueue(k2(opencl_carries.opencl_handle()));


     // Third step: Offset each thread group accordingly

     k3.local_work_size(0, local_worksize);

     k3.global_work_size(0, workgroups * local_worksize);

     viennacl::ocl::enqueue(k3(output, cl_uint(output.start()), cl_uint(output.stride()), cl_uint(output.size()),

                               opencl_carries.opencl_handle())

                           );

   }

 }


 template<typename NumericT>

 void inclusive_scan(vector_base<NumericT> const & input,

                     vector_base<NumericT>       & output)

 {

   detail::scan_impl(input, output, true);

 }


 template<typename NumericT>

 void exclusive_scan(vector_base<NumericT> const & input,

                     vector_base<NumericT>       & output)

 {

   detail::scan_impl(input, output, false);

 }


 } //namespace opencl

 } //namespace linalg

 } //namespace viennacl


 #endif

viennacl::linalg::opencl::min_cpu
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:467

viennacl::ocl::packed_cl_uint::stride
cl_uint stride
Increment between integers.
Definition: kernel.hpp:50

viennacl::linalg::opencl::norm_2_cpu
void norm_2_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Definition: vector_operations.hpp:350

viennacl::vector_tuple::const_size
vcl_size_t const_size() const
Definition: vector.hpp:1143

viennacl::linalg::opencl::kernels::vector_multi_inner_prod::execution_handler
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:167

viennacl::ocl::packed_cl_uint
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:45

viennacl::scalar
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227

viennacl::linalg::opencl::norm_1_cpu
void norm_1_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Definition: vector_operations.hpp:316

device.hpp
Represents an OpenCL device within ViennaCL.

viennacl::device_specific::statements_container::INDEPENDENT
Definition: forwards.h:266

viennacl::linalg::opencl::inner_prod_cpu
void inner_prod_cpu(vector_base< NumericT > const &x, vector_base< NumericT > const &y, NumericT &result)
Definition: vector_operations.hpp:283

size.hpp
Generic size and resize functionality for different vector and matrix types.

viennacl::linalg::opencl::plane_rotation
void plane_rotation(vector_base< NumericT > &x, vector_base< NumericT > &y, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
Definition: vector_operations.hpp:517

viennacl::linalg::opencl::norm_inf_cpu
void norm_inf_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:383

viennacl::ocl::kernel
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58

start.hpp
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...

viennacl::scheduler::preset::inner_prod
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
Definition: preset.hpp:229

viennacl::ocl::packed_cl_uint::start
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:48

viennacl::linalg::opencl::kernels::vector_convert::program_name
static std::string program_name()
Definition: vector.hpp:291

tools.hpp
Various little tools used here and there in ViennaCL.

viennacl::ocl::kernel::local_work_size
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742

viennacl::ocl::context
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55

viennacl::linalg::opencl::index_norm_inf
cl_uint index_norm_inf(vector_base< NumericT > const &x)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Definition: vector_operations.hpp:403

viennacl::linalg::opencl::norm_1_impl
void norm_1_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^1-norm of a vector.
Definition: vector_operations.hpp:301

viennacl::linalg::opencl::avbv_v
void avbv_v(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Definition: vector_operations.hpp:115

viennacl::traits::stride
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45

forwards.h
This file provides the forward declarations for the main types used within ViennaCL.

stride.hpp
Determines row and column increments for matrices and matrix proxies.

viennacl::linalg::opencl::min_impl
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector.
Definition: vector_operations.hpp:452

viennacl::scheduler::OPERATION_BINARY_ASSIGN_TYPE
Definition: forwards.h:115

viennacl::scheduler::preset::max
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:269

viennacl::traits::internal_size
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:371

viennacl::vector_expression
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239

viennacl::linalg::opencl::kernels::scan::init
static void init(viennacl::ocl::context &ctx)
Definition: scan.hpp:162

viennacl::scheduler::preset::avbv
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
Definition: preset.hpp:33

viennacl::scheduler::preset::norm_2
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:241

viennacl::linalg::opencl::kernels::vector_element::execution_handler
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:206

viennacl::linalg::opencl::kernels::scan
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: scan.hpp:155

viennacl::linalg::opencl::vector_assign
void vector_assign(vector_base< NumericT > &x, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Definition: vector_operations.hpp:144

viennacl::ocl::packed_cl_uint::internal_size
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:54

common.hpp
Common implementations shared by OpenCL-based operations.

NumericT
float NumericT
Definition: bisect.cpp:40

viennacl::linalg::opencl::element_op
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
Definition: matrix_operations.hpp:257

viennacl::vector_base::stride
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
Definition: vector_def.hpp:124

viennacl
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34

viennacl::scheduler::preset::min
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:276

viennacl::scheduler::preset::binary_element_op
statement binary_element_op(NumericT const *x, NumericT const *y, NumericT const *z, scheduler::operation_node_type TYPE)
Definition: preset.hpp:284

viennacl::traits::size
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235

viennacl::linalg::opencl::sum_impl
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector.
Definition: vector_operations.hpp:483

preset.hpp

viennacl::linalg::opencl::max_impl
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum of a vector.
Definition: vector_operations.hpp:420

viennacl::is_cpu_scalar
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:448

detail
Definition: blas3.hpp:36

viennacl::vector_range
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:434

viennacl::linalg::opencl::max_cpu
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:435

viennacl::ocl::context::get_kernel
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605

viennacl::scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE
Definition: forwards.h:116

viennacl::vector_tuple
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:269

viennacl::linalg::opencl::norm_2_impl
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
Definition: vector_operations.hpp:335

vector.hpp
OpenCL kernel file for vector operations.

viennacl::linalg::opencl::inner_prod_impl
void inner_prod_impl(vector_base< NumericT > const &x, vector_base< NumericT > const &y, scalar< NumericT > &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(x...
Definition: vector_operations.hpp:215

handle.hpp
Implementation of a smart-pointer-like class for handling OpenCL handles.

viennacl::traits::start
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44

viennacl::device_specific::execution_handler::template_of
template_base * template_of(std::string const &key)
Definition: execution_handler.hpp:81

viennacl::scheduler::preset::index_norm_inf
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:255

viennacl::scheduler::preset::norm_1
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:235

viennacl::scheduler::preset::norm_inf
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:247

viennacl::scheduler::result_of::op_type_info
Metafunction for querying type informations.
Definition: forwards.h:156

viennacl::vector_base
Common base class for dense vectors, vector ranges, and vector slices.
Definition: vector_def.hpp:104

viennacl::scheduler::preset::unary_element_op
statement unary_element_op(NumericT const *x, NumericT const *y, scheduler::operation_node_type TYPE)
Definition: preset.hpp:305

viennacl::scheduler::preset::av
scheduler::statement av(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a)
Definition: preset.hpp:88

viennacl::vcl_size_t
std::size_t vcl_size_t
Definition: forwards.h:75

viennacl::device_specific::statements_container::data_type
std::list< scheduler::statement > data_type
Definition: forwards.h:265

viennacl::vector< NumericT >

viennacl::linalg::opencl::inclusive_scan
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA.
Definition: vector_operations.hpp:584

scan.hpp
OpenCL kernel file for scan operations. To be merged back to vector operations.

viennacl::linalg::opencl::norm_inf_impl
void norm_inf_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:368

viennacl::scheduler::operation_node_type
operation_node_type
Enumeration for identifying the possible operations.
Definition: forwards.h:68

viennacl::linalg::opencl::vector_swap
void vector_swap(vector_base< NumericT > &x, vector_base< NumericT > &y)
Swaps the contents of two vectors, data is copied.
Definition: vector_operations.hpp:160

predicate.hpp
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.

viennacl::linalg::opencl::convert
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
Definition: matrix_operations.hpp:95

viennacl::traits::context
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40

viennacl::linalg::opencl::av
void av(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: vector_operations.hpp:76

viennacl::ocl::enqueue
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50

kernel.hpp
Representation of an OpenCL kernel in ViennaCL.

viennacl::scalar_vector
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87

viennacl::linalg::opencl::exclusive_scan
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA.
Definition: vector_operations.hpp:597

viennacl::scheduler::preset::swap
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
Definition: preset.hpp:103

viennacl::vector_base::size
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118

viennacl::basic_range
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424

viennacl::linalg::opencl::kernels::vector::execution_handler
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:113

viennacl::ocl::kernel::global_work_size
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751

viennacl::linalg::opencl::avbv
void avbv(vector_base< NumericT > &x, vector_base< NumericT > const &y, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &z, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Definition: vector_operations.hpp:92

viennacl::backend::mem_handle
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89

viennacl::vector_tuple::const_at
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1146

viennacl::linalg::opencl::detail::make_layout
viennacl::ocl::packed_cl_uint make_layout(vector_base< NumericT > const &vec)
Definition: vector_operations.hpp:229

viennacl::device_specific::execution_handler::execute
void execute(container_type::key_type const &key, statements_container const &statements)
Definition: execution_handler.hpp:86

viennacl::op_element_binary
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130

viennacl::scheduler::statement
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502

viennacl::backend::memory_create
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87

vector_def.hpp
Forward declarations of the implicit_vector_base, vector_base class.

handle.hpp
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...

viennacl::scheduler::preset::assign_cpu
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:123

viennacl::scheduler::preset::plane_rotation
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
Definition: preset.hpp:95

viennacl::device_specific::tree_parsing::operator_string
const char * operator_string(scheduler::operation_node_type type)
Definition: tree_parsing.hpp:205

viennacl::vector_base::start
size_type start() const
Returns the offset within the buffer.
Definition: vector_def.hpp:122

viennacl::ocl::type_to_string
Helper class for converting a type to its string representation.
Definition: utils.hpp:57

viennacl::linalg::opencl::detail::scan_impl
void scan_impl(vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive)
Worker routine for scan routines using OpenCL.
Definition: vector_operations.hpp:539

diff
ScalarType diff(ScalarType &s1, viennacl::scalar< ScalarType > &s2)
Definition: blas3_solve.cpp:69

viennacl::op_element_unary
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134

scalar.hpp
Implementation of the ViennaCL scalar class.

viennacl::linalg::opencl::kernels::vector_convert::init
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:296

viennacl::device_specific::vector_axpy_template
Definition: vector_axpy_template.hpp:60

viennacl::linalg::opencl::sum_cpu
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector.
Definition: vector_operations.hpp:498

enable_if.hpp
Simple enable-if variant that uses the SFINAE pattern.

viennacl::device_specific::statements_container
Definition: forwards.h:262

viennacl::ocl::packed_cl_uint::size
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:52