1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
47 template<
typename NumericT,
typename ScalarT>
51 namespace ds = device_specific;
52 ds::statements_container::data_type statements;
53 for (
unsigned int i = 0; i < vector_num; ++i)
55 handler.add(prefix, ds::reduction_template(parameters), ds::statements_container(statements,ds::statements_container::INDEPENDENT));
62 template<
typename NumericT>
67 template<
typename ScalarT1,
typename ScalarT2>
72 namespace ds = device_specific;
73 handler.
add(prefix +
"0000", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
false,
false));
74 handler.
add(prefix +
"1000", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
false,
false));
75 handler.
add(prefix +
"0100", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
false,
false));
76 handler.
add(prefix +
"1100", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
false,
false));
79 handler.
add(prefix +
"0010", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
true,
false));
80 handler.
add(prefix +
"1010", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
true,
false));
81 handler.
add(prefix +
"0110", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
true,
false));
82 handler.
add(prefix +
"1110", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
true,
false));
84 handler.
add(prefix +
"0001", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
false,
true));
85 handler.
add(prefix +
"1001", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
false,
true));
86 handler.
add(prefix +
"0101", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
false,
true));
87 handler.
add(prefix +
"1101", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
false,
true));
89 handler.
add(prefix +
"0011", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
true,
true));
90 handler.
add(prefix +
"1011", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
true,
true));
91 handler.
add(prefix +
"0111", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
true,
true));
92 handler.
add(prefix +
"1111", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
true,
true));
96 template<
typename ScalarT>
106 generate_avbv_impl2(handler, prefix +
"hvhv_", parameters, ASSIGN_OP, x, y, ha, z, hb);
107 generate_avbv_impl2(handler, prefix +
"dvhv_", parameters, ASSIGN_OP, x, y, da, z, hb);
108 generate_avbv_impl2(handler, prefix +
"hvdv_", parameters, ASSIGN_OP, x, y, ha, z, db);
109 generate_avbv_impl2(handler, prefix +
"dvdv_", parameters, ASSIGN_OP, x, y, da, z, db);
115 static std::map<cl_context, device_specific::execution_handler> handlers_map;
117 if (handlers_map.find(h) == handlers_map.end())
124 ds::execution_handler & handler =
at(handlers_map, h);
135 ds::vector_axpy_template::parameters_type
vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
136 ds::reduction_template::parameters_type
reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
138 generate_avbv_impl(handler,
"assign_", vector_axpy_params,
scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
139 generate_avbv_impl(handler,
"ip_add_", vector_axpy_params,
scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
145 generate_inner_prod_impl(handler,
"inner_prod", reduction_params, 1, &x, &y, &da);
148 bool is_float_or_double = is_floating_point<NumericT>::value;
149 if (is_float_or_double)
157 return at(handlers_map, h);
163 template<
typename NumericT>
169 static std::map<cl_context, device_specific::execution_handler> handlers_map;
171 if (handlers_map.find(h) == handlers_map.end())
181 ds::reduction_template::parameters_type
reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
189 generate_inner_prod_impl(handler,
"inner_prod_1", reduction_params, 1, &x, &y, &da);
190 generate_inner_prod_impl(handler,
"inner_prod_2", reduction_params, 2, &x, &y, &da);
191 generate_inner_prod_impl(handler,
"inner_prod_3", reduction_params, 3, &x, &y, &da);
192 generate_inner_prod_impl(handler,
"inner_prod_4", reduction_params, 4, &x, &y, &da);
193 generate_inner_prod_impl(handler,
"inner_prod_8", reduction_params, 8, &x, &y, &da);
201 template<
typename NumericT>
208 static std::map<cl_context, device_specific::execution_handler> handlers_map;
210 if (handlers_map.find(h) == handlers_map.end())
215 using namespace scheduler;
222 ds::vector_axpy_template::parameters_type
vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
229 #define VIENNACL_ADD_UNARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::unary_element_op(&x, &y, OPTYPE))
230 if (numeric_string ==
"float" || numeric_string ==
"double")
253 #undef VIENNACL_ADD_UNARY
256 #define VIENNACL_ADD_BINARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::binary_element_op(&x, &y, &z, OPTYPE))
259 if (numeric_string ==
"float" || numeric_string ==
"double")
263 #undef VIENNACL_ADD_BINARY
272 template<
typename StringT>
275 source.append(
" __kernel void convert_" + dest_type +
"_" + src_type +
"( \n");
276 source.append(
" __global " + dest_type +
" * dest, \n");
277 source.append(
" unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, \n");
278 source.append(
" __global const " + src_type +
" * src, \n");
279 source.append(
" unsigned int start_src, unsigned int inc_src) \n");
280 source.append(
" { \n");
281 source.append(
" for (unsigned int i = get_global_id(0); i < size_dest; i += get_global_size(0)) \n");
282 source.append(
" dest[start_dest + i * inc_dest] = src[start_src + i * inc_src]; \n");
283 source.append(
" } \n");
293 return "vector_convert";
298 static std::map<cl_context, bool> init_done;
302 source.reserve(4096);
357 std::string prog_name = program_name();
358 #ifdef VIENNACL_BUILD_INFO
359 std::cout <<
"Creating program " << prog_name << std::endl;
viennacl::ocl::device const & current_device() const
Returns the current device.
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
static std::string program_name()
void append_double_precision_pragma< double >(viennacl::ocl::context const &ctx, std::string &source)
Some helper routines for reading/writing/printing scheduler expressions.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
reduction_template::parameters_type const & reduction_params(ocl::device const &device)
Provides OpenCL-related utilities.
Main kernel class for vector conversion routines (e.g. convert vector to vector).
A class representing a compute device (e.g. a GPU)
void add(std::string const &key, template_base const &T, statements_container const &statements)
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
statement sum(scalar< NumericT > const *s, vector_base< NumericT > const *x)
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
static void apply(viennacl::ocl::context const &)
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
const OCL_TYPE & get() const
Class for representing non-strided subvectors of a bigger vector x.
vector_axpy_template::parameters_type const & vector_axpy_params(ocl::device const &device)
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
operation_node_type
Enumeration for identifying the possible operations.
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Provides an OpenCL kernel generator.
#define VIENNACL_ADD_UNARY(OPTYPE)
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
const char * operator_string(scheduler::operation_node_type type)
ValueT const & at(std::map< KeyT, ValueT > const &map, KeyT const &key)
Emulation of C++11's .at() member for std::map<>, const-version.
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Helper class for converting a type to its string representation.
void generate_vector_convert(StringT &source, std::string const &dest_type, std::string const &src_type)
#define VIENNACL_ADD_BINARY(OPTYPE)
static void init(viennacl::ocl::context &ctx)
Helper for handling fallbacks, lazy compilation, input-dependent kernels, etc.
reduction_parameters parameters_type