ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 
26 
27 #include "viennacl/ocl/kernel.hpp"
29 #include "viennacl/ocl/utils.hpp"
30 
32 
35 
38 namespace viennacl
39 {
40 namespace linalg
41 {
42 namespace opencl
43 {
44 namespace kernels
45 {
46 
47 template<typename NumericT, typename ScalarT>
48 static void generate_inner_prod_impl(device_specific::execution_handler & handler, std::string const & prefix, device_specific::reduction_template::parameters_type const & parameters, vcl_size_t vector_num,
49  viennacl::vector<NumericT> const * x, viennacl::vector<NumericT> const * y, ScalarT const* s)
50 {
51  namespace ds = device_specific;
52  ds::statements_container::data_type statements;
53  for (unsigned int i = 0; i < vector_num; ++i)
54  statements.push_back(scheduler::preset::inner_prod(s, x, y));
55  handler.add(prefix, ds::reduction_template(parameters), ds::statements_container(statements,ds::statements_container::INDEPENDENT));
56 }
57 
58 
59 
60 // main kernel class
62 template<typename NumericT>
63 class vector
64 {
65 private:
66 
67  template<typename ScalarT1, typename ScalarT2>
68  static void generate_avbv_impl2(device_specific::execution_handler & handler, std::string const & prefix, device_specific::vector_axpy_template::parameters_type const & parameters, scheduler::operation_node_type ASSIGN_OP,
69  viennacl::vector_base<NumericT> const * x, viennacl::vector_base<NumericT> const * y, ScalarT1 const * a,
70  viennacl::vector_base<NumericT> const * z, ScalarT2 const * b)
71  {
72  namespace ds = device_specific;
73  handler.add(prefix + "0000", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, false, false));
74  handler.add(prefix + "1000", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, false, false));
75  handler.add(prefix + "0100", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, false, false));
76  handler.add(prefix + "1100", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, false, false));
77  if (b)
78  {
79  handler.add(prefix + "0010", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, true, false));
80  handler.add(prefix + "1010", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, true, false));
81  handler.add(prefix + "0110", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, true, false));
82  handler.add(prefix + "1110", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, true, false));
83 
84  handler.add(prefix + "0001", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, false, true));
85  handler.add(prefix + "1001", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, false, true));
86  handler.add(prefix + "0101", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, false, true));
87  handler.add(prefix + "1101", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, false, true));
88 
89  handler.add(prefix + "0011", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, false, z, b, true, true));
90  handler.add(prefix + "1011", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, false, z, b, true, true));
91  handler.add(prefix + "0111", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, false, true, z, b, true, true));
92  handler.add(prefix + "1111", ds::vector_axpy_template(parameters), scheduler::preset::avbv(ASSIGN_OP, x, y, a, true, true, z, b, true, true));
93  }
94  }
95 
96  template<typename ScalarT>
97  static void generate_avbv_impl(device_specific::execution_handler & handler, std::string const & prefix, device_specific::vector_axpy_template::parameters_type const & parameters, scheduler::operation_node_type ASSIGN_OP,
98  viennacl::vector_base<NumericT> const * x, viennacl::vector_base<NumericT> const * y, ScalarT const * ha, viennacl::scalar<NumericT> const * da,
99  viennacl::vector_base<NumericT> const * z, ScalarT const * hb, viennacl::scalar<NumericT> const * db)
100  {
101  //x ASSIGN_OP a*y
102  generate_avbv_impl2(handler, prefix + "hv_", parameters, ASSIGN_OP, x, y, ha, (viennacl::vector<NumericT>*)NULL, (NumericT*)NULL);
103  generate_avbv_impl2(handler, prefix + "dv_", parameters, ASSIGN_OP, x, y, da, (viennacl::vector<NumericT>*)NULL, (NumericT*)NULL);
104 
105  //x ASSIGN_OP a*y + b*z
106  generate_avbv_impl2(handler, prefix + "hvhv_", parameters, ASSIGN_OP, x, y, ha, z, hb);
107  generate_avbv_impl2(handler, prefix + "dvhv_", parameters, ASSIGN_OP, x, y, da, z, hb);
108  generate_avbv_impl2(handler, prefix + "hvdv_", parameters, ASSIGN_OP, x, y, ha, z, db);
109  generate_avbv_impl2(handler, prefix + "dvdv_", parameters, ASSIGN_OP, x, y, da, z, db);
110  }
111 
112 public:
114  {
115  static std::map<cl_context, device_specific::execution_handler> handlers_map;
116  cl_context h = ctx.handle().get();
117  if (handlers_map.find(h) == handlers_map.end())
118  {
120 
121  namespace ds = viennacl::device_specific;
122  viennacl::ocl::device const & device = ctx.current_device();
123  handlers_map.insert(std::make_pair(h, ds::execution_handler(viennacl::ocl::type_to_string<NumericT>::apply() + "_vector", ctx, device)));
124  ds::execution_handler & handler = at(handlers_map, h);
125 
132  NumericT ha;
133  NumericT hb;
134 
135  ds::vector_axpy_template::parameters_type vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
136  ds::reduction_template::parameters_type reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
137 
138  generate_avbv_impl(handler, "assign_", vector_axpy_params, scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
139  generate_avbv_impl(handler, "ip_add_", vector_axpy_params, scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
140 
141  handler.add("plane_rotation", ds::vector_axpy_template(vector_axpy_params), scheduler::preset::plane_rotation(&x, &y, &ha, &hb));
142  handler.add("swap", ds::vector_axpy_template(vector_axpy_params), scheduler::preset::swap(&x, &y));
143  handler.add("assign_cpu", ds::vector_axpy_template(vector_axpy_params), scheduler::preset::assign_cpu(&x, &scalary));
144 
145  generate_inner_prod_impl(handler, "inner_prod", reduction_params, 1, &x, &y, &da);
146 
147  handler.add("norm_1", ds::reduction_template(reduction_params), scheduler::preset::norm_1(&da, &x));
148  bool is_float_or_double = is_floating_point<NumericT>::value;
149  if (is_float_or_double) //BIND_TO_HANDLE for optimization (will load x once in the internal inner product)
150  handler.add("norm_2", ds::reduction_template(reduction_params, ds::BIND_TO_HANDLE), scheduler::preset::norm_2(&da, &x));
151  handler.add("norm_inf", ds::reduction_template(reduction_params), scheduler::preset::norm_inf(&da, &x));
152  handler.add("index_norm_inf", ds::reduction_template(reduction_params), scheduler::preset::index_norm_inf(&da, &x));
153  handler.add("sum", ds::reduction_template(reduction_params), scheduler::preset::sum(&da, &x));
154  handler.add("max", ds::reduction_template(reduction_params), scheduler::preset::max(&da, &x));
155  handler.add("min", ds::reduction_template(reduction_params), scheduler::preset::min(&da, &x));
156  }
157  return at(handlers_map, h);
158  }
159 };
160 
161 // main kernel class
163 template<typename NumericT>
165 {
166 public:
168  {
169  static std::map<cl_context, device_specific::execution_handler> handlers_map;
170  cl_context h = ctx.handle().get();
171  if (handlers_map.find(h) == handlers_map.end())
172  {
174 
175  namespace ds = viennacl::device_specific;
176 
177  viennacl::ocl::device const & device = ctx.current_device();
178  handlers_map.insert(std::make_pair(h, ds::execution_handler(viennacl::ocl::type_to_string<NumericT>::apply() + "_vector_multi_inner_prod", ctx, device)));
179  ds::execution_handler & handler = viennacl::device_specific::at(handlers_map, h);
180 
181  ds::reduction_template::parameters_type reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
182 
183  //Dummy holders for the statements
188 
189  generate_inner_prod_impl(handler, "inner_prod_1", reduction_params, 1, &x, &y, &da);
190  generate_inner_prod_impl(handler, "inner_prod_2", reduction_params, 2, &x, &y, &da);
191  generate_inner_prod_impl(handler, "inner_prod_3", reduction_params, 3, &x, &y, &da);
192  generate_inner_prod_impl(handler, "inner_prod_4", reduction_params, 4, &x, &y, &da);
193  generate_inner_prod_impl(handler, "inner_prod_8", reduction_params, 8, &x, &y, &da);
194  }
195  return viennacl::device_specific::at(handlers_map, h);
196  }
197 };
198 
199 // main kernel class
201 template<typename NumericT>
203 {
204 
205 public:
207  {
208  static std::map<cl_context, device_specific::execution_handler> handlers_map;
209  cl_context h = ctx.handle().get();
210  if (handlers_map.find(h) == handlers_map.end())
211  {
213 
214  namespace ds = viennacl::device_specific;
215  using namespace scheduler;
217 
218  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
219  viennacl::ocl::device const & device = ctx.current_device();
220  handlers_map.insert(std::make_pair(h, ds::execution_handler(viennacl::ocl::type_to_string<NumericT>::apply() + "_vector_element", ctx, device)));
221  ds::execution_handler & handler = viennacl::device_specific::at(handlers_map, h);
222  ds::vector_axpy_template::parameters_type vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
223 
227 
228  // unary operations
229 #define VIENNACL_ADD_UNARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::unary_element_op(&x, &y, OPTYPE))
230  if (numeric_string == "float" || numeric_string == "double")
231  {
248  }
249  else
250  {
252  }
253 #undef VIENNACL_ADD_UNARY
254 
255  // binary operations
256 #define VIENNACL_ADD_BINARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::binary_element_op(&x, &y, &z, OPTYPE))
259  if (numeric_string == "float" || numeric_string == "double")
260  {
262  }
263 #undef VIENNACL_ADD_BINARY
264 
265  }
266  return viennacl::device_specific::at(handlers_map, h);
267  }
268 };
269 
270 
271 
272 template<typename StringT>
273 void generate_vector_convert(StringT & source, std::string const & dest_type, std::string const & src_type)
274 {
275  source.append(" __kernel void convert_" + dest_type + "_" + src_type + "( \n");
276  source.append(" __global " + dest_type + " * dest, \n");
277  source.append(" unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, \n");
278  source.append(" __global const " + src_type + " * src, \n");
279  source.append(" unsigned int start_src, unsigned int inc_src) \n");
280  source.append(" { \n");
281  source.append(" for (unsigned int i = get_global_id(0); i < size_dest; i += get_global_size(0)) \n");
282  source.append(" dest[start_dest + i * inc_dest] = src[start_src + i * inc_src]; \n");
283  source.append(" } \n");
284 }
285 
288 {
289 
290 public:
291  static std::string program_name()
292  {
293  return "vector_convert";
294  }
295 
296  static void init(viennacl::ocl::context & ctx)
297  {
298  static std::map<cl_context, bool> init_done;
299  if (!init_done[ctx.handle().get()])
300  {
301  std::string source;
302  source.reserve(4096);
303 
304  // int
310 
311  // unsigned int
317 
318  // long
324 
325  // unsigned long
331 
332  // float
338 
339  if (ctx.current_device().double_support())
340  {
342 
348 
355  }
356 
357  std::string prog_name = program_name();
358  #ifdef VIENNACL_BUILD_INFO
359  std::cout << "Creating program " << prog_name << std::endl;
360  #endif
361  ctx.add_program(source, prog_name);
362  init_done[ctx.handle().get()] = true;
363  } //if
364  } //init
365 
366 };
367 
368 
369 } // namespace kernels
370 } // namespace opencl
371 } // namespace linalg
372 } // namespace viennacl
373 #endif
374 
viennacl::ocl::device const & current_device() const
Returns the current device.
Definition: context.hpp:112
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:167
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
Implements a OpenCL platform within ViennaCL.
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
Definition: preset.hpp:229
void append_double_precision_pragma< double >(viennacl::ocl::context const &ctx, std::string &source)
Definition: utils.hpp:78
Various little tools used here and there in ViennaCL.
Some helper routines for reading/writing/printing scheduler expressions.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
reduction_template::parameters_type const & reduction_params(ocl::device const &device)
Definition: reduction.hpp:109
Provides OpenCL-related utilities.
Main kernel class for vector conversion routines (e.g. convert vector to vector).
Definition: vector.hpp:287
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
void add(std::string const &key, template_base const &T, statements_container const &statements)
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:269
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
Definition: preset.hpp:33
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:241
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:206
statement sum(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:263
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:164
float NumericT
Definition: bisect.cpp:40
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:276
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:368
const OCL_TYPE & get() const
Definition: handle.hpp:189
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:434
vector_axpy_template::parameters_type const & vector_axpy_params(ocl::device const &device)
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:255
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:235
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:247
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
std::size_t vcl_size_t
Definition: forwards.h:75
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
operation_node_type
Enumeration for identifying the possible operations.
Definition: forwards.h:68
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
Definition: vector.hpp:202
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
Definition: preset.hpp:103
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
Definition: vector.hpp:113
Provides an OpenCL kernel generator.
Definition: common.hpp:34
#define VIENNACL_ADD_UNARY(OPTYPE)
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
Definition: preset.hpp:123
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
Definition: preset.hpp:95
const char * operator_string(scheduler::operation_node_type type)
ValueT const & at(std::map< KeyT, ValueT > const &map, KeyT const &key)
Emulation of C++11's .at() member for std::map<>, const-version.
Definition: forwards.h:142
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:63
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void generate_vector_convert(StringT &source, std::string const &dest_type, std::string const &src_type)
Definition: vector.hpp:273
#define VIENNACL_ADD_BINARY(OPTYPE)
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:296
Helper for handling fallbacks, lazy compilation, input-dependent kernels, etc.