ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
scalar.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
24 #include "viennacl/ocl/utils.hpp"
25 
28 namespace viennacl
29 {
30 namespace linalg
31 {
32 namespace opencl
33 {
34 namespace kernels
35 {
36 
38 
41 {
42  VIENNACL_ASBS_NONE = 0, // scalar does not exist/contribute
45 };
46 
49 {
51 
53  std::string assign_op;
56 };
57 
58 // just returns the assignment string
59 template<typename StringT>
60 void generate_asbs_impl3(StringT & source, char sign_a, char sign_b, asbs_config const & cfg, bool mult_alpha, bool mult_beta)
61 {
62  source.append(" *s1 "); source.append(cfg.assign_op); source.append(1, sign_a); source.append(" *s2 ");
63  if (mult_alpha)
64  source.append("* alpha ");
65  else
66  source.append("/ alpha ");
67  if (cfg.b != VIENNACL_ASBS_NONE)
68  {
69  source.append(1, sign_b); source.append(" *s3 ");
70  if (mult_beta)
71  source.append("* beta");
72  else
73  source.append("/ beta");
74  }
75  source.append("; \n");
76 }
77 
78 template<typename StringT>
79 void generate_asbs_impl2(StringT & source, char sign_a, char sign_b, asbs_config const & cfg)
80 {
81  source.append(" if (options2 & (1 << 1)) { \n");
82  if (cfg.b != VIENNACL_ASBS_NONE)
83  {
84  source.append(" if (options3 & (1 << 1)) \n");
85  generate_asbs_impl3(source, sign_a, sign_b, cfg, false, false);
86  source.append(" else \n");
87  generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
88  }
89  else
90  generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
91  source.append(" } else { \n");
92  if (cfg.b != VIENNACL_ASBS_NONE)
93  {
94  source.append(" if (options3 & (1 << 1)) \n");
95  generate_asbs_impl3(source, sign_a, sign_b, cfg, true, false);
96  source.append(" else \n");
97  generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
98  }
99  else
100  generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
101  source.append(" } \n");
102 
103 }
104 
105 template<typename StringT>
106 void generate_asbs_impl(StringT & source, std::string const & numeric_string, asbs_config const & cfg)
107 {
108  source.append("__kernel void as");
109  if (cfg.b != VIENNACL_ASBS_NONE)
110  source.append("bs");
111  if (cfg.assign_op != "=")
112  source.append("_s");
113 
114  if (cfg.a == VIENNACL_ASBS_CPU)
115  source.append("_cpu");
116  else if (cfg.a == VIENNACL_ASBS_GPU)
117  source.append("_gpu");
118 
119  if (cfg.b == VIENNACL_ASBS_CPU)
120  source.append("_cpu");
121  else if (cfg.b == VIENNACL_ASBS_GPU)
122  source.append("_gpu");
123  source.append("( \n");
124  source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n");
125  source.append(" \n");
126  if (cfg.a == VIENNACL_ASBS_CPU)
127  {
128  source.append(" "); source.append(numeric_string); source.append(" fac2, \n");
129  }
130  else if (cfg.a == VIENNACL_ASBS_GPU)
131  {
132  source.append(" __global "); source.append(numeric_string); source.append(" * fac2, \n");
133  }
134  source.append(" unsigned int options2, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
135  source.append(" __global const "); source.append(numeric_string); source.append(" * s2");
136 
137  if (cfg.b != VIENNACL_ASBS_NONE)
138  {
139  source.append(", \n\n");
140  if (cfg.b == VIENNACL_ASBS_CPU)
141  {
142  source.append(" "); source.append(numeric_string); source.append(" fac3, \n");
143  }
144  else if (cfg.b == VIENNACL_ASBS_GPU)
145  {
146  source.append(" __global "); source.append(numeric_string); source.append(" * fac3, \n");
147  }
148  source.append(" unsigned int options3, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
149  source.append(" __global const "); source.append(numeric_string); source.append(" * s3");
150  }
151  source.append(") \n{ \n");
152 
153  if (cfg.a == VIENNACL_ASBS_CPU)
154  {
155  source.append(" "); source.append(numeric_string); source.append(" alpha = fac2; \n");
156  }
157  else if (cfg.a == VIENNACL_ASBS_GPU)
158  {
159  source.append(" "); source.append(numeric_string); source.append(" alpha = fac2[0]; \n");
160  }
161  source.append(" \n");
162 
163  if (cfg.b == VIENNACL_ASBS_CPU)
164  {
165  source.append(" "); source.append(numeric_string); source.append(" beta = fac3; \n");
166  }
167  else if (cfg.b == VIENNACL_ASBS_GPU)
168  {
169  source.append(" "); source.append(numeric_string); source.append(" beta = fac3[0]; \n");
170  }
171 
172  source.append(" if (options2 & (1 << 0)) { \n");
173  if (cfg.b != VIENNACL_ASBS_NONE)
174  {
175  source.append(" if (options3 & (1 << 0)) { \n");
176  generate_asbs_impl2(source, '-', '-', cfg);
177  source.append(" } else { \n");
178  generate_asbs_impl2(source, '-', '+', cfg);
179  source.append(" } \n");
180  }
181  else
182  generate_asbs_impl2(source, '-', '+', cfg);
183  source.append(" } else { \n");
184  if (cfg.b != VIENNACL_ASBS_NONE)
185  {
186  source.append(" if (options3 & (1 << 0)) { \n");
187  generate_asbs_impl2(source, '+', '-', cfg);
188  source.append(" } else { \n");
189  generate_asbs_impl2(source, '+', '+', cfg);
190  source.append(" } \n");
191  }
192  else
193  generate_asbs_impl2(source, '+', '+', cfg);
194 
195  source.append(" } \n");
196  source.append("} \n");
197 }
198 
199 template<typename StringT>
200 void generate_asbs(StringT & source, std::string const & numeric_string)
201 {
202  asbs_config cfg;
203  cfg.assign_op = "=";
204  cfg.with_stride_and_range = true;
205 
206  // as
207  cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
208  cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
209 
210  // asbs
211  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
212  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
213  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
214  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
215 
216  // asbs
217  cfg.assign_op = "+=";
218 
219  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
220  cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
221  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
222  cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
223 }
224 
225 template<typename StringT>
226 void generate_scalar_swap(StringT & source, std::string const & numeric_string)
227 {
228  source.append("__kernel void swap( \n");
229  source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n");
230  source.append(" __global "); source.append(numeric_string); source.append(" * s2) \n");
231  source.append("{ \n");
232  source.append(" "); source.append(numeric_string); source.append(" tmp = *s2; \n");
233  source.append(" *s2 = *s1; \n");
234  source.append(" *s1 = tmp; \n");
235  source.append("} \n");
236 }
237 
239 
240 // main kernel class
242 template<typename NumericT>
243 struct scalar
244 {
245  static std::string program_name()
246  {
248  }
249 
250  static void init(viennacl::ocl::context & ctx)
251  {
252  static std::map<cl_context, bool> init_done;
253  if (!init_done[ctx.handle().get()])
254  {
256  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
257 
258  std::string source;
259  source.reserve(8192);
260 
261  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
262 
263  // fully parametrized kernels:
264  generate_asbs(source, numeric_string);
265  generate_scalar_swap(source, numeric_string);
266 
267 
268  std::string prog_name = program_name();
269  #ifdef VIENNACL_BUILD_INFO
270  std::cout << "Creating program " << prog_name << std::endl;
271  #endif
272  ctx.add_program(source, prog_name);
273  init_done[ctx.handle().get()] = true;
274  } //if
275  } //init
276 };
277 
278 } // namespace kernels
279 } // namespace opencl
280 } // namespace linalg
281 } // namespace viennacl
282 #endif
283 
Implements a OpenCL platform within ViennaCL.
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
void generate_asbs_impl3(StringT &source, char sign_a, char sign_b, asbs_config const &cfg, bool mult_alpha, bool mult_beta)
Definition: scalar.hpp:60
Provides OpenCL-related utilities.
void generate_asbs(StringT &source, std::string const &numeric_string)
Definition: scalar.hpp:200
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:611
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void generate_scalar_swap(StringT &source, std::string const &numeric_string)
Definition: scalar.hpp:226
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
Main kernel class for generating OpenCL kernels for operations involving viennacl::scalar<>, but not viennacl::vector<> or viennacl::matrix<>.
Definition: scalar.hpp:243
const OCL_TYPE & get() const
Definition: handle.hpp:189
static void init(viennacl::ocl::context &ctx)
Definition: scalar.hpp:250
Configuration struct for generating OpenCL kernels for linear combinations of viennacl::scalar<> obje...
Definition: scalar.hpp:48
void generate_asbs_impl(StringT &source, std::string const &numeric_string, asbs_config const &cfg)
Definition: scalar.hpp:106
asbs_scalar_type
Enumeration for the scalar type in avbv-like operations.
Definition: scalar.hpp:40
Representation of an OpenCL kernel in ViennaCL.
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
void generate_asbs_impl2(StringT &source, char sign_a, char sign_b, asbs_config const &cfg)
Definition: scalar.hpp:79