ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
scalar_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/tools/tools.hpp"
29 #include "viennacl/traits/size.hpp"
33 
34 // includes CUDA
35 #include <cuda_runtime.h>
36 
37 
38 namespace viennacl
39 {
40 namespace linalg
41 {
42 namespace cuda
43 {
44 
46 
47 template<typename NumericT>
48 __global__ void as_kernel(NumericT * s1, const NumericT * fac2, unsigned int options2, const NumericT * s2)
49 {
50  NumericT alpha = *fac2;
51  if (options2 & (1 << 0))
52  alpha = -alpha;
53  if (options2 & (1 << 1))
54  alpha = NumericT(1) / alpha;
55 
56  *s1 = *s2 * alpha;
57 }
58 
59 template<typename NumericT>
60 __global__ void as_kernel(NumericT * s1, NumericT fac2, unsigned int options2, const NumericT * s2)
61 {
62  NumericT alpha = fac2;
63  if (options2 & (1 << 0))
64  alpha = -alpha;
65  if (options2 & (1 << 1))
66  alpha = NumericT(1) / alpha;
67 
68  *s1 = *s2 * alpha;
69 }
70 
71 template<typename ScalarT1,
72  typename ScalarT2, typename NumericT>
76  >::type
77 as(ScalarT1 & s1,
78  ScalarT2 const & s2, NumericT const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
79 {
80  typedef typename viennacl::result_of::cpu_value_type<ScalarT1>::type value_type;
81 
82  unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
83 
84  value_type temporary_alpha = 0;
86  temporary_alpha = alpha;
87 
88  as_kernel<<<1, 1>>>(viennacl::cuda_arg(s1),
89  viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
90  options_alpha,
91  viennacl::cuda_arg(s2));
92  VIENNACL_CUDA_LAST_ERROR_CHECK("as_kernel");
93 }
94 
96 
97 // alpha and beta on GPU
98 template<typename NumericT>
99 __global__ void asbs_kernel(NumericT * s1,
100  const NumericT * fac2, unsigned int options2, const NumericT * s2,
101  const NumericT * fac3, unsigned int options3, const NumericT * s3)
102 {
103  NumericT alpha = *fac2;
104  if (options2 & (1 << 0))
105  alpha = -alpha;
106  if (options2 & (1 << 1))
107  alpha = NumericT(1) / alpha;
108 
109  NumericT beta = *fac3;
110  if (options3 & (1 << 0))
111  beta = -beta;
112  if (options3 & (1 << 1))
113  beta = NumericT(1) / beta;
114 
115  *s1 = *s2 * alpha + *s3 * beta;
116 }
117 
118 // alpha on CPU, beta on GPU
119 template<typename NumericT>
120 __global__ void asbs_kernel(NumericT * s1,
121  NumericT fac2, unsigned int options2, const NumericT * s2,
122  NumericT const * fac3, unsigned int options3, const NumericT * s3)
123 {
124  NumericT alpha = fac2;
125  if (options2 & (1 << 0))
126  alpha = -alpha;
127  if (options2 & (1 << 1))
128  alpha = NumericT(1) / alpha;
129 
130  NumericT beta = *fac3;
131  if (options3 & (1 << 0))
132  beta = -beta;
133  if (options3 & (1 << 1))
134  beta = NumericT(1) / beta;
135 
136  *s1 = *s2 * alpha + *s3 * beta;
137 }
138 
139 // alpha on GPU, beta on CPU
140 template<typename NumericT>
141 __global__ void asbs_kernel(NumericT * s1,
142  NumericT const * fac2, unsigned int options2, const NumericT * s2,
143  NumericT fac3, unsigned int options3, const NumericT * s3)
144 {
145  NumericT alpha = *fac2;
146  if (options2 & (1 << 0))
147  alpha = -alpha;
148  if (options2 & (1 << 1))
149  alpha = NumericT(1) / alpha;
150 
151  NumericT beta = fac3;
152  if (options3 & (1 << 0))
153  beta = -beta;
154  if (options3 & (1 << 1))
155  beta = NumericT(1) / beta;
156 
157  *s1 = *s2 * alpha + *s3 * beta;
158 }
159 
160 // alpha and beta on CPU
161 template<typename NumericT>
162 __global__ void asbs_kernel(NumericT * s1,
163  NumericT fac2, unsigned int options2, const NumericT * s2,
164  NumericT fac3, unsigned int options3, const NumericT * s3)
165 {
166  NumericT alpha = fac2;
167  if (options2 & (1 << 0))
168  alpha = -alpha;
169  if (options2 & (1 << 1))
170  alpha = NumericT(1) / alpha;
171 
172  NumericT beta = fac3;
173  if (options3 & (1 << 0))
174  beta = -beta;
175  if (options3 & (1 << 1))
176  beta = NumericT(1) / beta;
177 
178  *s1 = *s2 * alpha + *s3 * beta;
179 }
180 
181 
182 template<typename ScalarT1,
183  typename ScalarT2, typename NumericT1,
184  typename ScalarT3, typename NumericT2>
190  >::type
191 asbs(ScalarT1 & s1,
192  ScalarT2 const & s2, NumericT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
193  ScalarT3 const & s3, NumericT2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
194 {
195  typedef typename viennacl::result_of::cpu_value_type<ScalarT1>::type value_type;
196 
197  unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
198  unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
199 
200  value_type temporary_alpha = 0;
202  temporary_alpha = alpha;
203 
204  value_type temporary_beta = 0;
206  temporary_beta = beta;
207 
208  asbs_kernel<<<1, 1>>>(viennacl::cuda_arg(s1),
209  viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
210  options_alpha,
211  viennacl::cuda_arg(s2),
212  viennacl::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)),
213  options_beta,
214  viennacl::cuda_arg(s3) );
215  VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_kernel");
216 }
217 
219 
220 // alpha and beta on GPU
221 template<typename NumericT>
222 __global__ void asbs_s_kernel(NumericT * s1,
223  const NumericT * fac2, unsigned int options2, const NumericT * s2,
224  const NumericT * fac3, unsigned int options3, const NumericT * s3)
225 {
226  NumericT alpha = *fac2;
227  if (options2 & (1 << 0))
228  alpha = -alpha;
229  if (options2 & (1 << 1))
230  alpha = NumericT(1) / alpha;
231 
232  NumericT beta = *fac3;
233  if (options3 & (1 << 0))
234  beta = -beta;
235  if (options3 & (1 << 1))
236  beta = NumericT(1) / beta;
237 
238  *s1 += *s2 * alpha + *s3 * beta;
239 }
240 
241 // alpha on CPU, beta on GPU
242 template<typename NumericT>
243 __global__ void asbs_s_kernel(NumericT * s1,
244  NumericT fac2, unsigned int options2, const NumericT * s2,
245  NumericT const * fac3, unsigned int options3, const NumericT * s3)
246 {
247  NumericT alpha = fac2;
248  if (options2 & (1 << 0))
249  alpha = -alpha;
250  if (options2 & (1 << 1))
251  alpha = NumericT(1) / alpha;
252 
253  NumericT beta = *fac3;
254  if (options3 & (1 << 0))
255  beta = -beta;
256  if (options3 & (1 << 1))
257  beta = NumericT(1) / beta;
258 
259  *s1 += *s2 * alpha + *s3 * beta;
260 }
261 
262 // alpha on GPU, beta on CPU
263 template<typename NumericT>
264 __global__ void asbs_s_kernel(NumericT * s1,
265  NumericT const * fac2, unsigned int options2, const NumericT * s2,
266  NumericT fac3, unsigned int options3, const NumericT * s3)
267 {
268  NumericT alpha = *fac2;
269  if (options2 & (1 << 0))
270  alpha = -alpha;
271  if (options2 & (1 << 1))
272  alpha = NumericT(1) / alpha;
273 
274  NumericT beta = fac3;
275  if (options3 & (1 << 0))
276  beta = -beta;
277  if (options3 & (1 << 1))
278  beta = NumericT(1) / beta;
279 
280  *s1 += *s2 * alpha + *s3 * beta;
281 }
282 
283 // alpha and beta on CPU
284 template<typename NumericT>
285 __global__ void asbs_s_kernel(NumericT * s1,
286  NumericT fac2, unsigned int options2, const NumericT * s2,
287  NumericT fac3, unsigned int options3, const NumericT * s3)
288 {
289  NumericT alpha = fac2;
290  if (options2 & (1 << 0))
291  alpha = -alpha;
292  if (options2 & (1 << 1))
293  alpha = NumericT(1) / alpha;
294 
295  NumericT beta = fac3;
296  if (options3 & (1 << 0))
297  beta = -beta;
298  if (options3 & (1 << 1))
299  beta = NumericT(1) / beta;
300 
301  *s1 += *s2 * alpha + *s3 * beta;
302 }
303 
304 
305 template<typename ScalarT1,
306  typename ScalarT2, typename NumericT1,
307  typename ScalarT3, typename NumericT2>
313  >::type
314 asbs_s(ScalarT1 & s1,
315  ScalarT2 const & s2, NumericT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
316  ScalarT3 const & s3, NumericT2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
317 {
318  typedef typename viennacl::result_of::cpu_value_type<ScalarT1>::type value_type;
319 
320  unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
321  unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta);
322 
323  value_type temporary_alpha = 0;
325  temporary_alpha = alpha;
326 
327  value_type temporary_beta = 0;
329  temporary_beta = beta;
330 
331  std::cout << "Launching asbs_s_kernel..." << std::endl;
332  asbs_s_kernel<<<1, 1>>>(viennacl::cuda_arg(s1),
333  viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)),
334  options_alpha,
335  viennacl::cuda_arg(s2),
336  viennacl::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)),
337  options_beta,
338  viennacl::cuda_arg(s3) );
339  VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_s_kernel");
340 }
341 
343 
344 template<typename NumericT>
345 __global__ void scalar_swap_kernel(NumericT * s1, NumericT * s2)
346 {
347  NumericT tmp = *s2;
348  *s2 = *s1;
349  *s1 = tmp;
350 }
351 
357 template<typename ScalarT1, typename ScalarT2>
360  >::type
361 swap(ScalarT1 & s1, ScalarT2 & s2)
362 {
363  typedef typename viennacl::result_of::cpu_value_type<ScalarT1>::type value_type;
364 
365  scalar_swap_kernel<<<1, 1>>>(viennacl::cuda_arg(s1), viennacl::cuda_arg(s2));
366 }
367 
368 
369 
370 } //namespace single_threaded
371 } //namespace linalg
372 } //namespace viennacl
373 
374 
375 #endif
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:30
unsigned int make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:160
Generic size and resize functionality for different vector and matrix types.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
viennacl::scalar< int > s2
viennacl::scalar< float > s1
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs(ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
__global__ void as_kernel(NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2)
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:448
Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy)
Definition: forwards.h:469
std::size_t vcl_size_t
Definition: forwards.h:75
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:271
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs_s(ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
Helper struct for checking whether a type is a viennacl::scalar<>
Definition: forwards.h:455
Common routines for CUDA execution.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_any_scalar< NumericT >::value >::type as(ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
__global__ void scalar_swap_kernel(NumericT *s1, NumericT *s2)
__global__ void asbs_kernel(NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
Definition: common.hpp:30
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39
__global__ void asbs_s_kernel(NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
viennacl::backend::mem_handle::cuda_handle_type & arg_reference(viennacl::scalar< NumericT > &s, OtherT)
Definition: common.hpp:188
Simple enable-if variant that uses the SFINAE pattern.