ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
libviennacl_blas3.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2015, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
23 // include necessary system headers
24 #include <iostream>
25 #include <vector>
26 
27 // Some helper functions for this tutorial:
28 #include "viennacl.hpp"
29 
31 
32 
33 #include "viennacl/vector.hpp"
34 
35 template<typename ScalarType>
37 {
38  if (s1 > s2 || s1 < s2)
39  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
40  return ScalarType(0);
41 }
42 
43 template<typename ScalarType, typename ViennaCLVectorType>
44 ScalarType diff(std::vector<ScalarType> const & v1, ViennaCLVectorType const & vcl_vec)
45 {
46  std::vector<ScalarType> v2_cpu(vcl_vec.size());
48  viennacl::copy(vcl_vec, v2_cpu);
49 
50  ScalarType inf_norm = 0;
51  for (unsigned int i=0;i<v1.size(); ++i)
52  {
53  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
54  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
55  else
56  v2_cpu[i] = 0.0;
57 
58  if (v2_cpu[i] > inf_norm)
59  inf_norm = v2_cpu[i];
60  }
61 
62  return inf_norm;
63 }
64 
65 template<typename T, typename U, typename EpsilonT>
66 void check(T const & t, U const & u, EpsilonT eps)
67 {
68  EpsilonT rel_error = std::fabs(static_cast<EpsilonT>(diff(t,u)));
69  if (rel_error > eps)
70  {
71  std::cerr << "Relative error: " << rel_error << std::endl;
72  std::cerr << "Aborting!" << std::endl;
73  exit(EXIT_FAILURE);
74  }
75  std::cout << "SUCCESS ";
76 }
77 
78 
79 template<typename T>
80 T get_value(std::vector<T> & array, ViennaCLInt i, ViennaCLInt j,
83  ViennaCLInt rows, ViennaCLInt cols,
85 {
86  // row-major
87  if (order == ViennaCLRowMajor && trans == ViennaCLTrans)
88  return array[static_cast<std::size_t>((j*stride1 + start1) * cols + (i*stride2 + start2))];
89  else if (order == ViennaCLRowMajor && trans != ViennaCLTrans)
90  return array[static_cast<std::size_t>((i*stride1 + start1) * cols + (j*stride2 + start2))];
91 
92  // column-major
93  else if (order != ViennaCLRowMajor && trans == ViennaCLTrans)
94  return array[static_cast<std::size_t>((j*stride1 + start1) + (i*stride2 + start2) * rows)];
95  return array[static_cast<std::size_t>((i*stride1 + start1) + (j*stride2 + start2) * rows)];
96 }
97 
98 
99 void test_blas(ViennaCLBackend my_backend,
100  float eps_float, double eps_double,
101  std::vector<float> & C_float, std::vector<double> & C_double,
102  std::vector<float> & A_float, std::vector<double> & A_double,
103  std::vector<float> & B_float, std::vector<double> & B_double,
104  ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B,
105  ViennaCLTranspose trans_A, ViennaCLTranspose trans_B,
106  viennacl::vector<float> & host_C_float, viennacl::vector<double> & host_C_double,
107  viennacl::vector<float> & host_A_float, viennacl::vector<double> & host_A_double,
108  viennacl::vector<float> & host_B_float, viennacl::vector<double> & host_B_double
109 #ifdef VIENNACL_WITH_CUDA
110  , viennacl::vector<float> & cuda_C_float, viennacl::vector<double> & cuda_C_double
111  , viennacl::vector<float> & cuda_A_float, viennacl::vector<double> & cuda_A_double
112  , viennacl::vector<float> & cuda_B_float, viennacl::vector<double> & cuda_B_double
113 #endif
115  , viennacl::vector<float> & opencl_C_float, viennacl::vector<double> * opencl_C_double
116  , viennacl::vector<float> & opencl_A_float, viennacl::vector<double> * opencl_A_double
117  , viennacl::vector<float> & opencl_B_float, viennacl::vector<double> * opencl_B_double
118 #endif
119  );
120 
121 void test_blas(ViennaCLBackend my_backend,
122  float eps_float, double eps_double,
123  std::vector<float> & C_float, std::vector<double> & C_double,
124  std::vector<float> & A_float, std::vector<double> & A_double,
125  std::vector<float> & B_float, std::vector<double> & B_double,
126  ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B,
127  ViennaCLTranspose trans_A, ViennaCLTranspose trans_B,
128  viennacl::vector<float> & host_C_float, viennacl::vector<double> & host_C_double,
129  viennacl::vector<float> & host_A_float, viennacl::vector<double> & host_A_double,
130  viennacl::vector<float> & host_B_float, viennacl::vector<double> & host_B_double
131 #ifdef VIENNACL_WITH_CUDA
132  , viennacl::vector<float> & cuda_C_float, viennacl::vector<double> & cuda_C_double
133  , viennacl::vector<float> & cuda_A_float, viennacl::vector<double> & cuda_A_double
134  , viennacl::vector<float> & cuda_B_float, viennacl::vector<double> & cuda_B_double
135 #endif
137  , viennacl::vector<float> & opencl_C_float, viennacl::vector<double> * opencl_C_double
138  , viennacl::vector<float> & opencl_A_float, viennacl::vector<double> * opencl_A_double
139  , viennacl::vector<float> & opencl_B_float, viennacl::vector<double> * opencl_B_double
140 #endif
141  )
142 {
143  ViennaCLInt C_size1 = 42;
144  ViennaCLInt C_size2 = 43;
145  ViennaCLInt C_start1 = 10;
146  ViennaCLInt C_start2 = 11;
147  ViennaCLInt C_stride1 = 2;
148  ViennaCLInt C_stride2 = 3;
149  ViennaCLInt C_rows = C_size1 * C_stride1 + C_start1 + 5;
150  ViennaCLInt C_columns = C_size2 * C_stride2 + C_start2 + 5;
151 
152  ViennaCLInt A_size1 = trans_A ? 44 : 42;
153  ViennaCLInt A_size2 = trans_A ? 42 : 44;
154  ViennaCLInt A_start1 = 12;
155  ViennaCLInt A_start2 = 13;
156  ViennaCLInt A_stride1 = 4;
157  ViennaCLInt A_stride2 = 5;
158  ViennaCLInt A_rows = A_size1 * A_stride1 + A_start1 + 5;
159  ViennaCLInt A_columns = A_size2 * A_stride2 + A_start2 + 5;
160 
161  ViennaCLInt B_size1 = trans_B ? 43 : 44;
162  ViennaCLInt B_size2 = trans_B ? 44 : 43;
163  ViennaCLInt B_start1 = 14;
164  ViennaCLInt B_start2 = 15;
165  ViennaCLInt B_stride1 = 6;
166  ViennaCLInt B_stride2 = 7;
167  ViennaCLInt B_rows = B_size1 * B_stride1 + B_start1 + 5;
168  ViennaCLInt B_columns = B_size2 * B_stride2 + B_start2 + 5;
169 
170  // Compute reference:
171  ViennaCLInt size_k = trans_A ? A_size1 : A_size2;
172  for (ViennaCLInt i=0; i<C_size1; ++i)
173  for (ViennaCLInt j=0; j<C_size2; ++j)
174  {
175  float val_float = 0;
176  double val_double = 0;
177  for (ViennaCLInt k=0; k<size_k; ++k)
178  {
179  float val_A_float = get_value(A_float, i, k, A_start1, A_start2, A_stride1, A_stride2, A_rows, A_columns, order_A, trans_A);
180  double val_A_double = get_value(A_double, i, k, A_start1, A_start2, A_stride1, A_stride2, A_rows, A_columns, order_A, trans_A);
181 
182  float val_B_float = get_value(B_float, k, j, B_start1, B_start2, B_stride1, B_stride2, B_rows, B_columns, order_B, trans_B);
183  double val_B_double = get_value(B_double, k, j, B_start1, B_start2, B_stride1, B_stride2, B_rows, B_columns, order_B, trans_B);
184 
185  val_float += val_A_float * val_B_float;
186  val_double += val_A_double * val_B_double;
187  }
188 
189  // write result
190  if (order_C == ViennaCLRowMajor)
191  {
192  C_float [static_cast<std::size_t>((i*C_stride1 + C_start1) * C_columns + (j*C_stride2 + C_start2))] = val_float;
193  C_double[static_cast<std::size_t>((i*C_stride1 + C_start1) * C_columns + (j*C_stride2 + C_start2))] = val_double;
194  }
195  else
196  {
197  C_float [static_cast<std::size_t>((i*C_stride1 + C_start1) + (j*C_stride2 + C_start2) * C_rows)] = val_float;
198  C_double[static_cast<std::size_t>((i*C_stride1 + C_start1) + (j*C_stride2 + C_start2) * C_rows)] = val_double;
199  }
200  }
201 
202  // Run GEMM and compare results:
203  ViennaCLHostSgemm(my_backend,
204  order_A, trans_A, order_B, trans_B, order_C,
205  C_size1, C_size2, size_k,
206  1.0f,
207  viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows,
208  viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows,
209  0.0f,
210  viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows);
211  check(C_float, host_C_float, eps_float);
212 
213  ViennaCLHostDgemm(my_backend,
214  order_A, trans_A, order_B, trans_B, order_C,
215  C_size1, C_size2, size_k,
216  1.0,
217  viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows,
218  viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows,
219  0.0,
220  viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows);
221  check(C_double, host_C_double, eps_double);
222 
223 #ifdef VIENNACL_WITH_CUDA
224  ViennaCLCUDASgemm(my_backend,
225  order_A, trans_A, order_B, trans_B, order_C,
226  C_size1, C_size2, size_k,
227  1.0f,
228  viennacl::cuda_arg(cuda_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows,
229  viennacl::cuda_arg(cuda_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows,
230  0.0f,
231  viennacl::cuda_arg(cuda_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows);
232  check(C_float, cuda_C_float, eps_float);
233 
234  ViennaCLCUDADgemm(my_backend,
235  order_A, trans_A, order_B, trans_B, order_C,
236  C_size1, C_size2, size_k,
237  1.0,
238  viennacl::cuda_arg(cuda_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows,
239  viennacl::cuda_arg(cuda_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows,
240  0.0,
241  viennacl::cuda_arg(cuda_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows);
242  check(C_double, cuda_C_double, eps_double);
243 #endif
244 
245 #ifdef VIENNACL_WITH_OPENCL
246  ViennaCLOpenCLSgemm(my_backend,
247  order_A, trans_A, order_B, trans_B, order_C,
248  C_size1, C_size2, size_k,
249  1.0f,
250  viennacl::traits::opencl_handle(opencl_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows,
251  viennacl::traits::opencl_handle(opencl_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows,
252  0.0f,
253  viennacl::traits::opencl_handle(opencl_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows);
254  check(C_float, opencl_C_float, eps_float);
255 
256  if (opencl_A_double != NULL && opencl_B_double != NULL && opencl_C_double != NULL)
257  {
258  ViennaCLOpenCLDgemm(my_backend,
259  order_A, trans_A, order_B, trans_B, order_C,
260  C_size1, C_size2, size_k,
261  1.0,
262  viennacl::traits::opencl_handle(*opencl_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows,
263  viennacl::traits::opencl_handle(*opencl_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows,
264  0.0,
265  viennacl::traits::opencl_handle(*opencl_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows);
266  check(C_double, *opencl_C_double, eps_double);
267  }
268 #endif
269 
270  std::cout << std::endl;
271 }
272 
273 void test_blas(ViennaCLBackend my_backend,
274  float eps_float, double eps_double,
275  std::vector<float> & C_float, std::vector<double> & C_double,
276  std::vector<float> & A_float, std::vector<double> & A_double,
277  std::vector<float> & B_float, std::vector<double> & B_double,
278  ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B,
279  viennacl::vector<float> & host_C_float, viennacl::vector<double> & host_C_double,
280  viennacl::vector<float> & host_A_float, viennacl::vector<double> & host_A_double,
281  viennacl::vector<float> & host_B_float, viennacl::vector<double> & host_B_double
282 #ifdef VIENNACL_WITH_CUDA
283  , viennacl::vector<float> & cuda_C_float, viennacl::vector<double> & cuda_C_double
284  , viennacl::vector<float> & cuda_A_float, viennacl::vector<double> & cuda_A_double
285  , viennacl::vector<float> & cuda_B_float, viennacl::vector<double> & cuda_B_double
286 #endif
288  , viennacl::vector<float> & opencl_C_float, viennacl::vector<double> * opencl_C_double
289  , viennacl::vector<float> & opencl_A_float, viennacl::vector<double> * opencl_A_double
290  , viennacl::vector<float> & opencl_B_float, viennacl::vector<double> * opencl_B_double
291 #endif
292  );
293 
294 void test_blas(ViennaCLBackend my_backend,
295  float eps_float, double eps_double,
296  std::vector<float> & C_float, std::vector<double> & C_double,
297  std::vector<float> & A_float, std::vector<double> & A_double,
298  std::vector<float> & B_float, std::vector<double> & B_double,
299  ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B,
300  viennacl::vector<float> & host_C_float, viennacl::vector<double> & host_C_double,
301  viennacl::vector<float> & host_A_float, viennacl::vector<double> & host_A_double,
302  viennacl::vector<float> & host_B_float, viennacl::vector<double> & host_B_double
303 #ifdef VIENNACL_WITH_CUDA
304  , viennacl::vector<float> & cuda_C_float, viennacl::vector<double> & cuda_C_double
305  , viennacl::vector<float> & cuda_A_float, viennacl::vector<double> & cuda_A_double
306  , viennacl::vector<float> & cuda_B_float, viennacl::vector<double> & cuda_B_double
307 #endif
309  , viennacl::vector<float> & opencl_C_float, viennacl::vector<double> * opencl_C_double
310  , viennacl::vector<float> & opencl_A_float, viennacl::vector<double> * opencl_A_double
311  , viennacl::vector<float> & opencl_B_float, viennacl::vector<double> * opencl_B_double
312 #endif
313  )
314 {
315  std::cout << " -> trans-trans: ";
316  test_blas(my_backend,
317  eps_float, eps_double,
318  C_float, C_double, A_float, A_double, B_float, B_double,
319  order_C, order_A, order_B,
321  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
322 #ifdef VIENNACL_WITH_CUDA
323  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
324 #endif
325 #ifdef VIENNACL_WITH_OPENCL
326  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
327 #endif
328  );
329 
330  std::cout << " -> trans-no: ";
331  test_blas(my_backend,
332  eps_float, eps_double,
333  C_float, C_double, A_float, A_double, B_float, B_double,
334  order_C, order_A, order_B,
336  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
337 #ifdef VIENNACL_WITH_CUDA
338  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
339 #endif
340 #ifdef VIENNACL_WITH_OPENCL
341  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
342 #endif
343  );
344 
345  std::cout << " -> no-trans: ";
346  test_blas(my_backend,
347  eps_float, eps_double,
348  C_float, C_double, A_float, A_double, B_float, B_double,
349  order_C, order_A, order_B,
351  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
352 #ifdef VIENNACL_WITH_CUDA
353  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
354 #endif
355 #ifdef VIENNACL_WITH_OPENCL
356  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
357 #endif
358  );
359 
360  std::cout << " -> no-no: ";
361  test_blas(my_backend,
362  eps_float, eps_double,
363  C_float, C_double, A_float, A_double, B_float, B_double,
364  order_C, order_A, order_B,
366  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
367 #ifdef VIENNACL_WITH_CUDA
368  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
369 #endif
370 #ifdef VIENNACL_WITH_OPENCL
371  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
372 #endif
373  );
374 
375 }
376 
377 
378 void test_blas(ViennaCLBackend my_backend,
379  float eps_float, double eps_double,
380  std::vector<float> & C_float, std::vector<double> & C_double,
381  std::vector<float> & A_float, std::vector<double> & A_double,
382  std::vector<float> & B_float, std::vector<double> & B_double,
383  viennacl::vector<float> & host_C_float, viennacl::vector<double> & host_C_double,
384  viennacl::vector<float> & host_A_float, viennacl::vector<double> & host_A_double,
385  viennacl::vector<float> & host_B_float, viennacl::vector<double> & host_B_double
386 #ifdef VIENNACL_WITH_CUDA
387  , viennacl::vector<float> & cuda_C_float, viennacl::vector<double> & cuda_C_double
388  , viennacl::vector<float> & cuda_A_float, viennacl::vector<double> & cuda_A_double
389  , viennacl::vector<float> & cuda_B_float, viennacl::vector<double> & cuda_B_double
390 #endif
392  , viennacl::vector<float> & opencl_C_float, viennacl::vector<double> * opencl_C_double
393  , viennacl::vector<float> & opencl_A_float, viennacl::vector<double> * opencl_A_double
394  , viennacl::vector<float> & opencl_B_float, viennacl::vector<double> * opencl_B_double
395 #endif
396  );
397 
398 void test_blas(ViennaCLBackend my_backend,
399  float eps_float, double eps_double,
400  std::vector<float> & C_float, std::vector<double> & C_double,
401  std::vector<float> & A_float, std::vector<double> & A_double,
402  std::vector<float> & B_float, std::vector<double> & B_double,
403  viennacl::vector<float> & host_C_float, viennacl::vector<double> & host_C_double,
404  viennacl::vector<float> & host_A_float, viennacl::vector<double> & host_A_double,
405  viennacl::vector<float> & host_B_float, viennacl::vector<double> & host_B_double
406 #ifdef VIENNACL_WITH_CUDA
407  , viennacl::vector<float> & cuda_C_float, viennacl::vector<double> & cuda_C_double
408  , viennacl::vector<float> & cuda_A_float, viennacl::vector<double> & cuda_A_double
409  , viennacl::vector<float> & cuda_B_float, viennacl::vector<double> & cuda_B_double
410 #endif
412  , viennacl::vector<float> & opencl_C_float, viennacl::vector<double> * opencl_C_double
413  , viennacl::vector<float> & opencl_A_float, viennacl::vector<double> * opencl_A_double
414  , viennacl::vector<float> & opencl_B_float, viennacl::vector<double> * opencl_B_double
415 #endif
416  )
417 {
418  std::cout << " -> C: row, A: row, B: row" << std::endl;
419  test_blas(my_backend,
420  eps_float, eps_double,
421  C_float, C_double, A_float, A_double, B_float, B_double,
423  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
424 #ifdef VIENNACL_WITH_CUDA
425  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
426 #endif
427 #ifdef VIENNACL_WITH_OPENCL
428  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
429 #endif
430  );
431 
432  std::cout << " -> C: row, A: row, B: col" << std::endl;
433  test_blas(my_backend,
434  eps_float, eps_double,
435  C_float, C_double, A_float, A_double, B_float, B_double,
437  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
438 #ifdef VIENNACL_WITH_CUDA
439  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
440 #endif
441 #ifdef VIENNACL_WITH_OPENCL
442  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
443 #endif
444  );
445 
446  std::cout << " -> C: row, A: col, B: row" << std::endl;
447  test_blas(my_backend,
448  eps_float, eps_double,
449  C_float, C_double, A_float, A_double, B_float, B_double,
451  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
452 #ifdef VIENNACL_WITH_CUDA
453  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
454 #endif
455 #ifdef VIENNACL_WITH_OPENCL
456  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
457 #endif
458  );
459 
460  std::cout << " -> C: row, A: col, B: col" << std::endl;
461  test_blas(my_backend,
462  eps_float, eps_double,
463  C_float, C_double, A_float, A_double, B_float, B_double,
465  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
466 #ifdef VIENNACL_WITH_CUDA
467  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
468 #endif
469 #ifdef VIENNACL_WITH_OPENCL
470  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
471 #endif
472  );
473 
474 
475  std::cout << " -> C: col, A: row, B: row" << std::endl;
476  test_blas(my_backend,
477  eps_float, eps_double,
478  C_float, C_double, A_float, A_double, B_float, B_double,
480  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
481 #ifdef VIENNACL_WITH_CUDA
482  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
483 #endif
484 #ifdef VIENNACL_WITH_OPENCL
485  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
486 #endif
487  );
488 
489  std::cout << " -> C: col, A: row, B: col" << std::endl;
490  test_blas(my_backend,
491  eps_float, eps_double,
492  C_float, C_double, A_float, A_double, B_float, B_double,
494  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
495 #ifdef VIENNACL_WITH_CUDA
496  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
497 #endif
498 #ifdef VIENNACL_WITH_OPENCL
499  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
500 #endif
501  );
502 
503  std::cout << " -> C: col, A: col, B: row" << std::endl;
504  test_blas(my_backend,
505  eps_float, eps_double,
506  C_float, C_double, A_float, A_double, B_float, B_double,
508  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
509 #ifdef VIENNACL_WITH_CUDA
510  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
511 #endif
512 #ifdef VIENNACL_WITH_OPENCL
513  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
514 #endif
515  );
516 
517  std::cout << " -> C: col, A: col, B: col" << std::endl;
518  test_blas(my_backend,
519  eps_float, eps_double,
520  C_float, C_double, A_float, A_double, B_float, B_double,
522  host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double
523 #ifdef VIENNACL_WITH_CUDA
524  , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double
525 #endif
526 #ifdef VIENNACL_WITH_OPENCL
527  , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double
528 #endif
529  );
530 
531 }
532 
533 
534 
535 
536 int main()
537 {
540 
541  std::size_t size = 500*500;
542  float eps_float = 1e-5f;
543  double eps_double = 1e-12;
544 
545  std::vector<float> C_float(size);
546  std::vector<float> A_float(size);
547  std::vector<float> B_float(size);
548 
549  std::vector<double> C_double(size);
550  std::vector<double> A_double(size);
551  std::vector<double> B_double(size);
552 
553  // fill with random data:
554 
555  for (std::size_t i = 0; i < size; ++i)
556  {
557  C_float[i] = 0.5f + 0.1f * randomFloat();
558  A_float[i] = 0.5f + 0.1f * randomFloat();
559  B_float[i] = 0.5f + 0.1f * randomFloat();
560 
561  C_double[i] = 0.5 + 0.2 * randomDouble();
562  A_double[i] = 0.5 + 0.2 * randomDouble();
563  B_double[i] = 0.5 + 0.2 * randomDouble();
564  }
565 
566 
567  // Host setup
568  ViennaCLBackend my_backend;
569  ViennaCLBackendCreate(&my_backend);
570 
571  viennacl::vector<float> host_C_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(C_float, host_C_float);
572  viennacl::vector<float> host_A_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(A_float, host_A_float);
573  viennacl::vector<float> host_B_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(B_float, host_B_float);
574 
575  viennacl::vector<double> host_C_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(C_double, host_C_double);
576  viennacl::vector<double> host_A_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(A_double, host_A_double);
577  viennacl::vector<double> host_B_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(B_double, host_B_double);
578 
579  // CUDA setup
580 #ifdef VIENNACL_WITH_CUDA
581  viennacl::vector<float> cuda_C_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(C_float, cuda_C_float);
582  viennacl::vector<float> cuda_A_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(A_float, cuda_A_float);
583  viennacl::vector<float> cuda_B_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(B_float, cuda_B_float);
584 
585  viennacl::vector<double> cuda_C_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(C_double, cuda_C_double);
586  viennacl::vector<double> cuda_A_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(A_double, cuda_A_double);
587  viennacl::vector<double> cuda_B_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(B_double, cuda_B_double);
588 #endif
589 
590  // OpenCL setup
591 #ifdef VIENNACL_WITH_OPENCL
592  ViennaCLInt context_id = 0;
593  viennacl::vector<float> opencl_C_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(C_float, opencl_C_float);
594  viennacl::vector<float> opencl_A_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(A_float, opencl_A_float);
595  viennacl::vector<float> opencl_B_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(B_float, opencl_B_float);
596 
597  viennacl::vector<double> *opencl_C_double = NULL;
598  viennacl::vector<double> *opencl_A_double = NULL;
599  viennacl::vector<double> *opencl_B_double = NULL;
600 
601  if ( viennacl::ocl::current_device().double_support() )
602  {
603  opencl_C_double = new viennacl::vector<double>(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(C_double, *opencl_C_double);
604  opencl_A_double = new viennacl::vector<double>(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(A_double, *opencl_A_double);
605  opencl_B_double = new viennacl::vector<double>(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(B_double, *opencl_B_double);
606  }
607 
608  ViennaCLBackendSetOpenCLContextID(my_backend, context_id);
609 #endif
610 
611  // consistency checks:
612  check(C_float, host_C_float, eps_float);
613  check(A_float, host_A_float, eps_float);
614  check(B_float, host_B_float, eps_float);
615 
616  check(C_double, host_C_double, eps_double);
617  check(A_double, host_A_double, eps_double);
618  check(B_double, host_B_double, eps_double);
619 
620 #ifdef VIENNACL_WITH_CUDA
621  check(C_float, cuda_C_float, eps_float);
622  check(A_float, cuda_A_float, eps_float);
623  check(B_float, cuda_B_float, eps_float);
624 
625  check(C_double, cuda_C_double, eps_double);
626  check(A_double, cuda_A_double, eps_double);
627  check(B_double, cuda_B_double, eps_double);
628 #endif
629 #ifdef VIENNACL_WITH_OPENCL
630  check(C_float, opencl_C_float, eps_float);
631  check(A_float, opencl_A_float, eps_float);
632  check(B_float, opencl_B_float, eps_float);
633 
634  if ( viennacl::ocl::current_device().double_support() )
635  {
636  check(C_double, *opencl_C_double, eps_double);
637  check(A_double, *opencl_A_double, eps_double);
638  check(B_double, *opencl_B_double, eps_double);
639  }
640 #endif
641 
642  std::cout << std::endl;
643 
644  test_blas(my_backend,
645  eps_float, eps_double,
646  C_float, C_double,
647  A_float, A_double,
648  B_float, B_double,
649  host_C_float, host_C_double,
650  host_A_float, host_A_double,
651  host_B_float, host_B_double
652 #ifdef VIENNACL_WITH_CUDA
653  , cuda_C_float, cuda_C_double
654  , cuda_A_float, cuda_A_double
655  , cuda_B_float, cuda_B_double
656 #endif
658  , opencl_C_float, opencl_C_double
659  , opencl_A_float, opencl_A_double
660  , opencl_B_float, opencl_B_double
661 #endif
662  );
663 
664 
665 #ifdef VIENNACL_WITH_OPENCL
666  //cleanup
667  if ( viennacl::ocl::current_device().double_support() )
668  {
669  delete opencl_C_double;
670  delete opencl_A_double;
671  delete opencl_B_double;
672  }
673 
674 #endif
675 
676  ViennaCLBackendDestroy(&my_backend);
677 
678  //
679  // That's it.
680  //
681  std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl;
682 
683  return EXIT_SUCCESS;
684 }
685 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend *backend)
Definition: backend.cpp:25
ViennaCLTranspose
Definition: viennacl.hpp:68
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
Generic backend for CUDA, OpenCL, host-based stuff.
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id)
Definition: backend.cpp:32
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
Definition: blas3_host.cpp:108
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Random number generator for returning uniformly distributed values in the closed interval [0...
Definition: random.hpp:44
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend *backend)
Definition: backend.cpp:39
ViennaCLOrder
Definition: viennacl.hpp:61
void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector< float > &C_float, std::vector< double > &C_double, std::vector< float > &A_float, std::vector< double > &A_double, std::vector< float > &B_float, std::vector< double > &B_double, ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B, ViennaCLTranspose trans_A, ViennaCLTranspose trans_B, viennacl::vector< float > &host_C_float, viennacl::vector< double > &host_C_double, viennacl::vector< float > &host_A_float, viennacl::vector< double > &host_A_double, viennacl::vector< float > &host_B_float, viennacl::vector< double > &host_B_double)
void check(T const &t, U const &u, EpsilonT eps)
int ViennaCLInt
Definition: viennacl.hpp:48
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
float ScalarType
Definition: fft_1d.cpp:42
int main()
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39
#define VIENNACL_WITH_OPENCL
Definition: opencl.cpp:30
T get_value(std::vector< T > &array, ViennaCLInt i, ViennaCLInt j, ViennaCLInt start1, ViennaCLInt start2, ViennaCLInt stride1, ViennaCLInt stride2, ViennaCLInt rows, ViennaCLInt cols, ViennaCLOrder order, ViennaCLTranspose trans)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
Definition: blas3_host.cpp:85
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context.
Definition: backend.hpp:225