ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector_float_double.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2015, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
23 //
24 // *** System
25 //
26 #include <iostream>
27 #include <iomanip>
28 #include <cmath>
29 
30 //
31 // *** ViennaCL
32 //
33 //#define VIENNACL_DEBUG_ALL
34 
35 #include "viennacl/vector.hpp"
42 #include "viennacl/linalg/sum.hpp"
43 
45 
46 
47 template<typename NumericT>
49 {
50 public:
51  vector_proxy(NumericT * p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
52  : values_(p_values), start_(start_idx), inc_(increment), size_(num_elements) {}
53 
54  NumericT const & operator[](std::size_t index) const { return values_[start_ + index * inc_]; }
55  NumericT & operator[](std::size_t index) { return values_[start_ + index * inc_]; }
56 
57  std::size_t size() const { return size_; }
58 
59 private:
60  NumericT * values_;
61  std::size_t start_;
62  std::size_t inc_;
63  std::size_t size_;
64 };
65 
66 template<typename NumericT>
68 {
69  std::vector<NumericT> std_vec(host_vec.size());
70 
71  for (std::size_t i=0; i<host_vec.size(); ++i)
72  std_vec[i] = host_vec[i];
73 
74  viennacl::copy(std_vec.begin(), std_vec.end(), vcl_vec.begin());
75 }
76 
77 template<typename NumericT>
79 {
80  std::vector<NumericT> std_vec(vcl_vec.size());
81 
82  viennacl::copy(vcl_vec.begin(), vcl_vec.end(), std_vec.begin());
83 
84  for (std::size_t i=0; i<host_vec.size(); ++i)
85  host_vec[i] = std_vec[i];
86 }
87 
88 
89 //
90 // -------------------------------------------------------------
91 //
92 template<typename ScalarType>
94 {
96  if (std::fabs(s1 - s2) > 0 )
97  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
98  return 0;
99 }
100 //
101 // -------------------------------------------------------------
102 //
103 template<typename ScalarType>
105 {
107  if (std::fabs(s1 - s2) > 0 )
108  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
109  return 0;
110 }
111 //
112 // -------------------------------------------------------------
113 //
114 template<typename ScalarType>
116 {
118  if (std::fabs(s1 - s2) > 0 )
119  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
120  return 0;
121 }
122 //
123 // -------------------------------------------------------------
124 //
125 template<typename ScalarType, typename ViennaCLVectorType>
126 ScalarType diff(vector_proxy<ScalarType> const & v1, ViennaCLVectorType const & vcl_vec)
127 {
128  std::vector<ScalarType> v2_cpu(vcl_vec.size());
130  viennacl::copy(vcl_vec, v2_cpu);
131 
132  for (unsigned int i=0;i<v1.size(); ++i)
133  {
134  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
135  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
136  else
137  v2_cpu[i] = 0.0;
138  }
139 
140  ScalarType ret = 0;
141  for (std::size_t i=0; i<v2_cpu.size(); ++i)
142  ret = std::max(ret, std::fabs(v2_cpu[i]));
143  return ret;
144 }
145 
146 
147 template<typename T1, typename T2>
148 int check(T1 const & t1, T2 const & t2, double epsilon)
149 {
150  int retval = EXIT_SUCCESS;
151 
152  double temp = std::fabs(diff(t1, t2));
153  if (temp > epsilon)
154  {
155  std::cout << "# Error! Relative difference: " << temp << std::endl;
156  retval = EXIT_FAILURE;
157  }
158  return retval;
159 }
160 
161 
162 //
163 // -------------------------------------------------------------
164 //
165 template< typename NumericT, typename Epsilon, typename HostVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 >
166 int test(Epsilon const& epsilon,
167  HostVectorType & host_v1, HostVectorType & host_v2,
168  ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
169 {
170  int retval = EXIT_SUCCESS;
171 
173 
174  NumericT cpu_result = NumericT(42.0);
175  viennacl::scalar<NumericT> gpu_result = NumericT(43.0);
176 
177  //
178  // Initializer:
179  //
180  std::cout << "Checking for zero_vector initializer..." << std::endl;
181  for (std::size_t i=0; i<host_v1.size(); ++i)
182  host_v1[i] = NumericT(0);
183  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
184  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
185  return EXIT_FAILURE;
186 
187  std::cout << "Checking for scalar_vector initializer..." << std::endl;
188  for (std::size_t i=0; i<host_v1.size(); ++i)
189  host_v1[i] = NumericT(cpu_result);
190  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
191  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
192  return EXIT_FAILURE;
193 
194  for (std::size_t i=0; i<host_v1.size(); ++i)
195  host_v1[i] = NumericT(gpu_result);
196  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
197  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
198  return EXIT_FAILURE;
199 
200  std::cout << "Checking for unit_vector initializer..." << std::endl;
201  for (std::size_t i=0; i<host_v1.size(); ++i)
202  host_v1[i] = NumericT(0);
203  host_v1[5] = NumericT(1);
204  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
205  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
206  return EXIT_FAILURE;
207 
208 
209  for (std::size_t i=0; i<host_v1.size(); ++i)
210  {
211  host_v1[i] = NumericT(1.0) + randomNumber();
212  host_v2[i] = NumericT(1.0) + randomNumber();
213  }
214 
215  proxy_copy(host_v1, vcl_v1); //resync
216  proxy_copy(host_v2, vcl_v2);
217 
218  std::cout << "Checking for successful copy..." << std::endl;
219  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
220  return EXIT_FAILURE;
221  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
222  return EXIT_FAILURE;
223 
224  //
225  // Part 1: Norms and inner product
226  //
227 
228  // --------------------------------------------------------------------------
229  std::cout << "Testing inner_prod..." << std::endl;
230  cpu_result = 0;
231  for (std::size_t i=0; i<host_v1.size(); ++i)
232  cpu_result += host_v1[i] * host_v2[i];
233  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
234  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
235 
236  std::cout << "Reference: " << cpu_result << std::endl;
237  std::cout << cpu_result2 << std::endl;
238  std::cout << gpu_result << std::endl;
239  if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
240  return EXIT_FAILURE;
241  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
242  return EXIT_FAILURE;
243 
244  cpu_result = 0;
245  for (std::size_t i=0; i<host_v1.size(); ++i)
246  cpu_result += (host_v1[i] + host_v2[i]) * (host_v2[i] - host_v1[i]);
247  NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
248  gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
249 
250  std::cout << "Reference: " << cpu_result << std::endl;
251  std::cout << cpu_result3 << std::endl;
252  std::cout << gpu_result << std::endl;
253  if (check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
254  return EXIT_FAILURE;
255  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
256  return EXIT_FAILURE;
257 
258  // --------------------------------------------------------------------------
259  std::cout << "Testing norm_1..." << std::endl;
260  cpu_result = 0;
261  for (std::size_t i=0; i<host_v1.size(); ++i)
262  cpu_result += std::fabs(host_v1[i]);
263  gpu_result = viennacl::linalg::norm_1(vcl_v1);
264 
265  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
266  return EXIT_FAILURE;
267 
268  gpu_result = 2 * cpu_result; //reset
269  cpu_result = 0;
270  for (std::size_t i=0; i<host_v1.size(); ++i)
271  cpu_result += std::fabs(host_v1[i]);
272  gpu_result = cpu_result;
273  cpu_result = 0;
274  cpu_result = viennacl::linalg::norm_1(vcl_v1);
275 
276  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
277  return EXIT_FAILURE;
278 
279  cpu_result = 0;
280  for (std::size_t i=0; i<host_v1.size(); ++i)
281  cpu_result += std::fabs(host_v1[i] + host_v2[i]);
282  gpu_result = cpu_result;
283  cpu_result = 0;
284  cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2);
285 
286  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
287  return EXIT_FAILURE;
288 
289  // --------------------------------------------------------------------------
290  std::cout << "Testing norm_2..." << std::endl;
291  cpu_result = 0;
292  for (std::size_t i=0; i<host_v1.size(); ++i)
293  cpu_result += host_v1[i] * host_v1[i];
294  cpu_result = std::sqrt(cpu_result);
295  gpu_result = viennacl::linalg::norm_2(vcl_v1);
296 
297  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
298  return EXIT_FAILURE;
299 
300  gpu_result = 2 * cpu_result; //reset
301  cpu_result = 0;
302  for (std::size_t i=0; i<host_v1.size(); ++i)
303  cpu_result += host_v1[i] * host_v1[i];
304  gpu_result = std::sqrt(cpu_result);
305  cpu_result = viennacl::linalg::norm_2(vcl_v1);
306 
307  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
308  return EXIT_FAILURE;
309 
310  cpu_result = 0;
311  for (std::size_t i=0; i<host_v1.size(); ++i)
312  cpu_result += (host_v1[i] + host_v2[i]) * (host_v1[i] + host_v2[i]);
313  gpu_result = std::sqrt(cpu_result);
314  cpu_result = viennacl::linalg::norm_2(vcl_v1 + vcl_v2);
315 
316  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
317  return EXIT_FAILURE;
318 
319  // --------------------------------------------------------------------------
320  std::cout << "Testing norm_inf..." << std::endl;
321  cpu_result = std::fabs(host_v1[0]);
322  for (std::size_t i=0; i<host_v1.size(); ++i)
323  cpu_result = std::max(std::fabs(host_v1[i]), cpu_result);
324  gpu_result = viennacl::linalg::norm_inf(vcl_v1);
325 
326  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
327  return EXIT_FAILURE;
328 
329  gpu_result = 2 * cpu_result; //reset
330  cpu_result = std::fabs(host_v1[0]);
331  for (std::size_t i=0; i<host_v1.size(); ++i)
332  cpu_result = std::max(std::fabs(host_v1[i]), cpu_result);
333  gpu_result = cpu_result;
334  cpu_result = 0;
335  cpu_result = viennacl::linalg::norm_inf(vcl_v1);
336 
337  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
338  return EXIT_FAILURE;
339 
340  cpu_result = std::fabs(host_v1[0]);
341  for (std::size_t i=0; i<host_v1.size(); ++i)
342  cpu_result = std::max(std::fabs(host_v1[i] + host_v2[i]), cpu_result);
343  gpu_result = cpu_result;
344  cpu_result = 0;
345  cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2);
346 
347  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
348  return EXIT_FAILURE;
349 
350  // --------------------------------------------------------------------------
351  std::cout << "Testing index_norm_inf..." << std::endl;
352  std::size_t cpu_index = 0;
353  cpu_result = std::fabs(host_v1[0]);
354  for (std::size_t i=0; i<host_v1.size(); ++i)
355  {
356  if (std::fabs(host_v1[i]) > cpu_result)
357  {
358  cpu_result = std::fabs(host_v1[i]);
359  cpu_index = i;
360  }
361  }
362  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);
363 
364  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
365  return EXIT_FAILURE;
366  // --------------------------------------------------------------------------
367  cpu_result = host_v1[cpu_index];
368  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];
369 
370  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
371  return EXIT_FAILURE;
372 
373  cpu_result = std::fabs(host_v1[0] + host_v2[0]);
374  for (std::size_t i=0; i<host_v1.size(); ++i)
375  {
376  if (std::fabs(host_v1[i] + host_v2[i]) > cpu_result)
377  {
378  cpu_result = std::fabs(host_v1[i] + host_v2[i]);
379  cpu_index = i;
380  }
381  }
382  cpu_result = host_v1[cpu_index];
383  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)];
384 
385  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
386  return EXIT_FAILURE;
387 
388 
389  // --------------------------------------------------------------------------
390  std::cout << "Testing max..." << std::endl;
391  cpu_result = host_v1[0];
392  for (std::size_t i=0; i<host_v1.size(); ++i)
393  cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
394  gpu_result = viennacl::linalg::max(vcl_v1);
395 
396  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
397  return EXIT_FAILURE;
398 
399  cpu_result = host_v1[0];
400  for (std::size_t i=0; i<host_v1.size(); ++i)
401  cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
402  gpu_result = cpu_result;
403  cpu_result *= 2; //reset
404  cpu_result = viennacl::linalg::max(vcl_v1);
405 
406  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
407  return EXIT_FAILURE;
408 
409  cpu_result = host_v1[0] + host_v2[0];
410  for (std::size_t i=0; i<host_v1.size(); ++i)
411  cpu_result = std::max<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
412  gpu_result = cpu_result;
413  cpu_result *= 2; //reset
414  cpu_result = viennacl::linalg::max(vcl_v1 + vcl_v2);
415 
416  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
417  return EXIT_FAILURE;
418 
419 
420  // --------------------------------------------------------------------------
421  std::cout << "Testing min..." << std::endl;
422  cpu_result = host_v1[0];
423  for (std::size_t i=0; i<host_v1.size(); ++i)
424  cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
425  gpu_result = viennacl::linalg::min(vcl_v1);
426 
427  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
428  return EXIT_FAILURE;
429 
430  cpu_result = host_v1[0];
431  for (std::size_t i=0; i<host_v1.size(); ++i)
432  cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
433  gpu_result = cpu_result;
434  cpu_result *= 2; //reset
435  cpu_result = viennacl::linalg::min(vcl_v1);
436 
437  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
438  return EXIT_FAILURE;
439 
440  cpu_result = host_v1[0] + host_v2[0];
441  for (std::size_t i=0; i<host_v1.size(); ++i)
442  cpu_result = std::min<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
443  gpu_result = cpu_result;
444  cpu_result *= 2; //reset
445  cpu_result = viennacl::linalg::min(vcl_v1 + vcl_v2);
446 
447  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
448  return EXIT_FAILURE;
449 
450  // --------------------------------------------------------------------------
451  std::cout << "Testing sum..." << std::endl;
452  cpu_result = 0;
453  for (std::size_t i=0; i<host_v1.size(); ++i)
454  cpu_result += host_v1[i];
455  cpu_result2 = viennacl::linalg::sum(vcl_v1);
456  gpu_result = viennacl::linalg::sum(vcl_v1);
457 
458  if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
459  return EXIT_FAILURE;
460  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
461  return EXIT_FAILURE;
462 
463  cpu_result = 0;
464  for (std::size_t i=0; i<host_v1.size(); ++i)
465  cpu_result += host_v1[i] + host_v2[i];
466  cpu_result3 = viennacl::linalg::sum(vcl_v1 + vcl_v2);
467  gpu_result = viennacl::linalg::sum(vcl_v1 + vcl_v2);
468 
469  if (check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
470  return EXIT_FAILURE;
471  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
472  return EXIT_FAILURE;
473 
474 
475  //
476  // Plane rotation and assignments
477  //
478 
479  // --------------------------------------------------------------------------
480 
481  for (std::size_t i=0; i<host_v1.size(); ++i)
482  {
483  NumericT temp = NumericT(1.1) * host_v1[i] + NumericT(2.3) * host_v2[i];
484  host_v2[i] = - NumericT(2.3) * host_v1[i] + NumericT(1.1) * host_v2[i];
485  host_v1[i] = temp;
486  }
487  viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3));
488 
489  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
490  return EXIT_FAILURE;
491  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
492  return EXIT_FAILURE;
493 
494  // --------------------------------------------------------------------------
495 
496  std::cout << "Testing assignments..." << std::endl;
497  NumericT val = static_cast<NumericT>(1e-1);
498  for (size_t i=0; i < host_v1.size(); ++i)
499  host_v1[i] = val;
500 
501  for (size_t i=0; i < vcl_v1.size(); ++i)
502  vcl_v1(i) = val;
503 
504  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
505  return EXIT_FAILURE;
506 
507  std::cout << "Testing assignments via iterators..." << std::endl;
508 
509  host_v1[2] = static_cast<NumericT>(1.9);
510  vcl_v1[2] = static_cast<NumericT>(1.9);
511 
512  host_v1[2] = static_cast<NumericT>(1.5);
513  typename ViennaCLVectorType1::iterator vcl_v1_it = vcl_v1.begin();
514  ++vcl_v1_it;
515  ++vcl_v1_it;
516  *vcl_v1_it = static_cast<NumericT>(1.5);
517 
518  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
519  return EXIT_FAILURE;
520 
521  //
522  // multiplication and division of vectors by scalars
523  //
524  for (std::size_t i=0; i < host_v1.size(); ++i)
525  {
526  host_v1[i] = NumericT(1.0) + randomNumber();
527  host_v2[i] = NumericT(3.1415) * host_v1[i];
528  }
529  proxy_copy(host_v1, vcl_v1); //resync
530  proxy_copy(host_v2, vcl_v2);
531 
532  std::cout << "Testing scaling with CPU scalar..." << std::endl;
533  NumericT alpha = static_cast<NumericT>(1.7182);
534  viennacl::scalar<NumericT> gpu_alpha = alpha;
535 
536  for (std::size_t i=0; i < host_v1.size(); ++i)
537  host_v1[i] *= NumericT(long(alpha));
538  vcl_v1 *= long(alpha);
539 
540  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
541  return EXIT_FAILURE;
542 
543  for (std::size_t i=0; i < host_v1.size(); ++i)
544  host_v1[i] *= NumericT(float(alpha));
545  vcl_v1 *= float(alpha);
546 
547  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
548  return EXIT_FAILURE;
549 
550  for (std::size_t i=0; i < host_v1.size(); ++i)
551  host_v1[i] *= NumericT(double(alpha));
552  vcl_v1 *= double(alpha);
553 
554  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
555  return EXIT_FAILURE;
556 
557 
558  std::cout << "Testing scaling with GPU scalar..." << std::endl;
559  for (std::size_t i=0; i < host_v1.size(); ++i)
560  host_v1[i] *= alpha;
561  vcl_v1 *= gpu_alpha;
562 
563  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
564  return EXIT_FAILURE;
565 
566  std::cout << "Testing scaling with scalar expression..." << std::endl;
567  cpu_result = 0;
568  for (std::size_t i=0; i < host_v1.size(); ++i)
569  cpu_result += host_v1[i] * host_v2[i];
570  for (std::size_t i=0; i < host_v1.size(); ++i)
571  host_v1[i] *= cpu_result;
572  vcl_v1 *= viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
573 
574  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
575  return EXIT_FAILURE;
576 
577  NumericT beta = static_cast<NumericT>(1.4153);
578  viennacl::scalar<NumericT> gpu_beta = beta;
579 
580  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
581  for (std::size_t i=0; i < host_v1.size(); ++i)
582  host_v1[i] /= NumericT(long(beta));
583  vcl_v1 /= long(beta);
584 
585  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
586  return EXIT_FAILURE;
587 
588  for (std::size_t i=0; i < host_v1.size(); ++i)
589  host_v1[i] /= NumericT(float(beta));
590  vcl_v1 /= float(beta);
591 
592  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
593  return EXIT_FAILURE;
594 
595  for (std::size_t i=0; i < host_v1.size(); ++i)
596  host_v1[i] /= NumericT(double(beta));
597  vcl_v1 /= double(beta);
598 
599  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
600  return EXIT_FAILURE;
601 
602 
603  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
604  for (std::size_t i=0; i < host_v1.size(); ++i)
605  host_v1[i] /= beta;
606  vcl_v1 /= gpu_beta;
607 
608  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
609  return EXIT_FAILURE;
610 
611 
612 
613  //
614  // add and inplace_add of vectors
615  //
616  for (size_t i=0; i < host_v1.size(); ++i)
617  {
618  host_v1[i] = NumericT(1.0) + randomNumber();
619  host_v2[i] = NumericT(3.1415) * host_v1[i];
620  }
621  proxy_copy(host_v1, vcl_v1); //resync
622  proxy_copy(host_v2, vcl_v2);
623 
624  std::cout << "Testing add on vector..." << std::endl;
625 
626  std::cout << "Checking for successful copy..." << std::endl;
627  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
628  return EXIT_FAILURE;
629  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
630  return EXIT_FAILURE;
631 
632  for (size_t i=0; i < host_v1.size(); ++i)
633  host_v1[i] = host_v1[i] + host_v2[i];
634  vcl_v1 = vcl_v1 + vcl_v2;
635 
636  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
637  return EXIT_FAILURE;
638 
639  std::cout << "Testing add on vector with flipsign..." << std::endl;
640  for (size_t i=0; i < host_v1.size(); ++i)
641  host_v1[i] = - host_v1[i] + host_v2[i];
642  vcl_v1 = - vcl_v1 + vcl_v2;
643 
644  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
645  return EXIT_FAILURE;
646 
647  std::cout << "Testing inplace-add on vector..." << std::endl;
648  for (size_t i=0; i < host_v1.size(); ++i)
649  host_v1[i] += host_v2[i];
650  vcl_v1 += vcl_v2;
651 
652  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
653  return EXIT_FAILURE;
654 
655  std::cout << "Testing assignment to vector with vector multiplied by scalar expression..." << std::endl;
656  cpu_result = 0;
657  for (std::size_t i=0; i < host_v1.size(); ++i)
658  cpu_result += host_v1[i] * host_v2[i];
659  for (std::size_t i=0; i < host_v1.size(); ++i)
660  host_v1[i] = cpu_result * host_v2[i];
661  //host_v1 = inner_prod(host_v1, host_v2) * host_v2;
662  vcl_v1 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2;
663 
664  //
665  // subtract and inplace_subtract of vectors
666  //
667  std::cout << "Testing sub on vector..." << std::endl;
668  for (std::size_t i=0; i < host_v1.size(); ++i)
669  host_v2[i] = NumericT(3.1415) * host_v1[i];
670  proxy_copy(host_v1, vcl_v1);
671  proxy_copy(host_v2, vcl_v2);
672 
673  for (std::size_t i=0; i < host_v1.size(); ++i)
674  host_v1[i] = host_v1[i] - host_v2[i];
675  vcl_v1 = vcl_v1 - vcl_v2;
676 
677  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
678  return EXIT_FAILURE;
679 
680  std::cout << "Testing inplace-sub on vector..." << std::endl;
681  for (std::size_t i=0; i < host_v1.size(); ++i)
682  host_v1[i] -= host_v2[i];
683  vcl_v1 -= vcl_v2;
684 
685  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
686  return EXIT_FAILURE;
687 
688 
689 
690  //
691  // multiply-add
692  //
693  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
694  for (size_t i=0; i < host_v1.size(); ++i)
695  {
696  host_v1[i] = NumericT(1.0) + randomNumber();
697  host_v2[i] = NumericT(3.1415) * host_v1[i];
698  }
699  proxy_copy(host_v1, vcl_v1);
700  proxy_copy(host_v2, vcl_v2);
701 
702  for (std::size_t i=0; i < host_v1.size(); ++i)
703  host_v1[i] = host_v1[i] + host_v2[i] * NumericT(float(alpha));
704  vcl_v1 = vcl_v1 + vcl_v2 * float(alpha);
705 
706  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
707  return EXIT_FAILURE;
708 
709  for (std::size_t i=0; i < host_v1.size(); ++i)
710  host_v1[i] = host_v1[i] + host_v2[i] * NumericT(double(alpha));
711  vcl_v1 = vcl_v1 + vcl_v2 * double(alpha);
712 
713  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
714  return EXIT_FAILURE;
715 
716 
717  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
718  for (std::size_t i=0; i < host_v1.size(); ++i)
719  host_v2[i] = NumericT(3.1415) * host_v1[i];
720  proxy_copy(host_v1, vcl_v1);
721  proxy_copy(host_v2, vcl_v2);
722 
723  for (std::size_t i=0; i < host_v1.size(); ++i)
724  host_v1[i] = NumericT(long(alpha)) * host_v1[i] + host_v2[i];
725  vcl_v1 = long(alpha) * vcl_v1 + vcl_v2;
726 
727  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
728  return EXIT_FAILURE;
729 
730  for (std::size_t i=0; i < host_v1.size(); ++i)
731  host_v1[i] = NumericT(float(alpha)) * host_v1[i] + host_v2[i];
732  vcl_v1 = float(alpha) * vcl_v1 + vcl_v2;
733 
734  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
735  return EXIT_FAILURE;
736 
737  for (std::size_t i=0; i < host_v1.size(); ++i)
738  host_v1[i] = NumericT(double(alpha)) * host_v1[i] + host_v2[i];
739  vcl_v1 = double(alpha) * vcl_v1 + vcl_v2;
740 
741  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
742  return EXIT_FAILURE;
743 
744 
745  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
746  for (std::size_t i=0; i < host_v1.size(); ++i)
747  host_v2[i] = NumericT(3.1415) * host_v1[i];
748  proxy_copy(host_v1, vcl_v1);
749  proxy_copy(host_v2, vcl_v2);
750 
751  for (std::size_t i=0; i < host_v1.size(); ++i)
752  host_v1[i] = NumericT(long(alpha)) * host_v1[i] + NumericT(long(beta)) * host_v2[i];
753  vcl_v1 = long(alpha) * vcl_v1 + long(beta) * vcl_v2;
754 
755  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
756  return EXIT_FAILURE;
757 
758  for (std::size_t i=0; i < host_v1.size(); ++i)
759  host_v1[i] = NumericT(float(alpha)) * host_v1[i] + NumericT(float(beta)) * host_v2[i];
760  vcl_v1 = float(alpha) * vcl_v1 + float(beta) * vcl_v2;
761 
762  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
763  return EXIT_FAILURE;
764 
765  for (std::size_t i=0; i < host_v1.size(); ++i)
766  host_v1[i] = NumericT(double(alpha)) * host_v1[i] + NumericT(double(beta)) * host_v2[i];
767  vcl_v1 = double(alpha) * vcl_v1 + double(beta) * vcl_v2;
768 
769  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
770  return EXIT_FAILURE;
771 
772 
773  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
774  for (std::size_t i=0; i < host_v1.size(); ++i)
775  host_v2[i] = NumericT(3.1415) * host_v1[i];
776  proxy_copy(host_v1, vcl_v1);
777  proxy_copy(host_v2, vcl_v2);
778 
779  for (std::size_t i=0; i < host_v1.size(); ++i)
780  host_v1[i] += host_v2[i] * NumericT(long(alpha));
781  vcl_v1 += vcl_v2 * long(alpha);
782 
783  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
784  return EXIT_FAILURE;
785 
786  for (std::size_t i=0; i < host_v1.size(); ++i)
787  host_v1[i] += host_v2[i] * NumericT(float(alpha));
788  vcl_v1 += vcl_v2 * float(alpha);
789 
790  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
791  return EXIT_FAILURE;
792 
793  for (std::size_t i=0; i < host_v1.size(); ++i)
794  host_v1[i] += NumericT(double(alpha)) * host_v2[i];
795  vcl_v1 += double(alpha) * vcl_v2;
796 
797  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
798  return EXIT_FAILURE;
799 
800 
801  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
802  for (std::size_t i=0; i < host_v1.size(); ++i)
803  host_v2[i] = NumericT(3.1415) * host_v1[i];
804  proxy_copy(host_v1, vcl_v1);
805  proxy_copy(host_v2, vcl_v2);
806 
807  for (std::size_t i=0; i < host_v1.size(); ++i)
808  host_v1[i] = host_v1[i] + alpha * host_v2[i];
809  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
810 
811  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
812  return EXIT_FAILURE;
813 
814  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
815  for (std::size_t i=0; i < host_v1.size(); ++i)
816  host_v2[i] = NumericT(3.1415) * host_v1[i];
817  proxy_copy(host_v1, vcl_v1);
818  proxy_copy(host_v2, vcl_v2);
819 
820  for (std::size_t i=0; i < host_v1.size(); ++i)
821  host_v1[i] = host_v1[i] + alpha * host_v2[i];
822  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
823 
824  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
825  return EXIT_FAILURE;
826 
827  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
828  for (std::size_t i=0; i < host_v1.size(); ++i)
829  host_v2[i] = NumericT(3.1415) * host_v1[i];
830  proxy_copy(host_v1, vcl_v1);
831  proxy_copy(host_v2, vcl_v2);
832 
833  for (std::size_t i=0; i < host_v1.size(); ++i)
834  host_v1[i] = alpha * host_v1[i] + beta * host_v2[i];
835  vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
836 
837  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
838  return EXIT_FAILURE;
839 
840 
841  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
842  for (std::size_t i=0; i < host_v1.size(); ++i)
843  host_v2[i] = NumericT(3.1415) * host_v1[i];
844  proxy_copy(host_v1, vcl_v1);
845  proxy_copy(host_v2, vcl_v2);
846 
847  for (std::size_t i=0; i < host_v1.size(); ++i)
848  host_v1[i] += alpha * host_v1[i] + beta * host_v2[i];
849  vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
850 
851  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
852  return EXIT_FAILURE;
853 
854  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
855  for (std::size_t i=0; i < host_v1.size(); ++i)
856  host_v2[i] = NumericT(3.1415) * host_v1[i];
857  proxy_copy(host_v1, vcl_v1);
858  proxy_copy(host_v2, vcl_v2);
859 
860  for (std::size_t i=0; i < host_v1.size(); ++i)
861  host_v1[i] += alpha * host_v1[i] - beta * host_v2[i];
862  vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
863 
864  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
865  return EXIT_FAILURE;
866 
867 
868 
869  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
870  for (std::size_t i=0; i < host_v1.size(); ++i)
871  host_v2[i] = NumericT(3.1415) * host_v1[i];
872  proxy_copy(host_v1, vcl_v1);
873  proxy_copy(host_v2, vcl_v2);
874 
875  for (std::size_t i=0; i < host_v1.size(); ++i)
876  host_v1[i] += alpha * host_v2[i];
877  vcl_v1 += gpu_alpha * vcl_v2;
878 
879  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
880  return EXIT_FAILURE;
881 
882 
883  //
884  // division-add
885  //
886  std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl;
887  for (size_t i=0; i < host_v1.size(); ++i)
888  {
889  host_v1[i] = NumericT(1.0) + randomNumber();
890  host_v2[i] = NumericT(3.1415) * host_v1[i];
891  }
892  proxy_copy(host_v1, vcl_v1);
893  proxy_copy(host_v2, vcl_v2);
894 
895  for (std::size_t i=0; i < host_v1.size(); ++i)
896  host_v1[i] = host_v1[i] + host_v2[i] / NumericT(long(alpha));
897  vcl_v1 = vcl_v1 + vcl_v2 / long(alpha);
898 
899  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
900  return EXIT_FAILURE;
901 
902  for (std::size_t i=0; i < host_v1.size(); ++i)
903  host_v1[i] = host_v1[i] + host_v2[i] / NumericT(float(alpha));
904  vcl_v1 = vcl_v1 + vcl_v2 / float(alpha);
905 
906  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
907  return EXIT_FAILURE;
908 
909  for (std::size_t i=0; i < host_v1.size(); ++i)
910  host_v1[i] = host_v1[i] + host_v2[i] / NumericT(double(alpha));
911  vcl_v1 = vcl_v1 + vcl_v2 / double(alpha);
912 
913  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
914  return EXIT_FAILURE;
915 
916 
917  std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl;
918  for (std::size_t i=0; i < host_v1.size(); ++i)
919  host_v2[i] = NumericT(3.1415) * host_v1[i];
920  proxy_copy(host_v1, vcl_v1);
921  proxy_copy(host_v2, vcl_v2);
922 
923  for (std::size_t i=0; i < host_v1.size(); ++i)
924  host_v1[i] = host_v1[i] / NumericT(float(alpha)) + host_v2[i];
925  vcl_v1 = vcl_v1 / float(alpha) + vcl_v2;
926 
927  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
928  return EXIT_FAILURE;
929 
930  for (std::size_t i=0; i < host_v1.size(); ++i)
931  host_v1[i] = host_v1[i] / NumericT(double(alpha)) + host_v2[i];
932  vcl_v1 = vcl_v1 / double(alpha) + vcl_v2;
933 
934  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
935  return EXIT_FAILURE;
936 
937 
938  std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl;
939  for (std::size_t i=0; i < host_v1.size(); ++i)
940  host_v2[i] = NumericT(3.1415) * host_v1[i];
941  proxy_copy(host_v1, vcl_v1);
942  proxy_copy(host_v2, vcl_v2);
943 
944  for (std::size_t i=0; i < host_v1.size(); ++i)
945  host_v1[i] = host_v1[i] / NumericT(float(alpha)) + host_v2[i] / NumericT(float(beta));
946  vcl_v1 = vcl_v1 / float(alpha) + vcl_v2 / float(beta);
947 
948  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
949  return EXIT_FAILURE;
950 
951  for (std::size_t i=0; i < host_v1.size(); ++i)
952  host_v1[i] = host_v1[i] / NumericT(double(alpha)) + host_v2[i] / NumericT(double(beta));
953  vcl_v1 = vcl_v1 / double(alpha) + vcl_v2 / double(beta);
954 
955  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
956  return EXIT_FAILURE;
957 
958  std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl;
959  for (std::size_t i=0; i < host_v1.size(); ++i)
960  host_v2[i] = NumericT(3.1415) * host_v1[i];
961  proxy_copy(host_v1, vcl_v1);
962  proxy_copy(host_v2, vcl_v2);
963 
964  for (std::size_t i=0; i < host_v1.size(); ++i)
965  host_v1[i] = host_v1[i] / alpha + host_v2[i] * beta;
966  vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
967 
968  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
969  return EXIT_FAILURE;
970 
971 
972  std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl;
973  for (std::size_t i=0; i < host_v1.size(); ++i)
974  host_v2[i] = NumericT(3.1415) * host_v1[i];
975  proxy_copy(host_v1, vcl_v1);
976  proxy_copy(host_v2, vcl_v2);
977 
978  for (std::size_t i=0; i < host_v1.size(); ++i)
979  host_v1[i] = host_v1[i] * alpha + host_v2[i] / beta;
980  vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
981 
982  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
983  return EXIT_FAILURE;
984 
985 
986 
987  std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl;
988  for (std::size_t i=0; i < host_v1.size(); ++i)
989  host_v2[i] = NumericT(3.1415) * host_v1[i];
990  proxy_copy(host_v1, vcl_v1);
991  proxy_copy(host_v2, vcl_v2);
992 
993  for (std::size_t i=0; i < host_v1.size(); ++i)
994  host_v1[i] += host_v2[i] / alpha;
995  vcl_v1 += vcl_v2 / alpha;
996 
997  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
998  return EXIT_FAILURE;
999 
1000 
1001  std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl;
1002  for (std::size_t i=0; i < host_v1.size(); ++i)
1003  host_v2[i] = NumericT(3.1415) * host_v1[i];
1004  proxy_copy(host_v1, vcl_v1);
1005  proxy_copy(host_v2, vcl_v2);
1006 
1007  for (std::size_t i=0; i < host_v1.size(); ++i)
1008  host_v1[i] = host_v1[i] + host_v2[i] / alpha;
1009  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
1010 
1011  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1012  return EXIT_FAILURE;
1013 
1014  std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl;
1015  for (std::size_t i=0; i < host_v1.size(); ++i)
1016  host_v2[i] = NumericT(3.1415) * host_v1[i];
1017  proxy_copy(host_v1, vcl_v1);
1018  proxy_copy(host_v2, vcl_v2);
1019 
1020  for (std::size_t i=0; i < host_v1.size(); ++i)
1021  host_v1[i] = host_v1[i] + host_v2[i] / alpha;
1022  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
1023 
1024  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1025  return EXIT_FAILURE;
1026 
1027  std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl;
1028  for (std::size_t i=0; i < host_v1.size(); ++i)
1029  host_v2[i] = NumericT(3.1415) * host_v1[i];
1030  proxy_copy(host_v1, vcl_v1);
1031  proxy_copy(host_v2, vcl_v2);
1032 
1033  for (std::size_t i=0; i < host_v1.size(); ++i)
1034  host_v1[i] = host_v1[i] / alpha + host_v2[i] / beta;
1035  vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1036 
1037  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1038  return EXIT_FAILURE;
1039 
1040 
1041  std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
1042  for (std::size_t i=0; i < host_v1.size(); ++i)
1043  host_v2[i] = NumericT(3.1415) * host_v1[i];
1044  proxy_copy(host_v1, vcl_v1);
1045  proxy_copy(host_v2, vcl_v2);
1046 
1047  for (std::size_t i=0; i < host_v1.size(); ++i)
1048  host_v1[i] += host_v1[i] / alpha + host_v2[i] / beta;
1049  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1050 
1051  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1052  return EXIT_FAILURE;
1053 
1054  std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
1055  for (std::size_t i=0; i < host_v1.size(); ++i)
1056  host_v2[i] = NumericT(3.1415) * host_v1[i];
1057  proxy_copy(host_v1, vcl_v1);
1058  proxy_copy(host_v2, vcl_v2);
1059 
1060  for (std::size_t i=0; i < host_v1.size(); ++i)
1061  host_v1[i] += host_v1[i] / alpha - host_v2[i] / beta;
1062  vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1063 
1064  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1065  return EXIT_FAILURE;
1066 
1067  std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
1068  for (std::size_t i=0; i < host_v1.size(); ++i)
1069  host_v2[i] = NumericT(3.1415) * host_v1[i];
1070  proxy_copy(host_v1, vcl_v1);
1071  proxy_copy(host_v2, vcl_v2);
1072 
1073  for (std::size_t i=0; i < host_v1.size(); ++i)
1074  host_v1[i] += host_v1[i] / alpha + host_v2[i] * beta;
1075  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1076 
1077  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1078  return EXIT_FAILURE;
1079 
1080  std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
1081  for (std::size_t i=0; i < host_v1.size(); ++i)
1082  host_v2[i] = NumericT(3.1415) * host_v1[i];
1083  proxy_copy(host_v1, vcl_v1);
1084  proxy_copy(host_v2, vcl_v2);
1085 
1086  for (std::size_t i=0; i < host_v1.size(); ++i)
1087  host_v1[i] += host_v1[i] * alpha - host_v2[i] / beta;
1088  vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1089 
1090  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1091  return EXIT_FAILURE;
1092 
1093 
1094 
1095  std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl;
1096  for (std::size_t i=0; i < host_v1.size(); ++i)
1097  host_v2[i] = NumericT(3.1415) * host_v1[i];
1098  proxy_copy(host_v1, vcl_v1);
1099  proxy_copy(host_v2, vcl_v2);
1100 
1101  for (std::size_t i=0; i < host_v1.size(); ++i)
1102  host_v1[i] += host_v2[i] * alpha;
1103  vcl_v1 += vcl_v2 * gpu_alpha;
1104 
1105  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1106  return EXIT_FAILURE;
1107 
1108 
1109 
1110  //
1111  // multiply-subtract
1112  //
1113  std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
1114  for (size_t i=0; i < host_v1.size(); ++i)
1115  {
1116  host_v1[i] = NumericT(1.0) + randomNumber();
1117  host_v2[i] = NumericT(3.1415) * host_v1[i];
1118  }
1119  proxy_copy(host_v1, vcl_v1);
1120  proxy_copy(host_v2, vcl_v2);
1121 
1122  for (std::size_t i=0; i < host_v1.size(); ++i)
1123  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1124  vcl_v1 = vcl_v1 - alpha * vcl_v2;
1125 
1126  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1127  return EXIT_FAILURE;
1128 
1129 
1130  std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
1131  for (std::size_t i=0; i < host_v1.size(); ++i)
1132  host_v2[i] = NumericT(3.1415) * host_v1[i];
1133  proxy_copy(host_v1, vcl_v1);
1134  proxy_copy(host_v2, vcl_v2);
1135 
1136  for (std::size_t i=0; i < host_v1.size(); ++i)
1137  host_v1[i] = alpha * host_v1[i] - host_v2[i];
1138  vcl_v1 = alpha * vcl_v1 - vcl_v2;
1139 
1140  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1141  return EXIT_FAILURE;
1142 
1143  std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
1144  for (std::size_t i=0; i < host_v1.size(); ++i)
1145  host_v2[i] = NumericT(3.1415) * host_v1[i];
1146  proxy_copy(host_v1, vcl_v1);
1147  proxy_copy(host_v2, vcl_v2);
1148 
1149  for (std::size_t i=0; i < host_v1.size(); ++i)
1150  host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1151  vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
1152 
1153  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1154  return EXIT_FAILURE;
1155 
1156 
1157  std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
1158  for (std::size_t i=0; i < host_v1.size(); ++i)
1159  host_v2[i] = NumericT(3.1415) * host_v1[i];
1160  proxy_copy(host_v1, vcl_v1);
1161  proxy_copy(host_v2, vcl_v2);
1162 
1163  for (std::size_t i=0; i < host_v1.size(); ++i)
1164  host_v1[i] -= alpha * host_v2[i];
1165  vcl_v1 -= alpha * vcl_v2;
1166 
1167  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1168  return EXIT_FAILURE;
1169 
1170 
1171  std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
1172  for (std::size_t i=0; i < host_v1.size(); ++i)
1173  host_v2[i] = NumericT(3.1415) * host_v1[i];
1174  proxy_copy(host_v1, vcl_v1);
1175  proxy_copy(host_v2, vcl_v2);
1176 
1177  for (std::size_t i=0; i < host_v1.size(); ++i)
1178  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1179  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1180 
1181  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1182  return EXIT_FAILURE;
1183 
1184  std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
1185  for (std::size_t i=0; i < host_v1.size(); ++i)
1186  host_v2[i] = NumericT(3.1415) * host_v1[i];
1187  proxy_copy(host_v1, vcl_v1);
1188  proxy_copy(host_v2, vcl_v2);
1189 
1190  for (std::size_t i=0; i < host_v1.size(); ++i)
1191  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1192  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1193 
1194  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1195  return EXIT_FAILURE;
1196 
1197  std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
1198  for (std::size_t i=0; i < host_v1.size(); ++i)
1199  host_v2[i] = NumericT(3.1415) * host_v1[i];
1200  proxy_copy(host_v1, vcl_v1);
1201  proxy_copy(host_v2, vcl_v2);
1202 
1203  for (std::size_t i=0; i < host_v1.size(); ++i)
1204  host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1205  vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1206 
1207  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1208  return EXIT_FAILURE;
1209 
1210  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1211  for (std::size_t i=0; i < host_v1.size(); ++i)
1212  host_v2[i] = NumericT(3.1415) * host_v1[i];
1213  proxy_copy(host_v1, vcl_v1);
1214  proxy_copy(host_v2, vcl_v2);
1215 
1216  for (std::size_t i=0; i < host_v1.size(); ++i)
1217  host_v1[i] -= alpha * host_v1[i] + beta * host_v2[i];
1218  vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
1219 
1220  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1221  return EXIT_FAILURE;
1222 
1223  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1224  for (std::size_t i=0; i < host_v1.size(); ++i)
1225  host_v2[i] = NumericT(3.1415) * host_v1[i];
1226  proxy_copy(host_v1, vcl_v1);
1227  proxy_copy(host_v2, vcl_v2);
1228 
1229  for (std::size_t i=0; i < host_v1.size(); ++i)
1230  host_v1[i] -= alpha * host_v1[i] - beta * host_v2[i];
1231  vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1232 
1233  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1234  return EXIT_FAILURE;
1235 
1236 
1237  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
1238  for (std::size_t i=0; i < host_v1.size(); ++i)
1239  host_v2[i] = NumericT(3.1415) * host_v1[i];
1240  proxy_copy(host_v1, vcl_v1);
1241  proxy_copy(host_v2, vcl_v2);
1242 
1243  for (std::size_t i=0; i < host_v1.size(); ++i)
1244  host_v1[i] -= alpha * host_v2[i];
1245  vcl_v1 -= gpu_alpha * vcl_v2;
1246 
1247  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1248  return EXIT_FAILURE;
1249 
1250 
1251 
1252  //
1253  // division-subtract
1254  //
1255  std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
1256  for (size_t i=0; i < host_v1.size(); ++i)
1257  {
1258  host_v1[i] = NumericT(1.0) + randomNumber();
1259  host_v2[i] = NumericT(3.1415) * host_v1[i];
1260  }
1261  proxy_copy(host_v1, vcl_v1);
1262  proxy_copy(host_v2, vcl_v2);
1263 
1264  for (std::size_t i=0; i < host_v1.size(); ++i)
1265  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1266  vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1267 
1268  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1269  return EXIT_FAILURE;
1270 
1271 
1272  std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1273  for (std::size_t i=0; i < host_v1.size(); ++i)
1274  host_v2[i] = NumericT(3.1415) * host_v1[i];
1275  proxy_copy(host_v1, vcl_v1);
1276  proxy_copy(host_v2, vcl_v2);
1277 
1278  for (std::size_t i=0; i < host_v1.size(); ++i)
1279  host_v1[i] = host_v1[i] / alpha - host_v2[i];
1280  vcl_v1 = vcl_v1 / alpha - vcl_v2;
1281 
1282  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1283  return EXIT_FAILURE;
1284 
1285  std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1286  for (std::size_t i=0; i < host_v1.size(); ++i)
1287  host_v2[i] = NumericT(3.1415) * host_v1[i];
1288  proxy_copy(host_v1, vcl_v1);
1289  proxy_copy(host_v2, vcl_v2);
1290 
1291  for (std::size_t i=0; i < host_v1.size(); ++i)
1292  host_v1[i] = host_v1[i] / alpha - host_v2[i] / alpha;
1293  vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1294 
1295  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1296  return EXIT_FAILURE;
1297 
1298 
1299  std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1300  for (std::size_t i=0; i < host_v1.size(); ++i)
1301  host_v2[i] = NumericT(3.1415) * host_v1[i];
1302  proxy_copy(host_v1, vcl_v1);
1303  proxy_copy(host_v2, vcl_v2);
1304 
1305  for (std::size_t i=0; i < host_v1.size(); ++i)
1306  host_v1[i] -= host_v2[i] / alpha;
1307  vcl_v1 -= vcl_v2 / alpha;
1308 
1309  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1310  return EXIT_FAILURE;
1311 
1312  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1313  for (std::size_t i=0; i < host_v1.size(); ++i)
1314  host_v2[i] = NumericT(3.1415) * host_v1[i];
1315  proxy_copy(host_v1, vcl_v1);
1316  proxy_copy(host_v2, vcl_v2);
1317 
1318  for (std::size_t i=0; i < host_v1.size(); ++i)
1319  host_v1[i] -= host_v2[i] / alpha;
1320  vcl_v1 -= vcl_v2 / gpu_alpha;
1321 
1322  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1323  return EXIT_FAILURE;
1324 
1325 
1326  std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1327  for (std::size_t i=0; i < host_v1.size(); ++i)
1328  host_v2[i] = NumericT(3.1415) * host_v1[i];
1329  proxy_copy(host_v1, vcl_v1);
1330  proxy_copy(host_v2, vcl_v2);
1331 
1332  for (std::size_t i=0; i < host_v1.size(); ++i)
1333  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1334  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1335 
1336  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1337  return EXIT_FAILURE;
1338 
1339  std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1340  for (std::size_t i=0; i < host_v1.size(); ++i)
1341  host_v2[i] = NumericT(3.1415) * host_v1[i];
1342  proxy_copy(host_v1, vcl_v1);
1343  proxy_copy(host_v2, vcl_v2);
1344 
1345  for (std::size_t i=0; i < host_v1.size(); ++i)
1346  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1347  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1348 
1349  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1350  return EXIT_FAILURE;
1351 
1352  std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1353  for (std::size_t i=0; i < host_v1.size(); ++i)
1354  host_v2[i] = NumericT(3.1415) * host_v1[i];
1355  proxy_copy(host_v1, vcl_v1);
1356  proxy_copy(host_v2, vcl_v2);
1357 
1358  for (std::size_t i=0; i < host_v1.size(); ++i)
1359  host_v1[i] = host_v1[i] / alpha - host_v2[i] / beta;
1360  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1361 
1362  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1363  return EXIT_FAILURE;
1364 
1365  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1366  for (std::size_t i=0; i < host_v1.size(); ++i)
1367  host_v2[i] = NumericT(3.1415) * host_v1[i];
1368  proxy_copy(host_v1, vcl_v1);
1369  proxy_copy(host_v2, vcl_v2);
1370 
1371  for (std::size_t i=0; i < host_v1.size(); ++i)
1372  host_v1[i] -= host_v1[i] / alpha + host_v2[i] / beta;
1373  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1374 
1375  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1376  return EXIT_FAILURE;
1377 
1378  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1379  for (std::size_t i=0; i < host_v1.size(); ++i)
1380  host_v2[i] = NumericT(3.1415) * host_v1[i];
1381  proxy_copy(host_v1, vcl_v1);
1382  proxy_copy(host_v2, vcl_v2);
1383 
1384  for (std::size_t i=0; i < host_v1.size(); ++i)
1385  host_v1[i] -= host_v1[i] / alpha - host_v2[i] / beta;
1386  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1387 
1388  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1389  return EXIT_FAILURE;
1390 
1391  std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1392  for (std::size_t i=0; i < host_v1.size(); ++i)
1393  host_v2[i] = NumericT(3.1415) * host_v1[i];
1394  proxy_copy(host_v1, vcl_v1);
1395  proxy_copy(host_v2, vcl_v2);
1396 
1397  for (std::size_t i=0; i < host_v1.size(); ++i)
1398  host_v1[i] = host_v1[i] * alpha - host_v2[i] / beta;
1399  vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1400 
1401  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1402  return EXIT_FAILURE;
1403 
1404  std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1405  for (std::size_t i=0; i < host_v1.size(); ++i)
1406  host_v2[i] = NumericT(3.1415) * host_v1[i];
1407  proxy_copy(host_v1, vcl_v1);
1408  proxy_copy(host_v2, vcl_v2);
1409 
1410  for (std::size_t i=0; i < host_v1.size(); ++i)
1411  host_v1[i] = host_v1[i] / alpha - host_v2[i] * beta;
1412  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1413 
1414  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1415  return EXIT_FAILURE;
1416 
1417  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1418  for (std::size_t i=0; i < host_v1.size(); ++i)
1419  host_v2[i] = NumericT(3.1415) * host_v1[i];
1420  proxy_copy(host_v1, vcl_v1);
1421  proxy_copy(host_v2, vcl_v2);
1422 
1423  for (std::size_t i=0; i < host_v1.size(); ++i)
1424  host_v1[i] -= host_v1[i] * alpha + host_v2[i] / beta;
1425  vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1426 
1427  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1428  return EXIT_FAILURE;
1429 
1430  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1431  for (std::size_t i=0; i < host_v1.size(); ++i)
1432  host_v2[i] = NumericT(3.1415) * host_v1[i];
1433  proxy_copy(host_v1, vcl_v1);
1434  proxy_copy(host_v2, vcl_v2);
1435 
1436  for (std::size_t i=0; i < host_v1.size(); ++i)
1437  host_v1[i] -= host_v1[i] / alpha + host_v2[i] * beta;
1438  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1439 
1440  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1441  return EXIT_FAILURE;
1442 
1443  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1444  for (std::size_t i=0; i < host_v1.size(); ++i)
1445  host_v2[i] = NumericT(3.1415) * host_v1[i];
1446  proxy_copy(host_v1, vcl_v1);
1447  proxy_copy(host_v2, vcl_v2);
1448 
1449  for (std::size_t i=0; i < host_v1.size(); ++i)
1450  host_v1[i] -= host_v1[i] * alpha - host_v2[i] / beta;
1451  vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1452 
1453  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1454  return EXIT_FAILURE;
1455 
1456  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1457  for (std::size_t i=0; i < host_v1.size(); ++i)
1458  host_v2[i] = NumericT(3.1415) * host_v1[i];
1459  proxy_copy(host_v1, vcl_v1);
1460  proxy_copy(host_v2, vcl_v2);
1461 
1462  for (std::size_t i=0; i < host_v1.size(); ++i)
1463  host_v1[i] -= host_v1[i] / alpha - host_v2[i] * beta;
1464  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1465 
1466  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1467  return EXIT_FAILURE;
1468 
1469 
1470  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1471  for (std::size_t i=0; i < host_v1.size(); ++i)
1472  host_v2[i] = NumericT(3.1415) * host_v1[i];
1473  proxy_copy(host_v1, vcl_v1);
1474  proxy_copy(host_v2, vcl_v2);
1475 
1476  for (std::size_t i=0; i < host_v1.size(); ++i)
1477  host_v1[i] -= alpha * host_v2[i];
1478  vcl_v1 -= gpu_alpha * vcl_v2;
1479 
1480  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1481  return EXIT_FAILURE;
1482 
1483 
1484 
1485  //
1486  // More complicated expressions (for ensuring the operator overloads work correctly)
1487  //
1488  for (std::size_t i=0; i < host_v1.size(); ++i)
1489  {
1490  host_v1[i] = NumericT(1.0) + randomNumber();
1491  host_v2[i] = NumericT(3.1415) * host_v1[i];
1492  }
1493  proxy_copy(host_v1, vcl_v1);
1494  proxy_copy(host_v2, vcl_v2);
1495 
1496  std::cout << "Testing three vector additions..." << std::endl;
1497  for (std::size_t i=0; i < host_v1.size(); ++i)
1498  host_v1[i] = host_v2[i] + host_v1[i] + host_v2[i];
1499  vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1500 
1501  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1502  return EXIT_FAILURE;
1503 
1504 
1505  for (std::size_t i=0; i < host_v1.size(); ++i)
1506  host_v2[i] = NumericT(3.1415) * host_v1[i];
1507  proxy_copy(host_v1, vcl_v1);
1508  proxy_copy(host_v2, vcl_v2);
1509 
1510  std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl;
1511  for (std::size_t i=0; i < host_v1.size(); ++i)
1512  host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1513  vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1514 
1515  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1516  return EXIT_FAILURE;
1517 
1518  std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl;
1519  for (std::size_t i=0; i < host_v1.size(); ++i)
1520  host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1521  vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1522 
1523  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1524  return EXIT_FAILURE;
1525 
1526  // --------------------------------------------------------------------------
1527  for (std::size_t i=0; i < host_v1.size(); ++i)
1528  host_v2[i] = NumericT(3.1415) * host_v1[i];
1529  proxy_copy(host_v1, vcl_v1);
1530  proxy_copy(host_v2, vcl_v2);
1531 
1532  std::cout << "Testing swap..." << std::endl;
1533  for (std::size_t i=0; i < host_v1.size(); ++i)
1534  {
1535  NumericT temp = host_v1[i];
1536  host_v1[i] = host_v2[i];
1537  host_v2[i] = temp;
1538  }
1539  swap(vcl_v1, vcl_v2);
1540 
1541  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1542  return EXIT_FAILURE;
1543 
1544  // --------------------------------------------------------------------------
1545  for (std::size_t i=0; i<host_v1.size(); ++i)
1546  {
1547  host_v1[i] = NumericT(1.0) + randomNumber();
1548  host_v2[i] = NumericT(5.0) + randomNumber();
1549  }
1550 
1551  proxy_copy(host_v1, vcl_v1);
1552  proxy_copy(host_v2, vcl_v2);
1553 
1554  std::cout << "Testing unary operator-..." << std::endl;
1555  for (std::size_t i=0; i < host_v1.size(); ++i)
1556  host_v1[i] = - host_v2[i];
1557  vcl_v1 = - vcl_v2;
1558 
1559  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1560  return EXIT_FAILURE;
1561 
1562 
1563  std::cout << "Testing elementwise multiplication..." << std::endl;
1564  std::cout << " v1 = element_prod(v1, v2);" << std::endl;
1565  for (std::size_t i=0; i < host_v1.size(); ++i)
1566  host_v1[i] = host_v1[i] * host_v2[i];
1567  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1568 
1569  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1570  return EXIT_FAILURE;
1571 
1572  std::cout << " v1 += element_prod(v1, v2);" << std::endl;
1573  for (std::size_t i=0; i < host_v1.size(); ++i)
1574  host_v1[i] += host_v1[i] * host_v2[i];
1575  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1576 
1577  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1578  return EXIT_FAILURE;
1579 
1580  std::cout << " v1 -= element_prod(v1, v2);" << std::endl;
1581  for (std::size_t i=0; i < host_v1.size(); ++i)
1582  host_v1[i] -= host_v1[i] * host_v2[i];
1583  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1584 
1585  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1586  return EXIT_FAILURE;
1587 
1589  std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl;
1590  for (std::size_t i=0; i < host_v1.size(); ++i)
1591  host_v1[i] = (host_v1[i] + host_v2[i]) * host_v2[i];
1592  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1593 
1594  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1595  return EXIT_FAILURE;
1596 
1597  std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl;
1598  for (std::size_t i=0; i < host_v1.size(); ++i)
1599  host_v1[i] += (host_v1[i] + host_v2[i]) * host_v2[i];
1600  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1601 
1602  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1603  return EXIT_FAILURE;
1604 
1605  std::cout << " v1 -= element_prod(v1 + v2, v2);" << std::endl;
1606  for (std::size_t i=0; i < host_v1.size(); ++i)
1607  host_v1[i] -= (host_v1[i] + host_v2[i]) * host_v2[i];
1608  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1609 
1610  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1611  return EXIT_FAILURE;
1612 
1614  std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl;
1615  for (std::size_t i=0; i < host_v1.size(); ++i)
1616  host_v1[i] = host_v1[i] * (host_v2[i] + host_v1[i]);
1617  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1618 
1619  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1620  return EXIT_FAILURE;
1621 
1622  std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl;
1623  for (std::size_t i=0; i < host_v1.size(); ++i)
1624  host_v1[i] += host_v1[i] * (host_v2[i] + host_v1[i]);
1625  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1626 
1627  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1628  return EXIT_FAILURE;
1629 
1630  std::cout << " v1 -= element_prod(v1, v2 + v1);" << std::endl;
1631  for (std::size_t i=0; i < host_v1.size(); ++i)
1632  host_v1[i] -= host_v1[i] * (host_v2[i] + host_v1[i]);
1633  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1634 
1635  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1636  return EXIT_FAILURE;
1637 
1639  std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1640  for (std::size_t i=0; i < host_v1.size(); ++i)
1641  host_v1[i] = (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1642  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1643 
1644  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1645  return EXIT_FAILURE;
1646 
1647  std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1648  for (std::size_t i=0; i < host_v1.size(); ++i)
1649  host_v1[i] += (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1650  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1651 
1652  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1653  return EXIT_FAILURE;
1654 
1655  std::cout << " v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1656  for (std::size_t i=0; i < host_v1.size(); ++i)
1657  host_v1[i] -= (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1658  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1659 
1660  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1661  return EXIT_FAILURE;
1662 
1663 
1664  std::cout << "Testing elementwise division..." << std::endl;
1665  for (std::size_t i=0; i<host_v1.size(); ++i)
1666  {
1667  host_v1[i] = NumericT(1.0) + randomNumber();
1668  host_v2[i] = NumericT(5.0) + randomNumber();
1669  }
1670 
1671  proxy_copy(host_v1, vcl_v1);
1672  proxy_copy(host_v2, vcl_v2);
1673 
1674  for (std::size_t i=0; i < host_v1.size(); ++i)
1675  host_v1[i] = host_v1[i] / host_v2[i];
1676  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);
1677 
1678  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1679  return EXIT_FAILURE;
1680 
1681  for (std::size_t i=0; i < host_v1.size(); ++i)
1682  host_v1[i] += host_v1[i] / host_v2[i];
1683  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2);
1684 
1685  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1686  return EXIT_FAILURE;
1687 
1688  for (std::size_t i=0; i < host_v1.size(); ++i)
1689  host_v1[i] -= host_v1[i] / host_v2[i];
1690  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2);
1691 
1692  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1693  return EXIT_FAILURE;
1694 
1696  for (std::size_t i=0; i < host_v1.size(); ++i)
1697  host_v1[i] = (host_v1[i] + host_v2[i]) / host_v2[i];
1698  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1699 
1700  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1701  return EXIT_FAILURE;
1702 
1703  for (std::size_t i=0; i < host_v1.size(); ++i)
1704  host_v1[i] += (host_v1[i] + host_v2[i]) / host_v2[i];
1705  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1706 
1707  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1708  return EXIT_FAILURE;
1709 
1710  for (std::size_t i=0; i < host_v1.size(); ++i)
1711  host_v1[i] -= (host_v1[i] + host_v2[i]) / host_v2[i];
1712  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1713 
1714  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1715  return EXIT_FAILURE;
1716 
1718  for (std::size_t i=0; i < host_v1.size(); ++i)
1719  host_v1[i] = host_v1[i] / (host_v2[i] + host_v1[i]);
1720  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1721 
1722  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1723  return EXIT_FAILURE;
1724 
1725  for (std::size_t i=0; i < host_v1.size(); ++i)
1726  host_v1[i] += host_v1[i] / (host_v2[i] + host_v1[i]);
1727  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1728 
1729  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1730  return EXIT_FAILURE;
1731 
1732  for (std::size_t i=0; i < host_v1.size(); ++i)
1733  host_v1[i] -= host_v1[i] / (host_v2[i] + host_v1[i]);
1734  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1735 
1736  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1737  return EXIT_FAILURE;
1738 
1740  for (std::size_t i=0; i < host_v1.size(); ++i)
1741  host_v1[i] = (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1742  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1743 
1744  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1745  return EXIT_FAILURE;
1746 
1747  for (std::size_t i=0; i < host_v1.size(); ++i)
1748  host_v1[i] += (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1749  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1750 
1751  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1752  return EXIT_FAILURE;
1753 
1754  for (std::size_t i=0; i < host_v1.size(); ++i)
1755  host_v1[i] -= (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1756  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1757 
1758  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1759  return EXIT_FAILURE;
1760 
1761 
1762  std::cout << "Testing elementwise power function..." << std::endl;
1763  for (std::size_t i=0; i<host_v1.size(); ++i)
1764  {
1765  host_v1[i] = NumericT(1.1) + NumericT(0.5) * randomNumber();
1766  host_v2[i] = NumericT(1.1) + NumericT(0.5) * randomNumber();
1767  }
1768  std::vector<NumericT> std_v3(host_v1.size());
1769  vector_proxy<NumericT> host_v3(&std_v3[0], 0, 1, host_v1.size());
1770 
1771  proxy_copy(host_v1, vcl_v1);
1772  proxy_copy(host_v2, vcl_v2);
1773 
1774  for (std::size_t i=0; i<host_v3.size(); ++i)
1775  host_v3[i] = std::pow(host_v1[i], host_v2[i]);
1776  vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1777 
1778  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1779  {
1780  std::cerr << "** Failure in v1 = pow(v1, v2);" << std::endl;
1781  return EXIT_FAILURE;
1782  }
1783 
1784  proxy_copy(host_v1, vcl_v1);
1785  for (std::size_t i=0; i<host_v3.size(); ++i)
1786  host_v3[i] = host_v1[i];
1787  for (std::size_t i=0; i<host_v3.size(); ++i)
1788  host_v3[i] += std::pow(host_v1[i], host_v2[i]);
1789  vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1790 
1791  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1792  {
1793  std::cerr << "** Failure in v1 += pow(v1, v2);" << std::endl;
1794  return EXIT_FAILURE;
1795  }
1796 
1797  proxy_copy(host_v1, vcl_v1);
1798  for (std::size_t i=0; i<host_v3.size(); ++i)
1799  host_v3[i] = host_v1[i];
1800  for (std::size_t i=0; i<host_v3.size(); ++i)
1801  host_v3[i] -= std::pow(host_v1[i], host_v2[i]);
1802  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1803 
1804  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1805  {
1806  std::cerr << "** Failure in v1 -= pow(v1, v2);" << std::endl;
1807  return EXIT_FAILURE;
1808  }
1809 
1811  proxy_copy(host_v1, vcl_v1);
1812  for (std::size_t i=0; i<host_v3.size(); ++i)
1813  host_v3[i] = host_v1[i];
1814  for (std::size_t i=0; i<host_v3.size(); ++i)
1815  host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1816  vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1817 
1818  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1819  {
1820  std::cerr << "** Failure in v1 = pow(v1 + v2, v2);" << std::endl;
1821  return EXIT_FAILURE;
1822  }
1823 
1824  proxy_copy(host_v1, vcl_v1);
1825  for (std::size_t i=0; i<host_v3.size(); ++i)
1826  host_v3[i] = host_v1[i];
1827  for (std::size_t i=0; i<host_v3.size(); ++i)
1828  host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1829  vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1830 
1831  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1832  {
1833  std::cerr << "** Failure in v1 += pow(v1 + v2, v2);" << std::endl;
1834  return EXIT_FAILURE;
1835  }
1836 
1837  proxy_copy(host_v1, vcl_v1);
1838  for (std::size_t i=0; i<host_v3.size(); ++i)
1839  host_v3[i] = host_v1[i];
1840  for (std::size_t i=0; i<host_v3.size(); ++i)
1841  host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1842  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1843 
1844  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1845  {
1846  std::cerr << "** Failure in v1 -= pow(v1 + v2, v2);" << std::endl;
1847  return EXIT_FAILURE;
1848  }
1849 
1851  proxy_copy(host_v1, vcl_v1);
1852  for (std::size_t i=0; i<host_v3.size(); ++i)
1853  host_v3[i] = host_v1[i];
1854  for (std::size_t i=0; i<host_v3.size(); ++i)
1855  host_v3[i] = std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1856  vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1857 
1858  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1859  {
1860  std::cerr << "** Failure in v1 = pow(v1, v2 + v1);" << std::endl;
1861  return EXIT_FAILURE;
1862  }
1863 
1864  proxy_copy(host_v1, vcl_v1);
1865  for (std::size_t i=0; i<host_v3.size(); ++i)
1866  host_v3[i] = host_v1[i];
1867  for (std::size_t i=0; i<host_v3.size(); ++i)
1868  host_v3[i] += std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1869  vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1870 
1871  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1872  {
1873  std::cerr << "** Failure in v1 += pow(v1, v2 + v1);" << std::endl;
1874  return EXIT_FAILURE;
1875  }
1876 
1877  proxy_copy(host_v1, vcl_v1);
1878  for (std::size_t i=0; i<host_v3.size(); ++i)
1879  host_v3[i] = host_v1[i];
1880  for (std::size_t i=0; i<host_v3.size(); ++i)
1881  host_v3[i] -= std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1882  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1883 
1884  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1885  {
1886  std::cerr << "** Failure in v1 -= pow(v1, v2 + v1);" << std::endl;
1887  return EXIT_FAILURE;
1888  }
1889 
1891  proxy_copy(host_v1, vcl_v1);
1892  for (std::size_t i=0; i<host_v3.size(); ++i)
1893  host_v3[i] = host_v1[i];
1894  for (std::size_t i=0; i<host_v3.size(); ++i)
1895  host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1896  vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1897 
1898  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1899  {
1900  std::cerr << "** Failure in v1 = pow(v1 + v2, v2 + v1);" << std::endl;
1901  return EXIT_FAILURE;
1902  }
1903 
1904  proxy_copy(host_v1, vcl_v1);
1905  for (std::size_t i=0; i<host_v3.size(); ++i)
1906  host_v3[i] = host_v1[i];
1907  for (std::size_t i=0; i<host_v3.size(); ++i)
1908  host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1909  vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1910 
1911  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1912  {
1913  std::cerr << "** Failure in v1 += pow(v1 + v2, v2 + v1);" << std::endl;
1914  return EXIT_FAILURE;
1915  }
1916 
1917  proxy_copy(host_v1, vcl_v1);
1918  for (std::size_t i=0; i<host_v3.size(); ++i)
1919  host_v3[i] = host_v1[i];
1920  for (std::size_t i=0; i<host_v3.size(); ++i)
1921  host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1922  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1923 
1924  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1925  {
1926  std::cerr << "** Failure in v1 -= pow(v1 + v2, v2 + v1);" << std::endl;
1927  return EXIT_FAILURE;
1928  }
1929 
1930  std::cout << "Testing unary elementwise operations..." << std::endl;
1931  for (size_t i=0; i < host_v1.size(); ++i)
1932  host_v1[i] = randomNumber() / NumericT(4);
1933 
1934 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1935  for (std::size_t i=0; i<host_v1.size(); ++i) \
1936  host_v2[i] = NumericT(3.1415) * host_v1[i]; \
1937  proxy_copy(host_v1, vcl_v1); \
1938  proxy_copy(host_v2, vcl_v2); \
1939  \
1940  for (std::size_t i=0; i<host_v1.size(); ++i) \
1941  host_v1[i] = std::FUNCNAME(host_v2[i]); \
1942  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1943  \
1944  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1945  { \
1946  std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1947  return EXIT_FAILURE; \
1948  } \
1949  \
1950  for (std::size_t i=0; i<host_v1.size(); ++i) \
1951  host_v1[i] = std::FUNCNAME(host_v1[i] + host_v2[i]); \
1952  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1953  \
1954  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1955  { \
1956  std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1957  return EXIT_FAILURE; \
1958  } \
1959  \
1960  for (std::size_t i=0; i<host_v1.size(); ++i) \
1961  host_v1[i] += std::FUNCNAME(host_v1[i]); \
1962  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1963  \
1964  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1965  { \
1966  std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1967  return EXIT_FAILURE; \
1968  } \
1969  \
1970  for (std::size_t i=0; i<host_v1.size(); ++i) \
1971  host_v1[i] += std::FUNCNAME(host_v1[i] + host_v2[i]); \
1972  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1973  \
1974  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1975  { \
1976  std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1977  return EXIT_FAILURE; \
1978  } \
1979  \
1980  for (std::size_t i=0; i<host_v1.size(); ++i) \
1981  host_v1[i] -= std::FUNCNAME(host_v2[i]); \
1982  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1983  \
1984  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1985  { \
1986  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1987  return EXIT_FAILURE; \
1988  } \
1989  \
1990  for (std::size_t i=0; i<host_v1.size(); ++i) \
1991  host_v1[i] -= std::FUNCNAME(host_v1[i] + host_v2[i]); \
1992  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1993  \
1994  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1995  { \
1996  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1997  return EXIT_FAILURE; \
1998  } \
1999 
2001  GENERATE_UNARY_OP_TEST(cosh);
2002  for (std::size_t i=0; i < host_v1.size(); ++i)
2003  host_v1[i] = randomNumber() / NumericT(4);
2005  GENERATE_UNARY_OP_TEST(floor);
2006  GENERATE_UNARY_OP_TEST(fabs);
2008  GENERATE_UNARY_OP_TEST(log10);
2010  GENERATE_UNARY_OP_TEST(sinh);
2011  GENERATE_UNARY_OP_TEST(fabs);
2012  //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only
2013  GENERATE_UNARY_OP_TEST(sqrt);
2015  GENERATE_UNARY_OP_TEST(tanh);
2016 
2017  // --------------------------------------------------------------------------
2018  for (std::size_t i=0; i<host_v1.size(); ++i)
2019  host_v2[i] = NumericT(3.1415) * host_v1[i];
2020  proxy_copy(host_v1, vcl_v1);
2021  proxy_copy(host_v2, vcl_v2);
2022 
2023  std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl;
2024  for (std::size_t i=0; i<host_v1.size(); ++i)
2025  host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2026  vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2);
2027 
2028  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2029  return EXIT_FAILURE;
2030 
2031  std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl;
2032  for (std::size_t i=0; i<host_v1.size(); ++i)
2033  host_v2[i] = NumericT(3.1415) * host_v1[i];
2034  proxy_copy(host_v1, vcl_v1);
2035  proxy_copy(host_v2, vcl_v2);
2036 
2037  for (std::size_t i=0; i<host_v1.size(); ++i)
2038  host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2039  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2);
2040 
2041  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2042  return EXIT_FAILURE;
2043 
2044 
2045  std::cout << "Testing lenghty sum of scaled vectors..." << std::endl;
2046  for (std::size_t i=0; i<host_v1.size(); ++i)
2047  host_v2[i] = NumericT(3.1415) * host_v1[i];
2048  proxy_copy(host_v1, vcl_v1);
2049  proxy_copy(host_v2, vcl_v2);
2050 
2051  for (std::size_t i=0; i<host_v1.size(); ++i)
2052  host_v1[i] = host_v2[i] / alpha + beta * host_v1[i] - alpha * host_v2[i] + beta * host_v1[i] - alpha * host_v1[i];
2053  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
2054 
2055  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2056  return EXIT_FAILURE;
2057 
2058  // --------------------------------------------------------------------------
2059  return retval;
2060 }
2061 
2062 
2063 template< typename NumericT, typename Epsilon >
2064 int test(Epsilon const& epsilon)
2065 {
2066  int retval = EXIT_SUCCESS;
2067  std::size_t size = 24656;
2068 
2070 
2071  std::cout << "Running tests for vector of size " << size << std::endl;
2072 
2073  //
2074  // Set up host objects
2075  //
2076  std::vector<NumericT> std_full_vec(size);
2077  std::vector<NumericT> std_full_vec2(std_full_vec.size());
2078 
2079  for (std::size_t i=0; i<std_full_vec.size(); ++i)
2080  {
2081  std_full_vec[i] = NumericT(1.0) + randomNumber();
2082  std_full_vec2[i] = NumericT(1.0) + randomNumber();
2083  }
2084 
2085  std::size_t r1_start = std_full_vec.size() / 4;
2086  std::size_t r1_stop = 2 * std_full_vec.size() / 4;
2087  std::size_t r2_start = 2 * std_full_vec2.size() / 4;
2088  std::size_t r2_stop = 3 * std_full_vec2.size() / 4;
2089  vector_proxy<NumericT> host_range_vec (&std_full_vec[0], r1_start, 1, r1_stop - r1_start);
2090  vector_proxy<NumericT> host_range_vec2(&std_full_vec2[0], r2_start, 1, r2_stop - r2_start);
2091 
2092  std::size_t s1_start = std_full_vec.size() / 4;
2093  std::size_t s1_inc = 3;
2094  std::size_t s1_size = std_full_vec.size() / 4;
2095  std::size_t s2_start = 2 * std_full_vec2.size() / 4;
2096  std::size_t s2_inc = 2;
2097  std::size_t s2_size = std_full_vec2.size() / 4;
2098  vector_proxy<NumericT> host_slice_vec (&std_full_vec[0], s1_start, s1_inc, s1_size);
2099  vector_proxy<NumericT> host_slice_vec2(&std_full_vec2[0], s2_start, s2_inc, s2_size);
2100 
2101  //
2102  // Set up ViennaCL objects
2103  //
2104  viennacl::vector<NumericT> vcl_full_vec(std_full_vec.size());
2105  viennacl::vector<NumericT> vcl_full_vec2(std_full_vec2.size());
2106 
2107  viennacl::fast_copy(std_full_vec.begin(), std_full_vec.end(), vcl_full_vec.begin());
2108  viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
2109 
2110  viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
2111  viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
2112  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec(vcl_full_vec, vcl_r1);
2113  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec2(vcl_full_vec2, vcl_r2);
2114 
2115  {
2116  viennacl::vector<NumericT> vcl_short_vec(vcl_range_vec);
2117  viennacl::vector<NumericT> vcl_short_vec2 = vcl_range_vec2;
2118 
2119  std::vector<NumericT> std_short_vec(host_range_vec.size());
2120  for (std::size_t i=0; i<std_short_vec.size(); ++i)
2121  std_short_vec[i] = host_range_vec[i];
2122  vector_proxy<NumericT> host_short_vec(&std_short_vec[0], 0, 1, std_short_vec.size());
2123 
2124  std::vector<NumericT> std_short_vec2(host_range_vec2.size());
2125  for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2126  std_short_vec2[i] = host_range_vec2[i];
2127  vector_proxy<NumericT> host_short_vec2(&std_short_vec2[0], 0, 1, std_short_vec.size());
2128 
2129  std::cout << "Testing creation of vectors from range..." << std::endl;
2130  if (check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2131  return EXIT_FAILURE;
2132  if (check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2133  return EXIT_FAILURE;
2134  }
2135 
2136  viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
2137  viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
2138  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec(vcl_full_vec, vcl_s1);
2139  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec2(vcl_full_vec2, vcl_s2);
2140 
2141  viennacl::vector<NumericT> vcl_short_vec(vcl_slice_vec);
2142  viennacl::vector<NumericT> vcl_short_vec2 = vcl_slice_vec2;
2143 
2144  std::vector<NumericT> std_short_vec(host_slice_vec.size());
2145  for (std::size_t i=0; i<std_short_vec.size(); ++i)
2146  std_short_vec[i] = host_slice_vec[i];
2147  vector_proxy<NumericT> host_short_vec(&std_short_vec[0], 0, 1, std_short_vec.size());
2148 
2149  std::vector<NumericT> std_short_vec2(host_slice_vec2.size());
2150  for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2151  std_short_vec2[i] = host_slice_vec2[i];
2152  vector_proxy<NumericT> host_short_vec2(&std_short_vec2[0], 0, 1, std_short_vec.size());
2153 
2154  std::cout << "Testing creation of vectors from slice..." << std::endl;
2155  if (check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2156  return EXIT_FAILURE;
2157  if (check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2158  return EXIT_FAILURE;
2159 
2160 
2161  //
2162  // Now start running tests for vectors, ranges and slices:
2163  //
2164 
2165  std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
2166  retval = test<NumericT>(epsilon,
2167  host_short_vec, host_short_vec2,
2168  vcl_short_vec, vcl_short_vec2);
2169  if (retval != EXIT_SUCCESS)
2170  return EXIT_FAILURE;
2171 
2172  std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
2173  retval = test<NumericT>(epsilon,
2174  host_short_vec, host_short_vec2,
2175  vcl_short_vec, vcl_range_vec2);
2176  if (retval != EXIT_SUCCESS)
2177  return EXIT_FAILURE;
2178 
2179  std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
2180  retval = test<NumericT>(epsilon,
2181  host_short_vec, host_short_vec2,
2182  vcl_short_vec, vcl_slice_vec2);
2183  if (retval != EXIT_SUCCESS)
2184  return EXIT_FAILURE;
2185 
2187 
2188  std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
2189  retval = test<NumericT>(epsilon,
2190  host_short_vec, host_short_vec2,
2191  vcl_range_vec, vcl_short_vec2);
2192  if (retval != EXIT_SUCCESS)
2193  return EXIT_FAILURE;
2194 
2195  std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
2196  retval = test<NumericT>(epsilon,
2197  host_short_vec, host_short_vec2,
2198  vcl_range_vec, vcl_range_vec2);
2199  if (retval != EXIT_SUCCESS)
2200  return EXIT_FAILURE;
2201 
2202  std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
2203  retval = test<NumericT>(epsilon,
2204  host_short_vec, host_short_vec2,
2205  vcl_range_vec, vcl_slice_vec2);
2206  if (retval != EXIT_SUCCESS)
2207  return EXIT_FAILURE;
2208 
2210 
2211  std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
2212  retval = test<NumericT>(epsilon,
2213  host_short_vec, host_short_vec2,
2214  vcl_slice_vec, vcl_short_vec2);
2215  if (retval != EXIT_SUCCESS)
2216  return EXIT_FAILURE;
2217 
2218  std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
2219  retval = test<NumericT>(epsilon,
2220  host_short_vec, host_short_vec2,
2221  vcl_slice_vec, vcl_range_vec2);
2222  if (retval != EXIT_SUCCESS)
2223  return EXIT_FAILURE;
2224 
2225  std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
2226  retval = test<NumericT>(epsilon,
2227  host_short_vec, host_short_vec2,
2228  vcl_slice_vec, vcl_slice_vec2);
2229  if (retval != EXIT_SUCCESS)
2230  return EXIT_FAILURE;
2231 
2232  return EXIT_SUCCESS;
2233 }
2234 
2235 
2236 //
2237 // -------------------------------------------------------------
2238 //
2239 int main()
2240 {
2241  std::cout << std::endl;
2242  std::cout << "----------------------------------------------" << std::endl;
2243  std::cout << "----------------------------------------------" << std::endl;
2244  std::cout << "## Test :: Vector" << std::endl;
2245  std::cout << "----------------------------------------------" << std::endl;
2246  std::cout << "----------------------------------------------" << std::endl;
2247  std::cout << std::endl;
2248 
2249  int retval = EXIT_SUCCESS;
2250 
2251  std::cout << std::endl;
2252  std::cout << "----------------------------------------------" << std::endl;
2253  std::cout << std::endl;
2254  {
2255  typedef float NumericT;
2256  NumericT epsilon = static_cast<NumericT>(1.0E-2);
2257  std::cout << "# Testing setup:" << std::endl;
2258  std::cout << " eps: " << epsilon << std::endl;
2259  std::cout << " numeric: float" << std::endl;
2260  retval = test<NumericT>(epsilon);
2261  if ( retval == EXIT_SUCCESS )
2262  std::cout << "# Test passed" << std::endl;
2263  else
2264  return retval;
2265  }
2266  std::cout << std::endl;
2267  std::cout << "----------------------------------------------" << std::endl;
2268  std::cout << std::endl;
2269  #ifdef VIENNACL_WITH_OPENCL
2271  #endif
2272  {
2273  {
2274  typedef double NumericT;
2275  NumericT epsilon = 1.0E-10;
2276  std::cout << "# Testing setup:" << std::endl;
2277  std::cout << " eps: " << epsilon << std::endl;
2278  std::cout << " numeric: double" << std::endl;
2279  retval = test<NumericT>(epsilon);
2280  if ( retval == EXIT_SUCCESS )
2281  std::cout << "# Test passed" << std::endl;
2282  else
2283  return retval;
2284  }
2285  std::cout << std::endl;
2286  std::cout << "----------------------------------------------" << std::endl;
2287  std::cout << std::endl;
2288  }
2289 
2290  std::cout << std::endl;
2291  std::cout << "------- Test completed --------" << std::endl;
2292  std::cout << std::endl;
2293 
2294 
2295  return retval;
2296 }
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:96
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
vector_proxy(NumericT *p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:100
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
NumericT & operator[](std::size_t index)
int test(Epsilon const &epsilon, HostVectorType &host_v1, HostVectorType &host_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
float NumericT
Definition: bisect.cpp:40
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
Random number generator for returning uniformly distributed values in the closed interval [0...
Definition: random.hpp:44
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:434
std::size_t size() const
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:841
int check(T1 const &t1, T2 const &t2, double epsilon)
void proxy_copy(vector_proxy< NumericT > const &host_vec, viennacl::vector_base< NumericT > &vcl_vec)
Class for representing strided subvectors of a bigger vector x.
Definition: forwards.h:437
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
NumericT const & operator[](std::size_t index) const
Proxy classes for vectors.
int main()
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
Definition: vector_def.hpp:76
Stub routines for the summation of elements in a vector, or all elements in either a row or column of...
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
NumericT max(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:47
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
T norm_1(std::vector< T, A > const &v1)
Definition: norm_1.hpp:61
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:424
float ScalarType
Definition: fft_1d.cpp:42
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:848
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:429
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:233
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:91
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)