ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
scheduler.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2015, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 /*
19 *
20 * Benchmark: Vector operations (vector.cpp and vector.cu are identical, the latter being required for compilation using CUDA nvcc)
21 *
22 */
23 
24 
25 //#define VIENNACL_DEBUG_ALL
26 #ifndef NDEBUG
27  #define NDEBUG
28 #endif
29 
30 #include "viennacl/scalar.hpp"
31 #include "viennacl/vector.hpp"
32 #include "viennacl/matrix.hpp"
36 #include "viennacl/tools/timer.hpp"
37 
38 #include <iostream>
39 #include <vector>
40 
41 using std::cout;
42 using std::cin;
43 using std::endl;
44 
45 
46 #define BENCHMARK_VECTOR_SIZE 2
47 #define BENCHMARK_RUNS 1000
48 
49 
50 template<typename ScalarType>
52 {
53 
55  double exec_time;
56 
57  std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE);
58  std::vector<ScalarType> std_vec2(BENCHMARK_VECTOR_SIZE);
61  ScalarType alpha = ScalarType(1.1415);
62  ScalarType beta = ScalarType(0.97172);
63 
64 
66 
67  std_vec1[0] = 1.0;
68  std_vec2[0] = 1.0;
69  for (std::size_t i=1; i<BENCHMARK_VECTOR_SIZE; ++i)
70  {
71  std_vec1[i] = std_vec1[i-1] * ScalarType(1.000001);
72  std_vec2[i] = std_vec1[i-1] * ScalarType(0.999999);
73  }
74 
75  viennacl::copy(std_vec1, vcl_vec1);
76  viennacl::fast_copy(std_vec1, vcl_vec1);
77  viennacl::copy(std_vec2, vcl_vec2);
78 
80  vcl_vec2 = alpha * vcl_vec1 + beta * vcl_vec2;
82  timer.start();
83  for (std::size_t runs=0; runs<BENCHMARK_RUNS; ++runs)
84  {
85  vcl_vec2 = alpha * vcl_vec1 + beta * vcl_vec2;
86  }
88  exec_time = timer.get();
89  std::cout << "Execution time per operation, no scheduler: " << exec_time / BENCHMARK_RUNS << " sec" << std::endl;
90  std::cout << "Result: " << vcl_vec2[0] << std::endl;
91 
93  timer.start();
94  for (std::size_t runs=0; runs<BENCHMARK_RUNS; ++runs)
95  {
96  viennacl::scheduler::statement my_statement(vcl_vec2, viennacl::op_assign(), alpha * vcl_vec1 + beta * vcl_vec2); // same as vcl_v1 = alpha * vcl_vec1 + beta * vcl_vec2;
97  viennacl::scheduler::execute(my_statement);
98  }
100  exec_time = timer.get();
101  std::cout << "Execution time per operation, with scheduler including statement generation: " << exec_time / BENCHMARK_RUNS << " sec" << std::endl;
102  std::cout << "Result: " << vcl_vec2[0] << std::endl;
103 
104  viennacl::scheduler::statement my_statement(vcl_vec2, viennacl::op_assign(), alpha * vcl_vec1 + beta * vcl_vec2); // same as vcl_v1 = alpha * vcl_vec1 + beta * vcl_vec2;
106  timer.start();
107  for (std::size_t runs=0; runs<BENCHMARK_RUNS; ++runs)
108  {
109  viennacl::scheduler::execute(my_statement);
110  }
112  exec_time = timer.get();
113  std::cout << "Execution time per operation, only execution: " << exec_time / BENCHMARK_RUNS << " sec" << std::endl;
114  std::cout << "Result: " << vcl_vec2[0] << std::endl;
115 
116  return 0;
117 }
118 
119 int main()
120 {
121  std::cout << std::endl;
122  std::cout << "----------------------------------------------" << std::endl;
123  std::cout << " Device Info" << std::endl;
124  std::cout << "----------------------------------------------" << std::endl;
125 
126 #ifdef VIENNACL_WITH_OPENCL
127  std::cout << viennacl::ocl::current_device().info() << std::endl;
128 #endif
129 
130  std::cout << std::endl;
131  std::cout << "----------------------------------------------" << std::endl;
132  std::cout << "----------------------------------------------" << std::endl;
133  std::cout << "## Benchmark :: Vector" << std::endl;
134  std::cout << "----------------------------------------------" << std::endl;
135  std::cout << std::endl;
136  std::cout << " -------------------------------" << std::endl;
137  std::cout << " # benchmarking single-precision" << std::endl;
138  std::cout << " -------------------------------" << std::endl;
139  run_benchmark<float>();
140 #ifdef VIENNACL_WITH_OPENCL
142 #endif
143  {
144  std::cout << std::endl;
145  std::cout << " -------------------------------" << std::endl;
146  std::cout << " # benchmarking double-precision" << std::endl;
147  std::cout << " -------------------------------" << std::endl;
148  run_benchmark<double>();
149  }
150  return 0;
151 }
152 
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:90
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
int run_benchmark()
Definition: scheduler.cpp:51
Implementation of the dense matrix class.
A tag class representing assignment.
Definition: forwards.h:81
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
void execute(statement const &s)
Definition: execute.hpp:279
#define BENCHMARK_RUNS
Definition: scheduler.cpp:47
#define BENCHMARK_VECTOR_SIZE
Definition: scheduler.cpp:46
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
int main()
Definition: scheduler.cpp:119
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
float ScalarType
Definition: fft_1d.cpp:42
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
double get() const
Definition: timer.hpp:104
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
Implementation of the ViennaCL scalar class.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)