ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
matrix-free.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2015, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
31 //
32 // include necessary system headers
33 //
34 #include <iostream>
35 
36 //
37 // ViennaCL includes
38 //
39 #include "viennacl/scalar.hpp"
40 #include "viennacl/vector.hpp"
41 #include "viennacl/linalg/prod.hpp"
42 #include "viennacl/linalg/cg.hpp"
45 
57 template<typename NumericT>
58 class MyOperator
59 {
60 public:
61  MyOperator(std::size_t N) : N_(N) {}
62 
63  // Dispatcher for y = Ax
65  {
66 #if defined(VIENNACL_WITH_CUDA)
68  apply_cuda(x, y);
69 #endif
70 
71 #if defined(VIENNACL_WITH_OPENCL)
73  apply_opencl(x, y);
74 #endif
75 
77  apply_host(x, y);
78  }
79 
80  std::size_t size1() const { return N_ * N_; }
81 
82 private:
83 
84 #if defined(VIENNACL_WITH_CUDA)
85  void apply_cuda(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const;
86 #endif
87 
88 #if defined(VIENNACL_WITH_OPENCL)
89  void apply_opencl(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const;
90 #endif
91 
92  void apply_host(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const;
93 
94  std::size_t N_;
95 };
96 
97 
103 int main()
104 {
105  typedef float ScalarType; // feel free to change to double (and change OpenCL kernel argument types accordingly)
106 
107  std::size_t N = 10;
109  MyOperator<ScalarType> op(N);
110 
116 
121  std::cout.precision(3);
122  std::cout << std::fixed;
123  std::cout << "Result value map: " << std::endl;
124  std::cout << std::endl << "^ y " << std::endl;
125  for (std::size_t i=0; i<N; ++i)
126  {
127  std::cout << "| ";
128  for (std::size_t j=0; j<N; ++j)
129  std::cout << result[i * N + j] << " ";
130  std::cout << std::endl;
131  }
132  std::cout << "*---------------------------------------------> x" << std::endl;
133 
137  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
138 
139  return EXIT_SUCCESS;
140 }
141 
142 
143 
144 
155 template<typename NumericT>
156 void MyOperator<NumericT>::apply_host(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const
157 {
158  NumericT const * values_x = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(x.handle());
159  NumericT * values_y = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(y.handle());
160 
161  NumericT dx = NumericT(1) / NumericT(N_ + 1);
162  NumericT dy = NumericT(1) / NumericT(N_ + 1);
163 
172  // feel free to use
173  // #pragma omp parallel for
174  // here
175  for (std::size_t i=0; i<N_; ++i)
176  for (std::size_t j=0; j<N_; ++j)
177  {
178  NumericT value_right = (j < N_ - 1) ? values_x[ i *N_ + j + 1] : 0;
179  NumericT value_left = (j > 0 ) ? values_x[ i *N_ + j - 1] : 0;
180  NumericT value_top = (i < N_ - 1) ? values_x[(i+1)*N_ + j ] : 0;
181  NumericT value_bottom = (i > 0 ) ? values_x[(i-1)*N_ + j ] : 0;
182  NumericT value_center = values_x[i*N_ + j];
183 
184  values_y[i*N_ + j] = ((value_right - value_center) / dx - (value_center - value_left) / dx) / dx
185  + ((value_top - value_center) / dy - (value_center - value_bottom) / dy) / dy;
186  }
187 }
188 
189 
201 #if defined(VIENNACL_WITH_CUDA)
202 template<typename NumericT>
203 __global__ void apply_cuda_kernel(NumericT const * values_x,
204  NumericT * values_y,
205  std::size_t N)
206 {
207  NumericT dx = NumericT(1) / (N + 1);
208  NumericT dy = NumericT(1) / (N + 1);
209 
210  for (std::size_t i = blockIdx.x; i < N; i += gridDim.x)
211  for (std::size_t j = threadIdx.x; j < N; j += blockDim.x)
212  {
213  NumericT value_right = (j < N - 1) ? values_x[ i *N + j + 1] : 0;
214  NumericT value_left = (j > 0 ) ? values_x[ i *N + j - 1] : 0;
215  NumericT value_top = (i < N - 1) ? values_x[(i+1)*N + j ] : 0;
216  NumericT value_bottom = (i > 0 ) ? values_x[(i-1)*N + j ] : 0;
217  NumericT value_center = values_x[i*N + j];
218 
219  values_y[i*N + j] = ((value_right - value_center) / dx - (value_center - value_left) / dx) / dx
220  + ((value_top - value_center) / dy - (value_center - value_bottom) / dy) / dy;
221  }
222 }
223 #endif
224 
225 
226 #if defined(VIENNACL_WITH_CUDA)
227 template<typename NumericT>
228 void MyOperator<NumericT>::apply_cuda(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const
229 {
230  apply_cuda_kernel<<<128, 128>>>(viennacl::cuda_arg(x), viennacl::cuda_arg(y), N_);
231 }
232 #endif
233 
234 
235 
236 
244 #if defined(VIENNACL_WITH_OPENCL)
245 static const char * my_compute_program =
246 "typedef float NumericT; \n"
247 "__kernel void apply_opencl_kernel(__global NumericT const * values_x, \n"
248 " __global NumericT * values_y, \n"
249 " unsigned int N) {\n"
250 
251 " NumericT dx = (NumericT)1 / (N + 1); \n"
252 " NumericT dy = (NumericT)1 / (N + 1); \n"
253 
254 " for (unsigned int i = get_group_id(0); i < N; i += get_num_groups(0)) \n"
255 " for (unsigned int j = get_local_id(0); j < N; j += get_local_size(0)) { \n"
256 
257 " NumericT value_right = (j < N - 1) ? values_x[ i *N + j + 1] : 0; \n"
258 " NumericT value_left = (j > 0 ) ? values_x[ i *N + j - 1] : 0; \n"
259 " NumericT value_top = (i < N - 1) ? values_x[(i+1)*N + j ] : 0; \n"
260 " NumericT value_bottom = (i > 0 ) ? values_x[(i-1)*N + j ] : 0; \n"
261 " NumericT value_center = values_x[i*N + j]; \n"
262 
263 " values_y[i*N + j] = ((value_right - value_center) / dx - (value_center - value_left) / dx) / dx \n"
264 " + ((value_top - value_center) / dy - (value_center - value_bottom) / dy) / dy; \n"
265 " } \n"
266 " } \n";
267 #endif
268 
276 #if defined(VIENNACL_WITH_OPENCL)
277 template<typename NumericT>
278 void MyOperator<NumericT>::apply_opencl(viennacl::vector_base<NumericT> const & x, viennacl::vector_base<NumericT> & y) const
279  {
280  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context());
281  static bool first_run = true;
282  if (first_run) {
283  ctx.add_program(my_compute_program, "my_compute_program");
284  first_run = false;
285  }
286  viennacl::ocl::kernel & my_kernel = ctx.get_kernel("my_compute_program", "apply_opencl_kernel");
287 
288  viennacl::ocl::enqueue(my_kernel(x, y, static_cast<cl_uint>(N_)));
289  }
290 #endif
291 
292 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
int main()
Definition: bisect.cpp:91
The stabilized bi-conjugate gradient method is implemented here.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496
float NumericT
Definition: bisect.cpp:40
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:368
Implementations of the generalized minimum residual method are in this file.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
The conjugate gradient method is implemented here.
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:218
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
float ScalarType
Definition: fft_1d.cpp:42
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: cg.hpp:48
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version.
Definition: common.hpp:39
Implementation of the ViennaCL scalar class.
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128