ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
amg_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_AMG_OPERATIONS_HPP
2 #define VIENNACL_LINALG_OPENCL_AMG_OPERATIONS_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cstdlib>
26 #include <cmath>
27 #include <map>
28 
32 
33 
34 namespace viennacl
35 {
36 namespace linalg
37 {
38 namespace opencl
39 {
40 namespace amg
41 {
42 
43 
45 
47 template<typename NumericT>
51 {
52  (void)tag;
53 
54  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
56  viennacl::ocl::kernel & influence_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_influence_trivial");
57 
58  viennacl::ocl::enqueue(influence_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(),
59  cl_uint(A.size1()),
60  cl_uint(A.nnz()),
61  viennacl::traits::opencl_handle(amg_context.influence_jumper_),
62  viennacl::traits::opencl_handle(amg_context.influence_ids_),
63  viennacl::traits::opencl_handle(amg_context.influence_values_)
64  )
65  );
66 }
67 
68 
70 template<typename NumericT>
74 {
75  (void)A; (void)amg_context; (void)tag;
76  throw std::runtime_error("amg_influence_advanced() not implemented for OpenCL yet");
77 }
78 
79 
81 template<typename NumericT>
85 {
86  // TODO: dispatch based on influence tolerance provided
87  amg_influence_trivial(A, amg_context, tag);
88 }
89 
90 
91 
97 {
100  viennacl::backend::memory_read(amg_context.point_types_.handle(), 0, point_types.raw_size(), point_types.get());
101  viennacl::backend::memory_read(amg_context.coarse_id_.handle(), 0, coarse_ids.raw_size(), coarse_ids.get());
102 
103  unsigned int coarse_id = 0;
104  for (std::size_t i=0; i<amg_context.point_types_.size(); ++i)
105  {
106  coarse_ids.set(i, coarse_id);
108  ++coarse_id;
109  }
110 
111  amg_context.num_coarse_ = coarse_id;
112 
113  viennacl::backend::memory_write(amg_context.coarse_id_.handle(), 0, coarse_ids.raw_size(), coarse_ids.get());
114 }
115 
116 
118 
119 
120 
127 template<typename NumericT>
131 {
132  (void)tag;
133  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
135 
137  unsigned int *random_weights_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(random_weights.handle());
138  for (std::size_t i=0; i<random_weights.size(); ++i)
139  random_weights_ptr[i] = static_cast<unsigned int>(rand()) % static_cast<unsigned int>(A.size1());
141 
142  // work vectors:
146 
150 
151  unsigned int num_undecided = static_cast<unsigned int>(A.size1());
153  viennacl::backend::typesafe_host_array<unsigned int> undecided_buffer_host(undecided_buffer.handle(), undecided_buffer.size());
154 
155  viennacl::ocl::kernel & init_workdata_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_init_workdata");
156  viennacl::ocl::kernel & max_neighborhood_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_max_neighborhood");
157  viennacl::ocl::kernel & mark_mis_nodes_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_mark_mis_nodes");
158  viennacl::ocl::kernel & reset_state_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_reset_state");
159 
160  unsigned int pmis_iters = 0;
161  while (num_undecided > 0)
162  {
163  ++pmis_iters;
164 
165  //
166  // init temporary work data:
167  //
168  viennacl::ocl::enqueue(init_workdata_kernel(work_state, work_random, work_index,
169  amg_context.point_types_,
170  random_weights,
171  cl_uint(A.size1())
172  )
173  );
174 
175  //
176  // Propagate maximum tuple twice
177  //
178  for (unsigned int r = 0; r < 2; ++r)
179  {
180  // max operation
181  viennacl::ocl::enqueue(max_neighborhood_kernel(work_state, work_random, work_index,
182  work_state2, work_random2, work_index2,
183  amg_context.influence_jumper_, amg_context.influence_ids_,
184  cl_uint(A.size1())
185  )
186  );
187 
188  // copy work array (can be fused into a single kernel if needed. Previous kernel is in most cases sufficiently heavy)
189  work_state = work_state2;
190  work_random = work_random2;
191  work_index = work_index2;
192  }
193 
194  //
195  // mark MIS and non-MIS nodes:
196  //
197  viennacl::ocl::enqueue(mark_mis_nodes_kernel(work_state, work_index,
198  amg_context.point_types_,
199  undecided_buffer,
200  cl_uint(A.size1())
201  )
202  );
203 
204  // get number of undecided points on host:
205  viennacl::backend::memory_read(undecided_buffer.handle(), 0, undecided_buffer_host.raw_size(), undecided_buffer_host.get());
206  num_undecided = 0;
207  for (std::size_t i=0; i<undecided_buffer.size(); ++i)
208  num_undecided += undecided_buffer_host[i];
209 
210  } //while
211 
212  viennacl::ocl::enqueue(reset_state_kernel(amg_context.point_types_, cl_uint(amg_context.point_types_.size()) ) );
213 }
214 
215 
216 
223 template<typename NumericT>
227 {
228  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
230 
231  amg_influence_trivial(A, amg_context, tag);
232 
233  //
234  // Stage 1: Build aggregates:
235  //
237  amg_coarse_ag_stage1_mis2(A, amg_context, tag);
238  else
239  throw std::runtime_error("Only MIS2 coarsening implemented. Selected coarsening not available with OpenCL backend!");
240 
242 
243  //
244  // Stage 2: Propagate coarse aggregate indices to neighbors:
245  //
246  viennacl::ocl::kernel & propagate_coarse_indices = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_propagate_coarse_indices");
247  viennacl::ocl::enqueue(propagate_coarse_indices(amg_context.point_types_,
248  amg_context.coarse_id_,
249  amg_context.influence_jumper_,
250  amg_context.influence_ids_,
251  cl_uint(A.size1())
252  )
253  );
254 
255  //
256  // Stage 3: Merge remaining undecided points (merging to first aggregate found when cycling over the hierarchy
257  //
258  viennacl::ocl::kernel & merge_undecided = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_merge_undecided");
259  viennacl::ocl::enqueue(merge_undecided(amg_context.point_types_,
260  amg_context.coarse_id_,
261  amg_context.influence_jumper_,
262  amg_context.influence_ids_,
263  cl_uint(A.size1())
264  )
265  );
266 
267  //
268  // Stage 4: Set undecided points to fine points (coarse ID already set in Stage 3)
269  // Note: Stage 3 and Stage 4 were initially fused, but are now split in order to avoid race conditions (or a fallback to sequential execution).
270  //
271  viennacl::ocl::kernel & merge_undecided_2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_merge_undecided_2");
272  viennacl::ocl::enqueue(merge_undecided_2(amg_context.point_types_, cl_uint(A.size1()) ) );
273 
274 }
275 
276 
277 
278 
285 template<typename InternalT1>
286 void amg_coarse(InternalT1 & A,
289 {
290  switch (tag.get_coarsening_method())
291  {
293  default: throw std::runtime_error("not implemented yet");
294  }
295 }
296 
297 
298 
299 
301 
302 
310 template<typename NumericT>
315 {
316  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
318 
319  (void)tag;
321 
322  // build matrix here
324  viennacl::ocl::enqueue(interpolate_ag(P.handle1().opencl_handle(),
325  P.handle2().opencl_handle(),
326  P.handle().opencl_handle(),
327  amg_context.coarse_id_,
328  cl_uint(A.size1())
329  )
330  );
331 
333 }
334 
342 template<typename NumericT>
347 {
348  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
350 
351  (void)tag;
353 
354  // form tentative operator:
355  amg_interpol_ag(A, P_tentative, amg_context, tag);
356 
358 
360  viennacl::ocl::enqueue(interpol_sa(A.handle1().opencl_handle(),
361  A.handle2().opencl_handle(),
362  A.handle().opencl_handle(),
363  cl_uint(A.size1()),
364  cl_uint(A.nnz()),
365  Jacobi.handle1().opencl_handle(),
366  Jacobi.handle2().opencl_handle(),
367  Jacobi.handle().opencl_handle(),
369  )
370  );
371 
372  P = viennacl::linalg::prod(Jacobi, P_tentative);
373 
374  P.generate_row_block_information();
375 }
376 
384 template<typename MatrixT>
385 void amg_interpol(MatrixT const & A,
386  MatrixT & P,
389 {
390  switch (tag.get_interpolation_method())
391  {
392  case viennacl::linalg::AMG_INTERPOLATION_METHOD_AGGREGATION: amg_interpol_ag (A, P, amg_context, tag); break;
394  default: throw std::runtime_error("Not implemented yet!");
395  }
396 }
397 
399 template<typename NumericT, unsigned int AlignmentV>
402 {
403  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
406  "assign_to_dense");
407 
408  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
409  viennacl::traits::opencl_handle(B),
410  cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
411  cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)),
412  cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
414 
415 }
416 
426 template<typename NumericT>
427 void smooth_jacobi(unsigned int iterations,
428  compressed_matrix<NumericT> const & A,
429  vector<NumericT> & x,
430  vector<NumericT> & x_backup,
431  vector<NumericT> const & rhs_smooth,
432  NumericT weight)
433 {
434  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context());
437 
438  for (unsigned int i=0; i<iterations; ++i)
439  {
440  x_backup = x;
441 
442  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
443  static_cast<NumericT>(weight),
444  viennacl::traits::opencl_handle(x_backup),
445  viennacl::traits::opencl_handle(x),
446  viennacl::traits::opencl_handle(rhs_smooth),
447  static_cast<cl_uint>(rhs_smooth.size())));
448 
449  }
450 }
451 
452 
453 } //namespace amg
454 } //namespace host_based
455 } //namespace linalg
456 } //namespace viennacl
457 
458 #endif
Main kernel class for generating OpenCL kernels for compressed_matrix.
Definition: amg.hpp:346
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
void amg_influence(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Dispatcher for influence processing.
amg_coarsening_method get_coarsening_method() const
Returns the current coarsening strategy.
Definition: amg_base.hpp:88
void amg_influence_trivial(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Routine for taking all connections in the matrix as strong.
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
void amg_coarse_ag(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) coarsening. Partially single-threaded version (VIENNACL_AMG_COARSE_AG) ...
const vcl_size_t & size1() const
Returns the number of rows.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:382
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
double get_jacobi_weight() const
Returns the Jacobi smoother weight (damping).
Definition: amg_base.hpp:113
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:390
OpenCL kernel file for operations related to algebraic multigrid.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
void amg_interpol_ag(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) interpolation. Multi-Threaded! (VIENNACL_INTERPOL_SA)
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Common implementations shared by OpenCL-based operations.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void assign_to_dense(viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::matrix_base< NumericT > &B)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
void enumerate_coarse_points(viennacl::linalg::detail::amg::amg_level_context &amg_context)
Assign IDs to coarse points.
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Main kernel class for generating OpenCL kernels for compressed_matrix.
void amg_interpol(MatrixT const &A, MatrixT &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Dispatcher for building the interpolation matrix.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
void amg_influence_advanced(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Routine for extracting strongly connected points considering a user-provided threshold value...
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
static void init(viennacl::ocl::context &ctx)
Definition: amg.hpp:353
A tag for algebraic multigrid (AMG). Used to transport information from the user to the implementatio...
Definition: amg_base.hpp:64
void smooth_jacobi(unsigned int iterations, compressed_matrix< NumericT > const &A, vector< NumericT > &x, vector< NumericT > &x_backup, vector< NumericT > const &rhs_smooth, NumericT weight)
Jacobi Smoother (OpenCL version)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
static void init(viennacl::ocl::context &ctx)
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
viennacl::vector< unsigned int > point_types_
Definition: amg_base.hpp:197
void switch_memory_context(viennacl::context new_ctx)
Definition: vector.hpp:1064
amg_interpolation_method get_interpolation_method() const
Returns the current interpolation method.
Definition: amg_base.hpp:93
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
void amg_coarse(InternalT1 &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Calls the right coarsening procedure.
Helper classes and functions for the AMG preconditioner. Experimental.
viennacl::vector< unsigned int > coarse_id_
Definition: amg_base.hpp:198
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
viennacl::vector< unsigned int > influence_values_
Definition: amg_base.hpp:196
void amg_coarse_ag_stage1_mis2(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) coarsening, single-threaded version of stage 1.
void amg_interpol_sa(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Smoothed aggregation interpolation. (VIENNACL_INTERPOL_SA)
viennacl::vector< unsigned int > influence_ids_
Definition: amg_base.hpp:195
viennacl::vector< unsigned int > influence_jumper_
Definition: amg_base.hpp:194