ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
amg_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_AMG_OPERATIONS_HPP
2 #define VIENNACL_LINALG_HOST_BASED_AMG_OPERATIONS_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cstdlib>
26 #include <cmath>
28 
29 #include <map>
30 #include <set>
31 #include <functional>
32 #ifdef VIENNACL_WITH_OPENMP
33 #include <omp.h>
34 #endif
35 
36 namespace viennacl
37 {
38 namespace linalg
39 {
40 namespace host_based
41 {
42 namespace amg
43 {
44 
45 
47 
49 template<typename NumericT>
53 {
54  (void)tag;
55 
56  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
57  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
58 
59  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
60  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
61  unsigned int *influences_values_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_values_.handle());
62 
63 #ifdef VIENNACL_WITH_OPENMP
64  #pragma omp parallel for
65 #endif
66  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
67  {
68  vcl_size_t i = vcl_size_t(i2);
69  influences_row_ptr[i] = A_row_buffer[i];
70  influences_values_ptr[i] = A_row_buffer[i+1] - A_row_buffer[i];
71  }
72  influences_row_ptr[A.size1()] = A_row_buffer[A.size1()];
73 
74 #ifdef VIENNACL_WITH_OPENMP
75  #pragma omp parallel for
76 #endif
77  for (long i=0; i<long(A.nnz()); ++i)
78  influences_id_ptr[i] = A_col_buffer[i];
79 }
80 
81 
83 template<typename NumericT>
87 {
88  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
89  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
90  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
91 
92  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
93  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
94 
95  //
96  // Step 1: Scan influences in order to allocate the necessary memory
97  //
98 #ifdef VIENNACL_WITH_OPENMP
99  #pragma omp parallel for
100 #endif
101  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
102  {
103  vcl_size_t i = vcl_size_t(i2);
104  unsigned int row_start = A_row_buffer[i];
105  unsigned int row_stop = A_row_buffer[i+1];
106  NumericT diag = 0;
107  NumericT largest_positive = 0;
108  NumericT largest_negative = 0;
109  unsigned int num_influences = 0;
110 
111  // obtain diagonal element as well as maximum positive and negative off-diagonal entries
112  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
113  {
114  unsigned int col = A_col_buffer[nnz_index];
115  NumericT value = A_elements[nnz_index];
116 
117  if (col == i)
118  diag = value;
119  else if (value > largest_positive)
120  largest_positive = value;
121  else if (value < largest_negative)
122  largest_negative = value;
123  }
124 
125  if (largest_positive <= 0 && largest_negative >= 0) // no offdiagonal entries
126  {
127  influences_row_ptr[i] = 0;
128  continue;
129  }
130 
131  // Find all points that strongly influence current point (Yang, p.5)
132  //std::cout << "Looking for strongly influencing points for point " << i << std::endl;
133  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
134  {
135  unsigned int col = A_col_buffer[nnz_index];
136 
137  if (i == col)
138  continue;
139 
140  NumericT value = A_elements[nnz_index];
141 
142  if ( (diag > 0 && diag * value <= tag.get_strong_connection_threshold() * diag * largest_negative)
143  || (diag < 0 && diag * value <= tag.get_strong_connection_threshold() * diag * largest_positive))
144  {
145  ++num_influences;
146  }
147  }
148 
149  influences_row_ptr[i] = num_influences;
150  }
151 
152  //
153  // Step 2: Exclusive scan on number of influences to obtain CSR-like datastructure
154  //
155  unsigned int current_entry = 0;
156  for (std::size_t i=0; i<A.size1(); ++i)
157  {
158  unsigned int tmp = influences_row_ptr[i];
159  influences_row_ptr[i] = current_entry;
160  current_entry += tmp;
161  }
162  influences_row_ptr[A.size1()] = current_entry;
163 
164 
165  //
166  // Step 3: Write actual influences
167  //
168 #ifdef VIENNACL_WITH_OPENMP
169  #pragma omp parallel for
170 #endif
171  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
172  {
173  unsigned int i = static_cast<unsigned int>(i2);
174  unsigned int row_start = A_row_buffer[i];
175  unsigned int row_stop = A_row_buffer[i+1];
176  NumericT diag = 0;
177  NumericT largest_positive = 0;
178  NumericT largest_negative = 0;
179 
180  // obtain diagonal element as well as maximum positive and negative off-diagonal entries
181  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
182  {
183  unsigned int col = A_col_buffer[nnz_index];
184  NumericT value = A_elements[nnz_index];
185 
186  if (col == i)
187  diag = value;
188  else if (value > largest_positive)
189  largest_positive = value;
190  else if (value < largest_negative)
191  largest_negative = value;
192  }
193 
194  if (largest_positive <= 0 && largest_negative >= 0) // no offdiagonal entries
195  continue;
196 
197  // Find all points that strongly influence current point (Yang, p.5)
198  //std::cout << "Looking for strongly influencing points for point " << i << std::endl;
199  unsigned int *influences_id_write_ptr = influences_id_ptr + influences_row_ptr[i];
200  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
201  {
202  unsigned int col = A_col_buffer[nnz_index];
203 
204  if (i == col)
205  continue;
206 
207  NumericT value = A_elements[nnz_index];
208 
209  if ( (diag > 0 && diag * value <= tag.get_strong_connection_threshold() * diag * largest_negative)
210  || (diag < 0 && diag * value <= tag.get_strong_connection_threshold() * diag * largest_positive))
211  {
212  //std::cout << " - Adding influence from point " << col << std::endl;
213  *influences_id_write_ptr = col;
214  ++influences_id_write_ptr;
215  }
216  }
217  }
218 
219 }
220 
221 
223 template<typename NumericT>
227 {
228  // TODO: dispatch based on influence tolerance provided
229  amg_influence_trivial(A, amg_context, tag);
230 }
231 
232 
233 
236 {
237  unsigned int *point_types_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.point_types_.handle());
238  unsigned int *coarse_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.coarse_id_.handle());
239 
240  unsigned int coarse_id = 0;
241  for (vcl_size_t i=0; i<amg_context.coarse_id_.size(); ++i)
242  {
243  //assert(point_types_ptr[i] != viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED && bool("Logic error in enumerate_coarse_points(): Undecided points detected!"));
244 
246  coarse_id_ptr[i] = coarse_id++;
247  }
248 
249  //std::cout << "Coarse nodes after enumerate_coarse_points(): " << coarse_id << std::endl;
250  amg_context.num_coarse_ = coarse_id;
251 }
252 
253 
254 
255 
257 
258 
261 {
262  amg_id_influence(std::size_t id2, std::size_t influences2) : id(static_cast<unsigned int>(id2)), influences(static_cast<unsigned int>(influences2)) {}
263 
264  unsigned int id;
265  unsigned int influences;
266 };
267 
268 inline bool operator>(amg_id_influence const & a, amg_id_influence const & b)
269 {
270  if (a.influences > b.influences)
271  return true;
272  if (a.influences == b.influences)
273  return a.id > b.id;
274  return false;
275 }
276 
283 template<typename NumericT>
287 {
288  unsigned int *point_types_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.point_types_.handle());
289  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
290  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
291  unsigned int *influences_values_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_values_.handle());
292 
293  std::set<amg_id_influence, std::greater<amg_id_influence> > points_by_influences;
294 
295  amg_influence_advanced(A, amg_context, tag);
296 
297  for (std::size_t i=0; i<A.size1(); ++i)
298  points_by_influences.insert(amg_id_influence(i, influences_values_ptr[i]));
299 
300  //std::cout << "Starting coarsening process..." << std::endl;
301 
302  while (!points_by_influences.empty())
303  {
304  amg_id_influence point = *(points_by_influences.begin());
305 
306  // remove point from queue:
307  points_by_influences.erase(points_by_influences.begin());
308 
309  //std::cout << "Working on point " << point.id << std::endl;
310 
311  // point is already coarse or fine point, continue;
313  continue;
314 
315  //std::cout << " Setting point " << point.id << " to a coarse point." << std::endl;
316  // make this a coarse point:
318 
319  // Set strongly influenced points to fine points:
320  unsigned int j_stop = influences_row_ptr[point.id + 1];
321  for (unsigned int j = influences_row_ptr[point.id]; j < j_stop; ++j)
322  {
323  unsigned int influenced_point_id = influences_id_ptr[j];
324 
325  //std::cout << "Checking point " << influenced_point_id << std::endl;
326  if (point_types_ptr[influenced_point_id] != viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED)
327  continue;
328 
329  //std::cout << " Setting point " << influenced_point_id << " to a fine point." << std::endl;
330  point_types_ptr[influenced_point_id] = viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_FINE;
331 
332  // add one to influence measure for all undecided points strongly influencing this fine point.
333  unsigned int k_stop = influences_row_ptr[influenced_point_id + 1];
334  for (unsigned int k = influences_row_ptr[influenced_point_id]; k < k_stop; ++k)
335  {
336  unsigned int influenced_influenced_point_id = influences_id_ptr[k];
337  if (point_types_ptr[influenced_influenced_point_id] == viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED)
338  {
339  // grab and remove from set, increase influence counter, store back:
340  amg_id_influence point_to_find(influenced_influenced_point_id, influences_values_ptr[influenced_influenced_point_id]);
341  points_by_influences.erase(point_to_find);
342 
343  point_to_find.influences += 1;
344  influences_values_ptr[influenced_influenced_point_id] += 1; // for consistency
345 
346  points_by_influences.insert(point_to_find);
347  }
348  } //for
349  } // for
350 
351  } // while
352 
354 }
355 
356 
358 
359 
366 template<typename NumericT>
370 {
371  (void)tag;
372  unsigned int *point_types_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.point_types_.handle());
373  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
374  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
375 
376  for (unsigned int i=0; i<static_cast<unsigned int>(A.size1()); ++i)
377  {
378  // check if node has no aggregates next to it (MIS-2)
379  bool is_new_coarse_node = true;
380 
381  // Set strongly influenced points to fine points:
382  unsigned int j_stop = influences_row_ptr[i + 1];
383  for (unsigned int j = influences_row_ptr[i]; j < j_stop; ++j)
384  {
385  unsigned int influenced_point_id = influences_id_ptr[j];
386  if (point_types_ptr[influenced_point_id] != viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED) // either coarse or fine point
387  {
388  is_new_coarse_node = false;
389  break;
390  }
391  }
392 
393  if (is_new_coarse_node)
394  {
395  // make all strongly influenced neighbors fine points:
396  for (unsigned int j = influences_row_ptr[i]; j < j_stop; ++j)
397  {
398  unsigned int influenced_point_id = influences_id_ptr[j];
399  point_types_ptr[influenced_point_id] = viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_FINE;
400  }
401 
402  //std::cout << "Setting new coarse node: " << i << std::endl;
403  // Note: influences may include diagonal element, so it's important to *first* set fine points above before setting the coarse information here
405  }
406  }
407 }
408 
409 
410 
417 template<typename NumericT>
421 {
422  (void)tag;
423  unsigned int *point_types_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.point_types_.handle());
424  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
425  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
426 
427  std::vector<unsigned int> random_weights(A.size1());
428  for (std::size_t i=0; i<random_weights.size(); ++i)
429  random_weights[i] = static_cast<unsigned int>(rand()) % static_cast<unsigned int>(A.size1());
430 
431  std::size_t num_threads = 1;
432 #ifdef VIENNACL_WITH_OPENMP
433  num_threads = omp_get_max_threads();
434 #endif
435 
439 
440  unsigned int *work_state_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(work_state.handle());
441  unsigned int *work_random_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(work_random.handle());
442  unsigned int *work_index_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(work_index.handle());
443 
447 
448  unsigned int *work_state2_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(work_state2.handle());
449  unsigned int *work_random2_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(work_random2.handle());
450  unsigned int *work_index2_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(work_index2.handle());
451 
452 
453  unsigned int num_undecided = static_cast<unsigned int>(A.size1());
454  unsigned int pmis_iters = 0;
455  while (num_undecided > 0)
456  {
457  ++pmis_iters;
458 
459  //
460  // init temporary work data:
461  //
462 #ifdef VIENNACL_WITH_OPENMP
463  #pragma omp parallel for
464 #endif
465  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
466  {
467  unsigned int i = static_cast<unsigned int>(i2);
468  switch (point_types_ptr[i])
469  {
473  default:
474  throw std::runtime_error("Unexpected state encountered in MIS2 setup for AMG.");
475  }
476 
477  work_random_ptr[i] = random_weights[i];
478  work_index_ptr[i] = i;
479  }
480 
481 
482  //
483  // Propagate maximum tuple twice
484  //
485  for (unsigned int r = 0; r < 2; ++r)
486  {
487  // max operation
488 #ifdef VIENNACL_WITH_OPENMP
489  #pragma omp parallel for
490 #endif
491  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
492  {
493  unsigned int i = static_cast<unsigned int>(i2);
494  // load
495  unsigned int state = work_state_ptr[i];
496  unsigned int random = work_random_ptr[i];
497  unsigned int index = work_index_ptr[i];
498 
499  // max
500  unsigned int j_stop = influences_row_ptr[i + 1];
501  for (unsigned int j = influences_row_ptr[i]; j < j_stop; ++j)
502  {
503  unsigned int influenced_point_id = influences_id_ptr[j];
504 
505  // lexigraphical triple-max (not particularly pretty, but does the job):
506  if (state < work_state_ptr[influenced_point_id])
507  {
508  state = work_state_ptr[influenced_point_id];
509  random = work_random_ptr[influenced_point_id];
510  index = work_index_ptr[influenced_point_id];
511  }
512  else if (state == work_state_ptr[influenced_point_id])
513  {
514  if (random < work_random_ptr[influenced_point_id])
515  {
516  state = work_state_ptr[influenced_point_id];
517  random = work_random_ptr[influenced_point_id];
518  index = work_index_ptr[influenced_point_id];
519  }
520  else if (random == work_random_ptr[influenced_point_id])
521  {
522  if (index < work_index_ptr[influenced_point_id])
523  {
524  state = work_state_ptr[influenced_point_id];
525  random = work_random_ptr[influenced_point_id];
526  index = work_index_ptr[influenced_point_id];
527  }
528  } // max(random)
529  } // max(state)
530  } // for
531 
532  // store
533  work_state2_ptr[i] = state;
534  work_random2_ptr[i] = random;
535  work_index2_ptr[i] = index;
536  }
537 
538  // copy work array
539 #ifdef VIENNACL_WITH_OPENMP
540  #pragma omp parallel for
541 #endif
542  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
543  {
544  unsigned int i = static_cast<unsigned int>(i2);
545  work_state_ptr[i] = work_state2_ptr[i];
546  work_random_ptr[i] = work_random2_ptr[i];
547  work_index_ptr[i] = work_index2_ptr[i];
548  }
549  }
550 
551  //
552  // mark MIS and non-MIS nodes:
553  //
554  std::vector<unsigned int> thread_buffer(num_threads);
555 
556 #ifdef VIENNACL_WITH_OPENMP
557  #pragma omp parallel for
558 #endif
559  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
560  {
561  unsigned int i = static_cast<unsigned int>(i2);
562  unsigned int max_state = work_state_ptr[i];
563  unsigned int max_index = work_index_ptr[i];
564 
566  {
567  if (i == max_index) // make this a MIS node
569  else if (max_state == 2) // mind the mapping of viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_COARSE above!
571  else
572 #ifdef VIENNACL_WITH_OPENMP
573  thread_buffer[omp_get_thread_num()] += 1;
574 #else
575  thread_buffer[0] += 1;
576 #endif
577  }
578  }
579 
580  num_undecided = 0;
581  for (std::size_t i=0; i<thread_buffer.size(); ++i)
582  num_undecided += thread_buffer[i];
583  } // while
584 
585  // consistency with sequential MIS: reset state for non-coarse points, so that coarse indices are correctly picked up later
586 #ifdef VIENNACL_WITH_OPENMP
587  #pragma omp parallel for
588 #endif
589  for (long i=0; i<static_cast<long>(A.size1()); ++i)
592 
593 }
594 
595 
596 
603 template<typename NumericT>
607 {
608  unsigned int *point_types_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.point_types_.handle());
609  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
610  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
611  unsigned int *coarse_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.coarse_id_.handle());
612 
613  amg_influence_trivial(A, amg_context, tag);
614 
615  //
616  // Stage 1: Build aggregates:
617  //
620 
622 
623  //
624  // Stage 2: Propagate coarse aggregate indices to neighbors:
625  //
626 #ifdef VIENNACL_WITH_OPENMP
627  #pragma omp parallel for
628 #endif
629  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
630  {
631  unsigned int i = static_cast<unsigned int>(i2);
633  {
634  unsigned int coarse_index = coarse_id_ptr[i];
635 
636  unsigned int j_stop = influences_row_ptr[i + 1];
637  for (unsigned int j = influences_row_ptr[i]; j < j_stop; ++j)
638  {
639  unsigned int influenced_point_id = influences_id_ptr[j];
640  coarse_id_ptr[influenced_point_id] = coarse_index; // Set aggregate index for fine point
641 
642  if (influenced_point_id != i) // Note: Any write races between threads are harmless here
643  point_types_ptr[influenced_point_id] = viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_FINE;
644  }
645  }
646  }
647 
648 
649  //
650  // Stage 3: Merge remaining undecided points (merging to first aggregate found when cycling over the hierarchy
651  //
652 #ifdef VIENNACL_WITH_OPENMP
653  #pragma omp parallel for
654 #endif
655  for (long i2=0; i2<static_cast<long>(A.size1()); ++i2)
656  {
657  unsigned int i = static_cast<unsigned int>(i2);
659  {
660  unsigned int j_stop = influences_row_ptr[i + 1];
661  for (unsigned int j = influences_row_ptr[i]; j < j_stop; ++j)
662  {
663  unsigned int influenced_point_id = influences_id_ptr[j];
664  if (point_types_ptr[influenced_point_id] != viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_UNDECIDED) // either coarse or fine point
665  {
666  //std::cout << "Setting fine node " << i << " to be aggregated with node " << *influence_iter << "/" << pointvector.get_coarse_index(*influence_iter) << std::endl;
667  coarse_id_ptr[i] = coarse_id_ptr[influenced_point_id];
668  break;
669  }
670  }
671  }
672  }
673 
674  //
675  // Stage 4: Set undecided points to fine points (coarse ID already set in Stage 3)
676  // Note: Stage 3 and Stage 4 were initially fused, but are now split in order to avoid race conditions (or a fallback to sequential execution).
677  //
678 #ifdef VIENNACL_WITH_OPENMP
679  #pragma omp parallel for
680 #endif
681  for (long i=0; i<static_cast<long>(A.size1()); ++i)
684 
685 }
686 
687 
688 
689 
696 template<typename MatrixT>
697 void amg_coarse(MatrixT & A,
700 {
701  switch (tag.get_coarsening_method())
702  {
706  //default: throw std::runtime_error("not implemented yet");
707  }
708 }
709 
710 
711 
712 
714 
715 
723 template<typename NumericT>
728 {
729  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
730  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
731  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
732 
733  unsigned int *point_types_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.point_types_.handle());
734  unsigned int *influences_row_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_jumper_.handle());
735  unsigned int *influences_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.influence_ids_.handle());
736  unsigned int *coarse_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.coarse_id_.handle());
737 
738  P.resize(A.size1(), amg_context.num_coarse_, false);
739 
740  std::vector<std::map<unsigned int, NumericT> > P_setup(A.size1());
741 
742  // Iterate over all points to build the interpolation matrix row-by-row
743  // Interpolation for coarse points is immediate using '1'.
744  // Interpolation for fine points is set up via corresponding row weights (cf. Yang paper, p. 14)
745 #ifdef VIENNACL_WITH_OPENMP
746  #pragma omp parallel for
747 #endif
748  for (long row2=0; row2<static_cast<long>(A.size1()); ++row2)
749  {
750  unsigned int row = static_cast<unsigned int>(row2);
751  std::map<unsigned int, NumericT> & P_setup_row = P_setup[row];
752  //std::cout << "Row " << row << ": " << std::endl;
754  {
755  //std::cout << " Setting value 1.0 at " << coarse_id_ptr[row] << std::endl;
756  P_setup_row[coarse_id_ptr[row]] = NumericT(1);
757  }
759  {
760  //std::cout << "Building interpolant for fine point " << row << std::endl;
761 
762  NumericT row_sum = 0;
763  NumericT row_coarse_sum = 0;
764  NumericT diag = 0;
765 
766  // Row sum of coefficients (without diagonal) and sum of influencing coarse point coefficients has to be computed
767  unsigned int row_A_start = A_row_buffer[row];
768  unsigned int row_A_end = A_row_buffer[row + 1];
769  unsigned int const *influence_iter = influences_id_ptr + influences_row_ptr[row];
770  unsigned int const *influence_end = influences_id_ptr + influences_row_ptr[row + 1];
771  for (unsigned int index = row_A_start; index < row_A_end; ++index)
772  {
773  unsigned int col = A_col_buffer[index];
774  NumericT value = A_elements[index];
775 
776  if (col == row)
777  {
778  diag = value;
779  continue;
780  }
782  {
783  // Note: One increment is sufficient, because influence_iter traverses an ordered subset of the column indices in this row
784  while (influence_iter != influence_end && *influence_iter < col)
785  ++influence_iter;
786 
787  if (influence_iter != influence_end && *influence_iter == col)
788  row_coarse_sum += value;
789  }
790 
791  row_sum += value;
792  }
793 
794  NumericT temp_res = -row_sum/(row_coarse_sum*diag);
795  //std::cout << "row_sum: " << row_sum << ", row_coarse_sum: " << row_coarse_sum << ", diag: " << diag << std::endl;
796 
797  if (std::fabs(temp_res) > 1e-2 * std::fabs(diag))
798  {
799  // Iterate over all strongly influencing points to build the interpolant
800  influence_iter = influences_id_ptr + influences_row_ptr[row];
801  for (unsigned int index = row_A_start; index < row_A_end; ++index)
802  {
803  unsigned int col = A_col_buffer[index];
805  continue;
806  NumericT value = A_elements[index];
807 
808  // Advance to correct influence metric:
809  while (influence_iter != influence_end && *influence_iter < col)
810  ++influence_iter;
811 
812  if (influence_iter != influence_end && *influence_iter == col)
813  {
814  //std::cout << " Setting entry " << temp_res * value << " at " << coarse_id_ptr[col] << " for point " << col << std::endl;
815  P_setup_row[coarse_id_ptr[col]] = temp_res * value;
816  }
817  }
818  }
819 
820  // TODO truncate interpolation if specified by the user.
821  (void)tag;
822  }
823  else
824  throw std::runtime_error("Logic error in direct interpolation: Point is neither coarse-point nor fine-point!");
825  }
826 
827  // TODO: P_setup can be avoided without sacrificing parallelism.
828  viennacl::tools::sparse_matrix_adapter<NumericT> P_adapter(P_setup, P.size1(), P.size2());
829  viennacl::copy(P_adapter, P);
830 }
831 
832 
840 template<typename NumericT>
845 {
846  (void)tag;
848 
849  NumericT * P_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(P.handle());
850  unsigned int * P_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(P.handle1());
851  unsigned int * P_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(P.handle2());
852 
853  unsigned int *coarse_id_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(amg_context.coarse_id_.handle());
854 
855  // Build interpolation matrix:
856 #ifdef VIENNACL_WITH_OPENMP
857  #pragma omp parallel for
858 #endif
859  for (long row2 = 0; row2 < long(A.size1()); ++row2)
860  {
861  unsigned int row = static_cast<unsigned int>(row2);
862  P_elements[row] = NumericT(1);
863  P_row_buffer[row] = row;
864  P_col_buffer[row] = coarse_id_ptr[row];
865  }
866  P_row_buffer[A.size1()] = static_cast<unsigned int>(A.size1()); // don't forget finalizer
867 
869 }
870 
871 
879 template<typename NumericT>
884 {
885  (void)tag;
887 
888  // form tentative operator:
889  amg_interpol_ag(A, P_tentative, amg_context, tag);
890 
891  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
892  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
893  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
894 
896  unsigned int * Jacobi_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(Jacobi.handle1());
897  unsigned int * Jacobi_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(Jacobi.handle2());
898  NumericT * Jacobi_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(Jacobi.handle());
899 
900 
901  // Build Jacobi matrix:
902 #ifdef VIENNACL_WITH_OPENMP
903  #pragma omp parallel for
904 #endif
905  for (long row2=0; row2<static_cast<long>(A.size1()); ++row2)
906  {
907  unsigned int row = static_cast<unsigned int>(row2);
908  unsigned int row_begin = A_row_buffer[row];
909  unsigned int row_end = A_row_buffer[row+1];
910 
911  Jacobi_row_buffer[row] = row_begin;
912 
913  // Step 1: Extract diagonal:
914  NumericT diag = 0;
915  for (unsigned int j = row_begin; j < row_end; ++j)
916  {
917  if (A_col_buffer[j] == row)
918  {
919  diag = A_elements[j];
920  break;
921  }
922  }
923 
924  // Step 2: Write entries:
925  for (unsigned int j = row_begin; j < row_end; ++j)
926  {
927  unsigned int col_index = A_col_buffer[j];
928  Jacobi_col_buffer[j] = col_index;
929 
930  if (col_index == row)
931  Jacobi_elements[j] = NumericT(1) - NumericT(tag.get_jacobi_weight());
932  else
933  Jacobi_elements[j] = - NumericT(tag.get_jacobi_weight()) * A_elements[j] / diag;
934  }
935  }
936  Jacobi_row_buffer[A.size1()] = static_cast<unsigned int>(Jacobi.nnz()); // don't forget finalizer
937 
938  P = viennacl::linalg::prod(Jacobi, P_tentative);
939 
941 }
942 
943 
951 template<typename MatrixT>
952 void amg_interpol(MatrixT const & A,
953  MatrixT & P,
956 {
957  switch (tag.get_interpolation_method())
958  {
959  case viennacl::linalg::AMG_INTERPOLATION_METHOD_DIRECT: amg_interpol_direct (A, P, amg_context, tag); break;
960  case viennacl::linalg::AMG_INTERPOLATION_METHOD_AGGREGATION: amg_interpol_ag (A, P, amg_context, tag); break;
962  default: throw std::runtime_error("Not implemented yet!");
963  }
964 }
965 
966 
971 template<typename NumericT>
974 {
975  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
976  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
977  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
978 
979  // initialize datastructures for B:
981 
982  NumericT * B_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(B.handle());
983  unsigned int * B_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle1());
984  unsigned int * B_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle2());
985 
986  // prepare uninitialized B_row_buffer:
987  for (std::size_t i = 0; i < B.size1(); ++i)
988  B_row_buffer[i] = 0;
989 
990  //
991  // Stage 1: Compute pattern for B
992  //
993  for (std::size_t row = 0; row < A.size1(); ++row)
994  {
995  unsigned int row_start = A_row_buffer[row];
996  unsigned int row_stop = A_row_buffer[row+1];
997 
998  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
999  B_row_buffer[A_col_buffer[nnz_index]] += 1;
1000  }
1001 
1002  // Bring row-start array in place using inclusive-scan:
1003  unsigned int offset = B_row_buffer[0];
1004  B_row_buffer[0] = 0;
1005  for (std::size_t row = 1; row < B.size1(); ++row)
1006  {
1007  unsigned int tmp = B_row_buffer[row];
1008  B_row_buffer[row] = offset;
1009  offset += tmp;
1010  }
1011  B_row_buffer[B.size1()] = offset;
1012 
1013  //
1014  // Stage 2: Fill with data
1015  //
1016 
1017  std::vector<unsigned int> B_row_offsets(B.size1()); //number of elements already written per row
1018 
1019  for (std::size_t row = 0; row < A.size1(); ++row)
1020  {
1021  //std::cout << "Row " << row << ": ";
1022  unsigned int row_start = A_row_buffer[row];
1023  unsigned int row_stop = A_row_buffer[row+1];
1024 
1025  for (unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
1026  {
1027  unsigned int col_in_A = A_col_buffer[nnz_index];
1028  unsigned int B_nnz_index = B_row_buffer[col_in_A] + B_row_offsets[col_in_A];
1029  B_col_buffer[B_nnz_index] = static_cast<unsigned int>(row);
1030  B_elements[B_nnz_index] = A_elements[nnz_index];
1031  ++B_row_offsets[col_in_A];
1032  //B_temp.at(A_col_buffer[nnz_index])[row] = A_elements[nnz_index];
1033  }
1034  }
1035 
1036  // Step 3: Make datastructure consistent (row blocks!)
1038 }
1039 
1041 template<typename NumericT, unsigned int AlignmentV>
1044 {
1045  NumericT const * A_elements = detail::extract_raw_pointer<NumericT>(A.handle());
1046  unsigned int const * A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.handle1());
1047  unsigned int const * A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.handle2());
1048 
1049  NumericT * B_elements = detail::extract_raw_pointer<NumericT>(B.handle());
1050 
1051 #ifdef VIENNACL_WITH_OPENMP
1052  #pragma omp parallel for
1053 #endif
1054  for (long row = 0; row < static_cast<long>(A.size1()); ++row)
1055  {
1056  unsigned int row_stop = A_row_buffer[row+1];
1057 
1058  for (unsigned int nnz_index = A_row_buffer[row]; nnz_index < row_stop; ++nnz_index)
1059  B_elements[static_cast<unsigned int>(row) * static_cast<unsigned int>(B.internal_size2()) + A_col_buffer[nnz_index]] = A_elements[nnz_index];
1060  }
1061 
1062 }
1063 
1073 template<typename NumericT>
1074 void smooth_jacobi(unsigned int iterations,
1075  compressed_matrix<NumericT> const & A,
1076  vector<NumericT> & x,
1077  vector<NumericT> & x_backup,
1078  vector<NumericT> const & rhs_smooth,
1079  NumericT weight)
1080 {
1081 
1082  NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
1083  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
1084  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
1085  NumericT const * rhs_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(rhs_smooth.handle());
1086 
1087  NumericT * x_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(x.handle());
1088  NumericT const * x_old_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(x_backup.handle());
1089 
1090  for (unsigned int i=0; i<iterations; ++i)
1091  {
1092  x_backup = x;
1093 
1094  #ifdef VIENNACL_WITH_OPENMP
1095  #pragma omp parallel for
1096  #endif
1097  for (long row2 = 0; row2 < static_cast<long>(A.size1()); ++row2)
1098  {
1099  unsigned int row = static_cast<unsigned int>(row2);
1100  unsigned int col_end = A_row_buffer[row+1];
1101 
1102  NumericT sum = NumericT(0);
1103  NumericT diag = NumericT(1);
1104  for (unsigned int index = A_row_buffer[row]; index != col_end; ++index)
1105  {
1106  unsigned int col = A_col_buffer[index];
1107  if (col == row)
1108  diag = A_elements[index];
1109  else
1110  sum += A_elements[index] * x_old_elements[col];
1111  }
1112 
1113  x_elements[row] = weight * (rhs_elements[row] - sum) / diag + (NumericT(1) - weight) * x_old_elements[row];
1114  }
1115  }
1116 }
1117 
1118 } //namespace amg
1119 } //namespace host_based
1120 } //namespace linalg
1121 } //namespace viennacl
1122 
1123 #endif
const vcl_size_t & size2() const
Returns the number of columns.
Helper struct for sequential classical one-pass coarsening.
amg_coarsening_method get_coarsening_method() const
Returns the current coarsening strategy.
Definition: amg_base.hpp:88
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
Definition: sum.hpp:45
const vcl_size_t & size1() const
Returns the number of rows.
viennacl::vector_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_row_sum > row_sum(viennacl::matrix_base< NumericT > const &A)
User interface function for computing the sum of all elements of each row of a matrix.
Definition: sum.hpp:77
void amg_coarse_ag(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) coarsening. Partially single-threaded version (VIENNACL_AMG_COARSE_AG) ...
double get_jacobi_weight() const
Returns the Jacobi smoother weight (damping).
Definition: amg_base.hpp:113
void amg_interpol_ag(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) interpolation. Multi-Threaded! (VIENNACL_INTERPOL_AG)
void amg_influence_advanced(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Routine for extracting strongly connected points considering a user-provided threshold value...
void amg_coarse_classic_onepass(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Classical (RS) one-pass coarsening. Single-Threaded! (VIENNACL_AMG_COARSE_CLASSIC_ONEPASS) ...
amg_id_influence(std::size_t id2, std::size_t influences2)
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
float NumericT
Definition: bisect.cpp:40
void amg_coarse_ag_stage1_sequential(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) coarsening, single-threaded version of stage 1.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
void amg_transpose(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &B)
Computes B = trans(A).
bool operator>(amg_id_influence const &a, amg_id_influence const &b)
void assign_to_dense(viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::matrix_base< NumericT > &B)
double get_strong_connection_threshold() const
Returns the strong connection threshold parameter.
Definition: amg_base.hpp:105
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
std::size_t vcl_size_t
Definition: forwards.h:75
void enumerate_coarse_points(viennacl::linalg::detail::amg::amg_level_context &amg_context)
Assign IDs to coarse points.
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:244
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:885
void smooth_jacobi(unsigned int iterations, compressed_matrix< NumericT > const &A, vector< NumericT > &x, vector< NumericT > &x_backup, vector< NumericT > const &rhs_smooth, NumericT weight)
Damped Jacobi Smoother (CUDA version)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
void amg_coarse_ag_stage1_mis2(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
AG (aggregation based) coarsening, multi-threaded version of stage 1 using parallel maximum independe...
void amg_interpol_sa(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Smoothed aggregation interpolation. Multi-Threaded! (VIENNACL_INTERPOL_SA)
A tag for algebraic multigrid (AMG). Used to transport information from the user to the implementatio...
Definition: amg_base.hpp:64
void amg_influence(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Dispatcher for influence processing.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void amg_influence_trivial(compressed_matrix< NumericT > const &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Routine for taking all connections in the matrix as strong.
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void amg_coarse(MatrixT &A, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Entry point and dispatcher for coarsening procedures.
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
viennacl::vector< unsigned int > point_types_
Definition: amg_base.hpp:197
amg_interpolation_method get_interpolation_method() const
Returns the current interpolation method.
Definition: amg_base.hpp:93
Adapts a non-const sparse matrix type made up from std::vector > to basic u...
Definition: adapter.hpp:357
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
Helper classes and functions for the AMG preconditioner. Experimental.
void amg_interpol(MatrixT const &A, MatrixT &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Dispatcher for building the interpolation matrix.
void amg_interpol_direct(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &P, viennacl::linalg::detail::amg::amg_level_context &amg_context, viennacl::linalg::amg_tag &tag)
Direct interpolation. Multi-threaded! (VIENNACL_AMG_INTERPOL_DIRECT)
viennacl::vector< unsigned int > coarse_id_
Definition: amg_base.hpp:198
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
viennacl::vector< unsigned int > influence_values_
Definition: amg_base.hpp:196
viennacl::vector< unsigned int > influence_ids_
Definition: amg_base.hpp:195
viennacl::vector< unsigned int > influence_jumper_
Definition: amg_base.hpp:194