ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
solver.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2015, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 /*
19 *
20 * Benchmark: Iterative solver tests (solver.cpp and solver.cu are identical, the latter being required for compilation using CUDA nvcc)
21 *
22 */
23 
24 
25 #ifndef NDEBUG
26  #define NDEBUG
27 #endif
28 
29 #include <boost/numeric/ublas/matrix_sparse.hpp>
30 #include <boost/numeric/ublas/io.hpp>
31 #include <boost/numeric/ublas/operation_sparse.hpp>
32 
33 #define VIENNACL_WITH_UBLAS 1
34 
35 #include "viennacl/scalar.hpp"
36 #include "viennacl/vector.hpp"
39 #include "viennacl/ell_matrix.hpp"
41 #include "viennacl/hyb_matrix.hpp"
42 #include "viennacl/context.hpp"
43 
44 #include "viennacl/linalg/cg.hpp"
48 
49 #include "viennacl/linalg/ilu.hpp"
53 
55 #include "viennacl/tools/timer.hpp"
56 
57 
58 #include <iostream>
59 #include <vector>
60 
61 
62 using namespace boost::numeric;
63 
64 #define BENCHMARK_RUNS 1
65 
66 
67 inline void printOps(double num_ops, double exec_time)
68 {
69  std::cout << "GFLOPs: " << num_ops / (1000000 * exec_time * 1000) << std::endl;
70 }
71 
72 
73 template<typename ScalarType>
74 ScalarType diff_inf(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
75 {
76  ublas::vector<ScalarType> v2_cpu(v2.size());
77  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
78 
79  for (unsigned int i=0;i<v1.size(); ++i)
80  {
81  if ( std::max( fabs(v2_cpu[i]), fabs(v1[i]) ) > 0 )
82  v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) );
83  else
84  v2_cpu[i] = 0.0;
85  }
86 
87  return norm_inf(v2_cpu);
88 }
89 
90 template<typename ScalarType>
91 ScalarType diff_2(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
92 {
93  ublas::vector<ScalarType> v2_cpu(v2.size());
94  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
95 
96  return norm_2(v1 - v2_cpu) / norm_2(v1);
97 }
98 
99 
100 template<typename MatrixType, typename VectorType, typename SolverTag, typename PrecondTag>
101 void run_solver(MatrixType const & matrix, VectorType const & rhs, VectorType const & ref_result, SolverTag const & solver, PrecondTag const & precond, long ops)
102 {
104  VectorType result(rhs);
105  VectorType residual(rhs);
107 
108  timer.start();
109  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
110  {
111  result = viennacl::linalg::solve(matrix, rhs, solver, precond);
112  }
114  double exec_time = timer.get();
115  std::cout << "Exec. time: " << exec_time << std::endl;
116  std::cout << "Est. "; printOps(static_cast<double>(ops), exec_time / BENCHMARK_RUNS);
117  residual -= viennacl::linalg::prod(matrix, result);
118  std::cout << "Relative residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;
119  std::cout << "Estimated rel. residual: " << solver.error() << std::endl;
120  std::cout << "Iterations: " << solver.iters() << std::endl;
121  result -= ref_result;
122  std::cout << "Relative deviation from result: " << viennacl::linalg::norm_2(result) / viennacl::linalg::norm_2(ref_result) << std::endl;
123 }
124 
125 
126 template<typename ScalarType>
128 {
130  double exec_time;
131 
132  ublas::vector<ScalarType> ublas_vec1;
133  ublas::vector<ScalarType> ublas_vec2;
134  ublas::vector<ScalarType> ublas_result;
135  unsigned int solver_iters = 100;
136  unsigned int solver_krylov_dim = 20;
137  double solver_tolerance = 1e-6;
138 
139  ublas::compressed_matrix<ScalarType> ublas_matrix;
140  if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx"))
141  {
142  std::cout << "Error reading Matrix file" << std::endl;
143  return EXIT_FAILURE;
144  }
145  std::cout << "done reading matrix" << std::endl;
146 
147  ublas_result = ublas::scalar_vector<ScalarType>(ublas_matrix.size1(), ScalarType(1.0));
148  ublas_vec1 = ublas::prod(ublas_matrix, ublas_result);
149  ublas_vec2 = ublas_vec1;
150 
151  viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(ublas_vec1.size(), ublas_vec1.size(), ctx);
152  viennacl::coordinate_matrix<ScalarType> vcl_coordinate_matrix(ublas_vec1.size(), ublas_vec1.size(), ctx);
153  viennacl::ell_matrix<ScalarType> vcl_ell_matrix(ctx);
154  viennacl::sliced_ell_matrix<ScalarType> vcl_sliced_ell_matrix(ctx);
155  viennacl::hyb_matrix<ScalarType> vcl_hyb_matrix(ctx);
156 
157  viennacl::vector<ScalarType> vcl_vec1(ublas_vec1.size(), ctx);
158  viennacl::vector<ScalarType> vcl_vec2(ublas_vec1.size(), ctx);
159  viennacl::vector<ScalarType> vcl_result(ublas_vec1.size(), ctx);
160 
161 
162  //cpu to gpu:
163  viennacl::copy(ublas_matrix, vcl_compressed_matrix);
164  viennacl::copy(ublas_matrix, vcl_coordinate_matrix);
165  viennacl::copy(ublas_matrix, vcl_ell_matrix);
166  viennacl::copy(ublas_matrix, vcl_sliced_ell_matrix);
167  viennacl::copy(ublas_matrix, vcl_hyb_matrix);
168  viennacl::copy(ublas_vec1, vcl_vec1);
169  viennacl::copy(ublas_vec2, vcl_vec2);
170  viennacl::copy(ublas_result, vcl_result);
171 
172 
173  std::cout << "------- Jacobi preconditioner ----------" << std::endl;
177 
178  std::cout << "------- Row-Scaling preconditioner ----------" << std::endl;
182 
186  std::cout << "------- ICHOL0 on CPU (ublas) ----------" << std::endl;
187 
188  timer.start();
190  exec_time = timer.get();
191  std::cout << "Setup time: " << exec_time << std::endl;
192 
193  timer.start();
194  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
195  ublas_ichol0.apply(ublas_vec1);
196  exec_time = timer.get();
197  std::cout << "ublas time: " << exec_time << std::endl;
198 
199  std::cout << "------- ICHOL0 with ViennaCL ----------" << std::endl;
200 
201  timer.start();
203  exec_time = timer.get();
204  std::cout << "Setup time: " << exec_time << std::endl;
205 
207  timer.start();
208  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
209  vcl_ichol0.apply(vcl_vec1);
211  exec_time = timer.get();
212  std::cout << "ViennaCL time: " << exec_time << std::endl;
213 
214  std::cout << "------- Chow-Patel parallel ICC with ViennaCL ----------" << std::endl;
215 
216  timer.start();
219  std::cout << "Setup time: " << timer.get() << std::endl;
220 
221  timer.start();
222  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
223  vcl_chow_patel_icc.apply(vcl_vec1);
225  std::cout << "ViennaCL Chow-Patel-ILU substitution time: " << timer.get() << std::endl;
226 
227 
231  std::cout << "------- ILU0 on with ublas ----------" << std::endl;
232 
233  timer.start();
235  exec_time = timer.get();
236  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
237  timer.start();
238  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
239  ublas_ilu0.apply(ublas_vec1);
240  exec_time = timer.get();
241  std::cout << "ublas ILU0 substitution time (no level scheduling): " << exec_time << std::endl;
242 
243  std::cout << "------- ILU0 with ViennaCL ----------" << std::endl;
244 
245  timer.start();
247  exec_time = timer.get();
248  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
249 
251  timer.start();
252  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
253  vcl_ilu0.apply(vcl_vec1);
255  exec_time = timer.get();
256  std::cout << "ViennaCL ILU0 substitution time (no level scheduling): " << exec_time << std::endl;
257 
258  timer.start();
259  viennacl::linalg::ilu0_tag ilu0_with_level_scheduling; ilu0_with_level_scheduling.use_level_scheduling(true);
260  viennacl::linalg::ilu0_precond< viennacl::compressed_matrix<ScalarType> > vcl_ilu0_level_scheduling(vcl_compressed_matrix, ilu0_with_level_scheduling);
261  exec_time = timer.get();
262  std::cout << "Setup time (with level scheduling): " << exec_time << std::endl;
263 
265  timer.start();
266  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
267  vcl_ilu0_level_scheduling.apply(vcl_vec1);
269  exec_time = timer.get();
270  std::cout << "ViennaCL ILU0 substitution time (with level scheduling): " << exec_time << std::endl;
271 
272 
273 
275 
276  std::cout << "------- Block-ILU0 with ublas ----------" << std::endl;
277 
278  ublas_vec1 = ublas_vec2;
279  viennacl::copy(ublas_vec1, vcl_vec1);
280 
281  timer.start();
283  viennacl::linalg::ilu0_tag> ublas_block_ilu0(ublas_matrix, viennacl::linalg::ilu0_tag());
284  exec_time = timer.get();
285  std::cout << "Setup time: " << exec_time << std::endl;
286 
287  timer.start();
288  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
289  ublas_block_ilu0.apply(ublas_vec1);
290  exec_time = timer.get();
291  std::cout << "ublas time: " << exec_time << std::endl;
292 
293  std::cout << "------- Block-ILU0 with ViennaCL ----------" << std::endl;
294 
295  timer.start();
297  viennacl::linalg::ilu0_tag> vcl_block_ilu0(vcl_compressed_matrix, viennacl::linalg::ilu0_tag());
298  exec_time = timer.get();
299  std::cout << "Setup time: " << exec_time << std::endl;
300 
301  //vcl_block_ilu0.apply(vcl_vec1); //warm-up
303  timer.start();
304  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
305  vcl_block_ilu0.apply(vcl_vec1);
307  exec_time = timer.get();
308  std::cout << "ViennaCL time: " << exec_time << std::endl;
309 
311 
312  std::cout << "------- ILUT with ublas ----------" << std::endl;
313 
314  ublas_vec1 = ublas_vec2;
315  viennacl::copy(ublas_vec1, vcl_vec1);
316 
317  timer.start();
319  exec_time = timer.get();
320  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
321  timer.start();
322  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
323  ublas_ilut.apply(ublas_vec1);
324  exec_time = timer.get();
325  std::cout << "ublas ILUT substitution time (no level scheduling): " << exec_time << std::endl;
326 
327 
328  std::cout << "------- ILUT with ViennaCL ----------" << std::endl;
329 
330  timer.start();
332  exec_time = timer.get();
333  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
334 
336  timer.start();
337  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
338  vcl_ilut.apply(vcl_vec1);
340  exec_time = timer.get();
341  std::cout << "ViennaCL ILUT substitution time (no level scheduling): " << exec_time << std::endl;
342 
343  timer.start();
344  viennacl::linalg::ilut_tag ilut_with_level_scheduling; ilut_with_level_scheduling.use_level_scheduling(true);
345  viennacl::linalg::ilut_precond< viennacl::compressed_matrix<ScalarType> > vcl_ilut_level_scheduling(vcl_compressed_matrix, ilut_with_level_scheduling);
346  exec_time = timer.get();
347  std::cout << "Setup time (with level scheduling): " << exec_time << std::endl;
348 
350  timer.start();
351  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
352  vcl_ilut_level_scheduling.apply(vcl_vec1);
354  exec_time = timer.get();
355  std::cout << "ViennaCL ILUT substitution time (with level scheduling): " << exec_time << std::endl;
356 
357 
359 
360  std::cout << "------- Block-ILUT with ublas ----------" << std::endl;
361 
362  ublas_vec1 = ublas_vec2;
363  viennacl::copy(ublas_vec1, vcl_vec1);
364 
365  timer.start();
367  viennacl::linalg::ilut_tag> ublas_block_ilut(ublas_matrix, viennacl::linalg::ilut_tag());
368  exec_time = timer.get();
369  std::cout << "Setup time: " << exec_time << std::endl;
370 
371  //ublas_block_ilut.apply(ublas_vec1);
372  timer.start();
373  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
374  ublas_block_ilut.apply(ublas_vec1);
375  exec_time = timer.get();
376  std::cout << "ublas time: " << exec_time << std::endl;
377 
378  std::cout << "------- Block-ILUT with ViennaCL ----------" << std::endl;
379 
380  timer.start();
382  viennacl::linalg::ilut_tag> vcl_block_ilut(vcl_compressed_matrix, viennacl::linalg::ilut_tag());
383  exec_time = timer.get();
384  std::cout << "Setup time: " << exec_time << std::endl;
385 
386  //vcl_block_ilut.apply(vcl_vec1); //warm-up
388  timer.start();
389  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
390  vcl_block_ilut.apply(vcl_vec1);
392  exec_time = timer.get();
393  std::cout << "ViennaCL time: " << exec_time << std::endl;
394 
395  std::cout << "------- Chow-Patel parallel ILU with ViennaCL ----------" << std::endl;
396 
397  timer.start();
400  std::cout << "Setup time: " << timer.get() << std::endl;
401 
402  timer.start();
403  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
404  vcl_chow_patel_ilu.apply(vcl_vec1);
406  std::cout << "ViennaCL Chow-Patel-ILU substitution time: " << timer.get() << std::endl;
407 
411  long cg_ops = static_cast<long>(solver_iters * (ublas_matrix.nnz() + 6 * ublas_vec2.size()));
412 
413  viennacl::linalg::cg_tag cg_solver(solver_tolerance, solver_iters);
414 
415  std::cout << "------- CG solver (no preconditioner) using ublas ----------" << std::endl;
416  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
417 
418  std::cout << "------- CG solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
419  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
420 
421  bool is_double = (sizeof(ScalarType) == sizeof(double));
422  if (is_double)
423  {
424  std::cout << "------- CG solver, mixed precision (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
425  viennacl::linalg::mixed_precision_cg_tag mixed_precision_cg_solver(solver_tolerance, solver_iters);
426 
427  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, mixed_precision_cg_solver, viennacl::linalg::no_precond(), cg_ops);
428  }
429 
430  std::cout << "------- CG solver (no preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
431  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
432 
433  std::cout << "------- CG solver (no preconditioner) via ViennaCL, ell_matrix ----------" << std::endl;
434  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
435 
436  std::cout << "------- CG solver (no preconditioner) via ViennaCL, sliced_ell_matrix ----------" << std::endl;
437  run_solver(vcl_sliced_ell_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
438 
439  std::cout << "------- CG solver (no preconditioner) via ViennaCL, hyb_matrix ----------" << std::endl;
440  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
441 
442  std::cout << "------- CG solver (ICHOL0 preconditioner) using ublas ----------" << std::endl;
443  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ichol0, cg_ops);
444 
445  std::cout << "------- CG solver (ICHOL0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
446  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ichol0, cg_ops);
447 
448  std::cout << "------- CG solver (Chow-Patel ICHOL0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
449  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_chow_patel_icc, cg_ops);
450 
451  std::cout << "------- CG solver (ILU0 preconditioner) using ublas ----------" << std::endl;
452  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ilu0, cg_ops);
453 
454  std::cout << "------- CG solver (ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
455  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilu0, cg_ops);
456 
457 
458  std::cout << "------- CG solver (Block-ILU0 preconditioner) using ublas ----------" << std::endl;
459  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_block_ilu0, cg_ops);
460 
461  std::cout << "------- CG solver (Block-ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
462  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_block_ilu0, cg_ops);
463 
464  std::cout << "------- CG solver (ILUT preconditioner) using ublas ----------" << std::endl;
465  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ilut, cg_ops);
466 
467  std::cout << "------- CG solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
468  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilut, cg_ops);
469 
470  std::cout << "------- CG solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
471  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilut, cg_ops);
472 
473  std::cout << "------- CG solver (Block-ILUT preconditioner) using ublas ----------" << std::endl;
474  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_block_ilut, cg_ops);
475 
476  std::cout << "------- CG solver (Block-ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
477  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_block_ilut, cg_ops);
478 
479  std::cout << "------- CG solver (Jacobi preconditioner) using ublas ----------" << std::endl;
480  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_jacobi, cg_ops);
481 
482  std::cout << "------- CG solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
483  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_jacobi_csr, cg_ops);
484 
485  std::cout << "------- CG solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
486  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_jacobi_coo, cg_ops);
487 
488 
489  std::cout << "------- CG solver (row scaling preconditioner) using ublas ----------" << std::endl;
490  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_row_scaling, cg_ops);
491 
492  std::cout << "------- CG solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
493  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_row_scaling_csr, cg_ops);
494 
495  std::cout << "------- CG solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
496  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_row_scaling_coo, cg_ops);
497 
501 
502  long bicgstab_ops = static_cast<long>(solver_iters * (2 * ublas_matrix.nnz() + 13 * ublas_vec2.size()));
503 
504  viennacl::linalg::bicgstab_tag bicgstab_solver(solver_tolerance, solver_iters);
505 
506  std::cout << "------- BiCGStab solver (no preconditioner) using ublas ----------" << std::endl;
507  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
508 
509  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
510  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
511 
512  std::cout << "------- BiCGStab solver (ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
513  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilu0, bicgstab_ops);
514 
515  std::cout << "------- BiCGStab solver (Chow-Patel-ILU preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
516  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_chow_patel_ilu, bicgstab_ops);
517 
518  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
519  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
520 
521  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, ell_matrix ----------" << std::endl;
522  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
523 
524  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, sliced_ell_matrix ----------" << std::endl;
525  run_solver(vcl_sliced_ell_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
526 
527  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, hyb_matrix ----------" << std::endl;
528  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
529 
530  std::cout << "------- BiCGStab solver (ILUT preconditioner) using ublas ----------" << std::endl;
531  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_ilut, bicgstab_ops);
532 
533  std::cout << "------- BiCGStab solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
534  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilut, bicgstab_ops);
535 
536  std::cout << "------- BiCGStab solver (Block-ILUT preconditioner) using ublas ----------" << std::endl;
537  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_block_ilut, bicgstab_ops);
538 
539 #ifdef VIENNACL_WITH_OPENCL
540  std::cout << "------- BiCGStab solver (Block-ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
541  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_block_ilut, bicgstab_ops);
542 #endif
543 
544 // std::cout << "------- BiCGStab solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
545 // run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilut, bicgstab_ops);
546 
547  std::cout << "------- BiCGStab solver (Jacobi preconditioner) using ublas ----------" << std::endl;
548  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_jacobi, bicgstab_ops);
549 
550  std::cout << "------- BiCGStab solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
551  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_jacobi_csr, bicgstab_ops);
552 
553  std::cout << "------- BiCGStab solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
554  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_jacobi_coo, bicgstab_ops);
555 
556 
557  std::cout << "------- BiCGStab solver (row scaling preconditioner) using ublas ----------" << std::endl;
558  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_row_scaling, bicgstab_ops);
559 
560  std::cout << "------- BiCGStab solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
561  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_row_scaling_csr, bicgstab_ops);
562 
563  std::cout << "------- BiCGStab solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
564  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_row_scaling_coo, bicgstab_ops);
565 
566 
570 
571  long gmres_ops = static_cast<long>(solver_iters * (ublas_matrix.nnz() + (solver_iters * 2 + 7) * ublas_vec2.size()));
572 
573  viennacl::linalg::gmres_tag gmres_solver(solver_tolerance, solver_iters, solver_krylov_dim);
574 
575  std::cout << "------- GMRES solver (no preconditioner) using ublas ----------" << std::endl;
576  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
577 
578  std::cout << "------- GMRES solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
579  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
580 
581  std::cout << "------- GMRES solver (no preconditioner) on GPU, coordinate_matrix ----------" << std::endl;
582  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
583 
584  std::cout << "------- GMRES solver (no preconditioner) on GPU, ell_matrix ----------" << std::endl;
585  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
586 
587  std::cout << "------- GMRES solver (no preconditioner) on GPU, sliced_ell_matrix ----------" << std::endl;
588  run_solver(vcl_sliced_ell_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
589 
590  std::cout << "------- GMRES solver (no preconditioner) on GPU, hyb_matrix ----------" << std::endl;
591  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
592 
593  std::cout << "------- GMRES solver (ILUT preconditioner) using ublas ----------" << std::endl;
594  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_ilut, gmres_ops);
595 
596  std::cout << "------- GMRES solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
597  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_ilut, gmres_ops);
598 
599  std::cout << "------- GMRES solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
600  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_ilut, gmres_ops);
601 
602 
603  std::cout << "------- GMRES solver (Jacobi preconditioner) using ublas ----------" << std::endl;
604  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_jacobi, gmres_ops);
605 
606  std::cout << "------- GMRES solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
607  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_jacobi_csr, gmres_ops);
608 
609  std::cout << "------- GMRES solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
610  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_jacobi_coo, gmres_ops);
611 
612 
613  std::cout << "------- GMRES solver (row scaling preconditioner) using ublas ----------" << std::endl;
614  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_row_scaling, gmres_ops);
615 
616  std::cout << "------- GMRES solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
617  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_row_scaling_csr, gmres_ops);
618 
619  std::cout << "------- GMRES solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
620  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_row_scaling_coo, gmres_ops);
621 
622  return EXIT_SUCCESS;
623 }
624 
625 int main()
626 {
627  std::cout << std::endl;
628  std::cout << "----------------------------------------------" << std::endl;
629  std::cout << " Device Info" << std::endl;
630  std::cout << "----------------------------------------------" << std::endl;
631 
632 #ifdef VIENNACL_WITH_OPENCL
634  std::vector<viennacl::ocl::device> const & devices = pf.devices();
635 
636  // Set first device to first context:
637  viennacl::ocl::setup_context(0, devices[0]);
638 
639  // Set second device for second context (use the same device for the second context if only one device available):
640  if (devices.size() > 1)
641  viennacl::ocl::setup_context(1, devices[1]);
642  else
643  viennacl::ocl::setup_context(1, devices[0]);
644 
645  std::cout << viennacl::ocl::current_device().info() << std::endl;
647 #else
648  viennacl::context ctx;
649 #endif
650 
651  std::cout << "---------------------------------------------------------------------------" << std::endl;
652  std::cout << "---------------------------------------------------------------------------" << std::endl;
653  std::cout << " Benchmark for Execution Times of Iterative Solvers provided with ViennaCL " << std::endl;
654  std::cout << "---------------------------------------------------------------------------" << std::endl;
655  std::cout << " Note that the purpose of this benchmark is not to run solvers until" << std::endl;
656  std::cout << " convergence. Instead, only the execution times of a few iterations are" << std::endl;
657  std::cout << " recorded. Residual errors are only printed for information." << std::endl << std::endl;
658 
659 
660  std::cout << std::endl;
661  std::cout << "----------------------------------------------" << std::endl;
662  std::cout << "----------------------------------------------" << std::endl;
663  std::cout << "## Benchmark :: Solver" << std::endl;
664  std::cout << "----------------------------------------------" << std::endl;
665  std::cout << std::endl;
666  std::cout << " -------------------------------" << std::endl;
667  std::cout << " # benchmarking single-precision" << std::endl;
668  std::cout << " -------------------------------" << std::endl;
669  run_benchmark<float>(ctx);
670 #ifdef VIENNACL_WITH_OPENCL
672 #endif
673  {
674  std::cout << std::endl;
675  std::cout << " -------------------------------" << std::endl;
676  std::cout << " # benchmarking double-precision" << std::endl;
677  std::cout << " -------------------------------" << std::endl;
678  run_benchmark<double>(ctx);
679  }
680  return 0;
681 }
682 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:406
Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows).
Definition: timer.hpp:90
A tag for incomplete LU and incomplete Cholesky factorization with static pattern (Parallel-ILU0...
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:96
A reader and writer for the matrix market format is implemented here.
std::vector< platform > get_platforms()
Definition: platform.hpp:124
ILU0 preconditioner class, can be supplied to solve()-routines.
Definition: ilu0.hpp:146
Incomplete Cholesky preconditioner class with static pattern (ICHOL0), can be supplied to solve()-rou...
Definition: ichol.hpp:127
int run_benchmark(viennacl::context ctx)
Definition: solver.cpp:127
A tag for incomplete Cholesky factorization with static pattern (ILU0)
Definition: ichol.hpp:43
Jacobi preconditioner class, can be supplied to solve()-routines. Generic version for non-ViennaCL ma...
Wrapper class for an OpenCL platform.
Definition: platform.hpp:45
A tag for incomplete LU factorization with static pattern (ILU0)
Definition: ilu0.hpp:58
std::vector< device > devices(cl_device_type dtype=CL_DEVICE_TYPE_DEFAULT)
Returns the available devices of the supplied device type.
Definition: platform.hpp:91
The stabilized bi-conjugate gradient method is implemented here.
ScalarType diff_2(ublas::vector< ScalarType > &v1, viennacl::vector< ScalarType > &v2)
Definition: solver.cpp:91
Implementations of incomplete Cholesky factorization preconditioners with static nonzero pattern...
ScalarType diff_inf(ublas::vector< ScalarType > &v1, viennacl::vector< ScalarType > &v2)
Definition: solver.cpp:74
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
bool use_level_scheduling() const
Definition: ilut.hpp:76
A tag for a jacobi preconditioner.
Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines.
A block ILU preconditioner class, can be supplied to solve()-routines.
Definition: block_ilu.hpp:132
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
Implementation of a simple Jacobi preconditioner.
Jacobi-type preconditioner class, can be supplied to solve()-routines. This is a diagonal preconditio...
Definition: row_scaling.hpp:87
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
Definition: gmres.hpp:49
Implementation of the coordinate_matrix class.
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
Definition: bicgstab.hpp:496
Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
void apply(VectorT &vec) const
Definition: ilut.hpp:366
viennacl::vector< float > v1
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
Implementations of the generalized minimum residual method are in this file.
#define BENCHMARK_RUNS
Definition: solver.cpp:64
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:841
A tag class representing the use of no preconditioner.
Definition: forwards.h:873
Implementations of incomplete factorization preconditioners. Convenience header file.
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:403
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
A tag for a row scaling preconditioner which merely normalizes the equation system such that each row...
Definition: row_scaling.hpp:40
void run_solver(MatrixType const &matrix, VectorType const &rhs, VectorType const &ref_result, SolverTag const &solver, PrecondTag const &precond, long ops)
Definition: solver.cpp:101
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
ILUT preconditioner class, can be supplied to solve()-routines.
Definition: ilut.hpp:352
int main()
Definition: solver.cpp:625
The conjugate gradient method is implemented here.
void printOps(double num_ops, double exec_time)
Definition: solver.cpp:67
void apply(VectorT &vec) const
Definition: ilu0.hpp:160
Implementation of the ell_matrix class.
void apply(VectorT &vec) const
Definition: block_ilu.hpp:174
A row normalization preconditioner is implemented here.
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
void apply(VectorT &vec) const
Definition: ichol.hpp:141
viennacl::vector< int > v2
bool use_level_scheduling() const
Definition: ilu0.hpp:63
void prod(std::vector< std::map< IndexT, NumericT > > const &stl_A, std::vector< std::map< IndexT, NumericT > > const &stl_B, std::vector< std::map< IndexT, NumericT > > &stl_C)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
float ScalarType
Definition: fft_1d.cpp:42
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: cg.hpp:48
A sparse square matrix in compressed sparse rows format.
double get() const
Definition: timer.hpp:104
A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for d...
Definition: bicgstab.hpp:47
long read_matrix_market_file(MatrixT &mat, const char *file, long index_base=1)
Reads a sparse matrix from a file (MatrixMarket format)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:848
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context.
Definition: backend.hpp:225
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Implementation of the ViennaCL scalar class.
void setup_context(long i, std::vector< cl_device_id > const &devices)
Convenience function for setting devices for a context.
Definition: backend.hpp:231
The conjugate gradient method using mixed precision is implemented here. Experimental.
Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...