ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
spai.cpp

This tutorial shows how to use the sparse approximate inverse (SPAI) preconditioner.

Warning
SPAI is currently only available with the OpenCL backend and is experimental. API-changes may happen any time in the future.

We start with including the necessary headers:

// enable Boost.uBLAS support
#define VIENNACL_WITH_UBLAS
#ifndef NDEBUG
#define BOOST_UBLAS_NDEBUG
#endif
// System headers:
#include <utility>
#include <iostream>
#include <fstream>
#include <string>
#include <cmath>
#include <algorithm>
#include <stdio.h>
// ViennaCL headers:
// Boost headers:
#include "boost/numeric/ublas/vector.hpp"
#include "boost/numeric/ublas/matrix.hpp"
#include "boost/numeric/ublas/io.hpp"
// auxiliary functionality:
#include "vector-io.hpp"

The following helper routine is used to run a solver with the provided preconditioner and to print the resulting residual norm.

template<typename MatrixT, typename VectorT, typename SolverTagT, typename PreconditionerT>
void run_solver(MatrixT const & A, VectorT const & b, SolverTagT const & solver_tag, PreconditionerT const & precond)
{
VectorT result = viennacl::linalg::solve(A, b, solver_tag, precond);
std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
VectorT residual = viennacl::linalg::prod(A, result);
residual -= b;
std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(b) << std::endl;
}

The main steps in this tutorial are the following:

int main (int, const char **)
{
typedef float ScalarType;
typedef boost::numeric::ublas::compressed_matrix<ScalarType> MatrixType;
typedef boost::numeric::ublas::vector<ScalarType> VectorType;
typedef viennacl::vector<ScalarType> GPUVectorType;

If you have multiple OpenCL-capable devices in your system, we pick the second device for this tutorial.

#ifdef VIENNACL_WITH_OPENCL
// Optional: Customize OpenCL backend
std::vector<viennacl::ocl::device> const & devices = pf.devices();
// Optional: Set first device to first context:
// Optional: Set second device for second context (use the same device for the second context if only one device available):
if (devices.size() > 1)
else
std::cout << viennacl::ocl::current_device().info() << std::endl;
#else
#endif

Create uBLAS-based sparse matrix and read system matrix from file

MatrixType M;
if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx"))
{
std::cerr<<"ERROR: Could not read matrix file " << std::endl;
exit(EXIT_FAILURE);
}
std::cout << "Size of matrix: " << M.size1() << std::endl;
std::cout << "Avg. Entries per row: " << double(M.nnz()) / static_cast<double>(M.size1()) << std::endl;

Use a constant load vector for simplicity

VectorType rhs(M.size2());
for (std::size_t i=0; i<rhs.size(); ++i)
rhs(i) = ScalarType(1);

Create the ViennaCL matrix and vector and initialize with uBLAS data:

GPUMatrixType gpu_M(M.size1(), M.size2(), ctx);
GPUVectorType gpu_rhs(M.size1(), ctx);
viennacl::copy(M, gpu_M);
viennacl::copy(rhs, gpu_rhs);

Solver Runs

We use a relative tolerance of $ 10^{-10} $ with a maximum of 50 iterations for each use case. Usually more than 50 solver iterations are required for convergence, but this choice ensures shorter execution times and suffices for this tutorial.

viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here

The first reference is to use no preconditioner (CPU and GPU):

std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl;
VectorType result = viennacl::linalg::solve(M, rhs, solver_tag);
std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
VectorType residual = viennacl::linalg::prod(M, result) - rhs;
std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;
std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl;
GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag);
std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result);
gpu_residual -= gpu_rhs;
std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl;

The second reference is a standard ILUT preconditioner (only CPU):

std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(M, rhs, solver_tag, ilut);

Step 1: SPAI with CPU

std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(M, rhs, solver_tag, spai_cpu);

Step 2: FSPAI with CPU

std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(M, rhs, solver_tag, fspai_cpu);

Step 3: SPAI with GPU

std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu);

Step 4: FSPAI with GPU

std::cout << "--- Test 4: GPU-based FSPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(gpu_M, gpu_rhs, solver_tag, fspai_gpu);

That's it! Print success message and exit.

std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
return EXIT_SUCCESS;
}

Full Example Code

/* =========================================================================
Copyright (c) 2010-2015, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
Portions of this software are copyright by UChicago Argonne, LLC.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
// enable Boost.uBLAS support
#define VIENNACL_WITH_UBLAS
#ifndef NDEBUG
#define BOOST_UBLAS_NDEBUG
#endif
// System headers:
#include <utility>
#include <iostream>
#include <fstream>
#include <string>
#include <cmath>
#include <algorithm>
#include <stdio.h>
// ViennaCL headers:
// Boost headers:
#include "boost/numeric/ublas/vector.hpp"
#include "boost/numeric/ublas/matrix.hpp"
#include "boost/numeric/ublas/io.hpp"
// auxiliary functionality:
#include "vector-io.hpp"
template<typename MatrixT, typename VectorT, typename SolverTagT, typename PreconditionerT>
void run_solver(MatrixT const & A, VectorT const & b, SolverTagT const & solver_tag, PreconditionerT const & precond)
{
VectorT result = viennacl::linalg::solve(A, b, solver_tag, precond);
std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
VectorT residual = viennacl::linalg::prod(A, result);
residual -= b;
std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(b) << std::endl;
}
int main (int, const char **)
{
typedef float ScalarType;
typedef boost::numeric::ublas::compressed_matrix<ScalarType> MatrixType;
typedef boost::numeric::ublas::vector<ScalarType> VectorType;
typedef viennacl::vector<ScalarType> GPUVectorType;
#ifdef VIENNACL_WITH_OPENCL
// Optional: Customize OpenCL backend
std::vector<viennacl::ocl::device> const & devices = pf.devices();
// Optional: Set first device to first context:
// Optional: Set second device for second context (use the same device for the second context if only one device available):
if (devices.size() > 1)
else
std::cout << viennacl::ocl::current_device().info() << std::endl;
#else
#endif
MatrixType M;
if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx"))
{
std::cerr<<"ERROR: Could not read matrix file " << std::endl;
exit(EXIT_FAILURE);
}
std::cout << "Size of matrix: " << M.size1() << std::endl;
std::cout << "Avg. Entries per row: " << double(M.nnz()) / static_cast<double>(M.size1()) << std::endl;
VectorType rhs(M.size2());
for (std::size_t i=0; i<rhs.size(); ++i)
rhs(i) = ScalarType(1);
GPUMatrixType gpu_M(M.size1(), M.size2(), ctx);
GPUVectorType gpu_rhs(M.size1(), ctx);
viennacl::copy(M, gpu_M);
viennacl::copy(rhs, gpu_rhs);
viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here
std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl;
VectorType result = viennacl::linalg::solve(M, rhs, solver_tag);
std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
VectorType residual = viennacl::linalg::prod(M, result) - rhs;
std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;
std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl;
GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag);
std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result);
gpu_residual -= gpu_rhs;
std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl;
std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(M, rhs, solver_tag, ilut);
std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(M, rhs, solver_tag, spai_cpu);
std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(M, rhs, solver_tag, fspai_cpu);
std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu);
std::cout << "--- Test 4: GPU-based FSPAI ---" << std::endl;
std::cout << " * Preconditioner setup..." << std::endl;
std::cout << " * Iterative solver run..." << std::endl;
run_solver(gpu_M, gpu_rhs, solver_tag, fspai_gpu);
std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
return EXIT_SUCCESS;
}