ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
bandwidth-reduction.cpp

This tutorial shows how the bandwidth of the nonzero pattern of a sparse matrix can be reduced by renumbering the unknowns (i.e. rows and columns). Such a reordering can significantly improve cache reuse for algorithms such as sparse matrix-vector products and may also reduce the iteration required in iterative solvers.

There are two parts: The first part defines a couple of helper function. Feel free to directly go to the second part, which shows the actual tutorial code.

#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <map>
#include <vector>
#include <deque>
#include <cmath>

Part 1: Helper functions

We start by defining a couple of helper functions for sparse matrices represented by C++ STL containers, i.e. std::vector<std::map<T,U> >

Reorder Matrix

Reorders a matrix according to a previously generated node number permutation vector r

inline std::vector< std::map<int, double> > reorder_matrix(std::vector< std::map<int, double> > const & matrix, std::vector<int> const & r)
{
std::vector< std::map<int, double> > matrix2(r.size());
for (std::size_t i = 0; i < r.size(); i++)
for (std::map<int, double>::const_iterator it = matrix[i].begin(); it != matrix[i].end(); it++)
matrix2[static_cast<std::size_t>(r[i])][r[static_cast<std::size_t>(it->first)]] = it->second;
return matrix2;
}

Bandwidth Calculation

Calculates the bandwidth of a matrix

inline int calc_bw(std::vector< std::map<int, double> > const & matrix)
{
int bw = 0;
for (std::size_t i = 0; i < matrix.size(); i++)
{
int min_index = static_cast<int>(matrix.size());
int max_index = 0;
for (std::map<int, double>::const_iterator it = matrix[i].begin(); it != matrix[i].end(); it++)
{
if (it->first > max_index)
max_index = it->first;
if (it->first < min_index)
min_index = it->first;
}
if (max_index > min_index) //row isn't empty
bw = std::max(bw, max_index - min_index);
}
return bw;
}

Bandwidth Calculation

Calculate the bandwidth of a reordered matrix

template<typename IndexT>
int calc_reordered_bw(std::vector< std::map<int, double> > const & matrix, std::vector<IndexT> const & r)
{
int bw = 0;
for (std::size_t i = 0; i < r.size(); i++)
{
int min_index = static_cast<int>(matrix.size());
int max_index = 0;
for (std::map<int, double>::const_iterator it = matrix[i].begin(); it != matrix[i].end(); it++)
{
std::size_t col_idx = static_cast<std::size_t>(it->first);
if (r[col_idx] > max_index)
max_index = r[col_idx];
if (r[col_idx] < min_index)
min_index = r[col_idx];
}
if (max_index > min_index)
bw = std::max(bw, max_index - min_index);
}
return bw;
}

Generate Random Permutation

Generates a random permutation by Knuth shuffle algorithm reference: http://en.wikipedia.org/wiki/Knuth_shuffle (URL taken on July 2nd, 2011)

inline std::vector<int> generate_random_reordering(std::size_t n)
{
std::vector<int> r(n);
int tmp;
for (std::size_t i = 0; i < n; i++)
r[i] = static_cast<int>(i);
for (std::size_t i = 0; i < n - 1; i++)
{
std::size_t j = i + static_cast<std::size_t>((static_cast<double>(rand()) / static_cast<double>(RAND_MAX)) * static_cast<double>(n - 1 - i));
if (j != i)
{
tmp = r[i];
r[i] = r[j];
r[j] = tmp;
}
}
return r;
}

Matrix Generation for 3D Meshes

This function generates an incidence matrix from of an underlying three-dimensional mesh l: x dimension m: y dimension n: z dimension tri: true for tetrahedral mesh, false for cubic mesh return value: matrix of size l * m * n

inline std::vector< std::map<int, double> > gen_3d_mesh_matrix(std::size_t l, std::size_t m, std::size_t n, bool tri)
{
std::vector< std::map<int, double> > matrix;
std::size_t s = l * m * n;
std::size_t ind;
std::size_t ind1;
std::size_t ind2;
matrix.resize(s);
for (std::size_t i = 0; i < l; i++)
{
for (std::size_t j = 0; j < m; j++)
{
for (std::size_t k = 0; k < n; k++)
{
ind = i + l * j + l * m * k;
matrix[ind][static_cast<int>(ind)] = 1.0;
if (i > 0)
{
ind2 = ind - 1;
matrix[ind][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind)] = 1.0;
}
if (j > 0)
{
ind2 = ind - l;
matrix[ind][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind)] = 1.0;
}
if (k > 0)
{
ind2 = ind - l * m;
matrix[ind][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind)] = 1.0;
}
if (tri)
{
if (i < l - 1 && j < m - 1)
{
if ((i + j + k) % 2 == 0)
{
ind1 = ind;
ind2 = ind + 1 + l;
}
else
{
ind1 = ind + 1;
ind2 = ind + l;
}
matrix[ind1][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind1)] = 1.0;
}
if (i < l - 1 && k < n - 1)
{
if ((i + j + k) % 2 == 0)
{
ind1 = ind;
ind2 = ind + 1 + l * m;
}
else
{
ind1 = ind + 1;
ind2 = ind + l * m;
}
matrix[ind1][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind1)] = 1.0;
}
if (j < m - 1 && k < n - 1)
{
if ((i + j + k) % 2 == 0)
{
ind1 = ind;
ind2 = ind + l + l * m;
}
else
{
ind1 = ind + l;
ind2 = ind + l * m;
}
matrix[ind1][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind1)] = 1.0;
}
}
}
}
}
return matrix;
}

Part 2: Tutorial code

After all the helper functions have been defined, we are now ready to see how reordering routines are called.

int main(int, char **)
{
srand(42);
std::cout << "-- Generating matrix --" << std::endl;
std::size_t dof_per_dim = 64; //number of grid points per coordinate direction
std::size_t n = dof_per_dim * dof_per_dim * dof_per_dim; //total number of unknowns
std::vector< std::map<int, double> > matrix = gen_3d_mesh_matrix(dof_per_dim, dof_per_dim, dof_per_dim, false); //If last parameter is 'true', a tetrahedral grid instead of a hexahedral grid is used.

Shuffle the generated matrix

std::vector<int> r = generate_random_reordering(n);
std::vector< std::map<int, double> > matrix2 = reorder_matrix(matrix, r);

Print some statistics about the generated matrix:

std::cout << " * Unknowns: " << n << std::endl;
std::cout << " * Initial bandwidth: " << calc_bw(matrix) << std::endl;
std::cout << " * Randomly reordered bandwidth: " << calc_bw(matrix2) << std::endl;

Reorder using Cuthill-McKee algorithm and print new bandwidth:

std::cout << "-- Cuthill-McKee algorithm --" << std::endl;
std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;

Reorder using advanced Cuthill-McKee algorithm and print new bandwidth:

std::cout << "-- Advanced Cuthill-McKee algorithm --" << std::endl;
double a = 0.0;
std::size_t gmax = 1;
std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;

Reorder using Gibbs-Poole-Stockmeyer algorithm and print new bandwidth:

std::cout << "-- Gibbs-Poole-Stockmeyer algorithm --" << std::endl;
std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;

That's it.

std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
return EXIT_SUCCESS;
}

Full Example Code

/* =========================================================================
Copyright (c) 2010-2015, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
Portions of this software are copyright by UChicago Argonne, LLC.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <map>
#include <vector>
#include <deque>
#include <cmath>
inline std::vector< std::map<int, double> > reorder_matrix(std::vector< std::map<int, double> > const & matrix, std::vector<int> const & r)
{
std::vector< std::map<int, double> > matrix2(r.size());
for (std::size_t i = 0; i < r.size(); i++)
for (std::map<int, double>::const_iterator it = matrix[i].begin(); it != matrix[i].end(); it++)
matrix2[static_cast<std::size_t>(r[i])][r[static_cast<std::size_t>(it->first)]] = it->second;
return matrix2;
}
inline int calc_bw(std::vector< std::map<int, double> > const & matrix)
{
int bw = 0;
for (std::size_t i = 0; i < matrix.size(); i++)
{
int min_index = static_cast<int>(matrix.size());
int max_index = 0;
for (std::map<int, double>::const_iterator it = matrix[i].begin(); it != matrix[i].end(); it++)
{
if (it->first > max_index)
max_index = it->first;
if (it->first < min_index)
min_index = it->first;
}
if (max_index > min_index) //row isn't empty
bw = std::max(bw, max_index - min_index);
}
return bw;
}
template<typename IndexT>
int calc_reordered_bw(std::vector< std::map<int, double> > const & matrix, std::vector<IndexT> const & r)
{
int bw = 0;
for (std::size_t i = 0; i < r.size(); i++)
{
int min_index = static_cast<int>(matrix.size());
int max_index = 0;
for (std::map<int, double>::const_iterator it = matrix[i].begin(); it != matrix[i].end(); it++)
{
std::size_t col_idx = static_cast<std::size_t>(it->first);
if (r[col_idx] > max_index)
max_index = r[col_idx];
if (r[col_idx] < min_index)
min_index = r[col_idx];
}
if (max_index > min_index)
bw = std::max(bw, max_index - min_index);
}
return bw;
}
inline std::vector<int> generate_random_reordering(std::size_t n)
{
std::vector<int> r(n);
int tmp;
for (std::size_t i = 0; i < n; i++)
r[i] = static_cast<int>(i);
for (std::size_t i = 0; i < n - 1; i++)
{
std::size_t j = i + static_cast<std::size_t>((static_cast<double>(rand()) / static_cast<double>(RAND_MAX)) * static_cast<double>(n - 1 - i));
if (j != i)
{
tmp = r[i];
r[i] = r[j];
r[j] = tmp;
}
}
return r;
}
inline std::vector< std::map<int, double> > gen_3d_mesh_matrix(std::size_t l, std::size_t m, std::size_t n, bool tri)
{
std::vector< std::map<int, double> > matrix;
std::size_t s = l * m * n;
std::size_t ind;
std::size_t ind1;
std::size_t ind2;
matrix.resize(s);
for (std::size_t i = 0; i < l; i++)
{
for (std::size_t j = 0; j < m; j++)
{
for (std::size_t k = 0; k < n; k++)
{
ind = i + l * j + l * m * k;
matrix[ind][static_cast<int>(ind)] = 1.0;
if (i > 0)
{
ind2 = ind - 1;
matrix[ind][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind)] = 1.0;
}
if (j > 0)
{
ind2 = ind - l;
matrix[ind][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind)] = 1.0;
}
if (k > 0)
{
ind2 = ind - l * m;
matrix[ind][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind)] = 1.0;
}
if (tri)
{
if (i < l - 1 && j < m - 1)
{
if ((i + j + k) % 2 == 0)
{
ind1 = ind;
ind2 = ind + 1 + l;
}
else
{
ind1 = ind + 1;
ind2 = ind + l;
}
matrix[ind1][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind1)] = 1.0;
}
if (i < l - 1 && k < n - 1)
{
if ((i + j + k) % 2 == 0)
{
ind1 = ind;
ind2 = ind + 1 + l * m;
}
else
{
ind1 = ind + 1;
ind2 = ind + l * m;
}
matrix[ind1][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind1)] = 1.0;
}
if (j < m - 1 && k < n - 1)
{
if ((i + j + k) % 2 == 0)
{
ind1 = ind;
ind2 = ind + l + l * m;
}
else
{
ind1 = ind + l;
ind2 = ind + l * m;
}
matrix[ind1][static_cast<int>(ind2)] = 1.0;
matrix[ind2][static_cast<int>(ind1)] = 1.0;
}
}
}
}
}
return matrix;
}
int main(int, char **)
{
srand(42);
std::cout << "-- Generating matrix --" << std::endl;
std::size_t dof_per_dim = 64; //number of grid points per coordinate direction
std::size_t n = dof_per_dim * dof_per_dim * dof_per_dim; //total number of unknowns
std::vector< std::map<int, double> > matrix = gen_3d_mesh_matrix(dof_per_dim, dof_per_dim, dof_per_dim, false); //If last parameter is 'true', a tetrahedral grid instead of a hexahedral grid is used.
std::vector<int> r = generate_random_reordering(n);
std::vector< std::map<int, double> > matrix2 = reorder_matrix(matrix, r);
std::cout << " * Unknowns: " << n << std::endl;
std::cout << " * Initial bandwidth: " << calc_bw(matrix) << std::endl;
std::cout << " * Randomly reordered bandwidth: " << calc_bw(matrix2) << std::endl;
std::cout << "-- Cuthill-McKee algorithm --" << std::endl;
std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;
std::cout << "-- Advanced Cuthill-McKee algorithm --" << std::endl;
double a = 0.0;
std::size_t gmax = 1;
std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;
std::cout << "-- Gibbs-Poole-Stockmeyer algorithm --" << std::endl;
std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;
std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
return EXIT_SUCCESS;
}