doc/html/sliced__ell__matrix_8hpp_source.html

 #ifndef VIENNACL_SLICED_ELL_MATRIX_HPP_

 #define VIENNACL_SLICED_ELL_MATRIX_HPP_


 /* =========================================================================

    Copyright (c) 2010-2015, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include "viennacl/forwards.h"

 #include "viennacl/vector.hpp"


 #include "viennacl/tools/tools.hpp"


 #include "viennacl/linalg/sparse_matrix_operations.hpp"


 namespace viennacl

 {

 template<typename ScalarT, typename IndexT /* see forwards.h = unsigned int */>

 class sliced_ell_matrix

 {

 public:

   typedef viennacl::backend::mem_handle                                                           handle_type;

   typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<ScalarT>::ResultType>   value_type;

   typedef vcl_size_t                                                                              size_type;


   explicit sliced_ell_matrix() : rows_(0), cols_(0), rows_per_block_(0) {}


   sliced_ell_matrix(size_type num_rows,

                     size_type num_cols,

                     size_type num_rows_per_block_ = 0)

     : rows_(num_rows),

       cols_(num_cols),

       rows_per_block_(num_rows_per_block_) {}


   explicit sliced_ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), rows_per_block_(0)

   {

     columns_per_block_.switch_active_handle_id(ctx.memory_type());

     column_indices_.switch_active_handle_id(ctx.memory_type());

     block_start_.switch_active_handle_id(ctx.memory_type());

     elements_.switch_active_handle_id(ctx.memory_type());


 #ifdef VIENNACL_WITH_OPENCL

     if (ctx.memory_type() == OPENCL_MEMORY)

     {

       columns_per_block_.opencl_handle().context(ctx.opencl_context());

       column_indices_.opencl_handle().context(ctx.opencl_context());

       block_start_.opencl_handle().context(ctx.opencl_context());

       elements_.opencl_handle().context(ctx.opencl_context());

     }

 #endif

   }


   void clear()

   {

     viennacl::backend::typesafe_host_array<IndexT> host_columns_per_block_buffer(columns_per_block_, rows_ / rows_per_block_ + 1);

     viennacl::backend::typesafe_host_array<IndexT> host_column_buffer(column_indices_, internal_size1());

     viennacl::backend::typesafe_host_array<IndexT> host_block_start_buffer(block_start_, (rows_ - 1) / rows_per_block_ + 1);

     std::vector<ScalarT> host_elements(1);


     viennacl::backend::memory_create(columns_per_block_, host_columns_per_block_buffer.element_size() * (rows_ / rows_per_block_ + 1), viennacl::traits::context(columns_per_block_), host_columns_per_block_buffer.get());

     viennacl::backend::memory_create(column_indices_,    host_column_buffer.element_size() * internal_size1(),                         viennacl::traits::context(column_indices_),    host_column_buffer.get());

     viennacl::backend::memory_create(block_start_,       host_block_start_buffer.element_size() * ((rows_ - 1) / rows_per_block_ + 1), viennacl::traits::context(block_start_),       host_block_start_buffer.get());

     viennacl::backend::memory_create(elements_,          sizeof(ScalarT) * 1,                                                          viennacl::traits::context(elements_),          &(host_elements[0]));

   }


   vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, rows_per_block_); }

   vcl_size_t internal_size2() const { return cols_; }


   vcl_size_t size1() const { return rows_; }

   vcl_size_t size2() const { return cols_; }


   vcl_size_t rows_per_block() const { return rows_per_block_; }


   //vcl_size_t nnz() const { return rows_ * maxnnz_; }

   //vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); }


   handle_type & handle1()       { return columns_per_block_; }

   const handle_type & handle1() const { return columns_per_block_; }


   handle_type & handle2()       { return column_indices_; }

   const handle_type & handle2() const { return column_indices_; }


   handle_type & handle3()       { return block_start_; }

   const handle_type & handle3() const { return block_start_; }


   handle_type & handle()       { return elements_; }

   const handle_type & handle() const { return elements_; }


 #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment

   template<typename CPUMatrixT>

   friend void copy(CPUMatrixT const & cpu_matrix, sliced_ell_matrix & gpu_matrix );

 #else

   template<typename CPUMatrixT, typename ScalarT2, typename IndexT2>

   friend void copy(CPUMatrixT const & cpu_matrix, sliced_ell_matrix<ScalarT2, IndexT2> & gpu_matrix );

 #endif


 private:

   vcl_size_t rows_;

   vcl_size_t cols_;

   vcl_size_t rows_per_block_; //parameter C in the paper by Kreutzer et al.


   handle_type columns_per_block_;

   handle_type column_indices_;

   handle_type block_start_;

   handle_type elements_;

 };


 template<typename CPUMatrixT, typename ScalarT, typename IndexT>

 void copy(CPUMatrixT const & cpu_matrix, sliced_ell_matrix<ScalarT, IndexT> & gpu_matrix )

 {

   assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );

   assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );


   if (gpu_matrix.rows_per_block() == 0) // not yet initialized by user. Set default: 32 is perfect for NVIDIA GPUs and older AMD GPUs. Still okay for newer AMD GPUs.

     gpu_matrix.rows_per_block_ = 32;


   if (viennacl::traits::size1(cpu_matrix) > 0 && viennacl::traits::size2(cpu_matrix) > 0)

   {

     //determine max capacity for row

     IndexT columns_in_current_block = 0;

     vcl_size_t total_element_buffer_size = 0;

     viennacl::backend::typesafe_host_array<IndexT> columns_in_block_buffer(gpu_matrix.handle1(), (viennacl::traits::size1(cpu_matrix) - 1) / gpu_matrix.rows_per_block() + 1);

     for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)

     {

       vcl_size_t entries_in_row = 0;

       for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)

         ++entries_in_row;


       columns_in_current_block = std::max(columns_in_current_block, static_cast<IndexT>(entries_in_row));


       // check for end of block

       if ( (row_it.index1() % gpu_matrix.rows_per_block() == gpu_matrix.rows_per_block() - 1)

            || row_it.index1() == viennacl::traits::size1(cpu_matrix) - 1)

       {

         total_element_buffer_size += columns_in_current_block * gpu_matrix.rows_per_block();

         columns_in_block_buffer.set(row_it.index1() / gpu_matrix.rows_per_block(), columns_in_current_block);

         columns_in_current_block = 0;

       }

     }


     //setup GPU matrix

     gpu_matrix.rows_ = cpu_matrix.size1();

     gpu_matrix.cols_ = cpu_matrix.size2();


     viennacl::backend::typesafe_host_array<IndexT> coords(gpu_matrix.handle2(), total_element_buffer_size);

     viennacl::backend::typesafe_host_array<IndexT> block_start(gpu_matrix.handle3(), (viennacl::traits::size1(cpu_matrix) - 1) / gpu_matrix.rows_per_block() + 1);

     std::vector<ScalarT> elements(total_element_buffer_size, 0);


     vcl_size_t block_offset = 0;

     vcl_size_t block_index  = 0;

     vcl_size_t row_in_block = 0;

     for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)

     {

       vcl_size_t entry_in_row = 0;


       for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)

       {

         vcl_size_t buffer_index = block_offset + entry_in_row * gpu_matrix.rows_per_block() + row_in_block;

         coords.set(buffer_index, col_it.index2());

         elements[buffer_index] = *col_it;

         entry_in_row++;

       }


       ++row_in_block;


       // check for end of block:

       if ( (row_it.index1() % gpu_matrix.rows_per_block() == gpu_matrix.rows_per_block() - 1)

            || row_it.index1() == viennacl::traits::size1(cpu_matrix) - 1)

       {

         block_start.set(block_index, static_cast<IndexT>(block_offset));

         block_offset += columns_in_block_buffer[block_index] * gpu_matrix.rows_per_block();

         ++block_index;

         row_in_block = 0;

       }

     }


     viennacl::backend::memory_create(gpu_matrix.handle1(), columns_in_block_buffer.raw_size(), traits::context(gpu_matrix.handle1()), columns_in_block_buffer.get());

     viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(),                  traits::context(gpu_matrix.handle2()), coords.get());

     viennacl::backend::memory_create(gpu_matrix.handle3(), block_start.raw_size(),             traits::context(gpu_matrix.handle3()), block_start.get());

     viennacl::backend::memory_create(gpu_matrix.handle(),  sizeof(ScalarT) * elements.size(),  traits::context(gpu_matrix.handle()), &(elements[0]));

   }

 }


 template<typename IndexT, typename NumericT, typename IndexT2>

 void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix,

           sliced_ell_matrix<NumericT, IndexT2> & gpu_matrix)

 {

   tools::const_sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size());

   viennacl::copy(temp, gpu_matrix);

 }


 /*

 template<typename CPUMatrixT, typename ScalarT, typename IndexT>

 void copy(sliced_ell_matrix<ScalarT, IndexT> const & gpu_matrix, CPUMatrixT & cpu_matrix )

 {

   assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );

   assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );


   if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)

   {

     std::vector<NumericT> elements(gpu_matrix.internal_nnz());

     viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz());


     viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * elements.size(), &(elements[0]));

     viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get());


     for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++)

     {

       for (vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++)

       {

         vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;


         if (elements[offset] == static_cast<NumericT>(0.0))

             continue;


         if (coords[offset] >= gpu_matrix.size2())

         {

             std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl;

             return;

         }


         cpu_matrix(row, coords[offset]) = elements[offset];

       }

     }

   }

 } */


 //

 // Specify available operations:

 //


 namespace linalg

 {

 namespace detail

 {

   // x = A * y

   template<typename ScalarT, typename IndexT>

   struct op_executor<vector_base<ScalarT>, op_assign, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >

   {

     static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> const & rhs)

     {

       // check for the special case x = A * x

       if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))

       {

         viennacl::vector<ScalarT> temp(lhs);

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);

     }

   };


   template<typename ScalarT, typename IndexT>

   struct op_executor<vector_base<ScalarT>, op_inplace_add, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >

   {

     static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> const & rhs)

     {

       viennacl::vector<ScalarT> temp(lhs);

       viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);

       lhs += temp;

     }

   };


   template<typename ScalarT, typename IndexT>

   struct op_executor<vector_base<ScalarT>, op_inplace_sub, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >

   {

     static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> const & rhs)

     {

       viennacl::vector<ScalarT> temp(lhs);

       viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);

       lhs -= temp;

     }

   };


   // x = A * vec_op

   template<typename ScalarT, typename IndexT, typename LHS, typename RHS, typename OP>

   struct op_executor<vector_base<ScalarT>, op_assign, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >

   {

     static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)

     {

       viennacl::vector<ScalarT> temp(rhs.rhs(), viennacl::traits::context(rhs));

       viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);

     }

   };


   // x = A * vec_op

   template<typename ScalarT, typename IndexT, typename LHS, typename RHS, typename OP>

   struct op_executor<vector_base<ScalarT>, op_inplace_add, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >

   {

     static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)

     {

       viennacl::vector<ScalarT> temp(rhs.rhs(), viennacl::traits::context(rhs));

       viennacl::vector<ScalarT> temp_result(lhs);

       viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);

       lhs += temp_result;

     }

   };


   // x = A * vec_op

   template<typename ScalarT, typename IndexT, typename LHS, typename RHS, typename OP>

   struct op_executor<vector_base<ScalarT>, op_inplace_sub, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >

   {

     static void apply(vector_base<ScalarT> & lhs, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)

     {

       viennacl::vector<ScalarT> temp(rhs.rhs(), viennacl::traits::context(rhs));

       viennacl::vector<ScalarT> temp_result(lhs);

       viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);

       lhs -= temp_result;

     }

   };


 } // namespace detail

 } // namespace linalg


 }


 #endif


viennacl::sliced_ell_matrix::handle2
handle_type & handle2()
Definition: sliced_ell_matrix.hpp:112

viennacl::sliced_ell_matrix::clear
void clear()
Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity p...
Definition: sliced_ell_matrix.hpp:85

viennacl::sliced_ell_matrix::handle3
const handle_type & handle3() const
Definition: sliced_ell_matrix.hpp:116

viennacl::backend::typesafe_host_array
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92

viennacl::backend::typesafe_host_array::element_size
vcl_size_t element_size() const
Definition: util.hpp:112

viennacl::sliced_ell_matrix::sliced_ell_matrix
sliced_ell_matrix()
Definition: sliced_ell_matrix.hpp:53

viennacl::scalar
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:227

viennacl::sliced_ell_matrix::size_type
vcl_size_t size_type
Definition: sliced_ell_matrix.hpp:51

viennacl::backend::typesafe_host_array::size
vcl_size_t size() const
Definition: util.hpp:113

viennacl::sliced_ell_matrix::handle1
const handle_type & handle1() const
Definition: sliced_ell_matrix.hpp:110

tools.hpp
Various little tools used here and there in ViennaCL.

viennacl::traits::size1
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163

viennacl::OPENCL_MEMORY
Definition: forwards.h:349

viennacl::sliced_ell_matrix::handle2
const handle_type & handle2() const
Definition: sliced_ell_matrix.hpp:113

forwards.h
This file provides the forward declarations for the main types used within ViennaCL.

viennacl::backend::typesafe_host_array::get
void * get()
Definition: util.hpp:110

viennacl::linalg::detail::max
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59

viennacl::sliced_ell_matrix::rows_per_block
vcl_size_t rows_per_block() const
Definition: sliced_ell_matrix.hpp:104

viennacl::sliced_ell_matrix::copy
friend void copy(CPUMatrixT const &cpu_matrix, sliced_ell_matrix< ScalarT2, IndexT2 > &gpu_matrix)

viennacl::sliced_ell_matrix::value_type
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< ScalarT >::ResultType > value_type
Definition: sliced_ell_matrix.hpp:50

viennacl::traits::size2
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201

viennacl::sliced_ell_matrix::handle
handle_type & handle()
Definition: sliced_ell_matrix.hpp:118

viennacl::context
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39

viennacl
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34

viennacl::sliced_ell_matrix::sliced_ell_matrix
sliced_ell_matrix(viennacl::context ctx)
Definition: sliced_ell_matrix.hpp:66

detail
Definition: blas3.hpp:36

viennacl::sliced_ell_matrix
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:403

sparse_matrix_operations.hpp
Implementations of operations using sparse matrices.

viennacl::tools::const_sparse_matrix_adapter
Adapts a constant sparse matrix type made up from std::vector > to basic ub...
Definition: adapter.hpp:183

viennacl::vcl_size_t
std::size_t vcl_size_t
Definition: forwards.h:75

viennacl::sliced_ell_matrix::size1
vcl_size_t size1() const
Definition: sliced_ell_matrix.hpp:101

viennacl::sliced_ell_matrix::sliced_ell_matrix
sliced_ell_matrix(size_type num_rows, size_type num_cols, size_type num_rows_per_block_=0)
Standard constructor for setting the row and column sizes as well as the block size.
Definition: sliced_ell_matrix.hpp:59

viennacl::vector
Definition: forwards.h:266

viennacl::context::memory_type
viennacl::memory_types memory_type() const
Definition: context.hpp:76

viennacl::sliced_ell_matrix::handle
const handle_type & handle() const
Definition: sliced_ell_matrix.hpp:119

viennacl::sliced_ell_matrix::internal_size2
vcl_size_t internal_size2() const
Definition: sliced_ell_matrix.hpp:99

viennacl::backend::mem_handle::switch_active_handle_id
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121

viennacl::traits::context
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40

vector.hpp
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...

viennacl::sliced_ell_matrix::handle3
handle_type & handle3()
Definition: sliced_ell_matrix.hpp:115

viennacl::copy
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150

viennacl::backend::typesafe_host_array::set
void set(vcl_size_t index, U value)
Definition: util.hpp:115

viennacl::backend::mem_handle
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89

viennacl::backend::memory_create
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87

viennacl::linalg::prod_impl
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:438

viennacl::traits::handle
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41

viennacl::sliced_ell_matrix::size2
vcl_size_t size2() const
Definition: sliced_ell_matrix.hpp:102

viennacl::sliced_ell_matrix::internal_size1
vcl_size_t internal_size1() const
Definition: sliced_ell_matrix.hpp:98

viennacl::sliced_ell_matrix::handle1
handle_type & handle1()
Definition: sliced_ell_matrix.hpp:109

viennacl::sliced_ell_matrix::handle_type
viennacl::backend::mem_handle handle_type
Definition: sliced_ell_matrix.hpp:49

op_assign
Definition: self_assign.cpp:132