ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/compressed

Go to the documentation of this file.
00001 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_
00002 #define VIENNACL_COMPRESSED_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #include <vector>
00025 #include <list>
00026 #include <map>
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/ocl/backend.hpp"
00029 #include "viennacl/vector.hpp"
00030 
00031 #include "viennacl/linalg/compressed_matrix_operations.hpp"
00032 
00033 #include "viennacl/tools/tools.hpp"
00034 
00035 namespace viennacl
00036 {
00037     
00038 
00039     //provide copy-operation:
00054     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00055     void copy(const CPU_MATRIX & cpu_matrix,
00056                      compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00057     {
00058       //std::cout << "copy for (" << cpu_matrix.size1() << ", " << cpu_matrix.size2() << ", " << cpu_matrix.nnz() << ")" << std::endl;
00059       
00060       if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00061       {
00062         //determine nonzeros:
00063         long num_entries = 0;
00064         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00065               row_it != cpu_matrix.end1();
00066               ++row_it)
00067         {
00068           std::size_t entries_per_row = 0;
00069           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00070                 col_it != row_it.end();
00071                 ++col_it)
00072           {
00073             ++entries_per_row;
00074           }
00075           num_entries += viennacl::tools::roundUpToNextMultiple<std::size_t>(entries_per_row, ALIGNMENT);
00076         }
00077         
00078         if (num_entries == 0) //we copy an empty matrix
00079         {
00080           num_entries = 1;
00081         }
00082         
00083         //set up matrix entries:
00084         std::vector<cl_uint> row_buffer(cpu_matrix.size1() + 1);
00085         std::vector<cl_uint> col_buffer(num_entries);
00086         std::vector<SCALARTYPE> elements(num_entries);
00087         
00088         std::size_t row_index = 0;
00089         std::size_t data_index = 0;
00090         
00091         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00092               row_it != cpu_matrix.end1();
00093               ++row_it)
00094         {
00095           row_buffer[row_index] = data_index;
00096           ++row_index;
00097           
00098           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00099                 col_it != row_it.end();
00100                 ++col_it)
00101           {
00102             col_buffer[data_index] = static_cast<std::size_t>(col_it.index2());
00103             elements[data_index] = *col_it;
00104             ++data_index;
00105           }
00106           data_index = viennacl::tools::roundUpToNextMultiple<std::size_t>(data_index, ALIGNMENT); //take care of alignment
00107         }
00108         row_buffer[row_index] = data_index;
00109         
00110         gpu_matrix.set(&row_buffer[0],
00111                        &col_buffer[0],
00112                        &elements[0], 
00113                        cpu_matrix.size1(),
00114                        cpu_matrix.size2(),
00115                        num_entries);
00116       }
00117     }
00118     
00119     
00120     //adapted for std::vector< std::map < > > argument:
00126     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00127     void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00128                      compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00129     {
00130       copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00131     }
00132     
00133     #ifdef VIENNACL_HAVE_EIGEN
00134     template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
00135     void copy(const Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix,
00136               compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
00137     {
00138       std::vector< std::map<unsigned int, SCALARTYPE> >  stl_matrix(eigen_matrix.rows());
00139       
00140       for (int k=0; k < eigen_matrix.outerSize(); ++k)
00141         for (typename Eigen::SparseMatrix<SCALARTYPE, flags>::InnerIterator it(eigen_matrix, k); it; ++it)
00142           stl_matrix[it.row()][it.col()] = it.value();
00143         
00144       copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
00145     }
00146     #endif
00147     
00148     
00149     #ifdef VIENNACL_HAVE_MTL4
00150     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00151     void copy(const mtl::compressed2D<SCALARTYPE> & cpu_matrix,
00152               compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
00153     {
00154       typedef mtl::compressed2D<SCALARTYPE>  MatrixType;
00155       
00156       std::vector< std::map<unsigned int, SCALARTYPE> >  stl_matrix(cpu_matrix.num_rows());
00157       
00158       using mtl::traits::range_generator;
00159       using mtl::traits::range::min;
00160 
00161       // Choose between row and column traversal
00162       typedef typename min<range_generator<mtl::tag::row, MatrixType>,
00163                            range_generator<mtl::tag::col, MatrixType> >::type   range_type;
00164       range_type                                                      my_range;
00165 
00166       // Type of outer cursor
00167       typedef typename range_type::type                               c_type;
00168       // Type of inner cursor
00169       typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type;
00170 
00171       // Define the property maps
00172       typename mtl::traits::row<MatrixType>::type                              row(cpu_matrix); 
00173       typename mtl::traits::col<MatrixType>::type                              col(cpu_matrix);
00174       typename mtl::traits::const_value<MatrixType>::type                      value(cpu_matrix); 
00175 
00176       // Now iterate over the matrix    
00177       for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor)
00178         for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor)
00179           stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor);
00180       
00181       copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
00182     }
00183     #endif
00184     
00185     
00186     
00187     
00188     
00189     
00190     
00191     //
00192     // gpu to cpu:
00193     //
00203     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00204     void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00205                      CPU_MATRIX & cpu_matrix )
00206     {
00207       if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00208       {
00209         cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00210         
00211         //get raw data from memory:
00212         std::vector<cl_uint> row_buffer(gpu_matrix.size1() + 1);
00213         std::vector<cl_uint> col_buffer(gpu_matrix.nnz());
00214         std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00215         
00216         //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl;
00217         
00218         cl_int err;
00219         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00220         VIENNACL_ERR_CHECK(err);
00221         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00222         VIENNACL_ERR_CHECK(err);
00223         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00224         VIENNACL_ERR_CHECK(err);
00225         viennacl::ocl::get_queue().finish();
00226         
00227         //fill the cpu_matrix:
00228         std::size_t data_index = 0;
00229         for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
00230         {
00231           while (data_index < row_buffer[row])
00232           {
00233             if (col_buffer[data_index] >= gpu_matrix.size2())
00234             {
00235               std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
00236               return;
00237             }
00238             
00239             if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00240               cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index];
00241             ++data_index;
00242           }
00243         }
00244       }
00245     }
00246     
00247     
00253     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00254     void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00255               std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00256     {
00257       tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00258       copy(gpu_matrix, temp);
00259     }
00260     
00261     
00262     #ifdef VIENNACL_HAVE_EIGEN
00263     template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
00264     void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00265               Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix)
00266     {
00267       if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00268       {
00269         assert(static_cast<unsigned int>(eigen_matrix.rows()) >= gpu_matrix.size1()
00270                && static_cast<unsigned int>(eigen_matrix.cols()) >= gpu_matrix.size2()
00271                && "Provided Eigen compressed matrix is too small!");
00272         
00273         //get raw data from memory:
00274         std::vector<cl_uint> row_buffer(gpu_matrix.size1() + 1);
00275         std::vector<cl_uint> col_buffer(gpu_matrix.nnz());
00276         std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00277         
00278         cl_int err;
00279         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
00280                                   CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00281         VIENNACL_ERR_CHECK(err);
00282         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
00283                                   CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00284         VIENNACL_ERR_CHECK(err);
00285         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
00286                                   CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00287         VIENNACL_ERR_CHECK(err);
00288         viennacl::ocl::get_queue().finish();
00289         
00290         eigen_matrix.setZero();
00291         eigen_matrix.startFill();
00292         std::size_t data_index = 0;
00293         for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
00294         {
00295           while (data_index < row_buffer[row])
00296           {
00297             assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
00298             if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00299               eigen_matrix.fill(row-1, col_buffer[data_index]) = elements[data_index];
00300             ++data_index;
00301           }
00302         }
00303         eigen_matrix.endFill();
00304       }
00305     }
00306     #endif
00307     
00308     
00309     
00310     #ifdef VIENNACL_HAVE_MTL4
00311     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00312     void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00313               mtl::compressed2D<SCALARTYPE> & mtl4_matrix)
00314     {
00315       if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00316       {
00317         assert(mtl4_matrix.num_rows() >= gpu_matrix.size1()
00318                && mtl4_matrix.num_cols() >= gpu_matrix.size2()
00319                && "Provided MTL4 compressed matrix is too small!");
00320         
00321         //get raw data from memory:
00322         std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
00323         std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
00324         std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00325         
00326         cl_int err;
00327         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
00328                                   CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00329         VIENNACL_ERR_CHECK(err);
00330         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
00331                                   CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00332         VIENNACL_ERR_CHECK(err);
00333         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
00334                                   CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00335         VIENNACL_ERR_CHECK(err);
00336         viennacl::ocl::get_queue().finish();
00337         
00338         //set_to_zero(mtl4_matrix);  
00339         //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2());
00340         
00341         mtl::matrix::inserter< mtl::compressed2D<SCALARTYPE> >  ins(mtl4_matrix);
00342         std::size_t data_index = 0;
00343         for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
00344         {
00345           while (data_index < row_buffer[row])
00346           {
00347             assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
00348             if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00349               ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<SCALARTYPE> >::value_type(elements[data_index]);
00350             ++data_index;
00351           }
00352         }
00353       }
00354     }
00355     #endif
00356     
00357     
00358     
00359     
00360     
00362 
00367     template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */>
00368     class compressed_matrix
00369     {
00370     public:
00371       typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00372       
00374       compressed_matrix() : _rows(0), _cols(0), _nonzeros(0) { viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00375       
00382       explicit compressed_matrix(std::size_t rows, std::size_t cols, std::size_t nonzeros = 0) : 
00383         _rows(rows), _cols(cols), _nonzeros(nonzeros)
00384       {
00385         viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init();
00386         
00387         if (rows > 0)
00388           _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * rows);
00389         if (nonzeros > 0)
00390         {
00391           _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * nonzeros);
00392           _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros);
00393         }
00394       }
00395       
00396       explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, 
00397                                  std::size_t rows, std::size_t cols, std::size_t nonzeros) : 
00398         _rows(rows), _cols(cols), _nonzeros(nonzeros)
00399       {
00400           _row_buffer = mem_row_buffer;
00401           _row_buffer.inc();             //prevents that the user-provided memory is deleted once the matrix object is destroyed.
00402           _col_buffer = mem_col_buffer;
00403           _col_buffer.inc();             //prevents that the user-provided memory is deleted once the matrix object is destroyed.
00404           _elements = mem_elements;
00405           _elements.inc();               //prevents that the user-provided memory is deleted once the matrix object is destroyed.
00406       }
00407       
00408       
00418       void set(cl_uint * row_jumper, 
00419                cl_uint * col_buffer,
00420                SCALARTYPE * elements, 
00421                std::size_t rows,
00422                std::size_t cols,
00423                std::size_t nonzeros)
00424       {
00425         assert(cols > 0);
00426         assert(nonzeros > 0);
00427         //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl;
00428         _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1), row_jumper);
00429         _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * nonzeros, col_buffer);
00430         _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros, elements);
00431         _nonzeros = nonzeros;
00432         _rows = rows;
00433         _cols = cols;
00434       }
00435         
00437       void reserve(std::size_t new_nonzeros)
00438       {
00439         if (new_nonzeros > _nonzeros)
00440         {
00441           viennacl::ocl::handle<cl_mem> _col_buffer_old = _col_buffer;
00442           viennacl::ocl::handle<cl_mem> _elements_old = _elements;
00443           _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * new_nonzeros);
00444           _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * new_nonzeros);
00445           
00446           cl_int err;
00447           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _col_buffer_old, _col_buffer, 0, 0, sizeof(cl_uint)*_nonzeros, 0, NULL, NULL);
00448           VIENNACL_ERR_CHECK(err);
00449           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _elements_old, _elements, 0, 0, sizeof(SCALARTYPE)*_nonzeros, 0, NULL, NULL);
00450           VIENNACL_ERR_CHECK(err);
00451 
00452           _nonzeros = new_nonzeros;
00453         }
00454       }
00455 
00462       void resize(std::size_t new_size1, std::size_t new_size2, bool preserve = true)
00463       {
00464         assert(new_size1 > 0 && new_size2 > 0);
00465         //std::cout << "Resizing from (" << _rows << ", " << _cols << ") to (" << new_size1 << ", " << new_size2 << ")" << std::endl;
00466         
00467         if (new_size1 != _rows || new_size2 != _cols)
00468         {
00469           std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00470           if (_rows > 0)
00471             stl_sparse_matrix.resize(_rows);
00472           
00473           if (preserve && _rows > 0)
00474             viennacl::copy(*this, stl_sparse_matrix);
00475             
00476           stl_sparse_matrix.resize(new_size1);
00477           
00478           //discard entries with column index larger than new_size2
00479           if (new_size2 < _cols && _rows > 0)
00480           {
00481             for (size_t i=0; i<stl_sparse_matrix.size(); ++i)
00482             {
00483               std::list<unsigned int> to_delete;
00484               for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00485                    it != stl_sparse_matrix[i].end();
00486                   ++it)
00487               {
00488                 if (it->first >= new_size2)
00489                   to_delete.push_back(it->first);
00490               }
00491               
00492               for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00493                 stl_sparse_matrix[i].erase(*it);
00494             }
00495           }
00496           
00497           copy(stl_sparse_matrix, *this);
00498           
00499           _rows = new_size1;
00500           _cols = new_size2;
00501         }
00502       }
00503 
00505       const std::size_t & size1() const { return _rows; }
00507       const std::size_t & size2() const { return _cols; }
00509       const std::size_t & nnz() const { return _nonzeros; }
00510       
00512       const viennacl::ocl::handle<cl_mem> & handle1() const { return _row_buffer; }
00514       const viennacl::ocl::handle<cl_mem> & handle2() const { return _col_buffer; }
00516       const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; }
00517       
00518     private:
00519       // /** @brief Copy constructor is by now not available. */
00520       //compressed_matrix(compressed_matrix const &);
00521       
00523       compressed_matrix & operator=(compressed_matrix const &);
00524       
00525       
00526       std::size_t _rows;
00527       std::size_t _cols;
00528       std::size_t _nonzeros;
00529       viennacl::ocl::handle<cl_mem> _row_buffer;
00530       viennacl::ocl::handle<cl_mem> _col_buffer;
00531       viennacl::ocl::handle<cl_mem> _elements;
00532     };
00533 
00534     
00535     
00536 
00537 }
00538 
00539 #endif
/data/development/ViennaCL/dev/viennacl/compressed_matrix.hpp