ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/coordinate

Go to the documentation of this file.
00001 #ifndef VIENNACL_COORDINATE_MATRIX_HPP_
00002 #define VIENNACL_COORDINATE_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #include <map>
00025 #include <vector>
00026 #include <list>
00027 
00028 #include "viennacl/forwards.h"
00029 #include "viennacl/ocl/backend.hpp"
00030 #include "viennacl/vector.hpp"
00031 
00032 #include "viennacl/linalg/coordinate_matrix_operations.hpp"
00033 
00034 namespace viennacl
00035 {
00036   
00037     
00038     //provide copy-operation:
00046     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00047     void copy(const CPU_MATRIX & cpu_matrix,
00048                      coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00049     {
00050       size_t group_num = 64;
00051       
00052       // Step 1: Determine nonzeros:
00053       if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00054       {
00055         std::size_t num_entries = 0;
00056         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00057               row_it != cpu_matrix.end1();
00058               ++row_it)
00059         {
00060           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00061                 col_it != row_it.end();
00062                 ++col_it)
00063           {
00064             ++num_entries;
00065           }
00066         }
00067         
00068         // Step 2: Set up matrix data:
00069         std::cout << "Number of entries: " << num_entries << std::endl;
00070         gpu_matrix.nonzeros_ = num_entries;
00071         gpu_matrix.rows_ = cpu_matrix.size1();
00072         gpu_matrix.cols_ = cpu_matrix.size2();
00073 
00074         std::vector<cl_uint> coord_buffer(2*gpu_matrix.internal_nnz());
00075         std::vector<cl_uint> group_boundaries(group_num + 1);
00076         std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
00077         
00078         std::size_t data_index = 0;
00079         std::size_t current_fraction = 0;
00080         
00081         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00082               row_it != cpu_matrix.end1();
00083               ++row_it)
00084         {
00085           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00086                 col_it != row_it.end();
00087                 ++col_it)
00088           {
00089             coord_buffer[2*data_index] = static_cast<cl_uint>(col_it.index1());
00090             coord_buffer[2*data_index + 1] = static_cast<cl_uint>(col_it.index2());
00091             elements[data_index] = *col_it;
00092             ++data_index;
00093           }
00094           
00095           if (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries)    //split data equally over 64 groups
00096             group_boundaries[++current_fraction] = data_index;
00097         }
00098         
00099         //write end of last group:
00100         group_boundaries[group_num] = data_index;
00101         //group_boundaries[1] = data_index; //for one compute unit
00102         
00103         /*std::cout << "Group boundaries: " << std::endl;
00104         for (size_t i=0; i<group_boundaries.size(); ++i)
00105           std::cout << group_boundaries[i] << std::endl;*/
00106         
00107         gpu_matrix.coord_buffer_     = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, coord_buffer);
00108         gpu_matrix.elements_         = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, elements);
00109         gpu_matrix.group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, group_boundaries);
00110       }
00111     }
00112 
00118     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00119     void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00120                      coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00121     {
00122       copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00123     }
00124     
00125     //gpu to cpu:
00135     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00136     void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00137                      CPU_MATRIX & cpu_matrix )
00138     {
00139       if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00140       {
00141         cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00142         
00143         //get raw data from memory:
00144         std::vector<cl_uint> coord_buffer(2*gpu_matrix.nnz());
00145         std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00146         
00147         //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl;
00148         
00149         cl_int err;
00150         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle12(), CL_TRUE, 0, sizeof(cl_uint)* 2 *gpu_matrix.nnz(), &(coord_buffer[0]), 0, NULL, NULL);
00151         VIENNACL_ERR_CHECK(err);
00152         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00153         VIENNACL_ERR_CHECK(err);
00154         viennacl::ocl::get_queue().finish();
00155         
00156         //fill the cpu_matrix:
00157         for (std::size_t index = 0; index < gpu_matrix.nnz(); ++index)
00158         {
00159           cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index];
00160         }
00161       }
00162     }
00163 
00169     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00170     void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00171               std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00172     {
00173       tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00174       copy(gpu_matrix, temp);
00175     }
00176 
00177 
00179 
00186     template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */ >
00187     class coordinate_matrix
00188     {
00189     public:
00190       typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00191       
00193       coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0) { viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00194       
00201       coordinate_matrix(std::size_t rows, std::size_t cols, std::size_t nonzeros = 0) : 
00202         rows_(rows), cols_(cols), nonzeros_(nonzeros)
00203       {
00204         viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init();
00205         if (nonzeros > 0)
00206         {
00207           coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00208           elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00209           group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1));
00210         }
00211       }
00212         
00214       void reserve(std::size_t new_nonzeros)
00215       {
00216         if (new_nonzeros > nonzeros_)
00217         {
00218           viennacl::ocl::handle<cl_mem> coord_buffer_old = coord_buffer_;
00219           viennacl::ocl::handle<cl_mem> elements_old = elements_;
00220           coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00221           elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00222           
00223           cl_int err;
00224           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, 0, sizeof(cl_uint) * 2 * nonzeros_, 0, NULL, NULL);
00225           VIENNACL_ERR_CHECK(err);
00226           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, 0, sizeof(SCALARTYPE)*nonzeros_, 0, NULL, NULL);
00227           VIENNACL_ERR_CHECK(err);
00228 
00229           //new memory must be padded with zeros:
00230           std::vector<long> temp(internal_nnz() - nonzeros_);
00231           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, nonzeros_, sizeof(cl_uint) * 2 * temp.size(), 0, NULL, NULL);
00232           VIENNACL_ERR_CHECK(err);
00233           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, nonzeros_, sizeof(SCALARTYPE)*temp.size(), 0, NULL, NULL);
00234           VIENNACL_ERR_CHECK(err);
00235         }
00236       }
00237 
00244       void resize(std::size_t new_size1, std::size_t new_size2, bool preserve = true)
00245       {
00246         assert (new_size1 > 0 && new_size2 > 0);
00247                 
00248         if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer
00249         {
00250           std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00251           if (rows_ > 0)
00252             stl_sparse_matrix.resize(rows_);
00253           
00254           if (preserve && rows_ > 0)
00255             viennacl::copy(*this, stl_sparse_matrix);
00256             
00257           stl_sparse_matrix.resize(new_size1);
00258           
00259           std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl;
00260           if (new_size2 < cols_ && rows_ > 0)
00261           {
00262             for (std::size_t i=0; i<stl_sparse_matrix.size(); ++i)
00263             {
00264               std::list<unsigned int> to_delete;
00265               for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00266                    it != stl_sparse_matrix[i].end();
00267                   ++it)
00268               {
00269                 if (it->first >= new_size2)
00270                   to_delete.push_back(it->first);
00271               }
00272               
00273               for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00274                 stl_sparse_matrix[i].erase(*it);
00275             }
00276             std::cout << "Cropping done..." << std::endl;
00277           }
00278           
00279           rows_ = new_size1;
00280           cols_ = new_size2;
00281           viennacl::copy(stl_sparse_matrix, *this);
00282         }
00283           
00284         rows_ = new_size1;
00285         cols_ = new_size2;
00286       }
00287 
00288 
00290       std::size_t size1() const { return rows_; }
00292       std::size_t size2() const { return cols_; }
00294       std::size_t nnz() const { return nonzeros_; }
00296       std::size_t internal_nnz() const { return viennacl::tools::roundUpToNextMultiple<std::size_t>(nonzeros_, ALIGNMENT);; }
00297       
00299       const viennacl::ocl::handle<cl_mem> & handle12() const { return coord_buffer_; }
00301       const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
00303       const viennacl::ocl::handle<cl_mem> & handle3() const { return group_boundaries_; }
00304       
00305       #if defined(_MSC_VER) && _MSC_VER < 1500      //Visual Studio 2005 needs special treatment
00306       template <typename CPU_MATRIX>
00307       friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix );
00308       #else
00309       template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2>
00310       friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix );
00311       #endif
00312 
00313     private:
00315       coordinate_matrix(coordinate_matrix const &);
00316       
00318       coordinate_matrix & operator=(coordinate_matrix const &);
00319       
00320       
00321       std::size_t rows_;
00322       std::size_t cols_;
00323       std::size_t nonzeros_;
00324       viennacl::ocl::handle<cl_mem> coord_buffer_;
00325       viennacl::ocl::handle<cl_mem> elements_;
00326       viennacl::ocl::handle<cl_mem> group_boundaries_;
00327     };
00328 
00329 
00330 }
00331 
00332 #endif
/data/development/ViennaCL/dev/viennacl/coordinate_matrix.hpp