00001 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_
00002 #define VIENNACL_COMPRESSED_MATRIX_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include <vector>
00025 #include <list>
00026 #include <map>
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/ocl/backend.hpp"
00029 #include "viennacl/vector.hpp"
00030
00031 #include "viennacl/linalg/compressed_matrix_operations.hpp"
00032
00033 #include "viennacl/tools/tools.hpp"
00034
00035 namespace viennacl
00036 {
00037
00038
00039
00054 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00055 void copy(const CPU_MATRIX & cpu_matrix,
00056 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00057 {
00058
00059
00060 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00061 {
00062
00063 long num_entries = 0;
00064 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00065 row_it != cpu_matrix.end1();
00066 ++row_it)
00067 {
00068 std::size_t entries_per_row = 0;
00069 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00070 col_it != row_it.end();
00071 ++col_it)
00072 {
00073 ++entries_per_row;
00074 }
00075 num_entries += viennacl::tools::roundUpToNextMultiple<std::size_t>(entries_per_row, ALIGNMENT);
00076 }
00077
00078 if (num_entries == 0)
00079 {
00080 num_entries = 1;
00081 }
00082
00083
00084 std::vector<cl_uint> row_buffer(cpu_matrix.size1() + 1);
00085 std::vector<cl_uint> col_buffer(num_entries);
00086 std::vector<SCALARTYPE> elements(num_entries);
00087
00088 std::size_t row_index = 0;
00089 std::size_t data_index = 0;
00090
00091 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00092 row_it != cpu_matrix.end1();
00093 ++row_it)
00094 {
00095 row_buffer[row_index] = data_index;
00096 ++row_index;
00097
00098 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00099 col_it != row_it.end();
00100 ++col_it)
00101 {
00102 col_buffer[data_index] = static_cast<std::size_t>(col_it.index2());
00103 elements[data_index] = *col_it;
00104 ++data_index;
00105 }
00106 data_index = viennacl::tools::roundUpToNextMultiple<std::size_t>(data_index, ALIGNMENT);
00107 }
00108 row_buffer[row_index] = data_index;
00109
00110 gpu_matrix.set(&row_buffer[0],
00111 &col_buffer[0],
00112 &elements[0],
00113 cpu_matrix.size1(),
00114 cpu_matrix.size2(),
00115 num_entries);
00116 }
00117 }
00118
00119
00120
00126 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00127 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00128 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00129 {
00130 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00131 }
00132
00133 #ifdef VIENNACL_HAVE_EIGEN
00134 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
00135 void copy(const Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix,
00136 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
00137 {
00138 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(eigen_matrix.rows());
00139
00140 for (int k=0; k < eigen_matrix.outerSize(); ++k)
00141 for (typename Eigen::SparseMatrix<SCALARTYPE, flags>::InnerIterator it(eigen_matrix, k); it; ++it)
00142 stl_matrix[it.row()][it.col()] = it.value();
00143
00144 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
00145 }
00146 #endif
00147
00148
00149 #ifdef VIENNACL_HAVE_MTL4
00150 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00151 void copy(const mtl::compressed2D<SCALARTYPE> & cpu_matrix,
00152 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
00153 {
00154 typedef mtl::compressed2D<SCALARTYPE> MatrixType;
00155
00156 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(cpu_matrix.num_rows());
00157
00158 using mtl::traits::range_generator;
00159 using mtl::traits::range::min;
00160
00161
00162 typedef typename min<range_generator<mtl::tag::row, MatrixType>,
00163 range_generator<mtl::tag::col, MatrixType> >::type range_type;
00164 range_type my_range;
00165
00166
00167 typedef typename range_type::type c_type;
00168
00169 typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type;
00170
00171
00172 typename mtl::traits::row<MatrixType>::type row(cpu_matrix);
00173 typename mtl::traits::col<MatrixType>::type col(cpu_matrix);
00174 typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix);
00175
00176
00177 for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor)
00178 for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor)
00179 stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor);
00180
00181 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
00182 }
00183 #endif
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00203 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00204 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00205 CPU_MATRIX & cpu_matrix )
00206 {
00207 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00208 {
00209 cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00210
00211
00212 std::vector<cl_uint> row_buffer(gpu_matrix.size1() + 1);
00213 std::vector<cl_uint> col_buffer(gpu_matrix.nnz());
00214 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00215
00216
00217
00218 cl_int err;
00219 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00220 VIENNACL_ERR_CHECK(err);
00221 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00222 VIENNACL_ERR_CHECK(err);
00223 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00224 VIENNACL_ERR_CHECK(err);
00225 viennacl::ocl::get_queue().finish();
00226
00227
00228 std::size_t data_index = 0;
00229 for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
00230 {
00231 while (data_index < row_buffer[row])
00232 {
00233 if (col_buffer[data_index] >= gpu_matrix.size2())
00234 {
00235 std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
00236 return;
00237 }
00238
00239 if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00240 cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index];
00241 ++data_index;
00242 }
00243 }
00244 }
00245 }
00246
00247
00253 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00254 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00255 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00256 {
00257 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00258 copy(gpu_matrix, temp);
00259 }
00260
00261
00262 #ifdef VIENNACL_HAVE_EIGEN
00263 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
00264 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00265 Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix)
00266 {
00267 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00268 {
00269 assert(static_cast<unsigned int>(eigen_matrix.rows()) >= gpu_matrix.size1()
00270 && static_cast<unsigned int>(eigen_matrix.cols()) >= gpu_matrix.size2()
00271 && "Provided Eigen compressed matrix is too small!");
00272
00273
00274 std::vector<cl_uint> row_buffer(gpu_matrix.size1() + 1);
00275 std::vector<cl_uint> col_buffer(gpu_matrix.nnz());
00276 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00277
00278 cl_int err;
00279 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
00280 CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00281 VIENNACL_ERR_CHECK(err);
00282 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
00283 CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00284 VIENNACL_ERR_CHECK(err);
00285 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
00286 CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00287 VIENNACL_ERR_CHECK(err);
00288 viennacl::ocl::get_queue().finish();
00289
00290 eigen_matrix.setZero();
00291 eigen_matrix.startFill();
00292 std::size_t data_index = 0;
00293 for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
00294 {
00295 while (data_index < row_buffer[row])
00296 {
00297 assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
00298 if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00299 eigen_matrix.fill(row-1, col_buffer[data_index]) = elements[data_index];
00300 ++data_index;
00301 }
00302 }
00303 eigen_matrix.endFill();
00304 }
00305 }
00306 #endif
00307
00308
00309
00310 #ifdef VIENNACL_HAVE_MTL4
00311 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00312 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00313 mtl::compressed2D<SCALARTYPE> & mtl4_matrix)
00314 {
00315 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00316 {
00317 assert(mtl4_matrix.num_rows() >= gpu_matrix.size1()
00318 && mtl4_matrix.num_cols() >= gpu_matrix.size2()
00319 && "Provided MTL4 compressed matrix is too small!");
00320
00321
00322 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
00323 std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
00324 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00325
00326 cl_int err;
00327 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
00328 CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00329 VIENNACL_ERR_CHECK(err);
00330 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
00331 CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00332 VIENNACL_ERR_CHECK(err);
00333 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
00334 CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00335 VIENNACL_ERR_CHECK(err);
00336 viennacl::ocl::get_queue().finish();
00337
00338
00339
00340
00341 mtl::matrix::inserter< mtl::compressed2D<SCALARTYPE> > ins(mtl4_matrix);
00342 std::size_t data_index = 0;
00343 for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
00344 {
00345 while (data_index < row_buffer[row])
00346 {
00347 assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
00348 if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00349 ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<SCALARTYPE> >::value_type(elements[data_index]);
00350 ++data_index;
00351 }
00352 }
00353 }
00354 }
00355 #endif
00356
00357
00358
00359
00360
00362
00367 template<class SCALARTYPE, unsigned int ALIGNMENT >
00368 class compressed_matrix
00369 {
00370 public:
00371 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type;
00372
00374 compressed_matrix() : _rows(0), _cols(0), _nonzeros(0) { viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00375
00382 explicit compressed_matrix(std::size_t rows, std::size_t cols, std::size_t nonzeros = 0) :
00383 _rows(rows), _cols(cols), _nonzeros(nonzeros)
00384 {
00385 viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init();
00386
00387 if (rows > 0)
00388 _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * rows);
00389 if (nonzeros > 0)
00390 {
00391 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * nonzeros);
00392 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros);
00393 }
00394 }
00395
00396 explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements,
00397 std::size_t rows, std::size_t cols, std::size_t nonzeros) :
00398 _rows(rows), _cols(cols), _nonzeros(nonzeros)
00399 {
00400 _row_buffer = mem_row_buffer;
00401 _row_buffer.inc();
00402 _col_buffer = mem_col_buffer;
00403 _col_buffer.inc();
00404 _elements = mem_elements;
00405 _elements.inc();
00406 }
00407
00408
00418 void set(cl_uint * row_jumper,
00419 cl_uint * col_buffer,
00420 SCALARTYPE * elements,
00421 std::size_t rows,
00422 std::size_t cols,
00423 std::size_t nonzeros)
00424 {
00425 assert(cols > 0);
00426 assert(nonzeros > 0);
00427
00428 _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1), row_jumper);
00429 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * nonzeros, col_buffer);
00430 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros, elements);
00431 _nonzeros = nonzeros;
00432 _rows = rows;
00433 _cols = cols;
00434 }
00435
00437 void reserve(std::size_t new_nonzeros)
00438 {
00439 if (new_nonzeros > _nonzeros)
00440 {
00441 viennacl::ocl::handle<cl_mem> _col_buffer_old = _col_buffer;
00442 viennacl::ocl::handle<cl_mem> _elements_old = _elements;
00443 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * new_nonzeros);
00444 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * new_nonzeros);
00445
00446 cl_int err;
00447 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _col_buffer_old, _col_buffer, 0, 0, sizeof(cl_uint)*_nonzeros, 0, NULL, NULL);
00448 VIENNACL_ERR_CHECK(err);
00449 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _elements_old, _elements, 0, 0, sizeof(SCALARTYPE)*_nonzeros, 0, NULL, NULL);
00450 VIENNACL_ERR_CHECK(err);
00451
00452 _nonzeros = new_nonzeros;
00453 }
00454 }
00455
00462 void resize(std::size_t new_size1, std::size_t new_size2, bool preserve = true)
00463 {
00464 assert(new_size1 > 0 && new_size2 > 0);
00465
00466
00467 if (new_size1 != _rows || new_size2 != _cols)
00468 {
00469 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00470 if (_rows > 0)
00471 stl_sparse_matrix.resize(_rows);
00472
00473 if (preserve && _rows > 0)
00474 viennacl::copy(*this, stl_sparse_matrix);
00475
00476 stl_sparse_matrix.resize(new_size1);
00477
00478
00479 if (new_size2 < _cols && _rows > 0)
00480 {
00481 for (size_t i=0; i<stl_sparse_matrix.size(); ++i)
00482 {
00483 std::list<unsigned int> to_delete;
00484 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00485 it != stl_sparse_matrix[i].end();
00486 ++it)
00487 {
00488 if (it->first >= new_size2)
00489 to_delete.push_back(it->first);
00490 }
00491
00492 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00493 stl_sparse_matrix[i].erase(*it);
00494 }
00495 }
00496
00497 copy(stl_sparse_matrix, *this);
00498
00499 _rows = new_size1;
00500 _cols = new_size2;
00501 }
00502 }
00503
00505 const std::size_t & size1() const { return _rows; }
00507 const std::size_t & size2() const { return _cols; }
00509 const std::size_t & nnz() const { return _nonzeros; }
00510
00512 const viennacl::ocl::handle<cl_mem> & handle1() const { return _row_buffer; }
00514 const viennacl::ocl::handle<cl_mem> & handle2() const { return _col_buffer; }
00516 const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; }
00517
00518 private:
00519
00520
00521
00523 compressed_matrix & operator=(compressed_matrix const &);
00524
00525
00526 std::size_t _rows;
00527 std::size_t _cols;
00528 std::size_t _nonzeros;
00529 viennacl::ocl::handle<cl_mem> _row_buffer;
00530 viennacl::ocl::handle<cl_mem> _col_buffer;
00531 viennacl::ocl::handle<cl_mem> _elements;
00532 };
00533
00534
00535
00536
00537 }
00538
00539 #endif