00001 #ifndef VIENNACL_COORDINATE_MATRIX_HPP_
00002 #define VIENNACL_COORDINATE_MATRIX_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include <map>
00025 #include <vector>
00026 #include <list>
00027
00028 #include "viennacl/forwards.h"
00029 #include "viennacl/ocl/backend.hpp"
00030 #include "viennacl/vector.hpp"
00031
00032 #include "viennacl/linalg/coordinate_matrix_operations.hpp"
00033
00034 namespace viennacl
00035 {
00036
00037
00038
00046 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00047 void copy(const CPU_MATRIX & cpu_matrix,
00048 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00049 {
00050 size_t group_num = 64;
00051
00052
00053 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00054 {
00055 std::size_t num_entries = 0;
00056 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00057 row_it != cpu_matrix.end1();
00058 ++row_it)
00059 {
00060 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00061 col_it != row_it.end();
00062 ++col_it)
00063 {
00064 ++num_entries;
00065 }
00066 }
00067
00068
00069 std::cout << "Number of entries: " << num_entries << std::endl;
00070 gpu_matrix.nonzeros_ = num_entries;
00071 gpu_matrix.rows_ = cpu_matrix.size1();
00072 gpu_matrix.cols_ = cpu_matrix.size2();
00073
00074 std::vector<cl_uint> coord_buffer(2*gpu_matrix.internal_nnz());
00075 std::vector<cl_uint> group_boundaries(group_num + 1);
00076 std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
00077
00078 std::size_t data_index = 0;
00079 std::size_t current_fraction = 0;
00080
00081 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00082 row_it != cpu_matrix.end1();
00083 ++row_it)
00084 {
00085 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00086 col_it != row_it.end();
00087 ++col_it)
00088 {
00089 coord_buffer[2*data_index] = static_cast<cl_uint>(col_it.index1());
00090 coord_buffer[2*data_index + 1] = static_cast<cl_uint>(col_it.index2());
00091 elements[data_index] = *col_it;
00092 ++data_index;
00093 }
00094
00095 if (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries)
00096 group_boundaries[++current_fraction] = data_index;
00097 }
00098
00099
00100 group_boundaries[group_num] = data_index;
00101
00102
00103
00104
00105
00106
00107 gpu_matrix.coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, coord_buffer);
00108 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, elements);
00109 gpu_matrix.group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, group_boundaries);
00110 }
00111 }
00112
00118 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00119 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00120 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00121 {
00122 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00123 }
00124
00125
00135 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00136 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00137 CPU_MATRIX & cpu_matrix )
00138 {
00139 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00140 {
00141 cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00142
00143
00144 std::vector<cl_uint> coord_buffer(2*gpu_matrix.nnz());
00145 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00146
00147
00148
00149 cl_int err;
00150 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle12(), CL_TRUE, 0, sizeof(cl_uint)* 2 *gpu_matrix.nnz(), &(coord_buffer[0]), 0, NULL, NULL);
00151 VIENNACL_ERR_CHECK(err);
00152 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00153 VIENNACL_ERR_CHECK(err);
00154 viennacl::ocl::get_queue().finish();
00155
00156
00157 for (std::size_t index = 0; index < gpu_matrix.nnz(); ++index)
00158 {
00159 cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index];
00160 }
00161 }
00162 }
00163
00169 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00170 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00171 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00172 {
00173 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00174 copy(gpu_matrix, temp);
00175 }
00176
00177
00179
00186 template<class SCALARTYPE, unsigned int ALIGNMENT >
00187 class coordinate_matrix
00188 {
00189 public:
00190 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type;
00191
00193 coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0) { viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00194
00201 coordinate_matrix(std::size_t rows, std::size_t cols, std::size_t nonzeros = 0) :
00202 rows_(rows), cols_(cols), nonzeros_(nonzeros)
00203 {
00204 viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init();
00205 if (nonzeros > 0)
00206 {
00207 coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00208 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00209 group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1));
00210 }
00211 }
00212
00214 void reserve(std::size_t new_nonzeros)
00215 {
00216 if (new_nonzeros > nonzeros_)
00217 {
00218 viennacl::ocl::handle<cl_mem> coord_buffer_old = coord_buffer_;
00219 viennacl::ocl::handle<cl_mem> elements_old = elements_;
00220 coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00221 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00222
00223 cl_int err;
00224 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, 0, sizeof(cl_uint) * 2 * nonzeros_, 0, NULL, NULL);
00225 VIENNACL_ERR_CHECK(err);
00226 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, 0, sizeof(SCALARTYPE)*nonzeros_, 0, NULL, NULL);
00227 VIENNACL_ERR_CHECK(err);
00228
00229
00230 std::vector<long> temp(internal_nnz() - nonzeros_);
00231 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, nonzeros_, sizeof(cl_uint) * 2 * temp.size(), 0, NULL, NULL);
00232 VIENNACL_ERR_CHECK(err);
00233 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, nonzeros_, sizeof(SCALARTYPE)*temp.size(), 0, NULL, NULL);
00234 VIENNACL_ERR_CHECK(err);
00235 }
00236 }
00237
00244 void resize(std::size_t new_size1, std::size_t new_size2, bool preserve = true)
00245 {
00246 assert (new_size1 > 0 && new_size2 > 0);
00247
00248 if (new_size1 < rows_ || new_size2 < cols_)
00249 {
00250 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00251 if (rows_ > 0)
00252 stl_sparse_matrix.resize(rows_);
00253
00254 if (preserve && rows_ > 0)
00255 viennacl::copy(*this, stl_sparse_matrix);
00256
00257 stl_sparse_matrix.resize(new_size1);
00258
00259 std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl;
00260 if (new_size2 < cols_ && rows_ > 0)
00261 {
00262 for (std::size_t i=0; i<stl_sparse_matrix.size(); ++i)
00263 {
00264 std::list<unsigned int> to_delete;
00265 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00266 it != stl_sparse_matrix[i].end();
00267 ++it)
00268 {
00269 if (it->first >= new_size2)
00270 to_delete.push_back(it->first);
00271 }
00272
00273 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00274 stl_sparse_matrix[i].erase(*it);
00275 }
00276 std::cout << "Cropping done..." << std::endl;
00277 }
00278
00279 rows_ = new_size1;
00280 cols_ = new_size2;
00281 viennacl::copy(stl_sparse_matrix, *this);
00282 }
00283
00284 rows_ = new_size1;
00285 cols_ = new_size2;
00286 }
00287
00288
00290 std::size_t size1() const { return rows_; }
00292 std::size_t size2() const { return cols_; }
00294 std::size_t nnz() const { return nonzeros_; }
00296 std::size_t internal_nnz() const { return viennacl::tools::roundUpToNextMultiple<std::size_t>(nonzeros_, ALIGNMENT);; }
00297
00299 const viennacl::ocl::handle<cl_mem> & handle12() const { return coord_buffer_; }
00301 const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
00303 const viennacl::ocl::handle<cl_mem> & handle3() const { return group_boundaries_; }
00304
00305 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
00306 template <typename CPU_MATRIX>
00307 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix );
00308 #else
00309 template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2>
00310 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix );
00311 #endif
00312
00313 private:
00315 coordinate_matrix(coordinate_matrix const &);
00316
00318 coordinate_matrix & operator=(coordinate_matrix const &);
00319
00320
00321 std::size_t rows_;
00322 std::size_t cols_;
00323 std::size_t nonzeros_;
00324 viennacl::ocl::handle<cl_mem> coord_buffer_;
00325 viennacl::ocl::handle<cl_mem> elements_;
00326 viennacl::ocl::handle<cl_mem> group_boundaries_;
00327 };
00328
00329
00330 }
00331
00332 #endif