00001 #ifndef VIENNACL_MATRIX_HPP_
00002 #define VIENNACL_MATRIX_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include "viennacl/forwards.h"
00025 #include "viennacl/ocl/backend.hpp"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/vector.hpp"
00028 #include "viennacl/linalg/matrix_operations.hpp"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/tools/matrix_size_deducer.hpp"
00031 #include "viennacl/tools/matrix_kernel_class_deducer.hpp"
00032 #include "viennacl/meta/result_of.hpp"
00033
00034 namespace viennacl
00035 {
00037 struct row_major
00038 {
00046 static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t num_cols)
00047 {
00048 return i * num_cols + j;
00049 }
00050
00051 static vcl_size_t internal_size1(vcl_size_t rows, vcl_size_t alignment)
00052 {
00053 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);;
00054 }
00055
00056 static vcl_size_t internal_size2(vcl_size_t cols, vcl_size_t alignment)
00057 {
00058 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
00059 }
00060 };
00061
00062 struct column_major
00063 {
00071 static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t num_cols)
00072 {
00073 return i + j * num_rows;
00074 }
00075
00076 static vcl_size_t internal_size1(vcl_size_t rows, vcl_size_t alignment)
00077 {
00078 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);
00079 }
00080
00081 static vcl_size_t internal_size2(vcl_size_t cols, vcl_size_t alignment)
00082 {
00083 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
00084 }
00085 };
00086
00087 template <typename LHS, typename RHS, typename OP>
00088 class matrix_expression
00089 {
00090
00091
00092 public:
00094
00095
00096
00097 matrix_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {}
00098
00101 LHS & lhs() const { return _lhs; }
00104 RHS & rhs() const { return _rhs; }
00105
00107 std::size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(_lhs, _rhs); }
00108 std::size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(_lhs, _rhs); }
00109
00110 private:
00112 typename result_of::matrix_expression_internal_storage<LHS>::type _lhs;
00114 typename result_of::matrix_expression_internal_storage<RHS>::type _rhs;
00115 };
00116
00117
00119 struct row_iteration {};
00120
00122 struct col_iteration {};
00123
00124
00125 template <typename ROWCOL, typename MATRIXTYPE>
00126 class matrix_iterator
00127 {
00128 typedef matrix_iterator<ROWCOL, MATRIXTYPE> self_type;
00129 public:
00130 typedef typename MATRIXTYPE::value_type value_type;
00131
00132 matrix_iterator(MATRIXTYPE & mat,
00133 std::size_t start_row,
00134 std::size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {};
00135
00136 value_type operator*(void) { return mat_(row_, col_); }
00137 self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(mat_, row_, col_); return *this; }
00138 self_type & operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
00139
00140 bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
00141 bool operator!=(self_type const & other) { return !(*this == other); }
00142
00143 vcl_size_t index1() { return row_; }
00144 vcl_size_t index2() { return col_; }
00145
00146 MATRIXTYPE & operator()(void) const { return mat_; }
00147
00148 private:
00149 MATRIXTYPE & mat_;
00150 vcl_size_t row_;
00151 vcl_size_t col_;
00152 };
00153
00160 template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00161 class matrix
00162 {
00163
00164 public:
00165
00166 typedef matrix_iterator<row_iteration, matrix<SCALARTYPE, F, ALIGNMENT> > iterator1;
00167 typedef matrix_iterator<col_iteration, matrix<SCALARTYPE, F, ALIGNMENT> > iterator2;
00168 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type;
00169 typedef vcl_size_t size_type;
00170
00172 matrix() : rows_(0), columns_(0)
00173 {
00174 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00175 KernelClass::init();
00176 };
00177
00183 explicit matrix(size_type rows, size_type columns) :
00184 rows_(rows), columns_(columns)
00185 {
00186 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00187 KernelClass::init();
00188 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00189 }
00190
00191 explicit matrix(cl_mem mem, size_type rows, size_type columns) :
00192 rows_(rows), columns_(columns)
00193 {
00194 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00195 KernelClass::init();
00196 elements_ = mem;
00197 elements_.inc();
00198 }
00199
00200 template <typename LHS, typename RHS, typename OP>
00201 matrix(matrix_expression< LHS, RHS, OP> const & proxy) : rows_(proxy.size1()), columns_(proxy.size2())
00202 {
00203 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00204 KernelClass::init();
00205 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00206
00207 *this = proxy;
00208 }
00209
00210
00211
00212 matrix(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) :
00213 rows_(mat.size1()), columns_(mat.size2()),
00214 elements_(viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()))
00215 {
00216 cl_int err;
00217 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00218 VIENNACL_ERR_CHECK(err);
00219 }
00220
00221 matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00222 {
00223 resize(mat.size1(), mat.size2(), false);
00224 cl_int err;
00225 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00226 VIENNACL_ERR_CHECK(err);
00227 return *this;
00228 }
00229
00230 matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00231 const matrix<SCALARTYPE, F, ALIGNMENT>,
00232 op_trans> & proxy)
00233 {
00234 assert(handle() != proxy.lhs().handle() && "Self-assignment of matrix transpose not implemented");
00235 assert(proxy.lhs().size1() == size2() && "Matrix dimensions do not match!");
00236 assert(proxy.lhs().size2() == size1() && "Matrix dimensions do not match!");
00237
00238 resize(proxy.lhs().size2(), proxy.lhs().size1(), false);
00239
00240 std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
00241
00242 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00243 proxy.lhs().handle(), CL_TRUE, 0,
00244 sizeof(SCALARTYPE)*proxy.lhs().internal_size(),
00245 &(temp[0]), 0, NULL, NULL);
00246 VIENNACL_ERR_CHECK(err);
00247 viennacl::ocl::get_queue().finish();
00248
00249
00250
00251
00252
00253
00254
00255
00256 std::vector<SCALARTYPE> temp_trans(internal_size());
00257
00258 for (vcl_size_t i=0; i<proxy.lhs().size1(); ++i)
00259 for (vcl_size_t j=0; j<proxy.lhs().size2(); ++j)
00260 temp_trans[F::mem_index(j,i, internal_size1(), internal_size2())]
00261 = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
00262
00263
00264
00265
00266
00267
00268
00269
00270 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00271 sizeof(SCALARTYPE)*internal_size(),
00272 &(temp_trans[0]));
00273
00274 return *this;
00275 }
00276
00277
00278
00286 void resize(size_type rows, size_type columns, bool preserve = true)
00287 {
00288 assert(rows > 0 && columns > 0);
00289 if (preserve)
00290 {
00291
00292 std::vector< SCALARTYPE > old_entries(internal_size());
00293 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00294 handle(),
00295 CL_TRUE,
00296 0,
00297 sizeof(SCALARTYPE)*internal_size(),
00298 &(old_entries[0]),
00299 0, NULL, NULL);
00300 VIENNACL_ERR_CHECK(err);
00301
00302
00303 std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00304 for (size_type i=0; i<rows; ++i)
00305 {
00306 if (i >= rows_)
00307 continue;
00308
00309 for (size_type j=0; j<columns; ++j)
00310 {
00311 if (j >= columns_)
00312 continue;
00313 new_entries[F::mem_index(i, j, F::internal_size1(rows, ALIGNMENT), F::internal_size2(columns, ALIGNMENT))]
00314 = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
00315 }
00316 }
00317
00318
00319 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00320 rows_ = rows;
00321 columns_ = columns;
00322 }
00323 else
00324 {
00325 rows_ = rows;
00326 columns_ = columns;
00327
00328 std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00329 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00330 }
00331 }
00332
00333
00334
00337 entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index)
00338 {
00339 return entry_proxy<SCALARTYPE>(F::mem_index(row_index, col_index, internal_size1(), internal_size2()), elements_);
00340 }
00341
00344 scalar<SCALARTYPE> operator()(size_type row_index, size_type col_index) const
00345 {
00346 scalar<SCALARTYPE> tmp;
00347 cl_int err;
00348 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(),
00349 elements_,
00350 tmp.handle(),
00351 sizeof(SCALARTYPE) * F::mem_index(row_index, col_index, internal_size1(), internal_size2()),
00352 0,
00353 sizeof(SCALARTYPE),
00354 0,
00355 NULL,
00356 NULL);
00357
00358 VIENNACL_ERR_CHECK(err);
00359 return tmp;
00360 }
00361
00362
00363 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00364 const matrix<SCALARTYPE, F, ALIGNMENT>,
00365 op_add >
00366 operator + (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00367 {
00368 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00369 const matrix<SCALARTYPE, F, ALIGNMENT>,
00370 op_add > (*this, other);
00371 }
00372
00373
00374 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00375 {
00376 viennacl::linalg::inplace_add(*this, other);
00377 return *this;
00378 }
00379
00380 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_range< matrix<SCALARTYPE, F, ALIGNMENT> > & other)
00381 {
00382 viennacl::linalg::inplace_add(*this, other);
00383 return *this;
00384 }
00385
00386 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00387 const matrix<SCALARTYPE, F, ALIGNMENT>,
00388 op_sub >
00389 operator - (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00390 {
00391 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00392 const matrix<SCALARTYPE, F, ALIGNMENT>,
00393 op_sub > (*this, other);
00394 }
00395
00396 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00397 {
00398 viennacl::linalg::inplace_sub(*this, other);
00399 return *this;
00400 }
00401
00402 template <unsigned int A1, unsigned int A2>
00403 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const vector<SCALARTYPE, A1>,
00404 const vector<SCALARTYPE, A2>,
00405 op_prod > & proxy)
00406 {
00407 viennacl::linalg::rank_1_update(*this, proxy.lhs(), proxy.rhs());
00408 return *this;
00409 }
00410
00411 template <unsigned int A1, unsigned int A2>
00412 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const vector<SCALARTYPE, A1>,
00413 const vector<SCALARTYPE, A2>,
00414 op_prod > & proxy)
00415 {
00416 viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0), proxy.lhs(), proxy.rhs());
00417 return *this;
00418 }
00419
00420 template <unsigned int A1, unsigned int A2>
00421 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00422 const vector<SCALARTYPE, A2>,
00423 op_prod >,
00424 const SCALARTYPE,
00425 op_prod > & proxy)
00426 {
00427 viennacl::linalg::scaled_rank_1_update(*this, proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00428 return *this;
00429 }
00430
00431 template <unsigned int A1, unsigned int A2>
00432 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00433 const vector<SCALARTYPE, A2>,
00434 op_prod >,
00435 const SCALARTYPE,
00436 op_prod > & proxy)
00437 {
00438 viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0) * proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00439 return *this;
00440 }
00441
00442
00443 matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (SCALARTYPE val)
00444 {
00445 viennacl::linalg::inplace_mult(*this, val);
00446 return *this;
00447 }
00448
00449 matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (scalar<SCALARTYPE> const & val)
00450 {
00451 viennacl::linalg::inplace_mult(*this, val);
00452 return *this;
00453 }
00454
00455 matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (SCALARTYPE val)
00456 {
00457 viennacl::linalg::inplace_mult(*this, SCALARTYPE(1.0) / val);
00458 return *this;
00459 }
00460
00461 matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (scalar<SCALARTYPE> const & val)
00462 {
00463 viennacl::linalg::inplace_divide(*this, val);
00464 return *this;
00465 }
00466
00467
00468
00469 template <typename MatrixType1, typename MatrixType2>
00470 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< MatrixType1,
00471 MatrixType2,
00472 op_prod > & proxy)
00473 {
00474 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00475 return *this;
00476 }
00477
00478
00479 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00480 const matrix<SCALARTYPE, F, ALIGNMENT>,
00481 op_add > & proxy)
00482 {
00483 viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this);
00484 return *this;
00485 }
00486
00487
00488 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00489 const matrix<SCALARTYPE, F, ALIGNMENT>,
00490 op_sub > & proxy)
00491 {
00492 viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this);
00493 return *this;
00494 }
00495
00496
00498 const size_type & size1() const { return rows_;}
00500 const size_type & size2() const { return columns_; }
00501
00503 void clear()
00504 {
00505 std::size_t internal_size = internal_size1() * internal_size2();
00506
00507 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00508
00509 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "clear");
00510 viennacl::ocl::enqueue(k(elements_, static_cast<cl_uint>(internal_size)));
00511 }
00512
00513
00514
00516 const size_type internal_size1() const { return F::internal_size1(size1(), ALIGNMENT); }
00518 const size_type internal_size2() const { return F::internal_size2(size2(), ALIGNMENT); }
00520 const size_type internal_size() const { return internal_size1() * internal_size2(); }
00521
00523 const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
00524
00525 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
00526 template <typename CPU_MATRIX>
00527 friend void copy(const CPU_MATRIX & cpu_matrix,
00528 matrix & gpu_matrix );
00529
00530 template <typename SCALARTYPE2, typename A1, typename A2>
00531 friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00532 matrix & gpu_matrix );
00533
00534 template <typename SCALARTYPE2>
00535 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00536 SCALARTYPE2 * cpu_matrix_end,
00537 matrix & gpu_matrix);
00538
00539 #ifdef VIENNACL_HAVE_EIGEN
00540 friend void copy(const Eigen::MatrixXf & cpu_matrix,
00541 matrix & gpu_matrix);
00542
00543 friend void copy(const Eigen::MatrixXd & cpu_matrix,
00544 matrix & gpu_matrix);
00545 #endif
00546
00547 #ifdef VIENNACL_HAVE_MTL4
00548 template <typename SCALARTYPE2, typename T>
00549 friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00550 matrix & gpu_matrix);
00551 #endif
00552 #else
00553 template <typename CPU_MATRIX, typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00554 friend void copy(const CPU_MATRIX & cpu_matrix,
00555 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00556
00557 template <typename SCALARTYPE2, typename A1, typename A2, typename F2, unsigned int ALIGNMENT2>
00558 friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00559 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00560
00561 template <typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00562 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00563 SCALARTYPE2 * cpu_matrix_end,
00564 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00565
00566 #ifdef VIENNACL_HAVE_EIGEN
00567 template <typename F2, unsigned int ALIGNMENT2>
00568 friend void copy(const Eigen::MatrixXf & cpu_matrix,
00569 matrix<float, F2, ALIGNMENT2> & gpu_matrix);
00570
00571 template <typename F2, unsigned int ALIGNMENT2>
00572 friend void copy(const Eigen::MatrixXd & cpu_matrix,
00573 matrix<double, F2, ALIGNMENT2> & gpu_matrix);
00574 #endif
00575
00576 #ifdef VIENNACL_HAVE_MTL4
00577 template <typename SCALARTYPE2, typename T, typename F2, unsigned int ALIGNMENT2>
00578 friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00579 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00580 #endif
00581 #endif
00582
00583 private:
00584 size_type rows_;
00585 size_type columns_;
00586 viennacl::ocl::handle<cl_mem> elements_;
00587 };
00588
00594 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00595 std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00596 {
00597 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
00598
00599 std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
00600 cl_int err;
00601 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &tmp[0], 0, NULL, NULL);
00602 VIENNACL_ERR_CHECK(err);
00603 viennacl::ocl::get_queue().finish();
00604
00605 s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
00606
00607 s << "(";
00608 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00609 {
00610 s << "(";
00611 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00612 {
00613 s << tmp[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00614 if (j < gpu_matrix.size2() - 1)
00615 s << ",";
00616 }
00617 s << ")";
00618 if (i < gpu_matrix.size1() - 1)
00619 s << ",";
00620 }
00621 s << ")";
00622 return s;
00623 }
00624
00630 template<typename LHS, typename RHS, typename OP>
00631 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
00632 {
00633 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType ScalarType;
00634
00635 matrix<ScalarType> temp = expr;
00636 s << temp;
00637 return s;
00638 }
00639
00641 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00642 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00643 const matrix<SCALARTYPE, F, ALIGNMENT>,
00644 op_trans> trans(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00645 {
00646 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00647 const matrix<SCALARTYPE, F, ALIGNMENT>,
00648 op_trans>(mat, mat);
00649 }
00650
00651
00653
00654
00655
00656
00662 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00663 void copy(const CPU_MATRIX & cpu_matrix,
00664 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00665 {
00666 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
00667
00668
00669
00670 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00671 {
00672 gpu_matrix.resize(cpu_matrix.size1(),
00673 cpu_matrix.size2(), false);
00674 }
00675 else
00676 {
00677 assert( (gpu_matrix.size1() == cpu_matrix.size1())
00678 && (gpu_matrix.size2() == cpu_matrix.size2())
00679 );
00680 }
00681
00682 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00683 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00684 {
00685 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00686 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00687 }
00688
00689 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00690
00691 }
00692
00693
00694
00695
00701 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00702 void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
00703 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00704 {
00705 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
00706
00707 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00708 {
00709 gpu_matrix.resize(cpu_matrix.size(),
00710 cpu_matrix[0].size(),
00711 false);
00712 }
00713 else
00714 {
00715 assert( (gpu_matrix.size1() == cpu_matrix.size())
00716 && (gpu_matrix.size2() == cpu_matrix[0].size())
00717 );
00718 }
00719
00720 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00721 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00722 {
00723 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00724 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00725 }
00726
00727 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00728 }
00729
00730
00731
00732
00733
00740 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00741 void fast_copy(SCALARTYPE * cpu_matrix_begin,
00742 SCALARTYPE * cpu_matrix_end,
00743 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00744 {
00745 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00746 sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
00747 cpu_matrix_begin);
00748 }
00749
00750
00751 #ifdef VIENNACL_HAVE_EIGEN
00752
00757 template <typename F, unsigned int ALIGNMENT>
00758 void copy(const Eigen::MatrixXf & cpu_matrix,
00759 matrix<float, F, ALIGNMENT> & gpu_matrix)
00760 {
00761 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
00762
00763 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00764 {
00765 gpu_matrix.resize(cpu_matrix.rows(),
00766 cpu_matrix.cols(),
00767 false);
00768 }
00769 else
00770 {
00771 assert( (gpu_matrix.size1() == static_cast<std::size_t>(cpu_matrix.rows()))
00772 && (gpu_matrix.size2() == static_cast<std::size_t>(cpu_matrix.cols()))
00773 );
00774 }
00775
00776 std::vector<float> data(gpu_matrix.internal_size());
00777 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00778 {
00779 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00780 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00781 }
00782
00783 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00784 }
00785
00791 template <typename F, unsigned int ALIGNMENT>
00792 void copy(const Eigen::MatrixXd & cpu_matrix,
00793 matrix<double, F, ALIGNMENT> & gpu_matrix)
00794 {
00795 typedef typename matrix<double, F, ALIGNMENT>::size_type size_type;
00796
00797 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00798 {
00799 gpu_matrix.resize(cpu_matrix.rows(),
00800 cpu_matrix.cols(),
00801 false);
00802 }
00803 else
00804 {
00805 assert( (gpu_matrix.size1() == static_cast<std::size_t>(cpu_matrix.rows()))
00806 && (gpu_matrix.size2() == static_cast<std::size_t>(cpu_matrix.cols()))
00807 );
00808 }
00809
00810 std::vector<double> data(gpu_matrix.internal_size());
00811 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00812 {
00813 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00814 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00815 }
00816
00817 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00818 }
00819 #endif
00820
00821 #ifdef VIENNACL_HAVE_MTL4
00822
00827 template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
00828 void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
00829 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00830 {
00831 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
00832
00833 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00834 {
00835 gpu_matrix.resize(cpu_matrix.num_rows(),
00836 cpu_matrix.num_cols(),
00837 false);
00838 }
00839 else
00840 {
00841 assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
00842 && (gpu_matrix.size2() == cpu_matrix.num_cols())
00843 );
00844 }
00845
00846 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00847 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00848 {
00849 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00850 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00851 }
00852
00853 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00854 }
00855 #endif
00856
00857
00858
00859
00860
00861
00862
00868 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00869 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00870 CPU_MATRIX & cpu_matrix )
00871 {
00872 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
00873
00874 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
00875 {
00876 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00877 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00878 VIENNACL_ERR_CHECK(err);
00879
00880
00881 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00882 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00883 cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00884 }
00885 }
00886
00887
00893 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00894 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00895 std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
00896 {
00897 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
00898
00899 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0)
00900 && (cpu_matrix.size() >= gpu_matrix.size1()) && (cpu_matrix[0].size() >= gpu_matrix.size2()))
00901 {
00902 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00903 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00904 VIENNACL_ERR_CHECK(err);
00905
00906
00907 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00908 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00909 cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00910 }
00911 }
00912
00913
00919 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00920 void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00921 SCALARTYPE * cpu_matrix_begin)
00922 {
00923 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00924 gpu_matrix.handle(),
00925 CL_TRUE, 0,
00926 sizeof(SCALARTYPE)*gpu_matrix.internal_size(),
00927 cpu_matrix_begin, 0, NULL, NULL);
00928 VIENNACL_ERR_CHECK(err);
00929 }
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940 template<typename CPU_SCALAR, typename SCALARTYPE,unsigned int VECTOR_ALIGNMENT>
00941 viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00942 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00943 op_prod>,
00944 const SCALARTYPE,
00945 op_prod> operator*(const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00946 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00947 op_prod> & proxy,
00948 CPU_SCALAR const & val)
00949 {
00950 return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00951 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00952 op_prod>,
00953 const SCALARTYPE,
00954 op_prod>(proxy, static_cast<SCALARTYPE>(val));
00955 }
00956
00957
00958 template <typename CPU_SCALAR, typename SCALARTYPE, unsigned int VA1, unsigned int VA2>
00959 viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00960 const viennacl::vector<SCALARTYPE, VA2>,
00961 op_prod>,
00962 const SCALARTYPE,
00963 op_prod> operator*(CPU_SCALAR const & val,
00964 viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00965 const viennacl::vector<SCALARTYPE, VA2>,
00966 op_prod> const & proxy)
00967 {
00968 return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00969 const viennacl::vector<SCALARTYPE, VA2>,
00970 op_prod>,
00971 const SCALARTYPE,
00972 op_prod>(proxy, static_cast<SCALARTYPE>(val));
00973 }
00974
00975
00976
00977 }
00978
00979 #endif