ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/matrix.hpp Source File

Go to the documentation of this file.
00001 #ifndef VIENNACL_MATRIX_HPP_
00002 #define VIENNACL_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #include "viennacl/forwards.h"
00025 #include "viennacl/ocl/backend.hpp"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/vector.hpp"
00028 #include "viennacl/linalg/matrix_operations.hpp"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/tools/matrix_size_deducer.hpp"
00031 #include "viennacl/tools/matrix_kernel_class_deducer.hpp"
00032 #include "viennacl/meta/result_of.hpp"
00033 
00034 namespace viennacl
00035 {
00037     struct row_major
00038     {
00046       static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t num_cols)
00047       {
00048         return i * num_cols + j;
00049       }
00050       
00051       static vcl_size_t internal_size1(vcl_size_t rows, vcl_size_t alignment)
00052       {
00053         return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);;
00054       }
00055       
00056       static vcl_size_t internal_size2(vcl_size_t cols, vcl_size_t alignment)
00057       {
00058         return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
00059       }
00060     };
00061 
00062     struct column_major
00063     {
00071       static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t num_cols)
00072       {
00073         return i + j * num_rows;
00074       }
00075       
00076       static vcl_size_t internal_size1(vcl_size_t rows, vcl_size_t alignment)
00077       {
00078         return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);
00079       }
00080       
00081       static vcl_size_t internal_size2(vcl_size_t cols, vcl_size_t alignment)
00082       {
00083         return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
00084       }
00085     };
00086     
00087     template <typename LHS, typename RHS, typename OP>
00088     class matrix_expression
00089     {
00090         
00091       
00092       public:
00094         //*/
00095         //typedef typename viennacl::tools::VECTOR_EXTRACTOR<LHS, RHS>::ResultType    VectorType;
00096       
00097         matrix_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {}
00098         
00101         LHS & lhs() const { return _lhs; }
00104         RHS & rhs() const { return _rhs; }
00105         
00107         std::size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(_lhs, _rhs); }
00108         std::size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(_lhs, _rhs); }
00109         
00110       private:
00112         typename result_of::matrix_expression_internal_storage<LHS>::type _lhs;
00114         typename result_of::matrix_expression_internal_storage<RHS>::type _rhs;
00115     };
00116     
00117     
00119     struct row_iteration {};
00120     
00122     struct col_iteration {};
00123 
00124     //STL-like iterator. TODO: STL-compliance...
00125     template <typename ROWCOL, typename MATRIXTYPE>
00126     class matrix_iterator
00127     {
00128         typedef matrix_iterator<ROWCOL, MATRIXTYPE>    self_type;
00129       public:
00130         typedef typename MATRIXTYPE::value_type       value_type;
00131         
00132         matrix_iterator(MATRIXTYPE & mat, 
00133                         std::size_t start_row,
00134                         std::size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {};
00135         
00136         value_type operator*(void) { return mat_(row_, col_); }
00137         self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(mat_, row_, col_); return *this; }
00138         self_type & operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
00139         
00140         bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
00141         bool operator!=(self_type const & other) { return !(*this == other); }
00142         
00143         vcl_size_t index1() { return row_; }
00144         vcl_size_t index2() { return col_; }
00145         
00146         MATRIXTYPE & operator()(void) const { return mat_; }
00147       
00148       private:
00149         MATRIXTYPE & mat_;
00150         vcl_size_t row_;
00151         vcl_size_t col_;
00152     };
00153 
00160     template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00161     class matrix
00162     {
00163       
00164     public:
00165       
00166       typedef matrix_iterator<row_iteration, matrix<SCALARTYPE, F, ALIGNMENT> >   iterator1;
00167       typedef matrix_iterator<col_iteration, matrix<SCALARTYPE, F, ALIGNMENT> >   iterator2;
00168       typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00169       typedef vcl_size_t                                                          size_type;
00170       
00172       matrix() : rows_(0), columns_(0)
00173       {
00174         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00175         KernelClass::init();
00176       };
00177       
00183       explicit matrix(size_type rows, size_type columns) :
00184         rows_(rows), columns_(columns)
00185       {
00186         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00187         KernelClass::init();
00188         elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00189       }
00190 
00191       explicit matrix(cl_mem mem, size_type rows, size_type columns) :
00192         rows_(rows), columns_(columns)
00193       {
00194         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00195         KernelClass::init();
00196         elements_ = mem;
00197         elements_.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
00198       }
00199 
00200       template <typename LHS, typename RHS, typename OP>
00201       matrix(matrix_expression< LHS, RHS, OP> const & proxy) : rows_(proxy.size1()), columns_(proxy.size2())
00202       {
00203         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00204         KernelClass::init();
00205         elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00206         
00207         *this = proxy;
00208       }
00209 
00210 
00211       //copy constructor:
00212       matrix(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) :
00213         rows_(mat.size1()), columns_(mat.size2()),
00214         elements_(viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()))
00215       {
00216         cl_int err;
00217         err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00218         VIENNACL_ERR_CHECK(err);
00219       }
00220 
00221       matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00222       {
00223         resize(mat.size1(), mat.size2(), false);
00224         cl_int err;
00225         err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00226         VIENNACL_ERR_CHECK(err);
00227         return *this;
00228       }
00229       
00230       matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00231                                                                             const matrix<SCALARTYPE, F, ALIGNMENT>,
00232                                                                             op_trans> & proxy)
00233       {
00234         assert(handle() != proxy.lhs().handle() && "Self-assignment of matrix transpose not implemented");
00235         assert(proxy.lhs().size1() == size2() && "Matrix dimensions do not match!");
00236         assert(proxy.lhs().size2() == size1() && "Matrix dimensions do not match!");
00237 
00238         resize(proxy.lhs().size2(), proxy.lhs().size1(), false);
00239         
00240         std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
00241         
00242         cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00243                                          proxy.lhs().handle(), CL_TRUE, 0,
00244                                          sizeof(SCALARTYPE)*proxy.lhs().internal_size(),
00245                                          &(temp[0]), 0, NULL, NULL);
00246         VIENNACL_ERR_CHECK(err);
00247         viennacl::ocl::get_queue().finish();
00248 
00249         /*
00250         for (size_t i=0; i<proxy.lhs().size1(); ++i)
00251         {
00252           for (size_t j=0; j<proxy.lhs().size2(); ++j)
00253             std::cout << temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", ";
00254         }*/
00255         
00256         std::vector<SCALARTYPE> temp_trans(internal_size());
00257 
00258         for (vcl_size_t i=0; i<proxy.lhs().size1(); ++i)
00259           for (vcl_size_t j=0; j<proxy.lhs().size2(); ++j)
00260             temp_trans[F::mem_index(j,i, internal_size1(), internal_size2())] 
00261              = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
00262 
00263         /*     
00264         for (size_t i=0; i<proxy.lhs().size1(); ++i)
00265         {
00266           for (size_t j=0; j<proxy.lhs().size2(); ++j)
00267             std::cout << temp_trans[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", ";
00268         }*/
00269         
00270         elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, 
00271                                                                    sizeof(SCALARTYPE)*internal_size(),
00272                                                                    &(temp_trans[0]));
00273           
00274         return *this;
00275       }
00276 
00277 
00278 
00286       void resize(size_type rows, size_type columns, bool preserve = true)
00287       {
00288         assert(rows > 0 && columns > 0);
00289         if (preserve)
00290         {
00291           //get old entries:
00292           std::vector< SCALARTYPE > old_entries(internal_size());
00293           cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), //src
00294                                            handle(), //dest
00295                                            CL_TRUE, //blocking
00296                                            0, //offset
00297                                            sizeof(SCALARTYPE)*internal_size(), //size
00298                                            &(old_entries[0]), //destination
00299                                            0, NULL, NULL);
00300           VIENNACL_ERR_CHECK(err);
00301           
00302           //set up entries of new matrix:
00303           std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00304           for (size_type i=0; i<rows; ++i)
00305           {
00306             if (i >= rows_)
00307               continue;
00308               
00309             for (size_type j=0; j<columns; ++j)
00310             {
00311               if (j >= columns_)
00312                 continue;
00313               new_entries[F::mem_index(i, j, F::internal_size1(rows, ALIGNMENT), F::internal_size2(columns, ALIGNMENT))] 
00314                  = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
00315             }
00316           }
00317           
00318           //copy new entries to GPU:
00319           elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00320           rows_ = rows;
00321           columns_ = columns;
00322         }
00323         else //discard old entries:
00324         {
00325           rows_ = rows;
00326           columns_ = columns;
00327           
00328           std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00329           elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00330         }
00331       }
00332       
00333       
00334       //read-write access to an element of the vector
00337       entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index)
00338       {
00339         return entry_proxy<SCALARTYPE>(F::mem_index(row_index, col_index, internal_size1(), internal_size2()), elements_);
00340       }
00341       
00344       scalar<SCALARTYPE> operator()(size_type row_index, size_type col_index) const
00345       {
00346         scalar<SCALARTYPE> tmp;
00347         cl_int err;
00348         err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(),
00349                                   elements_,
00350                                   tmp.handle(),
00351                                   sizeof(SCALARTYPE) * F::mem_index(row_index, col_index, internal_size1(), internal_size2()),
00352                                   0,
00353                                   sizeof(SCALARTYPE),
00354                                   0,
00355                                   NULL,
00356                                   NULL);
00357         //assert(err == CL_SUCCESS);
00358         VIENNACL_ERR_CHECK(err);
00359         return tmp;
00360       }
00361       
00362 
00363       matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00364                          const matrix<SCALARTYPE, F, ALIGNMENT>,
00365                          op_add >
00366       operator + (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00367       {
00368         return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00369                                   const matrix<SCALARTYPE, F, ALIGNMENT>,
00370                                   op_add > (*this, other);
00371       }
00372 
00373 
00374       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00375       {
00376         viennacl::linalg::inplace_add(*this, other);
00377         return *this;
00378       }
00379 
00380       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_range< matrix<SCALARTYPE, F, ALIGNMENT> > & other) 
00381       {
00382         viennacl::linalg::inplace_add(*this, other);
00383         return *this;
00384       }
00385 
00386       matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00387                          const matrix<SCALARTYPE, F, ALIGNMENT>,
00388                          op_sub >
00389       operator - (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00390       {
00391         return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00392                                   const matrix<SCALARTYPE, F, ALIGNMENT>,
00393                                   op_sub > (*this, other);
00394       }
00395 
00396       matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00397       {
00398         viennacl::linalg::inplace_sub(*this, other);
00399         return *this;
00400       }
00401 
00402       template <unsigned int A1, unsigned int A2>
00403       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const vector<SCALARTYPE, A1>,
00404                                                                                const vector<SCALARTYPE, A2>,
00405                                                                                op_prod > & proxy) 
00406       {
00407         viennacl::linalg::rank_1_update(*this, proxy.lhs(), proxy.rhs());
00408         return *this;
00409       }
00410 
00411       template <unsigned int A1, unsigned int A2>
00412       matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const vector<SCALARTYPE, A1>,
00413                                                                                const vector<SCALARTYPE, A2>,
00414                                                                                op_prod > & proxy) 
00415       {
00416         viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0), proxy.lhs(), proxy.rhs());
00417         return *this;
00418       }
00419 
00420       template <unsigned int A1, unsigned int A2>
00421       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00422                                                                                                         const vector<SCALARTYPE, A2>,
00423                                                                                                         op_prod >,
00424                                                                                const SCALARTYPE,
00425                                                                                op_prod > & proxy) 
00426       {
00427         viennacl::linalg::scaled_rank_1_update(*this, proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00428         return *this;
00429       }
00430 
00431       template <unsigned int A1, unsigned int A2>
00432       matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00433                                                                                                         const vector<SCALARTYPE, A2>,
00434                                                                                                         op_prod >,
00435                                                                                const SCALARTYPE,
00436                                                                                op_prod > & proxy) 
00437       {
00438         viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0) * proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00439         return *this;
00440       }
00441       
00442       
00443       matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (SCALARTYPE val) 
00444       {
00445         viennacl::linalg::inplace_mult(*this, val);
00446         return *this;
00447       }
00448 
00449       matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (scalar<SCALARTYPE> const & val) 
00450       {
00451         viennacl::linalg::inplace_mult(*this, val);
00452         return *this;
00453       }
00454 
00455       matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (SCALARTYPE val) 
00456       {
00457         viennacl::linalg::inplace_mult(*this, SCALARTYPE(1.0) / val);
00458         return *this;
00459       }
00460 
00461       matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (scalar<SCALARTYPE> const & val) 
00462       {
00463         viennacl::linalg::inplace_divide(*this, val);
00464         return *this;
00465       }
00466 
00467 
00468       //this = A * B and related (with trans())
00469       template <typename MatrixType1, typename MatrixType2>
00470       matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< MatrixType1,
00471                                                                               MatrixType2,
00472                                                                               op_prod > & proxy) 
00473       {
00474         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00475         return *this;
00476       }
00477 
00478       //this = A + B
00479       matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00480                                                                                const matrix<SCALARTYPE, F, ALIGNMENT>,
00481                                                                                op_add > & proxy) 
00482       {
00483         viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this);
00484         return *this;
00485       }
00486 
00487       //this = A - B
00488       matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00489                                                                                const matrix<SCALARTYPE, F, ALIGNMENT>,
00490                                                                                op_sub > & proxy) 
00491       {
00492         viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this);
00493         return *this;
00494       }
00495 
00496 
00498       const size_type & size1() const { return rows_;}
00500       const size_type & size2() const { return columns_; }
00501       
00503       void clear()
00504       {
00505         std::size_t internal_size = internal_size1() * internal_size2();
00506         
00507         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00508         
00509         viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "clear");
00510         viennacl::ocl::enqueue(k(elements_, static_cast<cl_uint>(internal_size)));
00511       }
00512       
00513       
00514       //const unsigned int row_stride() const { return roundUpToNextMultiple<unsigned int>(columns(), ALIGNMENT); }
00516       const size_type internal_size1() const { return F::internal_size1(size1(), ALIGNMENT); }
00518       const size_type internal_size2() const { return F::internal_size2(size2(), ALIGNMENT); }
00520       const size_type internal_size() const { return internal_size1() * internal_size2(); }
00521       
00523       const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
00524       
00525       #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment
00526       template <typename CPU_MATRIX>
00527       friend void copy(const CPU_MATRIX & cpu_matrix,
00528                       matrix & gpu_matrix );
00529       
00530       template <typename SCALARTYPE2, typename A1, typename A2>
00531       friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00532                       matrix & gpu_matrix );
00533       
00534       template <typename SCALARTYPE2>
00535       friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00536                             SCALARTYPE2 * cpu_matrix_end,
00537                             matrix & gpu_matrix);
00538       
00539       #ifdef VIENNACL_HAVE_EIGEN
00540       friend void copy(const Eigen::MatrixXf & cpu_matrix,
00541                        matrix & gpu_matrix);
00542       
00543       friend void copy(const Eigen::MatrixXd & cpu_matrix,
00544                        matrix & gpu_matrix);
00545       #endif
00546       
00547       #ifdef VIENNACL_HAVE_MTL4
00548       template <typename SCALARTYPE2, typename T>
00549       friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00550                        matrix & gpu_matrix);
00551       #endif
00552       #else
00553       template <typename CPU_MATRIX, typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00554       friend void copy(const CPU_MATRIX & cpu_matrix,
00555                       matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00556                       
00557       template <typename SCALARTYPE2, typename A1, typename A2, typename F2, unsigned int ALIGNMENT2>
00558       friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00559                        matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00560       
00561       template <typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00562       friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00563                             SCALARTYPE2 * cpu_matrix_end,
00564                             matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00565       
00566       #ifdef VIENNACL_HAVE_EIGEN
00567       template <typename F2, unsigned int ALIGNMENT2>
00568       friend void copy(const Eigen::MatrixXf & cpu_matrix,
00569                 matrix<float, F2, ALIGNMENT2> & gpu_matrix);
00570       
00571       template <typename F2, unsigned int ALIGNMENT2>
00572       friend void copy(const Eigen::MatrixXd & cpu_matrix,
00573                 matrix<double, F2, ALIGNMENT2> & gpu_matrix);
00574       #endif
00575       
00576       #ifdef VIENNACL_HAVE_MTL4
00577       template <typename SCALARTYPE2, typename T, typename F2, unsigned int ALIGNMENT2>
00578       friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00579                        matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00580       #endif
00581       #endif                 
00582       
00583     private:
00584       size_type rows_;
00585       size_type columns_;
00586       viennacl::ocl::handle<cl_mem> elements_;
00587     }; //matrix
00588 
00594     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00595     std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00596     {
00597       typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
00598       
00599       std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
00600       cl_int err;
00601       err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &tmp[0], 0, NULL, NULL);
00602       VIENNACL_ERR_CHECK(err);
00603       viennacl::ocl::get_queue().finish();
00604       
00605       s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
00606       
00607       s << "(";
00608       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00609       {
00610         s << "(";
00611         for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00612         {
00613           s << tmp[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00614           if (j < gpu_matrix.size2() - 1)
00615             s << ",";
00616         }
00617         s << ")";
00618         if (i < gpu_matrix.size1() - 1)
00619           s << ",";
00620       }
00621       s << ")";
00622       return s;
00623     }
00624 
00630     template<typename LHS, typename RHS, typename OP>
00631     std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
00632     {
00633       typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType     ScalarType;
00634 
00635       matrix<ScalarType> temp = expr;
00636       s << temp;
00637       return s;
00638     }
00639     
00641     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00642     matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00643                        const matrix<SCALARTYPE, F, ALIGNMENT>,
00644                        op_trans> trans(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00645     {
00646       return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00647                                 const matrix<SCALARTYPE, F, ALIGNMENT>,
00648                                 op_trans>(mat, mat);
00649     }
00650     
00651     
00653 
00654     //
00655     //cpu to gpu, generic type:
00656     //
00662     template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00663     void copy(const CPU_MATRIX & cpu_matrix,
00664               matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00665     {
00666       typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
00667       
00668       //std::cout << "Copying CPU_MATRIX!" << std::endl;
00669       //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
00670       if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00671       {
00672         gpu_matrix.resize(cpu_matrix.size1(),
00673                           cpu_matrix.size2(), false);
00674       }
00675       else
00676       {
00677         assert( (gpu_matrix.size1() == cpu_matrix.size1()) 
00678                && (gpu_matrix.size2() == cpu_matrix.size2())
00679               );
00680       }
00681 
00682       std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00683       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00684       {
00685         for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00686           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00687       }
00688       
00689       gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00690       //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
00691     }
00692     
00693     //
00694     //cpu to gpu, STL type:
00695     //
00701     template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00702     void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
00703               matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00704     {
00705       typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
00706       
00707       if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00708       {
00709         gpu_matrix.resize(cpu_matrix.size(),
00710                           cpu_matrix[0].size(),
00711                           false);
00712       }
00713       else
00714       {
00715         assert( (gpu_matrix.size1() == cpu_matrix.size()) 
00716                && (gpu_matrix.size2() == cpu_matrix[0].size())
00717               );
00718       }
00719 
00720       std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00721       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00722       {
00723         for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00724           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00725       }
00726       
00727       gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00728     }
00729     
00730     
00731     //
00732     //cpu to gpu, another STL type:
00733     //
00740     template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00741     void fast_copy(SCALARTYPE * cpu_matrix_begin,
00742                    SCALARTYPE * cpu_matrix_end,
00743                    matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00744     {
00745       gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00746                                                                             sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
00747                                                                             cpu_matrix_begin);
00748     }
00749     
00750    
00751     #ifdef VIENNACL_HAVE_EIGEN
00752 
00757     template <typename F, unsigned int ALIGNMENT>
00758     void copy(const Eigen::MatrixXf & cpu_matrix,
00759               matrix<float, F, ALIGNMENT> & gpu_matrix)
00760     {
00761       typedef typename matrix<float, F, ALIGNMENT>::size_type      size_type;
00762       
00763       if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00764       {
00765         gpu_matrix.resize(cpu_matrix.rows(),
00766                           cpu_matrix.cols(),
00767                           false);
00768       }
00769       else
00770       {
00771         assert( (gpu_matrix.size1() == static_cast<std::size_t>(cpu_matrix.rows())) 
00772                && (gpu_matrix.size2() == static_cast<std::size_t>(cpu_matrix.cols()))
00773               );
00774       }
00775 
00776       std::vector<float> data(gpu_matrix.internal_size());
00777       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00778       {
00779         for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00780           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00781       }
00782       
00783       gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00784     }
00785     
00791     template <typename F, unsigned int ALIGNMENT>
00792     void copy(const Eigen::MatrixXd & cpu_matrix,
00793               matrix<double, F, ALIGNMENT> & gpu_matrix)
00794     {
00795       typedef typename matrix<double, F, ALIGNMENT>::size_type      size_type;
00796       
00797       if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00798       {
00799         gpu_matrix.resize(cpu_matrix.rows(),
00800                           cpu_matrix.cols(),
00801                           false);
00802       }
00803       else
00804       {
00805         assert( (gpu_matrix.size1() == static_cast<std::size_t>(cpu_matrix.rows())) 
00806                && (gpu_matrix.size2() == static_cast<std::size_t>(cpu_matrix.cols()))
00807               );
00808       }
00809 
00810       std::vector<double> data(gpu_matrix.internal_size());
00811       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00812       {
00813         for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00814           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00815       }
00816       
00817       gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00818     }
00819     #endif
00820     
00821     #ifdef VIENNACL_HAVE_MTL4
00822 
00827     template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
00828     void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
00829               matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00830     {
00831       typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
00832       
00833       if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00834       {
00835         gpu_matrix.resize(cpu_matrix.num_rows(),
00836                           cpu_matrix.num_cols(),
00837                           false);
00838       }
00839       else
00840       {
00841         assert( (gpu_matrix.size1() == cpu_matrix.num_rows()) 
00842                && (gpu_matrix.size2() == cpu_matrix.num_cols())
00843               );
00844       }
00845 
00846       std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00847       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00848       {
00849         for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00850           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00851       }
00852       
00853       gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00854     }
00855     #endif
00856     
00857     
00858     
00859     
00860     //
00861     //gpu to cpu, generic type
00862     //
00868     template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00869     void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00870               CPU_MATRIX & cpu_matrix )
00871     {
00872       typedef typename matrix<float, F, ALIGNMENT>::size_type      size_type;
00873       
00874       if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
00875       {
00876         std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00877         cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00878         VIENNACL_ERR_CHECK(err);
00879         
00880         //now copy entries to cpu_matrix:
00881         for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00882           for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00883             cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00884       }
00885     }
00886 
00887     //gpu to cpu, STL type
00893     template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00894     void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00895               std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
00896     {
00897       typedef typename matrix<float, F, ALIGNMENT>::size_type      size_type;
00898       
00899       if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) 
00900          && (cpu_matrix.size() >= gpu_matrix.size1()) && (cpu_matrix[0].size() >= gpu_matrix.size2()))
00901       {
00902         std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00903         cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00904         VIENNACL_ERR_CHECK(err);
00905         
00906         //now copy entries to cpu_matrix:
00907         for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00908           for (size_type j = 0; j < gpu_matrix.size2(); ++j) 
00909             cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00910       }
00911     }
00912 
00913     //gpu to cpu, STL type
00919     template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00920     void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00921                    SCALARTYPE * cpu_matrix_begin)
00922     {
00923       cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00924                                        gpu_matrix.handle(), 
00925                                        CL_TRUE, 0,
00926                                        sizeof(SCALARTYPE)*gpu_matrix.internal_size(),
00927                                        cpu_matrix_begin, 0, NULL, NULL);
00928       VIENNACL_ERR_CHECK(err);
00929     }
00930 
00931 
00932 
00933 
00934 
00935 
00936 
00937 
00938 
00939     // outer_prod(v1, v2) * val;
00940     template<typename CPU_SCALAR, typename SCALARTYPE,unsigned int VECTOR_ALIGNMENT>
00941     viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00942                                                                     const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00943                                                                     op_prod>,
00944                                  const SCALARTYPE,
00945                                  op_prod>  operator*(const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00946                                                                                         const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00947                                                                                         op_prod> & proxy,
00948                                                      CPU_SCALAR const & val)
00949     {
00950       return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00951                                                                              const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00952                                                                              op_prod>,
00953                                           const SCALARTYPE,
00954                                           op_prod>(proxy, static_cast<SCALARTYPE>(val));
00955     }
00956 
00957     // val * outer_prod(v1, v2);
00958     template <typename CPU_SCALAR, typename SCALARTYPE, unsigned int VA1, unsigned int VA2>
00959     viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00960                                                                     const viennacl::vector<SCALARTYPE, VA2>,
00961                                                                     op_prod>,
00962                                  const SCALARTYPE,
00963                                  op_prod>  operator*(CPU_SCALAR const & val,
00964                                                      viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00965                                                                                   const viennacl::vector<SCALARTYPE, VA2>,
00966                                                                                   op_prod> const & proxy)
00967     {
00968       return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00969                                                                              const viennacl::vector<SCALARTYPE, VA2>,
00970                                                                              op_prod>,
00971                                           const SCALARTYPE,
00972                                           op_prod>(proxy, static_cast<SCALARTYPE>(val));
00973     }
00974     
00975    
00976 
00977 } //namespace viennacl
00978 
00979 #endif