ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/matrix

Go to the documentation of this file.
00001 #ifndef VIENNACL_MATRIX_PROXY_HPP_
00002 #define VIENNACL_MATRIX_PROXY_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #include "viennacl/forwards.h"
00025 #include "viennacl/range.hpp"
00026 #include "viennacl/matrix.hpp"
00027 #include "viennacl/linalg/matrix_operations.hpp"
00028 
00029 namespace viennacl
00030 {
00031 
00032   template <typename MatrixType>
00033   class matrix_range
00034   {
00035     public:
00036       typedef typename MatrixType::value_type     value_type;
00037       typedef range::size_type                    size_type;
00038       typedef range::difference_type              difference_type;
00039       typedef value_type                          reference;
00040       typedef const value_type &                  const_reference;
00041       
00042       matrix_range(MatrixType & A, 
00043                    range const & row_range,
00044                    range const & col_range) : A_(A), row_range_(row_range), col_range_(col_range) {}
00045                    
00046       size_type start1() const { return row_range_.start(); }
00047       size_type size1() const { return row_range_.size(); }
00048 
00049       size_type start2() const { return col_range_.start(); }
00050       size_type size2() const { return col_range_.size(); }
00051       
00052       template <typename MatrixType1, typename MatrixType2>
00053       matrix_range<MatrixType> & operator = (const matrix_expression< MatrixType1,
00054                                                                       MatrixType2,
00055                                                                       op_prod > & proxy) 
00056       {
00057         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00058         return *this;
00059       }
00060       
00061       
00062       matrix_range<MatrixType> & operator += (matrix_range<MatrixType> const & other)
00063       {
00064         viennacl::linalg::inplace_add(*this, other);
00065         return *this;
00066       }
00067       
00068       template <typename MatrixType1, typename MatrixType2>
00069       matrix_range<MatrixType> & operator += (const matrix_expression< MatrixType1,
00070                                                                        MatrixType2,
00071                                                                        op_prod > & proxy)
00072       {
00073         MatrixType1 temp = proxy;
00074         viennacl::range r1(0, temp.size1());
00075         viennacl::range r2(0, temp.size2());
00076         viennacl::matrix_range<MatrixType> temp2(temp, r1, r2);
00077         viennacl::linalg::inplace_add(*this, temp2);
00078         return *this;
00079       }
00080       
00081       template <typename MatrixType1, typename MatrixType2>
00082       matrix_range<MatrixType> & operator += (const matrix_expression< const matrix_range<MatrixType1>,
00083                                                                        const matrix_range<MatrixType2>,
00084                                                                        op_prod > & proxy)
00085       {
00086         MatrixType1 temp(proxy.size1(), proxy.size2());
00087         viennacl::range r1(0, temp.size1());
00088         viennacl::range r2(0, temp.size2());
00089         viennacl::matrix_range<MatrixType> temp2(temp, r1, r2);
00090         temp2 = proxy;
00091         viennacl::linalg::inplace_add(*this, temp2);
00092         return *this;
00093       }
00094 
00095       //const_reference operator()(size_type i, size_type j) const { return A_(start1() + i, start2() + i); }
00096       //reference operator()(size_type i, size_type j) { return A_(start1() + i, start2() + i); }
00097 
00098       MatrixType & get() { return A_; }
00099       const MatrixType & get() const { return A_; }
00100 
00101     private:
00102       MatrixType & A_;
00103       range row_range_;
00104       range col_range_;
00105   };
00106 
00107   
00109   template <typename MatrixType>
00110   matrix_expression< const matrix_range<MatrixType>,
00111                      const matrix_range<MatrixType>,
00112                      op_trans> trans(const matrix_range<MatrixType> & mat)
00113   {
00114     return matrix_expression< const matrix_range<MatrixType>,
00115                               const matrix_range<MatrixType>,
00116                               op_trans>(mat, mat);
00117   }
00118   
00119   
00120   
00121   
00125   
00126   //row_major:
00127   template <typename CPU_MATRIX, typename SCALARTYPE>
00128   void copy(const CPU_MATRIX & cpu_matrix,
00129             matrix_range<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_range )
00130   {
00131     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00132            && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00133     
00134      if ( gpu_matrix_range.start2() != 0 ||  gpu_matrix_range.size2() !=  gpu_matrix_range.get().size2())
00135      {
00136        std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
00137        
00138        //copy each stride separately:
00139        for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00140        {
00141          for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00142            entries[j] = cpu_matrix(i,j);
00143          
00144          size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
00145          size_t num_entries = gpu_matrix_range.size2();
00146          cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00147                                           gpu_matrix_range.get().handle(), CL_TRUE, 
00148                                           sizeof(SCALARTYPE)*start_offset,
00149                                           sizeof(SCALARTYPE)*num_entries,
00150                                           &(entries[0]), 0, NULL, NULL);
00151         VIENNACL_ERR_CHECK(err);
00152         //std::cout << "Strided copy worked!" << std::endl;
00153        }
00154      }
00155      else
00156      {
00157        //full block can be copied: 
00158        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00159        
00160        //copy each stride separately:
00161        for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00162          for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00163            entries[i*gpu_matrix_range.get().internal_size2() + j] = cpu_matrix(i,j);
00164        
00165        size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
00166        size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00167        //std::cout << "start_offset: " << start_offset << std::endl;
00168        cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00169                                          gpu_matrix_range.get().handle(), CL_TRUE, 
00170                                          sizeof(SCALARTYPE)*start_offset,
00171                                          sizeof(SCALARTYPE)*num_entries,
00172                                          &(entries[0]), 0, NULL, NULL);
00173        VIENNACL_ERR_CHECK(err);
00174        //std::cout << "Block copy worked!" << std::endl;
00175      }
00176   }
00177   
00178   //column_major:
00179   template <typename CPU_MATRIX, typename SCALARTYPE>
00180   void copy(const CPU_MATRIX & cpu_matrix,
00181             matrix_range<matrix<SCALARTYPE, column_major, 1> > & gpu_matrix_range )
00182   {
00183     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00184            && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00185     
00186      if ( gpu_matrix_range.start1() != 0 ||  gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
00187      {
00188        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
00189        
00190        //copy each stride separately:
00191        for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00192        {
00193          for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00194            entries[i] = cpu_matrix(i,j);
00195          
00196          size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
00197          size_t num_entries = gpu_matrix_range.size1();
00198          cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00199                                           gpu_matrix_range.get().handle(), CL_TRUE, 
00200                                           sizeof(SCALARTYPE)*start_offset,
00201                                           sizeof(SCALARTYPE)*num_entries,
00202                                           &(entries[0]), 0, NULL, NULL);
00203         VIENNACL_ERR_CHECK(err);
00204         //std::cout << "Strided copy worked!" << std::endl;
00205        }
00206      }
00207      else
00208      {
00209        //full block can be copied: 
00210        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00211        
00212        //copy each stride separately:
00213        for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00214          for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00215            entries[i + j*gpu_matrix_range.get().internal_size1()] = cpu_matrix(i,j);
00216        
00217        size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
00218        size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00219        //std::cout << "start_offset: " << start_offset << std::endl;
00220        cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00221                                          gpu_matrix_range.get().handle(), CL_TRUE, 
00222                                          sizeof(SCALARTYPE)*start_offset,
00223                                          sizeof(SCALARTYPE)*num_entries,
00224                                          &(entries[0]), 0, NULL, NULL);
00225        VIENNACL_ERR_CHECK(err);
00226        //std::cout << "Block copy worked!" << std::endl;
00227      }
00228     
00229   }
00230 
00231 
00235   
00236   
00237   //row_major:
00238   template <typename CPU_MATRIX, typename SCALARTYPE>
00239   void copy(matrix_range<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_range,
00240             CPU_MATRIX & cpu_matrix)
00241   {
00242     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00243            && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00244     
00245      if ( gpu_matrix_range.start2() != 0 ||  gpu_matrix_range.size2() !=  gpu_matrix_range.get().size2())
00246      {
00247        std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
00248        
00249        //copy each stride separately:
00250        for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00251        {
00252          size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
00253          size_t num_entries = gpu_matrix_range.size2();
00254          cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00255                                           gpu_matrix_range.get().handle(), CL_TRUE, 
00256                                           sizeof(SCALARTYPE)*start_offset,
00257                                           sizeof(SCALARTYPE)*num_entries,
00258                                           &(entries[0]), 0, NULL, NULL);
00259         VIENNACL_ERR_CHECK(err);
00260         //std::cout << "Strided copy worked!" << std::endl;
00261         
00262         for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00263           cpu_matrix(i,j) = entries[j];
00264          
00265        }
00266      }
00267      else
00268      {
00269        //full block can be copied: 
00270        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00271        
00272        size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
00273        size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00274        //std::cout << "start_offset: " << start_offset << std::endl;
00275        cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00276                                          gpu_matrix_range.get().handle(), CL_TRUE, 
00277                                          sizeof(SCALARTYPE)*start_offset,
00278                                          sizeof(SCALARTYPE)*num_entries,
00279                                          &(entries[0]), 0, NULL, NULL);
00280        VIENNACL_ERR_CHECK(err);
00281        //std::cout << "Block copy worked!" << std::endl;
00282 
00283        for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00284          for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00285            cpu_matrix(i,j) = entries[i*gpu_matrix_range.get().internal_size2() + j];
00286     }
00287     
00288   }
00289   
00290   
00291   //column_major:
00292   template <typename CPU_MATRIX, typename SCALARTYPE>
00293   void copy(matrix_range<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_range,
00294             CPU_MATRIX & cpu_matrix)
00295   {
00296     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00297            && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00298     
00299      if ( gpu_matrix_range.start1() != 0 ||  gpu_matrix_range.size1() !=  gpu_matrix_range.get().size1())
00300      {
00301        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
00302        
00303        //copy each stride separately:
00304        for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00305        {
00306          size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
00307          size_t num_entries = gpu_matrix_range.size1();
00308          cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00309                                           gpu_matrix_range.get().handle(), CL_TRUE, 
00310                                           sizeof(SCALARTYPE)*start_offset,
00311                                           sizeof(SCALARTYPE)*num_entries,
00312                                           &(entries[0]), 0, NULL, NULL);
00313         VIENNACL_ERR_CHECK(err);
00314         //std::cout << "Strided copy worked!" << std::endl;
00315         
00316         for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00317           cpu_matrix(i,j) = entries[i];
00318        }
00319      }
00320      else
00321      {
00322        //full block can be copied: 
00323        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00324        
00325        //copy each stride separately:
00326        size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
00327        size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00328        //std::cout << "start_offset: " << start_offset << std::endl;
00329        cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00330                                          gpu_matrix_range.get().handle(), CL_TRUE, 
00331                                          sizeof(SCALARTYPE)*start_offset,
00332                                          sizeof(SCALARTYPE)*num_entries,
00333                                          &(entries[0]), 0, NULL, NULL);
00334        VIENNACL_ERR_CHECK(err);
00335        //std::cout << "Block copy worked!" << std::endl;
00336        
00337        for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00338          for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00339            cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.get().internal_size1()];
00340      }
00341     
00342   }
00343 
00344 
00345 /*
00346   template<typename MatrixType>
00347   std::ostream & operator<<(std::ostream & s, matrix_range<MatrixType> const & proxy)
00348   {
00349     MatrixType temp(proxy.size1(), proxy.size2());
00350     viennacl::range r1(0, proxy.size1());
00351     viennacl::range r2(0, proxy.size2());
00352     matrix_range<MatrixType> temp2(temp, r1, r2);
00353     viennacl::copy(proxy, temp2);
00354     s << temp;
00355     return s;
00356   }*/
00357 
00358 
00359 }
00360 
00361 #endif
/data/development/ViennaCL/dev/viennacl/matrix_proxy.hpp