00001 #ifndef VIENNACL_MATRIX_PROXY_HPP_
00002 #define VIENNACL_MATRIX_PROXY_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include "viennacl/forwards.h"
00025 #include "viennacl/range.hpp"
00026 #include "viennacl/matrix.hpp"
00027 #include "viennacl/linalg/matrix_operations.hpp"
00028
00029 namespace viennacl
00030 {
00031
00032 template <typename MatrixType>
00033 class matrix_range
00034 {
00035 public:
00036 typedef typename MatrixType::value_type value_type;
00037 typedef range::size_type size_type;
00038 typedef range::difference_type difference_type;
00039 typedef value_type reference;
00040 typedef const value_type & const_reference;
00041
00042 matrix_range(MatrixType & A,
00043 range const & row_range,
00044 range const & col_range) : A_(A), row_range_(row_range), col_range_(col_range) {}
00045
00046 size_type start1() const { return row_range_.start(); }
00047 size_type size1() const { return row_range_.size(); }
00048
00049 size_type start2() const { return col_range_.start(); }
00050 size_type size2() const { return col_range_.size(); }
00051
00052 template <typename MatrixType1, typename MatrixType2>
00053 matrix_range<MatrixType> & operator = (const matrix_expression< MatrixType1,
00054 MatrixType2,
00055 op_prod > & proxy)
00056 {
00057 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00058 return *this;
00059 }
00060
00061
00062 matrix_range<MatrixType> & operator += (matrix_range<MatrixType> const & other)
00063 {
00064 viennacl::linalg::inplace_add(*this, other);
00065 return *this;
00066 }
00067
00068 template <typename MatrixType1, typename MatrixType2>
00069 matrix_range<MatrixType> & operator += (const matrix_expression< MatrixType1,
00070 MatrixType2,
00071 op_prod > & proxy)
00072 {
00073 MatrixType1 temp = proxy;
00074 viennacl::range r1(0, temp.size1());
00075 viennacl::range r2(0, temp.size2());
00076 viennacl::matrix_range<MatrixType> temp2(temp, r1, r2);
00077 viennacl::linalg::inplace_add(*this, temp2);
00078 return *this;
00079 }
00080
00081 template <typename MatrixType1, typename MatrixType2>
00082 matrix_range<MatrixType> & operator += (const matrix_expression< const matrix_range<MatrixType1>,
00083 const matrix_range<MatrixType2>,
00084 op_prod > & proxy)
00085 {
00086 MatrixType1 temp(proxy.size1(), proxy.size2());
00087 viennacl::range r1(0, temp.size1());
00088 viennacl::range r2(0, temp.size2());
00089 viennacl::matrix_range<MatrixType> temp2(temp, r1, r2);
00090 temp2 = proxy;
00091 viennacl::linalg::inplace_add(*this, temp2);
00092 return *this;
00093 }
00094
00095
00096
00097
00098 MatrixType & get() { return A_; }
00099 const MatrixType & get() const { return A_; }
00100
00101 private:
00102 MatrixType & A_;
00103 range row_range_;
00104 range col_range_;
00105 };
00106
00107
00109 template <typename MatrixType>
00110 matrix_expression< const matrix_range<MatrixType>,
00111 const matrix_range<MatrixType>,
00112 op_trans> trans(const matrix_range<MatrixType> & mat)
00113 {
00114 return matrix_expression< const matrix_range<MatrixType>,
00115 const matrix_range<MatrixType>,
00116 op_trans>(mat, mat);
00117 }
00118
00119
00120
00121
00125
00126
00127 template <typename CPU_MATRIX, typename SCALARTYPE>
00128 void copy(const CPU_MATRIX & cpu_matrix,
00129 matrix_range<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_range )
00130 {
00131 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00132 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00133
00134 if ( gpu_matrix_range.start2() != 0 || gpu_matrix_range.size2() != gpu_matrix_range.get().size2())
00135 {
00136 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
00137
00138
00139 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00140 {
00141 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00142 entries[j] = cpu_matrix(i,j);
00143
00144 size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
00145 size_t num_entries = gpu_matrix_range.size2();
00146 cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00147 gpu_matrix_range.get().handle(), CL_TRUE,
00148 sizeof(SCALARTYPE)*start_offset,
00149 sizeof(SCALARTYPE)*num_entries,
00150 &(entries[0]), 0, NULL, NULL);
00151 VIENNACL_ERR_CHECK(err);
00152
00153 }
00154 }
00155 else
00156 {
00157
00158 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00159
00160
00161 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00162 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00163 entries[i*gpu_matrix_range.get().internal_size2() + j] = cpu_matrix(i,j);
00164
00165 size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
00166 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00167
00168 cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00169 gpu_matrix_range.get().handle(), CL_TRUE,
00170 sizeof(SCALARTYPE)*start_offset,
00171 sizeof(SCALARTYPE)*num_entries,
00172 &(entries[0]), 0, NULL, NULL);
00173 VIENNACL_ERR_CHECK(err);
00174
00175 }
00176 }
00177
00178
00179 template <typename CPU_MATRIX, typename SCALARTYPE>
00180 void copy(const CPU_MATRIX & cpu_matrix,
00181 matrix_range<matrix<SCALARTYPE, column_major, 1> > & gpu_matrix_range )
00182 {
00183 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00184 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00185
00186 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
00187 {
00188 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
00189
00190
00191 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00192 {
00193 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00194 entries[i] = cpu_matrix(i,j);
00195
00196 size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
00197 size_t num_entries = gpu_matrix_range.size1();
00198 cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00199 gpu_matrix_range.get().handle(), CL_TRUE,
00200 sizeof(SCALARTYPE)*start_offset,
00201 sizeof(SCALARTYPE)*num_entries,
00202 &(entries[0]), 0, NULL, NULL);
00203 VIENNACL_ERR_CHECK(err);
00204
00205 }
00206 }
00207 else
00208 {
00209
00210 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00211
00212
00213 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00214 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00215 entries[i + j*gpu_matrix_range.get().internal_size1()] = cpu_matrix(i,j);
00216
00217 size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
00218 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00219
00220 cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
00221 gpu_matrix_range.get().handle(), CL_TRUE,
00222 sizeof(SCALARTYPE)*start_offset,
00223 sizeof(SCALARTYPE)*num_entries,
00224 &(entries[0]), 0, NULL, NULL);
00225 VIENNACL_ERR_CHECK(err);
00226
00227 }
00228
00229 }
00230
00231
00235
00236
00237
00238 template <typename CPU_MATRIX, typename SCALARTYPE>
00239 void copy(matrix_range<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_range,
00240 CPU_MATRIX & cpu_matrix)
00241 {
00242 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00243 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00244
00245 if ( gpu_matrix_range.start2() != 0 || gpu_matrix_range.size2() != gpu_matrix_range.get().size2())
00246 {
00247 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
00248
00249
00250 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00251 {
00252 size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
00253 size_t num_entries = gpu_matrix_range.size2();
00254 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00255 gpu_matrix_range.get().handle(), CL_TRUE,
00256 sizeof(SCALARTYPE)*start_offset,
00257 sizeof(SCALARTYPE)*num_entries,
00258 &(entries[0]), 0, NULL, NULL);
00259 VIENNACL_ERR_CHECK(err);
00260
00261
00262 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00263 cpu_matrix(i,j) = entries[j];
00264
00265 }
00266 }
00267 else
00268 {
00269
00270 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00271
00272 size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
00273 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00274
00275 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00276 gpu_matrix_range.get().handle(), CL_TRUE,
00277 sizeof(SCALARTYPE)*start_offset,
00278 sizeof(SCALARTYPE)*num_entries,
00279 &(entries[0]), 0, NULL, NULL);
00280 VIENNACL_ERR_CHECK(err);
00281
00282
00283 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00284 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00285 cpu_matrix(i,j) = entries[i*gpu_matrix_range.get().internal_size2() + j];
00286 }
00287
00288 }
00289
00290
00291
00292 template <typename CPU_MATRIX, typename SCALARTYPE>
00293 void copy(matrix_range<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_range,
00294 CPU_MATRIX & cpu_matrix)
00295 {
00296 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00297 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
00298
00299 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
00300 {
00301 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
00302
00303
00304 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00305 {
00306 size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
00307 size_t num_entries = gpu_matrix_range.size1();
00308 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00309 gpu_matrix_range.get().handle(), CL_TRUE,
00310 sizeof(SCALARTYPE)*start_offset,
00311 sizeof(SCALARTYPE)*num_entries,
00312 &(entries[0]), 0, NULL, NULL);
00313 VIENNACL_ERR_CHECK(err);
00314
00315
00316 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00317 cpu_matrix(i,j) = entries[i];
00318 }
00319 }
00320 else
00321 {
00322
00323 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
00324
00325
00326 size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
00327 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00328
00329 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00330 gpu_matrix_range.get().handle(), CL_TRUE,
00331 sizeof(SCALARTYPE)*start_offset,
00332 sizeof(SCALARTYPE)*num_entries,
00333 &(entries[0]), 0, NULL, NULL);
00334 VIENNACL_ERR_CHECK(err);
00335
00336
00337 for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
00338 for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
00339 cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.get().internal_size1()];
00340 }
00341
00342 }
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359 }
00360
00361 #endif