1 #ifndef VIENNACL_MATRIX_PROXY_HPP_
2 #define VIENNACL_MATRIX_PROXY_HPP_
37 template<
typename MatrixType>
38 class matrix_range :
public matrix_base<typename MatrixType::cpu_value_type>
40 typedef matrix_base<typename MatrixType::cpu_value_type> base_type;
41 typedef matrix_range<MatrixType> self_type;
54 range const & row_range,
55 range const & col_range) : base_type(const_cast<handle_type &>(A.
handle()),
61 range const & row_range,
62 range const & col_range) : base_type(const_cast<handle_type &>(A.
handle()),
73 using base_type::operator=;
76 template<
typename OtherNumericT,
typename F>
79 template<
typename OtherNumericT,
typename F>
82 template<
typename OtherNumericT,
typename F>
86 template<
typename MatrixType>
94 range const & row_range,
95 range const & col_range) : base_type(const_cast<handle_type &>(A.
handle()),
101 range const & row_range,
102 range const & col_range) : base_type(const_cast<handle_type &>(A.
handle()),
113 template<
typename CPUMatrixT,
typename NumericT>
114 void copy(
const CPUMatrixT & cpu_matrix,
117 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
118 && (cpu_matrix.size2() == gpu_matrix_range.size2())
119 &&
bool(
"Matrix size mismatch!"));
121 if ( gpu_matrix_range.start2() != 0)
123 std::vector<NumericT> entries(gpu_matrix_range.size2());
126 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
128 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
129 entries[j] = cpu_matrix(i,j);
131 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
132 vcl_size_t num_entries = gpu_matrix_range.size2();
140 std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
143 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
144 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
145 entries[i*gpu_matrix_range.internal_size2() + j] = cpu_matrix(i,j);
147 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
148 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2();
155 template<
typename CPUMatrixT,
typename NumericT>
156 void copy(
const CPUMatrixT & cpu_matrix,
159 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
160 && (cpu_matrix.size2() == gpu_matrix_range.size2())
161 &&
bool(
"Matrix size mismatch!"));
163 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.size1())
165 std::vector<NumericT> entries(gpu_matrix_range.size1());
168 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
170 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
171 entries[i] = cpu_matrix(i,j);
173 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
174 vcl_size_t num_entries = gpu_matrix_range.size1();
182 std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
185 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
186 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
187 entries[i + j*gpu_matrix_range.internal_size1()] = cpu_matrix(i,j);
189 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
190 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
204 template<
typename CPUMatrixT,
typename NumericT>
206 CPUMatrixT & cpu_matrix)
208 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
209 && (cpu_matrix.size2() == gpu_matrix_range.size2())
210 &&
bool(
"Matrix size mismatch!"));
212 if ( gpu_matrix_range.start2() != 0)
214 std::vector<NumericT> entries(gpu_matrix_range.size2());
217 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
219 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
220 vcl_size_t num_entries = gpu_matrix_range.size2();
224 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
225 cpu_matrix(i,j) = entries[j];
231 std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
233 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
237 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
238 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
239 cpu_matrix(i,j) = entries[i*gpu_matrix_range.internal_size2() + j];
246 template<
typename CPUMatrixT,
typename NumericT>
248 CPUMatrixT & cpu_matrix)
250 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
251 && (cpu_matrix.size2() == gpu_matrix_range.size2())
252 &&
bool(
"Matrix size mismatch!"));
254 if ( gpu_matrix_range.start1() != 0)
256 std::vector<NumericT> entries(gpu_matrix_range.size1());
259 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
261 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
262 vcl_size_t num_entries = gpu_matrix_range.size1();
266 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
267 cpu_matrix(i,j) = entries[i];
273 std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
276 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
277 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
281 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
282 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
283 cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.internal_size1()];
292 template<
typename MatrixType>
295 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
301 template<
typename MatrixType>
304 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
328 template<
typename MatrixType>
329 class matrix_slice :
public matrix_base<typename MatrixType::cpu_value_type>
331 typedef matrix_base<typename MatrixType::cpu_value_type> base_type;
332 typedef matrix_slice<MatrixType> self_type;
345 slice const & row_slice,
346 slice const & col_slice) : base_type(const_cast<handle_type &>(A.
handle()),
352 slice const & row_slice,
353 slice const & col_slice) : base_type(const_cast<handle_type &>(A.
handle()),
364 using base_type::operator=;
367 template<
typename OtherNumericT,
typename F>
370 template<
typename OtherNumericT,
typename F>
373 template<
typename OtherNumericT,
typename F>
377 template<
typename MatrixType>
385 slice const & row_slice,
386 slice const & col_slice) : base_type(const_cast<handle_type &>(A.
handle()),
392 slice const & row_slice,
393 slice const & col_slice) : base_type(const_cast<handle_type &>(A.
handle()),
405 template<
typename CPUMatrixT,
typename NumericT>
406 void copy(
const CPUMatrixT & cpu_matrix,
409 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
410 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
411 &&
bool(
"Matrix size mismatch!"));
413 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
415 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
417 std::vector<NumericT> entries(num_entries);
420 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
422 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
425 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
426 entries[j * gpu_matrix_slice.stride2()] = cpu_matrix(i,j);
434 template<
typename CPUMatrixT,
typename NumericT>
435 void copy(
const CPUMatrixT & cpu_matrix,
438 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
439 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
440 &&
bool(
"Matrix size mismatch!"));
443 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
445 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
447 std::vector<NumericT> entries(num_entries);
450 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
452 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
456 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
457 entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j);
472 template<
typename CPUMatrixT,
typename NumericT>
474 CPUMatrixT & cpu_matrix)
476 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
477 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
478 &&
bool(
"Matrix size mismatch!"));
480 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
482 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
484 std::vector<NumericT> entries(num_entries);
487 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
489 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
493 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
494 cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()];
502 template<
typename CPUMatrixT,
typename NumericT>
504 CPUMatrixT & cpu_matrix)
506 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
507 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
508 &&
bool(
"Matrix size mismatch!"));
510 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
512 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
514 std::vector<NumericT> entries(num_entries);
517 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
519 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
523 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
524 cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()];
534 template<
typename MatrixType>
537 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
542 template<
typename MatrixType>
545 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
550 template<
typename MatrixType>
553 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
viennacl::tools::shared_ptr< char > handle_type
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F > > const &B)
MatrixType::handle_type handle_type
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
base_type & operator=(viennacl::matrix< OtherNumericT, F > const &B)
matrix_range(matrix_range< MatrixType > const &A, range const &row_range, range const &col_range)
Class for representing strided submatrices of a bigger matrix A.
self_type & operator=(const self_type &other)
range::size_type size_type
MatrixType::value_type value_type
MatrixType::handle_type handle_type
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
MatrixType::handle_type handle_type
size_type stride2() const
Returns the number of columns.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
range::difference_type difference_type
Forward declaration of dense matrix classes.
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
matrix_slice(self_type const &other)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
MatrixType::value_type value_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
range::size_type size_type
matrix_range(self_type const &A, range const &row_range, range const &col_range)
result_of::size_type< T >::type start(T const &obj)
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
range::difference_type difference_type
const value_type & const_reference
size_type stride1() const
Returns the number of rows.
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
matrix_range(self_type const &other)
size_type size2() const
Returns the number of columns.
handle_type & handle()
Returns the OpenCL handle, non-const-version.
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F > > const &B)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
size_type size1() const
Returns the number of rows.
MatrixType::handle_type handle_type
DistanceT difference_type
base_type & operator=(viennacl::matrix< OtherNumericT, F > const &B)
matrix_slice(self_type const &A, slice const &row_slice, slice const &col_slice)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Implementation of a slice object for use with proxy objects.
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F > > const &B)
Implementation of a range object for use with proxy objects.
size_type start2() const
Returns the number of columns.
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Class for representing non-strided submatrices of a bigger matrix A.
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
const value_type & const_reference
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A tag for row-major storage of a dense matrix.
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
size_type start1() const
Returns the number of rows.
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F > > const &B)
matrix_slice(matrix_slice< MatrixType > const &A, slice const &row_slice, slice const &col_slice)