ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
27 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
36 
37 namespace viennacl
38 {
39 
40 //#ifdef VIENNACL_WITH_OPENCL
41 // template<class NumericT, class DISTRIBUTION>
42 // rand::random_matrix_t<NumericT, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){
43 // return rand::random_matrix_t<NumericT,DISTRIBUTION>(size1,size2,distribution);
44 // }
45 //#endif
46 
53 template<typename LHS, typename RHS, typename OP>
54 class matrix_expression
55 {
56  typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type lhs_reference_type;
57  typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type rhs_reference_type;
58 
59 public:
61 
62  matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}
63 
66  LHS & lhs() const { return lhs_; }
69  RHS & rhs() const { return rhs_; }
70 
74 
75 private:
77  lhs_reference_type lhs_;
79  rhs_reference_type rhs_;
80 };
81 
82 
84 struct row_iteration {};
85 
87 struct col_iteration {};
88 
89 //STL-like iterator. TODO: STL-compliance...
91 template<typename ROWCOL, typename MatrixT>
92 class matrix_iterator
93 {
94  typedef matrix_iterator<ROWCOL, MatrixT> self_type;
95 public:
96  typedef typename MatrixT::value_type value_type;
97 
98  matrix_iterator(MatrixT & mat,
99  vcl_size_t start_row,
100  vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}
101 
102  value_type operator*(void) { return mat_(row_, col_); }
103  self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MatrixT>::apply(mat_, row_, col_); return *this; }
104  self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
105 
106  bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
107  bool operator!=(self_type const & other) { return !(*this == other); }
108 
109  vcl_size_t index1() { return row_; }
110  vcl_size_t index2() { return col_; }
111 
112  MatrixT & operator()(void) const { return mat_; }
113 
114 private:
115  MatrixT & mat_;
116  vcl_size_t row_;
117  vcl_size_t col_;
118 };
119 
126 template<class NumericT, typename SizeT, typename DistanceT>
128  : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),
129  internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, dense_padding_size)),
130  internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, dense_padding_size)),
131  row_major_fixed_(true), row_major_(is_row_major)
132 {
133  if (rows > 0 && columns > 0)
134  {
135  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx);
136  clear();
137  }
138 }
139 
142 template<class NumericT, typename SizeT, typename DistanceT>
143 template<typename LHS, typename RHS, typename OP>
145  size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),
146  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
147  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
148  row_major_fixed_(true), row_major_(viennacl::traits::row_major(proxy))
149 {
151  if (internal_size() > 0)
152  {
154  clear();
155  self_type::operator=(proxy);
156  }
157 }
158 
159 // CUDA or host memory:
160 template<class NumericT, typename SizeT, typename DistanceT>
162  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
163  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
164  bool is_row_major)
165  : size1_(mat_size1), size2_(mat_size2),
166  start1_(mat_start1), start2_(mat_start2),
167  stride1_(mat_stride1), stride2_(mat_stride2),
168  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
169  row_major_fixed_(true), row_major_(is_row_major)
170 {
171  if (mem_type == viennacl::CUDA_MEMORY)
172  {
173 #ifdef VIENNACL_WITH_CUDA
175  elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
176  elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
177 #else
179 #endif
180  }
181  else if (mem_type == viennacl::MAIN_MEMORY)
182  {
184  elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
185  elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
186  }
187 
188  elements_.raw_size(sizeof(NumericT) * internal_size());
189 }
190 
191 #ifdef VIENNACL_WITH_OPENCL
192 template<class NumericT, typename SizeT, typename DistanceT>
193 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx)
194  : size1_(rows), size2_(columns),
195  start1_(0), start2_(0),
196  stride1_(1), stride2_(1),
197  internal_size1_(rows), internal_size2_(columns),
198  row_major_fixed_(true), row_major_(is_row_major)
199 {
201  elements_.opencl_handle() = mem;
202  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
203  elements_.opencl_handle().context(ctx.opencl_context());
204  elements_.raw_size(sizeof(NumericT)*internal_size());
205 }
206 
207 template<class NumericT, typename SizeT, typename DistanceT>
209  size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,
210  size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,
211  bool is_row_major)
212  : size1_(mat_size1), size2_(mat_size2),
213  start1_(mat_start1), start2_(mat_start2),
214  stride1_(mat_stride1), stride2_(mat_stride2),
215  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
216  row_major_fixed_(true), row_major_(is_row_major)
217 {
219  elements_.opencl_handle() = mem;
220  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
221  elements_.opencl_handle().context(ctx.opencl_context());
222  elements_.raw_size(sizeof(NumericT)*internal_size());
223 }
224 #endif
225 
226 // Copy CTOR
227 template<class NumericT, typename SizeT, typename DistanceT>
229  size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),
230  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
231  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
232  row_major_fixed_(true), row_major_(other.row_major())
233 {
235  if (internal_size() > 0)
236  {
238  clear();
239  self_type::operator=(other);
240  }
241 }
242 
243 // Conversion CTOR
244 template<typename NumericT, typename SizeT, typename DistanceT>
245 template<typename OtherNumericT>
247  size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),
248  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),
249  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),
250  row_major_fixed_(true), row_major_(other.row_major())
251 {
253  if (internal_size() > 0)
254  {
256  clear();
257  self_type::operator=(other);
258  }
259 }
260 
261 template<class NumericT, typename SizeT, typename DistanceT>
263 {
264  if (&other==this)
265  return *this;
266 
267  if (internal_size() == 0)
268  {
269  if (other.internal_size() == 0)
270  return *this;
271  if (!row_major_fixed_)
272  row_major_ = other.row_major();
273  resize(other.size1(), other.size2(), false);
274  }
275 
276  viennacl::linalg::am(*this,
277  other, cpu_value_type(1.0), 1, false, false);
278  return *this;
279 }
280 
281 // Conversion assignment
282 template<class NumericT, typename SizeT, typename DistanceT>
283 template<typename OtherNumericT>
285 {
286  if (internal_size() == 0)
287  {
288  if (other.internal_size() == 0)
289  return *this;
290  if (!row_major_fixed_)
291  row_major_ = other.row_major();
292  resize(other.size1(), other.size2(), false);
293  }
294 
295  viennacl::linalg::convert(*this, other);
296  return *this;
297 }
298 
303 template<class NumericT, typename SizeT, typename DistanceT>
304 template<typename LHS, typename RHS, typename OP>
306 {
307  assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0)
308  && (viennacl::traits::size2(proxy) == size2() || size2() == 0)
309  && bool("Incompatible matrix sizes!"));
310  if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
311  {
312  size1_ = viennacl::traits::size1(proxy);
313  size2_ = viennacl::traits::size2(proxy);
314  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
315  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
316  if (!row_major_fixed_)
317  row_major_ = viennacl::traits::row_major(proxy);
319  if (size1_ != internal_size1_ || size2_ != internal_size2_)
320  clear();
321  }
322 
323  if (internal_size() > 0)
325 
326  return *this;
327 }
328 
329 
330 // A = trans(B)
331 template<class NumericT, typename SizeT, typename DistanceT>
333 {
334  if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 )
335  {
336  size1_ = viennacl::traits::size1(proxy);
337  size2_ = viennacl::traits::size2(proxy);
338  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
339  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
340  if (!row_major_fixed_)
341  row_major_ = viennacl::traits::row_major(proxy);
342  }
343 
344  if ( handle() == proxy.lhs().handle() )
345  {
346  viennacl::matrix_base<NumericT> temp(proxy.lhs().size2(), proxy.lhs().size1(),proxy.lhs().row_major());
347  viennacl::linalg::trans(proxy, temp);
348  if ( proxy.lhs().size1() != proxy.lhs().size2() )
349  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
350  elements_ = temp.handle();
351  }
352  else
353  {
354  if ( proxy.lhs().size1() != proxy.lhs().size2() )
355  this->resize(proxy.lhs().size2(), proxy.lhs().size1());
356  viennacl::linalg::trans(proxy, *this);
357  }
358  return *this;
359 }
360 
361 template<class NumericT, typename SizeT, typename DistanceT>
362 template<typename LHS, typename RHS, typename OP>
364 {
365  assert( (viennacl::traits::size1(proxy) == size1())
366  && (viennacl::traits::size2(proxy) == size2())
367  && bool("Incompatible matrix sizes!"));
368  assert( (size1() > 0) && bool("Vector not yet initialized!") );
369  assert( (size2() > 0) && bool("Vector not yet initialized!") );
370 
372 
373  return *this;
374 }
375 
376 template<class NumericT, typename SizeT, typename DistanceT>
377 template<typename LHS, typename RHS, typename OP>
379 {
380  assert( (viennacl::traits::size1(proxy) == size1())
381  && (viennacl::traits::size2(proxy) == size2())
382  && bool("Incompatible matrix sizes!"));
383  assert( (size1() > 0) && bool("Vector not yet initialized!") );
384  assert( (size2() > 0) && bool("Vector not yet initialized!") );
385 
387 
388  return *this;
389 }
390 
392 template<class NumericT, typename SizeT, typename DistanceT>
394 {
395  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
396  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
397 
398  if (internal_size() == 0)
399  {
400  size1_ = m.size1();
401  size2_ = m.size2();
402  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
403  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
404  if (internal_size() > 0)
405  {
407  clear();
408  }
409  }
410  else
412 
413  if (internal_size() > 0)
415 
416  return *this;
417 }
418 
420 template<class NumericT, typename SizeT, typename DistanceT>
422 {
423  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
424  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
425 
426  if (internal_size() == 0)
427  {
428  size1_ = m.size1();
429  size2_ = m.size2();
430  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
431  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
432  if (internal_size() > 0)
433  {
435  clear();
436  }
437  }
438  else
440 
441  return *this;
442 }
443 
445 template<class NumericT, typename SizeT, typename DistanceT>
447 {
448  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
449  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
450 
451  if (internal_size() == 0)
452  {
453  size1_ = m.size1();
454  size2_ = m.size2();
455  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
456  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
457  if (internal_size() > 0)
458  {
460  clear();
461  }
462  }
463 
464  if (internal_size() > 0)
465  {
466  viennacl::linalg::matrix_assign(*this, m(0,0));
467  }
468 
469  return *this;
470 }
471 
472 
473 //read-write access to an element of the matrix/matrix_range/matrix_slice
476 template<class NumericT, typename SizeT, typename DistanceT>
478 {
479  if (row_major_)
480  return entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
481  return entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
482 }
483 
486 template<class NumericT, typename SizeT, typename DistanceT>
488 {
489  if (row_major_)
490  return const_entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
491  return const_entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
492 }
493 
494 //
495 // Operator overloads for enabling implicit conversions:
496 //
497 template<class NumericT, typename SizeT, typename DistanceT>
499 {
501  *this, NumericT(1.0), 1, false, false,
502  other, NumericT(1.0), 1, false, false);
503  return *this;
504 }
505 
506 template<class NumericT, typename SizeT, typename DistanceT>
508 {
510  *this, NumericT(1.0), 1, false, false,
511  other, NumericT(1.0), 1, false, true);
512  return *this;
513 }
514 
516 template<class NumericT, typename SizeT, typename DistanceT>
518 {
519  viennacl::linalg::am(*this,
520  *this, NumericT(val), 1, false, false);
521  return *this;
522 }
523 
525 template<class NumericT, typename SizeT, typename DistanceT>
527 {
528  viennacl::linalg::am(*this,
529  *this, NumericT(val), 1, false, false);
530  return *this;
531 }
532 
534 template<class NumericT, typename SizeT, typename DistanceT>
536 {
537  viennacl::linalg::am(*this,
538  *this, NumericT(val), 1, false, false);
539  return *this;
540 }
541 
543 template<class NumericT, typename SizeT, typename DistanceT>
545 {
546  viennacl::linalg::am(*this,
547  *this, NumericT(val), 1, false, false);
548  return *this;
549 }
550 
552 template<class NumericT, typename SizeT, typename DistanceT>
554 {
555  viennacl::linalg::am(*this,
556  *this, NumericT(val), 1, false, false);
557  return *this;
558 }
559 
561 template<class NumericT, typename SizeT, typename DistanceT>
563 {
564  viennacl::linalg::am(*this,
565  *this, NumericT(val), 1, false, false);
566  return *this;
567 }
568 
569 
570 
572 template<class NumericT, typename SizeT, typename DistanceT>
574 {
575  viennacl::linalg::am(*this,
576  *this, NumericT(val), 1, true, false);
577  return *this;
578 }
579 
581 template<class NumericT, typename SizeT, typename DistanceT>
583 {
584  viennacl::linalg::am(*this,
585  *this, NumericT(val), 1, true, false);
586  return *this;
587 }
588 
590 template<class NumericT, typename SizeT, typename DistanceT>
592 {
593  viennacl::linalg::am(*this,
594  *this, NumericT(val), 1, true, false);
595  return *this;
596 }
597 
599 template<class NumericT, typename SizeT, typename DistanceT>
601 {
602  viennacl::linalg::am(*this,
603  *this, NumericT(val), 1, true, false);
604  return *this;
605 }
606 
608 template<class NumericT, typename SizeT, typename DistanceT>
610 {
611  viennacl::linalg::am(*this,
612  *this, NumericT(val), 1, true, false);
613  return *this;
614 }
615 
617 template<class NumericT, typename SizeT, typename DistanceT>
619 {
620  viennacl::linalg::am(*this,
621  *this, NumericT(val), 1, true, false);
622  return *this;
623 }
624 
625 
627 template<class NumericT, typename SizeT, typename DistanceT>
629 {
631 }
632 
633 template<class NumericT, typename SizeT, typename DistanceT>
635 
636 
637 template<class NumericT, typename SizeT, typename DistanceT>
639 {
640  assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));
641 
642  if (preserve && internal_size() > 0)
643  {
644  //get old entries:
645  std::vector< NumericT > old_entries(internal_size());
646  viennacl::backend::memory_read(elements_, 0, sizeof(NumericT)*internal_size(), &(old_entries[0]));
647 
648  //set up entries of new matrix:
649  std::vector< NumericT > new_entries( viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size)
650  * viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size));
651  for (size_type i=0; i<rows; ++i)
652  {
653  if (i >= size1_)
654  continue;
655 
656  for (size_type j=0; j<columns; ++j)
657  {
658  if (j >= size2_)
659  continue;
660  if (row_major_)
661  new_entries[row_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
662  = old_entries[row_major::mem_index(i, j, internal_size1(), internal_size2())];
663  else
664  new_entries[column_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]
665  = old_entries[column_major::mem_index(i, j, internal_size1(), internal_size2())];
666  }
667  }
668 
669  //copy new entries to GPU:
670  size1_ = rows;
671  size2_ = columns;
672  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
673  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
674  viennacl::backend::memory_create(elements_, sizeof(NumericT)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));
675  }
676  else //discard old entries:
677  {
678  size1_ = rows;
679  size2_ = columns;
680  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);
681  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);
682 
683  viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(elements_));
684  clear();
685  }
686 }
687 
688 
695 template<class NumericT, typename F, unsigned int AlignmentV>
696 class matrix : public matrix_base<NumericT>
697 {
698  typedef matrix<NumericT, F, AlignmentV> self_type;
699  typedef matrix_base<NumericT> base_type;
700 public:
701  typedef typename base_type::size_type size_type;
702 
704  explicit matrix() : base_type(static_cast<bool>(viennacl::is_row_major<F>::value)) {}
705 
712  explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, viennacl::is_row_major<F>::value, ctx) {}
713 
721  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
722  : base_type(ptr_to_mem, mem_type,
723  rows, 0, 1, rows,
724  cols, 0, 1, cols,
725  viennacl::is_row_major<F>::value) {}
726 
727 
737  explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type,
738  size_type rows, size_type internal_row_count,
739  size_type cols, size_type internal_col_count)
740  : base_type(ptr_to_mem, mem_type,
741  rows, 0, 1, internal_row_count,
742  cols, 0, 1, internal_col_count,
743  true, viennacl::is_row_major<F>::value) {}
744 
745 #ifdef VIENNACL_WITH_OPENCL
746  explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns, viennacl::is_row_major<F>::value) {}
747 #endif
748 
749  template<typename LHS, typename RHS, typename OP>
750  matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {}
751 
753  matrix(identity_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
754  {
755  if (base_type::internal_size() > 0)
757  }
758 
760  matrix(zero_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
761  {
762  if (base_type::internal_size() > 0)
764  }
765 
767  matrix(scalar_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())
768  {
769  if (base_type::internal_size() > 0)
771  }
772 
773  matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
774  {
775  base_type::operator=(other);
776  }
777 
778 
779  //copy constructor:
780  matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))
781  {
782  base_type::operator=(other);
783  }
784 
785 
786  /*template<typename M1>
787  self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)
788  {
789  self_type temp(proxy.lhs());
790  *this = trans(temp);
791  return *this;
792  }*/
793 
794  using base_type::operator=;
795 
796  // the following are needed for Visual Studio:
797  template<typename OtherNumericT, typename F2>
799 
800  template<typename OtherNumericT, typename F2>
802 
803  template<typename OtherNumericT, typename F2>
805 
813  void resize(size_type rows, size_type columns, bool preserve = true)
814  {
815  base_type::resize(rows, columns, preserve);
816  }
817 
818 }; //matrix
819 
820 
821 
827 template<class NumericT>
828 std::ostream & operator<<(std::ostream & s, const matrix_base<NumericT> & gpu_matrix)
829 {
830  typedef typename matrix_base<NumericT>::size_type size_type;
831 
832  std::vector<NumericT> tmp(gpu_matrix.internal_size());
833  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0]));
834 
835  s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
836 
837  s << "(";
838  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
839  {
840  s << "(";
841  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
842  {
843  if (gpu_matrix.row_major())
844  s << tmp[row_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
845  else
846  s << tmp[column_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
847 
848  if (j < gpu_matrix.size2() - 1)
849  s << ",";
850  }
851  s << ")";
852  if (i < gpu_matrix.size1() - 1)
853  s << ",";
854  }
855  s << ")";
856  return s;
857 }
858 
864 template<typename LHS, typename RHS, typename OP>
865 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
866 {
868 
869  matrix<ScalarType> temp = expr;
870  s << temp;
871  return s;
872 }
873 
875 template<typename NumericT>
876 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>
877 trans(const matrix_base<NumericT> & mat)
878 {
879  return matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(mat, mat);
880 }
881 
882 //diag():
883 template<typename NumericT>
884 vector_expression< const matrix_base<NumericT>, const int, op_matrix_diag>
885 diag(const matrix_base<NumericT> & A, int k = 0)
886 {
888 }
889 
890 template<typename NumericT>
891 matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>
892 diag(const vector_base<NumericT> & v, int k = 0)
893 {
895 }
896 
897 // row():
898 template<typename NumericT, typename F>
899 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>
900 row(const matrix_base<NumericT, F> & A, unsigned int i)
901 {
902  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);
903 }
904 
905 // column():
906 template<typename NumericT, typename F>
907 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>
908 column(const matrix_base<NumericT, F> & A, unsigned int j)
909 {
910  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);
911 }
912 
914 
915 //
916 //cpu to gpu, generic type:
917 //
923 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
924 void copy(const CPUMatrixT & cpu_matrix,
925  matrix<NumericT, F, AlignmentV> & gpu_matrix )
926 {
927  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
928 
929  //std::cout << "Copying CPUMatrixT!" << std::endl;
930  //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
931  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
932  {
933  gpu_matrix.resize(cpu_matrix.size1(),
934  cpu_matrix.size2(), false);
935  }
936 
937  assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );
938 
939  std::vector<NumericT> data(gpu_matrix.internal_size());
940  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
941  {
942  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
943  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
944  }
945 
946  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
947  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
948  //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
949 }
950 
951 //
952 //cpu to gpu, STL type:
953 //
959 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
960 void copy(const std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix,
961  matrix<NumericT, F, AlignmentV> & gpu_matrix )
962 {
963  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
964 
965  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
966  {
967  gpu_matrix.resize(cpu_matrix.size(),
968  cpu_matrix[0].size(),
969  false);
970  }
971 
972  assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );
973 
974  std::vector<NumericT> data(gpu_matrix.internal_size());
975  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
976  {
977  assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );
978 
979  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
980  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
981  }
982 
983  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
984  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
985 }
986 
987 
988 //
989 //cpu to gpu, another STL type:
990 //
999 template<typename NumericT, typename F, unsigned int AlignmentV>
1000 void fast_copy(NumericT * cpu_matrix_begin,
1001  NumericT * cpu_matrix_end,
1002  matrix<NumericT, F, AlignmentV> & gpu_matrix)
1003 {
1004  if (gpu_matrix.internal_size() == 0)
1005  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);
1006  else
1007  {
1008  assert( (gpu_matrix.internal_size() >= static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin)) && bool("fast_copy(): Matrix not large enough to fit data!"));
1009  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), cpu_matrix_begin);
1010  }
1011 }
1012 
1013 #ifdef VIENNACL_WITH_ARMADILLO
1014 
1019 template<typename NumericT, typename F, unsigned int AlignmentV>
1020 void copy(arma::Mat<NumericT> const & arma_matrix,
1022 {
1023  typedef typename viennacl::matrix<NumericT, F, AlignmentV>::size_type size_type;
1024 
1025  if (vcl_matrix.size1() == 0 || vcl_matrix.size2() == 0)
1026  {
1027  vcl_matrix.resize(arma_matrix.n_rows,
1028  arma_matrix.n_cols,
1029  false);
1030  }
1031  else
1032  {
1033  assert( (vcl_matrix.size1() == static_cast<vcl_size_t>(arma_matrix.n_rows))
1034  && (vcl_matrix.size2() == static_cast<vcl_size_t>(arma_matrix.n_cols))
1035  && bool("matrix size mismatch")
1036  );
1037  }
1038 
1039  // prepare buffer:
1041  for (size_type j = 0; j < vcl_matrix.size2(); ++j) // iterate along columns is certainly fast for arma_matrix
1042  for (size_type i = 0; i < vcl_matrix.size1(); ++i)
1043  data.set(F::mem_index(i, j, vcl_matrix.internal_size1(), vcl_matrix.internal_size2()), arma_matrix(i,j));
1044 
1045  // copy over:
1046  viennacl::backend::memory_write(vcl_matrix.handle(), 0, data.raw_size(), data.get());
1047 }
1048 #endif
1049 
1050 #ifdef VIENNACL_WITH_EIGEN
1051 namespace detail
1052 {
1053  template<typename EigenMatrixTypeT, typename NumericT, typename F, unsigned int AlignmentV>
1054  void copy_from_eigen_matrix(EigenMatrixTypeT const & cpu_matrix,
1056  {
1057  typedef typename viennacl::matrix<NumericT, F, AlignmentV>::size_type size_type;
1058 
1059  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1060  {
1061  gpu_matrix.resize(cpu_matrix.rows(),
1062  cpu_matrix.cols(),
1063  false);
1064  }
1065  else
1066  {
1067  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
1068  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
1069  && bool("matrix size mismatch")
1070  );
1071  }
1072 
1073  std::vector<NumericT> data(gpu_matrix.internal_size());
1074  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1075  {
1076  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1077  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
1078  }
1079 
1080  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
1081  }
1082 
1083 }
1084 
1090 template<typename NumericT, int EigenOptions, typename F, unsigned int AlignmentV>
1091 void copy(Eigen::Matrix<NumericT, Eigen::Dynamic, Eigen::Dynamic, EigenOptions> const & cpu_matrix,
1093 {
1094  detail::copy_from_eigen_matrix(cpu_matrix, vcl_matrix);
1095 }
1096 
1102 template<typename NumericT, int EigenOptions, int EigenMatTypeV, typename EigenStrideT, typename F, unsigned int AlignmentV>
1103 void copy(Eigen::Map<Eigen::Matrix<NumericT, Eigen::Dynamic, Eigen::Dynamic, EigenOptions>, EigenMatTypeV, EigenStrideT> const & cpu_matrix,
1105 {
1106  detail::copy_from_eigen_matrix(cpu_matrix, vcl_matrix);
1107 }
1108 #endif
1109 
1110 #ifdef VIENNACL_WITH_MTL4
1111 
1116 template<typename NumericT, typename T, typename F, unsigned int AlignmentV>
1117 void copy(const mtl::dense2D<NumericT, T>& cpu_matrix,
1118  matrix<NumericT, F, AlignmentV> & gpu_matrix)
1119 {
1120  typedef typename matrix<NumericT, F, AlignmentV>::size_type size_type;
1121 
1122  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1123  {
1124  gpu_matrix.resize(cpu_matrix.num_rows(),
1125  cpu_matrix.num_cols(),
1126  false);
1127  }
1128  else
1129  {
1130  assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
1131  && (gpu_matrix.size2() == cpu_matrix.num_cols())
1132  && bool("matrix size mismatch")
1133  );
1134  }
1135 
1136  std::vector<NumericT> data(gpu_matrix.internal_size());
1137  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1138  {
1139  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1140  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
1141  }
1142 
1143  viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));
1144  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1145 }
1146 #endif
1147 
1148 
1149 
1150 
1151 //
1152 //gpu to cpu, generic type
1153 //
1159 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>
1160 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1161  CPUMatrixT & cpu_matrix )
1162 {
1163  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1164 
1165  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1166  {
1167  assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));
1168 
1169  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1170  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1171 
1172  //now copy entries to cpu_matrix:
1173  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1174  {
1175  assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));
1176  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1177  cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1178  }
1179  }
1180 }
1181 
1182 //gpu to cpu, STL type
1188 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>
1189 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,
1190  std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix)
1191 {
1192  typedef typename matrix<float, F, AlignmentV>::size_type size_type;
1193 
1194  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1195  {
1196  assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));
1197 
1198  std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());
1199  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1200 
1201  //now copy entries to cpu_matrix:
1202  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1203  {
1204  assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));
1205 
1206  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1207  cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1208  }
1209  }
1210 }
1211 
1212 //gpu to cpu, STL type
1220 template<typename NumericT, typename F, unsigned int AlignmentV>
1222  NumericT * cpu_matrix_begin)
1223 {
1224  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin);
1225 }
1226 
1227 
1228 
1230 
1231 
1232 // operator +
1234 template<typename LHS1, typename RHS1, typename OP1,
1235  typename LHS2, typename RHS2, typename OP2>
1236 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1237 const matrix_expression<const LHS2, const RHS2, OP2>,
1238 op_add>
1240  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1241 {
1242  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1243  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1244  && bool("Incompatible matrix sizes!"));
1245  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1246  const matrix_expression<const LHS2, const RHS2, OP2>,
1247  op_add>(proxy1, proxy2);
1248 }
1249 
1250 template<typename LHS1, typename RHS1, typename OP1,
1251  typename NumericT>
1252 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1253 const matrix_base<NumericT>,
1254 op_add>
1256  matrix_base<NumericT> const & proxy2)
1257 {
1258  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1259  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1260  && bool("Incompatible matrix sizes!"));
1261  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1262  const matrix_base<NumericT>,
1263  op_add>(proxy1, proxy2);
1264 }
1265 
1266 template<typename NumericT,
1267  typename LHS2, typename RHS2, typename OP2>
1268 matrix_expression< const matrix_base<NumericT>,
1269 const matrix_expression<const LHS2, const RHS2, OP2>,
1270 op_add>
1271 operator + (matrix_base<NumericT> const & proxy1,
1272  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1273 {
1274  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1275  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1276  && bool("Incompatible matrix sizes!"));
1277  return matrix_expression< const matrix_base<NumericT>,
1278  const matrix_expression<const LHS2, const RHS2, OP2>,
1279  op_add>(proxy1, proxy2);
1280 }
1281 
1283 template<typename NumericT>
1284 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_add >
1285 operator + (const matrix_base<NumericT> & m1, const matrix_base<NumericT> & m2)
1286 {
1287  return matrix_expression< const matrix_base<NumericT>,
1288  const matrix_base<NumericT>,
1289  op_add > (m1, m2);
1290 }
1291 
1292 
1293 // operator -
1294 template<typename LHS1, typename RHS1, typename OP1,
1295  typename LHS2, typename RHS2, typename OP2>
1296 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1297 const matrix_expression<const LHS2, const RHS2, OP2>,
1298 op_sub>
1300  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1301 {
1302  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1303  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1304  && bool("Incompatible matrix sizes!"));
1305  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1306  const matrix_expression<const LHS2, const RHS2, OP2>,
1307  op_sub>(proxy1, proxy2);
1308 }
1309 
1310 template<typename LHS1, typename RHS1, typename OP1,
1311  typename NumericT>
1312 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1313 const matrix_base<NumericT>,
1314 op_sub>
1316  matrix_base<NumericT> const & proxy2)
1317 {
1318  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1319  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1320  && bool("Incompatible matrix sizes!"));
1321  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1322  const matrix_base<NumericT>,
1323  op_sub>(proxy1, proxy2);
1324 }
1325 
1326 template<typename NumericT,
1327  typename LHS2, typename RHS2, typename OP2>
1328 matrix_expression< const matrix_base<NumericT>,
1329 const matrix_expression<const LHS2, const RHS2, OP2>,
1330 op_sub>
1331 operator - (matrix_base<NumericT> const & proxy1,
1332  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1333 {
1334  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1335  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1336  && bool("Incompatible matrix sizes!"));
1337  return matrix_expression< const matrix_base<NumericT>,
1338  const matrix_expression<const LHS2, const RHS2, OP2>,
1339  op_sub>(proxy1, proxy2);
1340 }
1341 
1343 template<typename NumericT>
1344 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_sub >
1345 operator - (const matrix_base<NumericT> & m1, const matrix_base<NumericT> & m2)
1346 {
1347  return matrix_expression< const matrix_base<NumericT>,
1348  const matrix_base<NumericT>,
1349  op_sub > (m1, m2);
1350 }
1351 
1352 
1353 
1354 // operator *
1360 template<typename S1, typename NumericT>
1362 matrix_expression< const matrix_base<NumericT>, const S1, op_mult>
1363 >::type
1364 operator * (S1 const & value, matrix_base<NumericT> const & m1)
1365 {
1366  return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, value);
1367 }
1368 
1370 template<typename NumericT>
1371 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1372 operator * (char value, matrix_base<NumericT> const & m1)
1373 {
1374  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1375 }
1376 
1378 template<typename NumericT>
1379 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1380 operator * (short value, matrix_base<NumericT> const & m1)
1381 {
1382  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1383 }
1384 
1386 template<typename NumericT>
1387 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1388 operator * (int value, matrix_base<NumericT> const & m1)
1389 {
1390  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1391 }
1392 
1394 template<typename NumericT>
1395 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1396 operator * (long value, matrix_base<NumericT> const & m1)
1397 {
1398  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1399 }
1400 
1402 template<typename NumericT>
1403 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1404 operator * (float value, matrix_base<NumericT> const & m1)
1405 {
1406  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1407 }
1408 
1410 template<typename NumericT>
1411 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1412 operator * (double value, matrix_base<NumericT> const & m1)
1413 {
1414  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));
1415 }
1416 
1417 
1418 
1424 template<typename LHS, typename RHS, typename OP, typename S1>
1426 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1428  S1 const & val)
1429 {
1430  return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
1431 }
1432 
1433 
1439 template<typename S1, typename LHS, typename RHS, typename OP>
1441 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1442 operator * (S1 const & val,
1443  matrix_expression< LHS, RHS, OP> const & proxy)
1444 {
1445  return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
1446 }
1447 
1450 template<typename NumericT, typename S1>
1452 matrix_expression< const matrix_base<NumericT>, const S1, op_mult> >::type
1453 operator * (matrix_base<NumericT> const & m1, S1 const & s1)
1454 {
1455  return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, s1);
1456 }
1457 
1459 template<typename NumericT>
1460 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1461 operator * (matrix_base<NumericT> const & m1, char s1)
1462 {
1463  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1464 }
1465 
1467 template<typename NumericT>
1468 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1469 operator * (matrix_base<NumericT> const & m1, short s1)
1470 {
1471  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1472 }
1473 
1475 template<typename NumericT>
1476 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1477 operator * (matrix_base<NumericT> const & m1, int s1)
1478 {
1479  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1480 }
1481 
1483 template<typename NumericT>
1484 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1485 operator * (matrix_base<NumericT> const & m1, long s1)
1486 {
1487  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1488 }
1489 
1491 template<typename NumericT>
1492 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1493 operator * (matrix_base<NumericT> const & m1, float s1)
1494 {
1495  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1496 }
1497 
1499 template<typename NumericT>
1500 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>
1501 operator * (matrix_base<NumericT> const & m1, double s1)
1502 {
1503  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));
1504 }
1505 
1506 
1507 // operator *=
1508 
1510 template<typename NumericT, typename S1>
1511 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1512 operator *= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1513 {
1514  bool is_sign_flip = viennacl::is_flip_sign_scalar<S1>::value;
1516  m1, gpu_val, 1, false, is_sign_flip ? true : false);
1517  return m1;
1518 }
1519 
1521 template<typename NumericT>
1522 matrix_base<NumericT> &
1523 operator *= (matrix_base<NumericT> & m1, char gpu_val)
1524 {
1526  m1, NumericT(gpu_val), 1, false, false);
1527  return m1;
1528 }
1529 
1531 template<typename NumericT>
1532 matrix_base<NumericT> &
1533 operator *= (matrix_base<NumericT> & m1, short gpu_val)
1534 {
1536  m1, NumericT(gpu_val), 1, false, false);
1537  return m1;
1538 }
1539 
1541 template<typename NumericT>
1542 matrix_base<NumericT> &
1543 operator *= (matrix_base<NumericT> & m1, int gpu_val)
1544 {
1546  m1, NumericT(gpu_val), 1, false, false);
1547  return m1;
1548 }
1549 
1551 template<typename NumericT>
1552 matrix_base<NumericT> &
1553 operator *= (matrix_base<NumericT> & m1, long gpu_val)
1554 {
1556  m1, NumericT(gpu_val), 1, false, false);
1557  return m1;
1558 }
1559 
1561 template<typename NumericT>
1562 matrix_base<NumericT> &
1563 operator *= (matrix_base<NumericT> & m1, float gpu_val)
1564 {
1566  m1, NumericT(gpu_val), 1, false, false);
1567  return m1;
1568 }
1569 
1571 template<typename NumericT>
1572 matrix_base<NumericT> &
1573 operator *= (matrix_base<NumericT> & m1, double gpu_val)
1574 {
1576  m1, NumericT(gpu_val), 1, false, false);
1577  return m1;
1578 }
1579 
1580 
1581 
1582 // operator /
1583 
1584 
1590 template<typename LHS, typename RHS, typename OP, typename S1>
1592 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type
1594  S1 const & val)
1595 {
1596  return matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div>(proxy, val);
1597 }
1598 
1599 
1601 template<typename NumericT, typename S1>
1603 matrix_expression< const matrix_base<NumericT>, const S1, op_div> >::type
1604 operator / (matrix_base<NumericT> const & m1, S1 const & s1)
1605 {
1606  return matrix_expression< const matrix_base<NumericT>, const S1, op_div>(m1, s1);
1607 }
1608 
1610 template<typename NumericT>
1611 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1612 operator / (matrix_base<NumericT> const & m1, char s1)
1613 {
1614  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1615 }
1616 
1618 template<typename NumericT>
1619 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1620 operator / (matrix_base<NumericT> const & m1, short s1)
1621 {
1622  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1623 }
1624 
1626 template<typename NumericT>
1627 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1628 operator / (matrix_base<NumericT> const & m1, int s1)
1629 {
1630  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1631 }
1632 
1634 template<typename NumericT>
1635 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1636 operator / (matrix_base<NumericT> const & m1, long s1)
1637 {
1638  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1639 }
1640 
1642 template<typename NumericT>
1643 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1644 operator / (matrix_base<NumericT> const & m1, float s1)
1645 {
1646  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1647 }
1648 
1650 template<typename NumericT>
1651 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>
1652 operator / (matrix_base<NumericT> const & m1, double s1)
1653 {
1654  return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));
1655 }
1656 
1657 
1658 
1659 // operator /=
1660 
1662 template<typename NumericT, typename S1>
1663 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type
1664 operator /= (matrix_base<NumericT> & m1, S1 const & gpu_val)
1665 {
1667  m1, gpu_val, 1, true, false);
1668  return m1;
1669 }
1670 
1672 template<typename NumericT>
1673 matrix_base<NumericT> &
1674 operator /= (matrix_base<NumericT> & m1, char gpu_val)
1675 {
1677  m1, NumericT(gpu_val), 1, true, false);
1678  return m1;
1679 }
1680 
1682 template<typename NumericT>
1683 matrix_base<NumericT> &
1684 operator /= (matrix_base<NumericT> & m1, short gpu_val)
1685 {
1687  m1, gpu_val, 1, true, false);
1688  return m1;
1689 }
1690 
1692 template<typename NumericT>
1693 matrix_base<NumericT> &
1694 operator /= (matrix_base<NumericT> & m1, int gpu_val)
1695 {
1697  m1, gpu_val, 1, true, false);
1698  return m1;
1699 }
1700 
1702 template<typename NumericT>
1703 matrix_base<NumericT> &
1704 operator /= (matrix_base<NumericT> & m1, long gpu_val)
1705 {
1707  m1, gpu_val, 1, true, false);
1708  return m1;
1709 }
1710 
1712 template<typename NumericT>
1713 matrix_base<NumericT> &
1714 operator /= (matrix_base<NumericT> & m1, float gpu_val)
1715 {
1717  m1, gpu_val, 1, true, false);
1718  return m1;
1719 }
1720 
1722 template<typename NumericT>
1723 matrix_base<NumericT> &
1724 operator /= (matrix_base<NumericT> & m1, double gpu_val)
1725 {
1727  m1, gpu_val, 1, true, false);
1728  return m1;
1729 }
1730 
1731 
1732 
1733 
1734 
1735 // outer_prod(v1, v2) * val;
1736 template<typename NumericT, typename S1>
1739 const S1,
1740 op_mult>
1741 >::type
1742 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
1743  const S1 & val)
1744 {
1745  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1746  const S1,
1747  op_mult>(proxy, val);
1748 }
1749 
1750 template<typename NumericT, typename S1>
1752 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1753 const NumericT,
1754 op_mult>
1755 >::type
1756 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
1757  const S1 & val)
1758 {
1759  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1760  const NumericT,
1761  op_mult>(proxy, NumericT(val));
1762 }
1763 
1764 // val * outer_prod(v1, v2);
1765 template<typename NumericT, typename S1>
1767 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1768 const S1,
1769 op_mult>
1770 >::type
1771 operator*(const S1 & val,
1772  const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1773 {
1774  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1775  const S1,
1776  op_mult>(proxy, val);
1777 }
1778 
1779 template<typename NumericT, typename S1>
1781 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1782 const NumericT,
1783 op_mult>
1784 >::type
1785 operator*(const S1 & val,
1786  const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1787 {
1788  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1789  const NumericT,
1790  op_mult>(proxy, NumericT(val));
1791 }
1792 
1793 
1794 
1795 //
1796 // Specify available operations:
1797 //
1798 
1801 namespace linalg
1802 {
1803 namespace detail
1804 {
1805 
1806  // x = y
1807  template<typename T>
1808  struct op_executor<matrix_base<T>, op_assign, matrix_base<T> >
1809  {
1810  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1811  {
1812  viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);
1813  }
1814  };
1815 
1816  // x = trans(y)
1817  template<typename T>
1818  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1819  {
1820  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1821  {
1822  matrix_base<T> temp(rhs);
1823  viennacl::linalg::am(lhs, temp, T(1), 1, false, false);
1824  }
1825  };
1826 
1827 
1828  // x += y
1829  template<typename T>
1830  struct op_executor<matrix_base<T>, op_inplace_add, matrix_base<T> >
1831  {
1832  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1833  {
1834  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);
1835  }
1836  };
1837 
1838  // x += trans(y)
1839  template<typename T>
1840  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1841  {
1842  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1843  {
1844  matrix_base<T> temp(rhs);
1845  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false);
1846  }
1847  };
1848 
1849  // x -= y
1850  template<typename T>
1851  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_base<T> >
1852  {
1853  static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)
1854  {
1855  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);
1856  }
1857  };
1858 
1859  // x -= trans(y)
1860  template<typename T>
1861  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >
1862  {
1863  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)
1864  {
1865  matrix_base<T> temp(rhs);
1866  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, true);
1867  }
1868  };
1869 
1871 
1872 
1873  // x = alpha * y
1874  template<typename T, typename ScalarType>
1875  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1876  {
1877  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1878  {
1879  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);
1880  }
1881  };
1882 
1883  // x += alpha * y
1884  template<typename T, typename ScalarType>
1885  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1886  {
1887  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1888  {
1889  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);
1890  }
1891  };
1892 
1893  // x -= alpha * y
1894  template<typename T, typename ScalarType>
1895  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >
1896  {
1897  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)
1898  {
1899  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);
1900  }
1901  };
1902 
1903 
1905 
1906  // x = alpha * vec_expr
1907  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1908  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1909  {
1910  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1911  {
1912  if (lhs.row_major())
1913  {
1914  matrix<T> temp(proxy.lhs());
1915  lhs = temp * proxy.rhs();
1916  }
1917  else
1918  {
1919  matrix<T, column_major> temp(proxy.lhs());
1920  lhs = temp * proxy.rhs();
1921  }
1922  }
1923  };
1924 
1925  // x += alpha * vec_expr
1926  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1927  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1928  {
1929  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1930  {
1931  if (lhs.row_major())
1932  {
1933  matrix<T> temp(proxy.lhs());
1934  lhs += temp * proxy.rhs();
1935  }
1936  else
1937  {
1938  matrix<T, column_major> temp(proxy.lhs());
1939  lhs += temp * proxy.rhs();
1940  }
1941  }
1942  };
1943 
1944  // x -= alpha * vec_expr
1945  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
1946  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1947  {
1948  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1949  {
1950  if (lhs.row_major())
1951  {
1952  matrix<T> temp(proxy.lhs());
1953  lhs -= temp * proxy.rhs();
1954  }
1955  else
1956  {
1957  matrix<T, column_major> temp(proxy.lhs());
1958  lhs -= temp * proxy.rhs();
1959  }
1960  }
1961  };
1962 
1963 
1965 
1966  // x = y / alpha
1967  template<typename T, typename ScalarType>
1968  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1969  {
1970  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1971  {
1972  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);
1973  }
1974  };
1975 
1976  // x += y / alpha
1977  template<typename T, typename ScalarType>
1978  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1979  {
1980  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1981  {
1982  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);
1983  }
1984  };
1985 
1986  // x -= y / alpha
1987  template<typename T, typename ScalarType>
1988  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >
1989  {
1990  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)
1991  {
1992  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);
1993  }
1994  };
1995 
1996 
1998 
1999  // x = vec_expr / alpha
2000  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
2001  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
2002  {
2003  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
2004  {
2005  if (lhs.row_major())
2006  {
2007  matrix<T> temp(proxy.lhs());
2008  lhs = temp / proxy.rhs();
2009  }
2010  else
2011  {
2012  matrix<T, column_major> temp(proxy.lhs());
2013  lhs = temp / proxy.rhs();
2014  }
2015  }
2016  };
2017 
2018  // x += vec_expr / alpha
2019  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
2020  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
2021  {
2022  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
2023  {
2024  if (lhs.row_major())
2025  {
2026  matrix<T> temp(proxy.lhs());
2027  lhs += temp / proxy.rhs();
2028  }
2029  else
2030  {
2031  matrix<T, column_major> temp(proxy.lhs());
2032  lhs += temp / proxy.rhs();
2033  }
2034  }
2035  };
2036 
2037  // x -= vec_expr / alpha
2038  template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>
2039  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
2040  {
2041  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
2042  {
2043  if (lhs.row_major())
2044  {
2045  matrix<T, row_major> temp(proxy.lhs());
2046  lhs -= temp / proxy.rhs();
2047  }
2048  else
2049  {
2050  matrix<T, column_major> temp(proxy.lhs());
2051  lhs -= temp / proxy.rhs();
2052  }
2053  }
2054  };
2055 
2056 
2057 
2058  // generic x = vec_expr1 + vec_expr2:
2059  template<typename T, typename LHS, typename RHS>
2060  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_add> >
2061  {
2062  // generic x = vec_expr1 + vec_expr2:
2063  template<typename LHS1, typename RHS1>
2064  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2065  {
2066  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2067  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2068 
2069  if (op_aliasing_lhs || op_aliasing_rhs)
2070  {
2071  matrix_base<T> temp(proxy.lhs());
2072  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2073  lhs = temp;
2074  }
2075  else
2076  {
2077  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2078  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2079  }
2080  }
2081 
2082  // x = y + z
2083  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2084  {
2086  proxy.lhs(), T(1), 1, false, false,
2087  proxy.rhs(), T(1), 1, false, false);
2088  }
2089 
2090  // x = alpha * y + z
2091  template<typename ScalarType>
2092  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2093  const matrix_base<T>,
2094  op_add> const & proxy)
2095  {
2097  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2098  proxy.rhs(), T(1), 1, false, false);
2099  }
2100 
2101  // x = y / alpha + z
2102  template<typename ScalarType>
2103  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2104  const matrix_base<T>,
2105  op_add> const & proxy)
2106  {
2108  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2109  proxy.rhs(), T(1), 1, false, false);
2110  }
2111 
2112  // x = y + beta * z
2113  template<typename ScalarType>
2114  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2115  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2116  op_add> const & proxy)
2117  {
2119  proxy.lhs(), T(1), 1, false, false,
2120  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2121  }
2122 
2123  // x = y + z / beta
2124  template<typename ScalarType>
2125  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2126  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2127  op_add> const & proxy)
2128  {
2130  proxy.lhs(), T(1), 1, false, false,
2131  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2132  }
2133 
2134  // x = alpha * y + beta * z
2135  template<typename ScalarType1, typename ScalarType2>
2136  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2137  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2138  op_add> const & proxy)
2139  {
2141  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2142  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2143  }
2144 
2145  // x = alpha * y + z / beta
2146  template<typename ScalarType1, typename ScalarType2>
2147  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2148  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2149  op_add> const & proxy)
2150  {
2152  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2153  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2154  }
2155 
2156  // x = y / alpha + beta * z
2157  template<typename ScalarType1, typename ScalarType2>
2158  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2159  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2160  op_add> const & proxy)
2161  {
2163  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2164  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2165  }
2166 
2167  // x = y / alpha + z / beta
2168  template<typename ScalarType1, typename ScalarType2>
2169  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2170  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2171  op_add> const & proxy)
2172  {
2174  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2175  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2176  }
2177  };
2178 
2179  // dense = sparse * dense
2180  template<typename T, typename LHS, typename RHS>
2181  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >
2182  {
2183  template< typename SparseMatrixType>
2184  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2186  viennacl::op_prod> const & proxy)
2187  {
2188  // check for x = A * x
2189  if (op_aliasing(lhs, proxy.rhs()))
2190  {
2191  matrix_base<T> temp(proxy);
2192  lhs = temp;
2193  }
2194  else
2195  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2196  }
2197 
2198  // dense = sparse * trans(dense)
2199  template< typename SparseMatrixType >
2200  static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,
2204  viennacl::op_prod> const & proxy)
2205  {
2206  // check for x = A * x
2207  if (op_aliasing(lhs, proxy.rhs()))
2208  {
2209  matrix_base<T> temp(proxy);
2210  lhs = temp;
2211  }
2212  else
2213  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
2214  }
2215 
2216  };
2217 
2218  // generic x += vec_expr1 + vec_expr2:
2219  template<typename T, typename LHS, typename RHS>
2220  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >
2221  {
2222  // generic x += vec_expr1 + vec_expr2:
2223  template<typename LHS1, typename RHS1>
2224  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2225  {
2226  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2227  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2228 
2229  if (op_aliasing_lhs || op_aliasing_rhs)
2230  {
2231  matrix_base<T> temp(proxy.lhs());
2232  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2233  lhs += temp;
2234  }
2235  else
2236  {
2237  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2238  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2239  }
2240  }
2241 
2242  // x += y + z
2243  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2244  {
2246  proxy.lhs(), T(1), 1, false, false,
2247  proxy.rhs(), T(1), 1, false, false);
2248  }
2249 
2250  // x += alpha * y + z
2251  template<typename ScalarType>
2252  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2253  const matrix_base<T>,
2254  op_add> const & proxy)
2255  {
2257  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2258  proxy.rhs(), T(1), 1, false, false);
2259  }
2260 
2261  // x += y / alpha + z
2262  template<typename ScalarType>
2263  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2264  const matrix_base<T>,
2265  op_add> const & proxy)
2266  {
2268  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2269  proxy.rhs(), T(1), 1, false, false);
2270  }
2271 
2272  // x += y + beta * z
2273  template<typename ScalarType>
2274  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2275  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2276  op_add> const & proxy)
2277  {
2279  proxy.lhs(), T(1), 1, false, false,
2280  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2281  }
2282 
2283  // x += y + z / beta
2284  template<typename ScalarType>
2285  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2286  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2287  op_add> const & proxy)
2288  {
2290  proxy.lhs(), T(1), 1, false, false,
2291  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2292  }
2293 
2294  // x += alpha * y + beta * z
2295  template<typename ScalarType1, typename ScalarType2>
2296  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2297  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2298  op_add> const & proxy)
2299  {
2301  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2302  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2303  }
2304 
2305  // x += alpha * y + z / beta
2306  template<typename ScalarType1, typename ScalarType2>
2307  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2308  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2309  op_add> const & proxy)
2310  {
2312  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2313  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2314  }
2315 
2316  // x += y / alpha + beta * z
2317  template<typename ScalarType1, typename ScalarType2>
2318  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2319  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2320  op_add> const & proxy)
2321  {
2323  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2324  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2325  }
2326 
2327  // x += y / alpha + z / beta
2328  template<typename ScalarType1, typename ScalarType2>
2329  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2330  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2331  op_add> const & proxy)
2332  {
2334  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2335  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2336  }
2337  };
2338 
2339 
2340 
2341  // generic x -= vec_expr1 + vec_expr2:
2342  template<typename T, typename LHS, typename RHS>
2343  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >
2344  {
2345  // generic x -= vec_expr1 + vec_expr2:
2346  template<typename LHS1, typename RHS1>
2347  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
2348  {
2349  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2350  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2351 
2352  if (op_aliasing_lhs || op_aliasing_rhs)
2353  {
2354  matrix_base<T> temp(proxy.lhs());
2355  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
2356  lhs -= temp;
2357  }
2358  else
2359  {
2360  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2361  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2362  }
2363  }
2364 
2365  // x -= y + z
2366  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)
2367  {
2369  proxy.lhs(), T(1), 1, false, true,
2370  proxy.rhs(), T(1), 1, false, true);
2371  }
2372 
2373  // x -= alpha * y + z
2374  template<typename ScalarType>
2375  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2376  const matrix_base<T>,
2377  op_add> const & proxy)
2378  {
2380  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2381  proxy.rhs(), T(1), 1, false, true);
2382  }
2383 
2384  // x -= y / alpha + z
2385  template<typename ScalarType>
2386  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2387  const matrix_base<T>,
2388  op_add> const & proxy)
2389  {
2391  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2392  proxy.rhs(), T(1), 1, false, true);
2393  }
2394 
2395  // x -= y + beta * z
2396  template<typename ScalarType>
2397  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2398  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2399  op_add> const & proxy)
2400  {
2402  proxy.lhs(), T(1), 1, false, true,
2403  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2404  }
2405 
2406  // x -= y + z / beta
2407  template<typename ScalarType>
2408  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2409  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2410  op_add> const & proxy)
2411  {
2413  proxy.lhs(), T(1), 1, false, true,
2414  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2415  }
2416 
2417  // x -= alpha * y + beta * z
2418  template<typename ScalarType1, typename ScalarType2>
2419  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2420  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2421  op_add> const & proxy)
2422  {
2424  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2425  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2426  }
2427 
2428  // x -= alpha * y + z / beta
2429  template<typename ScalarType1, typename ScalarType2>
2430  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2431  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2432  op_add> const & proxy)
2433  {
2435  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2436  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2437  }
2438 
2439  // x -= y / alpha + beta * z
2440  template<typename ScalarType1, typename ScalarType2>
2441  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2442  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2443  op_add> const & proxy)
2444  {
2446  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2447  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2448  }
2449 
2450  // x -= y / alpha + z / beta
2451  template<typename ScalarType1, typename ScalarType2>
2452  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2453  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2454  op_add> const & proxy)
2455  {
2457  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2458  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2459  }
2460  };
2461 
2462 
2463 
2465 
2466 
2467 
2468  // generic x = vec_expr1 - vec_expr2:
2469  template<typename T, typename LHS, typename RHS>
2470  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >
2471  {
2472  // generic x = vec_expr1 - vec_expr2:
2473  template<typename LHS1, typename RHS1>
2474  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2475  {
2476  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2477  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2478 
2479  if (op_aliasing_lhs || op_aliasing_rhs)
2480  {
2481  matrix_base<T> temp(proxy.lhs());
2482  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2483  lhs = temp;
2484  }
2485  else
2486  {
2487  op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());
2488  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2489  }
2490  }
2491 
2492  // x = y - z
2493  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2494  {
2496  proxy.lhs(), T(1), 1, false, false,
2497  proxy.rhs(), T(1), 1, false, true);
2498  }
2499 
2500  // x = alpha * y - z
2501  template<typename ScalarType>
2502  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2503  const matrix_base<T>,
2504  op_sub> const & proxy)
2505  {
2507  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2508  proxy.rhs(), T(1), 1, false, true);
2509  }
2510 
2511  // x = y / alpha - z
2512  template<typename ScalarType>
2513  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2514  const matrix_base<T>,
2515  op_sub> const & proxy)
2516  {
2518  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2519  proxy.rhs(), T(1), 1, false, true);
2520  }
2521 
2522  // x = y - beta * z
2523  template<typename ScalarType>
2524  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2525  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2526  op_sub> const & proxy)
2527  {
2529  proxy.lhs(), T(1), 1, false, false,
2530  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2531  }
2532 
2533  // x = y - z / beta
2534  template<typename ScalarType>
2535  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2536  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2537  op_sub> const & proxy)
2538  {
2540  proxy.lhs(), T(1), 1, false, false,
2541  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2542  }
2543 
2544  // x = alpha * y - beta * z
2545  template<typename ScalarType1, typename ScalarType2>
2546  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2547  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2548  op_sub> const & proxy)
2549  {
2551  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2552  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2553  }
2554 
2555  // x = alpha * y - z / beta
2556  template<typename ScalarType1, typename ScalarType2>
2557  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2558  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2559  op_sub> const & proxy)
2560  {
2562  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2563  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2564  }
2565 
2566  // x = y / alpha - beta * z
2567  template<typename ScalarType1, typename ScalarType2>
2568  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2569  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2570  op_sub> const & proxy)
2571  {
2573  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2574  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2575  }
2576 
2577  // x = y / alpha - z / beta
2578  template<typename ScalarType1, typename ScalarType2>
2579  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2580  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2581  op_sub> const & proxy)
2582  {
2584  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2585  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2586  }
2587  };
2588 
2589 
2590  // generic x += vec_expr1 - vec_expr2:
2591  template<typename T, typename LHS, typename RHS>
2592  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >
2593  {
2594  // generic x += vec_expr1 - vec_expr2:
2595  template<typename LHS1, typename RHS1>
2596  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2597  {
2598  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2599  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2600 
2601  if (op_aliasing_lhs || op_aliasing_rhs)
2602  {
2603  matrix_base<T> temp(proxy.lhs());
2604  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2605  lhs += temp;
2606  }
2607  else
2608  {
2609  op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2610  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2611  }
2612  }
2613 
2614  // x += y - z
2615  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2616  {
2618  proxy.lhs(), T(1), 1, false, false,
2619  proxy.rhs(), T(1), 1, false, true);
2620  }
2621 
2622  // x += alpha * y - z
2623  template<typename ScalarType>
2624  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2625  const matrix_base<T>,
2626  op_sub> const & proxy)
2627  {
2629  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2630  proxy.rhs(), T(1), 1, false, true);
2631  }
2632 
2633  // x += y / alpha - z
2634  template<typename ScalarType>
2635  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2636  const matrix_base<T>,
2637  op_sub> const & proxy)
2638  {
2640  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2641  proxy.rhs(), T(1), 1, false, true);
2642  }
2643 
2644  // x += y - beta * z
2645  template<typename ScalarType>
2646  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2647  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2648  op_sub> const & proxy)
2649  {
2651  proxy.lhs(), T(1), 1, false, false,
2652  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2653  }
2654 
2655  // x += y - z / beta
2656  template<typename ScalarType>
2657  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2658  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2659  op_sub> const & proxy)
2660  {
2662  proxy.lhs(), T(1), 1, false, false,
2663  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2664  }
2665 
2666  // x += alpha * y - beta * z
2667  template<typename ScalarType1, typename ScalarType2>
2668  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2669  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2670  op_sub> const & proxy)
2671  {
2673  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2674  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2675  }
2676 
2677  // x += alpha * y - z / beta
2678  template<typename ScalarType1, typename ScalarType2>
2679  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2680  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2681  op_sub> const & proxy)
2682  {
2684  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2685  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2686  }
2687 
2688  // x += y / alpha - beta * z
2689  template<typename ScalarType1, typename ScalarType2>
2690  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2691  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2692  op_sub> const & proxy)
2693  {
2695  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2696  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2697  }
2698 
2699  // x += y / alpha - z / beta
2700  template<typename ScalarType1, typename ScalarType2>
2701  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2702  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2703  op_sub> const & proxy)
2704  {
2706  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2707  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2708  }
2709  };
2710 
2711 
2712 
2713  // generic x -= vec_expr1 - vec_expr2:
2714  template<typename T, typename LHS, typename RHS>
2715  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >
2716  {
2717  // generic x -= vec_expr1 - vec_expr2:
2718  template<typename LHS1, typename RHS1>
2719  static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2720  {
2721  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2722  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2723 
2724  if (op_aliasing_lhs || op_aliasing_rhs)
2725  {
2726  matrix_base<T> temp(proxy.lhs());
2727  op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2728  lhs -= temp;
2729  }
2730  else
2731  {
2732  op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2733  op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2734  }
2735  }
2736 
2737  // x -= y - z
2738  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)
2739  {
2741  proxy.lhs(), T(1), 1, false, true,
2742  proxy.rhs(), T(1), 1, false, false);
2743  }
2744 
2745  // x -= alpha * y - z
2746  template<typename ScalarType>
2747  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2748  const matrix_base<T>,
2749  op_sub> const & proxy)
2750  {
2752  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2753  proxy.rhs(), T(1), 1, false, false);
2754  }
2755 
2756  // x -= y / alpha - z
2757  template<typename ScalarType>
2758  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2759  const matrix_base<T>,
2760  op_sub> const & proxy)
2761  {
2763  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2764  proxy.rhs(), T(1), 1, false, false);
2765  }
2766 
2767  // x -= y - beta * z
2768  template<typename ScalarType>
2769  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2770  const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,
2771  op_sub> const & proxy)
2772  {
2774  proxy.lhs(), T(1), 1, false, true,
2775  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2776  }
2777 
2778  // x -= y - z / beta
2779  template<typename ScalarType>
2780  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
2781  const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,
2782  op_sub> const & proxy)
2783  {
2785  proxy.lhs(), T(1), 1, false, true,
2786  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2787  }
2788 
2789  // x -= alpha * y - beta * z
2790  template<typename ScalarType1, typename ScalarType2>
2791  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2792  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2793  op_sub> const & proxy)
2794  {
2796  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2797  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2798  }
2799 
2800  // x -= alpha * y - z / beta
2801  template<typename ScalarType1, typename ScalarType2>
2802  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,
2803  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2804  op_sub> const & proxy)
2805  {
2807  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2808  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2809  }
2810 
2811  // x -= y / alpha - beta * z
2812  template<typename ScalarType1, typename ScalarType2>
2813  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2814  const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,
2815  op_sub> const & proxy)
2816  {
2818  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2819  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2820  }
2821 
2822  // x -= y / alpha - z / beta
2823  template<typename ScalarType1, typename ScalarType2>
2824  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,
2825  const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,
2826  op_sub> const & proxy)
2827  {
2829  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2830  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2831  }
2832  };
2833 
2834 
2836 
2837  template<typename T, typename LHS>
2838  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >
2839  {
2840  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)
2841  {
2842  viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);
2843  }
2844  };
2845 
2846 
2847  template<typename T, typename LHS>
2848  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >
2849  {
2850  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const int, op_matrix_diag> const & proxy)
2851  {
2852  viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);
2853  }
2854  };
2855 
2856  template<typename T, typename LHS>
2857  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >
2858  {
2859  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_row> const & proxy)
2860  {
2861  viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);
2862  }
2863  };
2864 
2865 
2866  template<typename T, typename LHS>
2867  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >
2868  {
2869  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_column> const & proxy)
2870  {
2871  viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);
2872  }
2873  };
2874 
2876 
2877  template<typename T>
2878  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const matrix_base<T>, op_row_sum> >
2879  {
2880  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const matrix_base<T>, op_row_sum> const & proxy)
2881  {
2882  viennacl::linalg::row_sum_impl(proxy.lhs(), lhs);
2883  }
2884  };
2885 
2886  template<typename T, typename LHS, typename RHS, typename OP>
2887  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_row_sum> >
2888  {
2889  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_row_sum> const & proxy)
2890  {
2891  matrix_base<T> tmp(proxy.lhs());
2893  }
2894  };
2895 
2896  template<typename T>
2897  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const matrix_base<T>, op_col_sum> >
2898  {
2899  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const matrix_base<T>, op_col_sum> const & proxy)
2900  {
2901  viennacl::linalg::column_sum_impl(proxy.lhs(), lhs);
2902  }
2903  };
2904 
2905 
2906  template<typename T, typename LHS, typename RHS, typename OP>
2907  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_col_sum> >
2908  {
2909  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_col_sum> const & proxy)
2910  {
2911  matrix_base<T> tmp(proxy.lhs());
2913  }
2914  };
2915 
2917 
2918  // generic x = mat_expr1 .* mat_expr2:
2919  template<typename T, typename LHS, typename RHS, typename OP>
2920  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2921  {
2922  // x = y .* z
2923  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2924  {
2925  viennacl::linalg::element_op(lhs, proxy);
2926  }
2927 
2928  // x = y .* mat_expr
2929  template<typename LHS2, typename RHS2, typename OP2>
2930  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2931  {
2932  matrix_base<T> temp(proxy.rhs());
2933  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2934  }
2935 
2936  // x = mat_expr .* z
2937  template<typename LHS1, typename RHS1, typename OP1>
2938  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2939  {
2940  matrix_base<T> temp(proxy.lhs());
2941  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2942  }
2943 
2944  // x = mat_expr .* mat_expr
2945  template<typename LHS1, typename RHS1, typename OP1,
2946  typename LHS2, typename RHS2, typename OP2>
2947  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2948  const matrix_expression<const LHS2, const RHS2, OP2>,
2949  op_element_binary<OP> > const & proxy)
2950  {
2951  matrix_base<T> temp1(proxy.lhs());
2952  matrix_base<T> temp2(proxy.rhs());
2953  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2954  }
2955  };
2956 
2957  // generic x += mat_expr .* mat_expr:
2958  template<typename T, typename LHS, typename RHS, typename OP>
2959  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2960  {
2961  // x += y .* z
2962  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2963  {
2964  matrix_base<T> temp(proxy);
2965  lhs += temp;
2966  }
2967 
2968  // x += y .* mat_expr
2969  template<typename LHS2, typename RHS2, typename OP2>
2970  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2971  {
2972  matrix_base<T> temp(proxy.rhs());
2973  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2974  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
2975  lhs += temp2;
2976  }
2977 
2978  // x += mat_expr .* z
2979  template<typename LHS1, typename RHS1, typename OP1>
2980  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
2981  {
2982  matrix_base<T> temp(proxy.lhs());
2983  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2984  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
2985  lhs += temp2;
2986  }
2987 
2988  // x += mat_expr .* mat_expr
2989  template<typename LHS1, typename RHS1, typename OP1,
2990  typename LHS2, typename RHS2, typename OP2>
2991  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2992  const matrix_expression<const LHS2, const RHS2, OP2>,
2993  op_element_binary<OP> > const & proxy)
2994  {
2995  matrix_base<T> temp1(proxy.lhs());
2996  matrix_base<T> temp2(proxy.rhs());
2997  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
2998  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
2999  lhs += temp3;
3000  }
3001  };
3002 
3003  // generic x -= mat_expr1 .* mat_expr2:
3004  template<typename T, typename LHS, typename RHS, typename OP>
3005  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
3006  {
3007 
3008  // x -= y .* z
3009  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
3010  {
3011  matrix_base<T> temp(proxy);
3012  lhs -= temp;
3013  }
3014 
3015  // x -= y .* mat_expr
3016  template<typename LHS2, typename RHS2, typename OP2>
3017  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
3018  {
3019  matrix_base<T> temp(proxy.rhs());
3020  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3021  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));
3022  lhs -= temp2;
3023  }
3024 
3025  // x -= mat_expr .* z
3026  template<typename LHS1, typename RHS1, typename OP1>
3027  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)
3028  {
3029  matrix_base<T> temp(proxy.lhs());
3030  matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3031  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));
3032  lhs -= temp2;
3033  }
3034 
3035  // x -= mat_expr .* mat_expr
3036  template<typename LHS1, typename RHS1, typename OP1,
3037  typename LHS2, typename RHS2, typename OP2>
3038  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
3039  const matrix_expression<const LHS2, const RHS2, OP2>,
3040  op_element_binary<OP> > const & proxy)
3041  {
3042  matrix_base<T> temp1(proxy.lhs());
3043  matrix_base<T> temp2(proxy.rhs());
3044  matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));
3045  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));
3046  lhs -= temp3;
3047  }
3048  };
3049 
3051 
3052  template<typename T, typename LHS, typename RHS, typename OP>
3053  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
3054  {
3055  // x = OP(y)
3056  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
3057  {
3058  viennacl::linalg::element_op(lhs, proxy);
3059  }
3060 
3061  // x = OP(vec_expr)
3062  template<typename LHS2, typename RHS2, typename OP2>
3063  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
3064  const matrix_expression<const LHS2, const RHS2, OP2>,
3065  op_element_unary<OP> > const & proxy)
3066  {
3067  matrix_base<T> temp(proxy.rhs());
3068  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp));
3069  }
3070  };
3071 
3072  template<typename T, typename LHS, typename RHS, typename OP>
3073  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
3074  {
3075  // x += OP(y)
3076  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
3077  {
3078  matrix_base<T> temp(proxy);
3079  lhs += temp;
3080  }
3081 
3082  // x += OP(vec_expr)
3083  template<typename LHS2, typename RHS2, typename OP2>
3084  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
3085  const matrix_expression<const LHS2, const RHS2, OP2>,
3086  op_element_unary<OP> > const & proxy)
3087  {
3088  matrix_base<T> temp(proxy.rhs());
3089  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
3090  lhs += temp;
3091  }
3092  };
3093 
3094  template<typename T, typename LHS, typename RHS, typename OP>
3095  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
3096  {
3097  // x -= OP(y)
3098  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)
3099  {
3100  matrix_base<T> temp(proxy);
3101  lhs -= temp;
3102  }
3103 
3104  // x -= OP(vec_expr)
3105  template<typename LHS2, typename RHS2, typename OP2>
3106  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
3107  const matrix_expression<const LHS2, const RHS2, OP2>,
3108  op_element_unary<OP> > const & proxy)
3109  {
3110  matrix_base<T> temp(proxy.rhs());
3111  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
3112  lhs -= temp;
3113  }
3114  };
3115 
3116 
3117 
3119 
3120  // C = A * B
3121  template<typename T>
3122  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3123  {
3124  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3125  {
3126  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3127  {
3128  matrix_base<T> temp(rhs);
3129  lhs = temp;
3130  }
3131  else
3132  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3133  }
3134  };
3135 
3136  // C = A * B^T
3137  template<typename T>
3138  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>,
3139  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3140  op_mat_mat_prod> >
3141  {
3142  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3143  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3144  op_mat_mat_prod> const & rhs)
3145  {
3146  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3147  {
3148  matrix_base<T> temp(rhs);
3149  lhs = temp;
3150  }
3151  else
3152  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3153  }
3154  };
3155 
3156  // C = A * EXPR for some matrix expression EXPR
3157  template<typename T, typename LhsT, typename RhsT, typename OpT>
3158  struct op_executor<matrix_base<T>,
3159  op_assign,
3160  matrix_expression<const matrix_base<T>,
3161  const matrix_expression<const LhsT, const RhsT, OpT>,
3162  op_mat_mat_prod>
3163  >
3164  {
3165  static void apply(matrix_base<T> & lhs,
3166  matrix_expression<const matrix_base<T>,
3167  const matrix_expression<const LhsT, const RhsT, OpT>,
3168  op_mat_mat_prod> const & rhs)
3169  {
3170  matrix_base<T> temp(rhs.rhs());
3171  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(1.0), T(0));
3172  }
3173  };
3174 
3175 
3176 
3177  // C = A^T * B
3178  template<typename T>
3179  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3180  const matrix_base<T>,
3181  op_mat_mat_prod> >
3182  {
3183  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3184  const matrix_base<T>,
3185  op_mat_mat_prod> const & rhs)
3186  {
3187  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3188  {
3189  matrix_base<T> temp(rhs);
3190  lhs = temp;
3191  }
3192  else
3193  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3194  }
3195  };
3196 
3197  // C = EXPR * B for some matrix expression EXPR
3198  template<typename T, typename LhsT, typename RhsT, typename OpT>
3199  struct op_executor<matrix_base<T>,
3200  op_assign,
3201  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3202  const matrix_base<T>,
3203  op_mat_mat_prod>
3204  >
3205  {
3206  static void apply(matrix_base<T> & lhs,
3207  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3208  const matrix_base<T>,
3209  op_mat_mat_prod> const & rhs)
3210  {
3211  matrix_base<T> temp(rhs.lhs());
3212  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(1.0), T(0));
3213  }
3214  };
3215 
3216 
3217  // C = A^T * B^T
3218  template<typename T>
3219  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3220  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3221  op_mat_mat_prod> >
3222  {
3223  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3224  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3225  op_mat_mat_prod> const & rhs)
3226  {
3227  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3228  {
3229  matrix_base<T> temp(rhs);
3230  lhs = temp;
3231  }
3232  else
3233  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
3234  }
3235  };
3236 
3237  // C = EXPR1 * EXPR2 for some matrix expressions EXPR1 and EXPR2
3238  template<typename T,
3239  typename LhsT1, typename RhsT1, typename OpT1,
3240  typename LhsT2, typename RhsT2, typename OpT2>
3241  struct op_executor<matrix_base<T>,
3242  op_assign,
3243  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3244  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3245  op_mat_mat_prod>
3246  >
3247  {
3248  static void apply(matrix_base<T> & lhs,
3249  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3250  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3251  op_mat_mat_prod> const & rhs)
3252  {
3253  matrix_base<T> temp1(rhs.lhs());
3254  matrix_base<T> temp2(rhs.rhs());
3255  viennacl::linalg::prod_impl(temp1, temp2, lhs, T(1.0), T(0));
3256  }
3257  };
3258 
3259 
3260 
3261 
3262  // C += A * B
3263  template<typename T>
3264  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3265  {
3266  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3267  {
3268  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3269  {
3270  matrix_base<T> temp(rhs);
3271  lhs += temp;
3272  }
3273  else
3274  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3275  }
3276  };
3277 
3278  // C += A * B^T
3279  template<typename T>
3280  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>,
3281  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3282  op_mat_mat_prod> >
3283  {
3284  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3285  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3286  op_mat_mat_prod> const & rhs)
3287  {
3288  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3289  {
3290  matrix_base<T> temp(rhs);
3291  lhs += temp;
3292  }
3293  else
3294  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3295  }
3296  };
3297 
3298  // C += A * EXPR for some matrix expression EXPR
3299  template<typename T, typename LhsT, typename RhsT, typename OpT>
3300  struct op_executor<matrix_base<T>,
3301  op_inplace_add,
3302  matrix_expression<const matrix_base<T>,
3303  const matrix_expression<const LhsT, const RhsT, OpT>,
3304  op_mat_mat_prod>
3305  >
3306  {
3307  static void apply(matrix_base<T> & lhs,
3308  matrix_expression<const matrix_base<T>,
3309  const matrix_expression<const LhsT, const RhsT, OpT>,
3310  op_mat_mat_prod> const & rhs)
3311  {
3312  matrix_base<T> temp(rhs.rhs());
3313  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(1.0), T(1.0));
3314  }
3315  };
3316 
3317 
3318  // C += A^T * B
3319  template<typename T>
3320  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3321  const matrix_base<T>,
3322  op_mat_mat_prod> >
3323  {
3324  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3325  const matrix_base<T>,
3326  op_mat_mat_prod> const & rhs)
3327  {
3328  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3329  {
3330  matrix_base<T> temp(rhs);
3331  lhs += temp;
3332  }
3333  else
3334  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3335  }
3336  };
3337 
3338  // C += EXPR * B for some matrix expression EXPR
3339  template<typename T, typename LhsT, typename RhsT, typename OpT>
3340  struct op_executor<matrix_base<T>,
3341  op_inplace_add,
3342  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3343  const matrix_base<T>,
3344  op_mat_mat_prod>
3345  >
3346  {
3347  static void apply(matrix_base<T> & lhs,
3348  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3349  const matrix_base<T>,
3350  op_mat_mat_prod> const & rhs)
3351  {
3352  matrix_base<T> temp(rhs.lhs());
3353  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(1.0), T(1.0));
3354  }
3355  };
3356 
3357 
3358  // C += A^T * B^T
3359  template<typename T>
3360  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3361  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3362  op_mat_mat_prod> >
3363  {
3364  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3365  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3366  op_mat_mat_prod> const & rhs)
3367  {
3368  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3369  {
3370  matrix_base<T> temp(rhs);
3371  lhs += temp;
3372  }
3373  else
3374  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
3375  }
3376  };
3377 
3378 
3379  // C += EXPR1 * EXPR2 for some matrix expressions EXPR1 and EXPR2
3380  template<typename T,
3381  typename LhsT1, typename RhsT1, typename OpT1,
3382  typename LhsT2, typename RhsT2, typename OpT2>
3383  struct op_executor<matrix_base<T>,
3384  op_inplace_add,
3385  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3386  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3387  op_mat_mat_prod>
3388  >
3389  {
3390  static void apply(matrix_base<T> & lhs,
3391  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3392  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3393  op_mat_mat_prod> const & rhs)
3394  {
3395  matrix_base<T> temp1(rhs.lhs());
3396  matrix_base<T> temp2(rhs.rhs());
3397  viennacl::linalg::prod_impl(temp1, temp2, lhs, T(1.0), T(1.0));
3398  }
3399  };
3400 
3401 
3402 
3403  // C -= A * B
3404  template<typename T>
3405  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >
3406  {
3407  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)
3408  {
3409  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))
3410  {
3411  matrix_base<T> temp(rhs);
3412  lhs -= temp;
3413  }
3414  else
3415  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3416  }
3417  };
3418 
3419  // C -= A * B^T
3420  template<typename T>
3421  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>,
3422  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3423  op_mat_mat_prod> >
3424  {
3425  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,
3426  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3427  op_mat_mat_prod> const & rhs)
3428  {
3429  if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3430  {
3431  matrix_base<T> temp(rhs);
3432  lhs -= temp;
3433  }
3434  else
3435  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3436  }
3437  };
3438 
3439  // C -= A * EXPR for some matrix expression EXPR
3440  template<typename T, typename LhsT, typename RhsT, typename OpT>
3441  struct op_executor<matrix_base<T>,
3442  op_inplace_sub,
3443  matrix_expression<const matrix_base<T>,
3444  const matrix_expression<const LhsT, const RhsT, OpT>,
3445  op_mat_mat_prod>
3446  >
3447  {
3448  static void apply(matrix_base<T> & lhs,
3449  matrix_expression<const matrix_base<T>,
3450  const matrix_expression<const LhsT, const RhsT, OpT>,
3451  op_mat_mat_prod> const & rhs)
3452  {
3453  matrix_base<T> temp(rhs.rhs());
3454  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(-1.0), T(1.0));
3455  }
3456  };
3457 
3458 
3459  // C -= A^T * B
3460  template<typename T>
3461  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3462  const matrix_base<T>,
3463  op_mat_mat_prod> >
3464  {
3465  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3466  const matrix_base<T>,
3467  op_mat_mat_prod> const & rhs)
3468  {
3469  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))
3470  {
3471  matrix_base<T> temp(rhs);
3472  lhs -= temp;
3473  }
3474  else
3475  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3476  }
3477  };
3478 
3479  // C += EXPR * B for some matrix expression EXPR
3480  template<typename T, typename LhsT, typename RhsT, typename OpT>
3481  struct op_executor<matrix_base<T>,
3482  op_inplace_sub,
3483  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3484  const matrix_base<T>,
3485  op_mat_mat_prod>
3486  >
3487  {
3488  static void apply(matrix_base<T> & lhs,
3489  matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3490  const matrix_base<T>,
3491  op_mat_mat_prod> const & rhs)
3492  {
3493  matrix_base<T> temp(rhs.lhs());
3494  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(-1.0), T(1.0));
3495  }
3496  };
3497 
3498 
3499  // C -= A^T * B^T
3500  template<typename T>
3501  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3502  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3503  op_mat_mat_prod> >
3504  {
3505  static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3506  const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3507  op_mat_mat_prod> const & rhs)
3508  {
3509  if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))
3510  {
3511  matrix_base<T> temp(rhs);
3512  lhs -= temp;
3513  }
3514  else
3515  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
3516  }
3517  };
3518 
3519  // C -= EXPR1 * EXPR2 for some matrix expressions EXPR1 and EXPR2
3520  template<typename T,
3521  typename LhsT1, typename RhsT1, typename OpT1,
3522  typename LhsT2, typename RhsT2, typename OpT2>
3523  struct op_executor<matrix_base<T>,
3524  op_inplace_sub,
3525  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3526  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3527  op_mat_mat_prod>
3528  >
3529  {
3530  static void apply(matrix_base<T> & lhs,
3531  matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3532  const matrix_expression<const LhsT2, const RhsT2, OpT2>,
3533  op_mat_mat_prod> const & rhs)
3534  {
3535  matrix_base<T> temp1(rhs.lhs());
3536  matrix_base<T> temp2(rhs.rhs());
3537  viennacl::linalg::prod_impl(temp1, temp2, lhs, T(-1.0), T(1.0));
3538  }
3539  };
3540 
3542 
3543  // y = A * x
3544  template<typename T>
3545  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3546  {
3547  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3548  {
3549  // check for x = A * x
3550  if (op_aliasing(lhs, rhs.rhs()))
3551  {
3552  vector_base<T> temp(rhs);
3553  lhs = temp;
3554  }
3555  else
3556  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3557  }
3558  };
3559 
3560  // y = A^T * x
3561  template<typename T>
3562  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3563  const vector_base<T>,
3564  op_prod> >
3565  {
3566  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3567  const vector_base<T>,
3568  op_prod> const & rhs)
3569  {
3570  // check for x = A^T * x
3571  if (op_aliasing(lhs, rhs.rhs()))
3572  {
3573  vector_base<T> temp(rhs);
3574  lhs = temp;
3575  }
3576  else
3577  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
3578  }
3579  };
3580 
3581  // y = MAT_EXPR * x for a matrix expression MAT_EXPR
3582  template<typename T, typename LhsT, typename RhsT, typename OpT>
3583  struct op_executor<vector_base<T>,
3584  op_assign,
3585  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3586  const vector_base<T>,
3587  op_prod>
3588  >
3589  {
3590  static void apply(vector_base<T> & lhs,
3591  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3592  const vector_base<T>,
3593  op_prod> const & rhs)
3594  {
3595  matrix_base<T> temp(rhs.lhs());
3596  viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs);
3597  }
3598  };
3599 
3600  // y = A * VEC_EXPR for a vector expression VEC_EXPR
3601  template<typename T, typename LhsT, typename RhsT, typename OpT>
3602  struct op_executor<vector_base<T>,
3603  op_assign,
3604  vector_expression<const matrix_base<T>,
3605  const vector_expression<const LhsT, const RhsT, OpT>,
3606  op_prod>
3607  >
3608  {
3609  static void apply(vector_base<T> & lhs,
3610  vector_expression<const matrix_base<T>,
3611  const vector_expression<const LhsT, const RhsT, OpT>,
3612  op_prod> const & rhs)
3613  {
3614  vector_base<T> x(rhs.rhs());
3615  viennacl::linalg::prod_impl(rhs.lhs(), x, lhs);
3616  }
3617  };
3618 
3619  // y = MAT_EXPR * VEC_EXPR for a matrix expression MAT_EXPR and a vector expression VEC_EXPR
3620  template<typename T,
3621  typename LhsT1, typename RhsT1, typename OpT1,
3622  typename LhsT2, typename RhsT2, typename OpT2>
3623  struct op_executor<vector_base<T>,
3624  op_assign,
3625  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3626  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3627  op_prod>
3628  >
3629  {
3630  static void apply(vector_base<T> & lhs,
3631  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3632  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3633  op_prod> const & rhs)
3634  {
3635  matrix_base<T> A(rhs.lhs());
3636  vector_base<T> x(rhs.rhs());
3637  viennacl::linalg::prod_impl(A, x, lhs);
3638  }
3639  };
3640 
3641 
3642 
3643  // y += A * x
3644  template<typename T>
3645  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3646  {
3647  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3648  {
3649  vector_base<T> temp(rhs);
3650  lhs += temp;
3651  }
3652  };
3653 
3654  // y += A^T * x
3655  template<typename T>
3656  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3657  const vector_base<T>,
3658  op_prod> >
3659  {
3660  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3661  const vector_base<T>,
3662  op_prod> const & rhs)
3663  {
3664  vector_base<T> temp(rhs);
3665  lhs += temp;
3666  }
3667  };
3668 
3669  // y += MAT_EXPR * x for a matrix expression MAT_EXPR
3670  template<typename T, typename LhsT, typename RhsT, typename OpT>
3671  struct op_executor<vector_base<T>,
3672  op_inplace_add,
3673  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3674  const vector_base<T>,
3675  op_prod>
3676  >
3677  {
3678  static void apply(vector_base<T> & lhs,
3679  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3680  const vector_base<T>,
3681  op_prod> const & rhs)
3682  {
3683  matrix_base<T> A(rhs.lhs());
3684  vector_base<T> y(lhs);
3685  viennacl::linalg::prod_impl(A, rhs.rhs(), y);
3686  lhs += y;
3687  }
3688  };
3689 
3690  // y += A * VEC_EXPR for a vector expression VEC_EXPR
3691  template<typename T, typename LhsT, typename RhsT, typename OpT>
3692  struct op_executor<vector_base<T>,
3693  op_inplace_add,
3694  vector_expression<const matrix_base<T>,
3695  const vector_expression<const LhsT, const RhsT, OpT>,
3696  op_prod>
3697  >
3698  {
3699  static void apply(vector_base<T> & lhs,
3700  vector_expression<const matrix_base<T>,
3701  const vector_expression<const LhsT, const RhsT, OpT>,
3702  op_prod> const & rhs)
3703  {
3704  vector_base<T> x(rhs.rhs());
3705  vector_base<T> y(lhs);
3706  viennacl::linalg::prod_impl(rhs.lhs(), x, y);
3707  lhs += y;
3708  }
3709  };
3710 
3711  // y += MAT_EXPR * VEC_EXPR for a matrix expression MAT_EXPR and a vector expression VEC_EXPR
3712  template<typename T,
3713  typename LhsT1, typename RhsT1, typename OpT1,
3714  typename LhsT2, typename RhsT2, typename OpT2>
3715  struct op_executor<vector_base<T>,
3716  op_inplace_add,
3717  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3718  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3719  op_prod>
3720  >
3721  {
3722  static void apply(vector_base<T> & lhs,
3723  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3724  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3725  op_prod> const & rhs)
3726  {
3727  matrix_base<T> A(rhs.lhs());
3728  vector_base<T> x(rhs.rhs());
3729  vector_base<T> y(lhs);
3730  viennacl::linalg::prod_impl(A, x, y);
3731  lhs += y;
3732  }
3733  };
3734 
3735 
3736 
3737  // y -= A * x
3738  template<typename T>
3739  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >
3740  {
3741  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)
3742  {
3743  vector_base<T> temp(rhs);
3744  lhs -= temp;
3745  }
3746  };
3747 
3748  // y -= A^T * x
3749  template<typename T>
3750  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3751  const vector_base<T>,
3752  op_prod> >
3753  {
3754  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,
3755  const vector_base<T>,
3756  op_prod> const & rhs)
3757  {
3758  vector_base<T> temp(rhs);
3759  lhs -= temp;
3760  }
3761  };
3762 
3763  // y -= MAT_EXPR * x for a matrix expression MAT_EXPR
3764  template<typename T, typename LhsT, typename RhsT, typename OpT>
3765  struct op_executor<vector_base<T>,
3766  op_inplace_sub,
3767  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3768  const vector_base<T>,
3769  op_prod>
3770  >
3771  {
3772  static void apply(vector_base<T> & lhs,
3773  vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,
3774  const vector_base<T>,
3775  op_prod> const & rhs)
3776  {
3777  matrix_base<T> A(rhs.lhs());
3778  vector_base<T> y(lhs);
3779  viennacl::linalg::prod_impl(A, rhs.rhs(), y);
3780  lhs -= y;
3781  }
3782  };
3783 
3784  // y -= A * VEC_EXPR for a vector expression VEC_EXPR
3785  template<typename T, typename LhsT, typename RhsT, typename OpT>
3786  struct op_executor<vector_base<T>,
3787  op_inplace_sub,
3788  vector_expression<const matrix_base<T>,
3789  const vector_expression<const LhsT, const RhsT, OpT>,
3790  op_prod>
3791  >
3792  {
3793  static void apply(vector_base<T> & lhs,
3794  vector_expression<const matrix_base<T>,
3795  const vector_expression<const LhsT, const RhsT, OpT>,
3796  op_prod> const & rhs)
3797  {
3798  vector_base<T> x(rhs.rhs());
3799  vector_base<T> y(lhs);
3800  viennacl::linalg::prod_impl(rhs.lhs(), x, y);
3801  lhs -= y;
3802  }
3803  };
3804 
3805  // y -= MAT_EXPR * VEC_EXPR for a matrix expression MAT_EXPR and a vector expression VEC_EXPR
3806  template<typename T,
3807  typename LhsT1, typename RhsT1, typename OpT1,
3808  typename LhsT2, typename RhsT2, typename OpT2>
3809  struct op_executor<vector_base<T>,
3810  op_inplace_sub,
3811  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3812  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3813  op_prod>
3814  >
3815  {
3816  static void apply(vector_base<T> & lhs,
3817  vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,
3818  const vector_expression<const LhsT2, const RhsT2, OpT2>,
3819  op_prod> const & rhs)
3820  {
3821  matrix_base<T> A(rhs.lhs());
3822  vector_base<T> x(rhs.rhs());
3823  vector_base<T> y(lhs);
3824  viennacl::linalg::prod_impl(A, x, y);
3825  lhs -= y;
3826  }
3827  };
3828 
3829 
3830 
3832 
3833  // A = v1 * v2^T
3834  template<typename T>
3835  struct op_executor<matrix_base<T>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3836  {
3837  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3838  {
3839  lhs.clear();
3840  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3841  }
3842  };
3843 
3844  // A = alpha * v1 * v2^T
3845  template<typename T, typename ScalarType>
3846  struct op_executor<matrix_base<T>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3847  const ScalarType,
3848  op_mult> >
3849  {
3850  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3851  const ScalarType,
3852  op_mult> const & rhs)
3853  {
3854  lhs.clear();
3855  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3856  }
3857  };
3858 
3859  // A += v1 * v2^T
3860  template<typename T>
3861  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3862  {
3863  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3864  {
3865  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
3866  }
3867  };
3868 
3869  // A += alpha * v1 * v2^T
3870  template<typename T, typename ScalarType>
3871  struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3872  const ScalarType,
3873  op_mult> >
3874  {
3875  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3876  const ScalarType,
3877  op_mult> const & rhs)
3878  {
3879  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3880  }
3881  };
3882 
3883  // A -= v1 * v2^T
3884  template<typename T>
3885  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3886  {
3887  static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3888  {
3889  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());
3890  }
3891  };
3892 
3893  // A -= alpha * v1 * v2^T
3894  template<typename T, typename ScalarType>
3895  struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3896  const ScalarType,
3897  op_mult> >
3898  {
3899  static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3900  const ScalarType,
3901  op_mult> const & rhs)
3902  {
3903  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());
3904  }
3905  };
3906 
3907 
3908 } // namespace detail
3909 
3910 } // namespace linalg
3911 
3914 } //namespace viennacl
3915 
3916 #endif
void row_sum_impl(const matrix_base< NumericT > &A, vector_base< NumericT > &result)
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:30
A tag class representing multiplication by a scalar.
Definition: forwards.h:92
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &v)
Dispatcher interface for v = diag(A, k)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:314
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Worker class for decomposing expression templates.
Definition: op_executor.hpp:80
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:236
Implementations of dense matrix related operations including matrix-vector products.
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
Definition: matrix_def.hpp:242
Class for representing strided submatrices of a bigger matrix A.
Definition: forwards.h:443
self_type & operator=(const self_type &other)
Definition: matrix.hpp:262
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:484
Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> ...
Definition: forwards.h:462
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Various little tools used here and there in ViennaCL.
matrix_expression< const self_type, const NumericT, op_mult > operator-() const
Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix.
Definition: matrix.hpp:628
A tag class representing the extraction of a matrix column to a vector.
Definition: forwards.h:195
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:382
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
A tag class representing a matrix given by a vector placed on a certain (off-)diagonal.
Definition: forwards.h:189
A tag class representing subtraction.
Definition: forwards.h:90
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
viennacl::context context() const
Definition: matrix_def.hpp:46
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:390
A tag indicating iteration along increasing row index of a matrix.
Definition: matrix.hpp:84
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:375
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F2 > > const &B)
Definition: matrix.hpp:804
A tag class representing division.
Definition: forwards.h:98
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
entry_proxy< NumericT > operator()(size_type row_index, size_type col_index)
Read-write access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:477
matrix_iterator(MatrixT &mat, vcl_size_t start_row, vcl_size_t start_col)
Definition: matrix.hpp:98
static vcl_size_t size1(LHS &lhs, RHS &)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator/=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1664
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_base< NumericT >, const S1, op_mult >>::type operator*(S1 const &value, matrix_base< NumericT > const &m1)
Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m...
Definition: matrix.hpp:1364
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:371
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
Forward declaration of dense matrix classes.
bool op_aliasing(vector_base< NumericT > const &, B const &)
Definition: op_executor.hpp:36
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
matrix(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:712
vcl_size_t size1() const
Returns the size of the result vector.
Definition: matrix.hpp:72
viennacl::vector< NumericT > operator-(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 - A * v2', where A is a matrix.
self_type & operator++(void)
Definition: matrix.hpp:103
self_type operator++(int)
Definition: matrix.hpp:104
float NumericT
Definition: bisect.cpp:40
A tag class representing the (off-)diagonal of a matrix.
Definition: forwards.h:186
base_type::size_type size_type
Definition: matrix.hpp:701
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
value_type operator*(void)
Definition: matrix.hpp:102
size_type size2() const
Definition: matrix_def.hpp:45
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator*=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1512
matrix(zero_matrix< NumericT > const &m)
Creates the matrix from the supplied zero matrix.
Definition: matrix.hpp:760
Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar ...
Definition: tools.hpp:225
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type internal_row_count, size_type cols, size_type internal_col_count)
Wraps a CUDA or host buffer provided by the user including padding of rows and columns.
Definition: matrix.hpp:737
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix_def.hpp:117
Definition: blas3.hpp:36
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &v)
size_type size1() const
Definition: matrix_def.hpp:44
vcl_size_t size2() const
Definition: matrix.hpp:73
void resize(MatrixType &matrix, vcl_size_t rows, vcl_size_t cols)
Generic resize routine for resizing a matrix (ViennaCL, uBLAS, etc.) to a new size/dimension.
Definition: size.hpp:63
void clear()
Resets all entries to zero.
Definition: matrix.hpp:634
Implementations of operations using sparse matrices.
A tag class representing addition.
Definition: forwards.h:88
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93
matrix(const self_type &other)
Definition: matrix.hpp:780
matrix_expression(LHS &lhs, RHS &rhs)
Definition: matrix.hpp:62
void scaled_rank_1_update(matrix_base< NumericT > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_expression< const LHS, const RHS, OP >, const S1, op_div > >::type operator/(matrix_expression< const LHS, const RHS, OP > const &proxy, S1 const &val)
Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha.
Definition: matrix.hpp:1593
Determines whether a given expression has a row-major matrix layout.
std::size_t vcl_size_t
Definition: forwards.h:75
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:244
matrix(const base_type &other)
Definition: matrix.hpp:773
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:885
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:218
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
Definition: matrix_def.hpp:81
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &v)
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F2 > > const &B)
Definition: matrix.hpp:801
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
RHS & rhs() const
Get right hand side operand.
Definition: matrix.hpp:69
viennacl::vector< NumericT > operator+(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 + A * v2', where A is a matrix.
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121
A tag class representing matrix-vector products and element-wise multiplications. ...
Definition: forwards.h:94
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base)
Rounds an integer to the next multiple of another integer.
Definition: tools.hpp:133
A dense matrix class.
Definition: forwards.h:369
matrix(matrix_expression< LHS, RHS, OP > const &proxy)
Definition: matrix.hpp:750
bool row_major() const
Definition: matrix_def.hpp:248
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
bool row_major(T const &)
Definition: row_major.hpp:38
void set(vcl_size_t index, U value)
Definition: util.hpp:115
matrix(scalar_matrix< NumericT > const &m)
Creates the matrix from the supplied scalar matrix.
Definition: matrix.hpp:767
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can optionally be preserved.
Definition: matrix.hpp:813
float ScalarType
Definition: fft_1d.cpp:42
A tag class representing transposed matrices.
Definition: forwards.h:220
Helper implementations that deduce the dimensions of the supplied matrix-valued expressions.
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:440
self_type & operator*=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:517
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
viennacl::matrix< float > m1
MatrixT::value_type value_type
Definition: matrix.hpp:96
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
bool operator!=(self_type const &other)
Definition: matrix.hpp:107
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:331
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:908
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
self_type & operator+=(const matrix_expression< const LHS, const RHS, OP > &proxy)
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
Wraps a CUDA or host buffer provided by the user.
Definition: matrix.hpp:721
self_type & operator/=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:573
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void resize(size_type rows, size_type columns, bool preserve=true)
Definition: matrix.hpp:638
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
memory_types
Definition: forwards.h:345
static vcl_size_t size2(LHS &, RHS &rhs)
matrix(identity_matrix< NumericT > const &m)
Creates the matrix from the supplied identity matrix.
Definition: matrix.hpp:753
void matrix_diag_from_vector(const vector_base< NumericT > &v, int k, matrix_base< NumericT > &A)
Dispatcher interface for A = diag(v, k)
LHS & lhs() const
Get left hand side operand.
Definition: matrix.hpp:66
A tag class representing the extraction of a matrix row to a vector.
Definition: forwards.h:192
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:233
MatrixT & operator()(void) const
Definition: matrix.hpp:112
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Implementation of the ViennaCL scalar class.
static void apply(const MATRIXTYPE &, unsigned int &, unsigned int &)
Definition: forwards.h:621
A collection of compile time type deductions.
A tag for row-major storage of a dense matrix.
Definition: forwards.h:304
matrix()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:704
bool operator==(self_type const &other)
Definition: matrix.hpp:106
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
Definition: mem_handle.hpp:99
A tag indicating iteration along increasing columns index of a matrix.
Definition: matrix.hpp:87
Simple enable-if variant that uses the SFINAE pattern.
void column_sum_impl(const matrix_base< NumericT > &A, vector_base< NumericT > &result)
self_type & operator-=(const matrix_expression< const LHS, const RHS, OP > &proxy)
base_type & operator=(viennacl::matrix< OtherNumericT, F2 > const &B)
Definition: matrix.hpp:798
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)