doc/html/matrix_8hpp_source.html

 #ifndef VIENNACL_MATRIX_HPP_

 #define VIENNACL_MATRIX_HPP_


 /* =========================================================================

    Copyright (c) 2010-2015, Institute for Microelectronics,

                             Institute for Analysis and Scientific Computing,

                             TU Wien.

    Portions of this software are copyright by UChicago Argonne, LLC.


                             -----------------

                   ViennaCL - The Vienna Computing Library

                             -----------------


    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at


    (A list of authors and contributors can be found in the manual)


    License:         MIT (X11), see file LICENSE in the base directory

 ============================================================================= */


 #include "viennacl/forwards.h"

 #include "viennacl/detail/matrix_def.hpp"

 #include "viennacl/scalar.hpp"

 #include "viennacl/linalg/matrix_operations.hpp"

 #include "viennacl/linalg/sparse_matrix_operations.hpp"

 #include "viennacl/tools/tools.hpp"

 #include "viennacl/tools/matrix_size_deducer.hpp"

 #include "viennacl/meta/result_of.hpp"

 #include "viennacl/meta/enable_if.hpp"

 #include "viennacl/traits/handle.hpp"

 #include "viennacl/traits/row_major.hpp"


 namespace viennacl

 {


 //#ifdef VIENNACL_WITH_OPENCL

 //  template<class NumericT, class DISTRIBUTION>

 //  rand::random_matrix_t<NumericT, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){

 //      return rand::random_matrix_t<NumericT,DISTRIBUTION>(size1,size2,distribution);

 //  }

 //#endif


 template<typename LHS, typename RHS, typename OP>

 class matrix_expression

 {

   typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type     lhs_reference_type;

   typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type     rhs_reference_type;


 public:

   typedef vcl_size_t       size_type;


   matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}


   LHS & lhs() const { return lhs_; }

   RHS & rhs() const { return rhs_; }


   vcl_size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(lhs_, rhs_); }

   vcl_size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(lhs_, rhs_); }


 private:

   lhs_reference_type lhs_;

   rhs_reference_type rhs_;

 };


 struct row_iteration {};


 struct col_iteration {};


 //STL-like iterator. TODO: STL-compliance...

 template<typename ROWCOL, typename MatrixT>

 class matrix_iterator

 {

   typedef matrix_iterator<ROWCOL, MatrixT>    self_type;

 public:

   typedef typename MatrixT::value_type       value_type;


   matrix_iterator(MatrixT & mat,

                   vcl_size_t start_row,

                   vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}


   value_type operator*(void) { return mat_(row_, col_); }

   self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MatrixT>::apply(mat_, row_, col_); return *this; }

   self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }


   bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }

   bool operator!=(self_type const & other) { return !(*this == other); }


   vcl_size_t index1() { return row_; }

   vcl_size_t index2() { return col_; }


   MatrixT & operator()(void) const { return mat_; }


 private:

   MatrixT & mat_;

   vcl_size_t row_;

   vcl_size_t col_;

 };


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx)

   : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),

     internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, dense_padding_size)),

     internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, dense_padding_size)),

     row_major_fixed_(true), row_major_(is_row_major)

 {

   if (rows > 0 && columns > 0)

   {

     viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx);

     clear();

   }

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 template<typename LHS, typename RHS, typename OP>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(matrix_expression<const LHS, const RHS, OP> const & proxy) :

   size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),

   internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),

   internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),

   row_major_fixed_(true), row_major_(viennacl::traits::row_major(proxy))

 {

   elements_.switch_active_handle_id(viennacl::traits::active_handle_id(proxy));

   if (internal_size() > 0)

   {

     viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy));

     clear();

     self_type::operator=(proxy);

   }

 }


 // CUDA or host memory:

 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type,

                                                         size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,

                                                         size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,

                                                         bool is_row_major)

   : size1_(mat_size1), size2_(mat_size2),

     start1_(mat_start1), start2_(mat_start2),

     stride1_(mat_stride1), stride2_(mat_stride2),

     internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),

     row_major_fixed_(true), row_major_(is_row_major)

 {

   if (mem_type == viennacl::CUDA_MEMORY)

   {

 #ifdef VIENNACL_WITH_CUDA

     elements_.switch_active_handle_id(viennacl::CUDA_MEMORY);

     elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));

     elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.

 #else

     throw cuda_not_available_exception();

 #endif

   }

   else if (mem_type == viennacl::MAIN_MEMORY)

   {

     elements_.switch_active_handle_id(viennacl::MAIN_MEMORY);

     elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));

     elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.

   }


   elements_.raw_size(sizeof(NumericT) * internal_size());

 }


 #ifdef VIENNACL_WITH_OPENCL

 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx)

   : size1_(rows), size2_(columns),

     start1_(0), start2_(0),

     stride1_(1), stride2_(1),

     internal_size1_(rows), internal_size2_(columns),

     row_major_fixed_(true), row_major_(is_row_major)

 {

   elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY);

   elements_.opencl_handle() = mem;

   elements_.opencl_handle().inc();  //prevents that the user-provided memory is deleted once the vector object is destroyed.

   elements_.opencl_handle().context(ctx.opencl_context());

   elements_.raw_size(sizeof(NumericT)*internal_size());

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(cl_mem mem, viennacl::context ctx,

                                                         size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1,

                                                         size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2,

                                                         bool is_row_major)

   : size1_(mat_size1), size2_(mat_size2),

     start1_(mat_start1), start2_(mat_start2),

     stride1_(mat_stride1), stride2_(mat_stride2),

     internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),

     row_major_fixed_(true), row_major_(is_row_major)

 {

   elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY);

   elements_.opencl_handle() = mem;

   elements_.opencl_handle().inc();  //prevents that the user-provided memory is deleted once the vector object is destroyed.

   elements_.opencl_handle().context(ctx.opencl_context());

   elements_.raw_size(sizeof(NumericT)*internal_size());

 }

 #endif


 // Copy CTOR

 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(const matrix_base<NumericT, SizeT, DistanceT> & other) :

   size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),

   internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),

   internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),

   row_major_fixed_(true), row_major_(other.row_major())

 {

   elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other));

   if (internal_size() > 0)

   {

     viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other));

     clear();

     self_type::operator=(other);

   }

 }


 // Conversion CTOR

 template<typename NumericT, typename SizeT, typename DistanceT>

 template<typename OtherNumericT>

 matrix_base<NumericT, SizeT, DistanceT>::matrix_base(const matrix_base<OtherNumericT, SizeT, DistanceT> & other) :

   size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1),

   internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size)),

   internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size)),

   row_major_fixed_(true), row_major_(other.row_major())

 {

   elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other));

   if (internal_size() > 0)

   {

     viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other));

     clear();

     self_type::operator=(other);

   }

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator=(const self_type & other)  //enables implicit conversions

 {

   if (&other==this)

     return *this;


   if (internal_size() == 0)

   {

     if (other.internal_size() == 0)

       return *this;

     if (!row_major_fixed_)

       row_major_ = other.row_major();

     resize(other.size1(), other.size2(), false);

   }


   viennacl::linalg::am(*this,

                        other, cpu_value_type(1.0), 1, false, false);

   return *this;

 }


 // Conversion assignment

 template<class NumericT, typename SizeT, typename DistanceT>

 template<typename OtherNumericT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator=(const matrix_base<OtherNumericT, SizeT, DistanceT> & other)

 {

   if (internal_size() == 0)

   {

     if (other.internal_size() == 0)

       return *this;

     if (!row_major_fixed_)

       row_major_ = other.row_major();

     resize(other.size1(), other.size2(), false);

   }


   viennacl::linalg::convert(*this, other);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 template<typename LHS, typename RHS, typename OP>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator=(const matrix_expression<const LHS, const RHS, OP> & proxy)

 {

   assert(  (viennacl::traits::size1(proxy) == size1() || size1() == 0)

            && (viennacl::traits::size2(proxy) == size2() || size2() == 0)

            && bool("Incompatible matrix sizes!"));

   if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)

   {

     size1_ = viennacl::traits::size1(proxy);

     size2_ = viennacl::traits::size2(proxy);

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);

     if (!row_major_fixed_)

       row_major_ = viennacl::traits::row_major(proxy);

     viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy));

     if (size1_ != internal_size1_ || size2_ != internal_size2_)

       clear();

   }


   if (internal_size() > 0)

     linalg::detail::op_executor<self_type, op_assign, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy);


   return *this;

 }


 // A = trans(B)

 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator=(const matrix_expression<const self_type, const self_type, op_trans> & proxy)

 {

   if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 )

   {

     size1_ = viennacl::traits::size1(proxy);

     size2_ = viennacl::traits::size2(proxy);

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);

     if (!row_major_fixed_)

       row_major_ = viennacl::traits::row_major(proxy);

   }


   if ( handle() == proxy.lhs().handle() )

   {

     viennacl::matrix_base<NumericT> temp(proxy.lhs().size2(), proxy.lhs().size1(),proxy.lhs().row_major());

     viennacl::linalg::trans(proxy, temp);

     if ( proxy.lhs().size1() != proxy.lhs().size2() )

       this->resize(proxy.lhs().size2(), proxy.lhs().size1());

     elements_ = temp.handle();

   }

   else

   {

     if ( proxy.lhs().size1() != proxy.lhs().size2() )

       this->resize(proxy.lhs().size2(), proxy.lhs().size1());

     viennacl::linalg::trans(proxy, *this);

   }

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 template<typename LHS, typename RHS, typename OP>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator+=(const matrix_expression<const LHS, const RHS, OP> & proxy)

 {

   assert(  (viennacl::traits::size1(proxy) == size1())

            && (viennacl::traits::size2(proxy) == size2())

            && bool("Incompatible matrix sizes!"));

   assert( (size1() > 0) && bool("Vector not yet initialized!") );

   assert( (size2() > 0) && bool("Vector not yet initialized!") );


   linalg::detail::op_executor<self_type, op_inplace_add, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy);


   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 template<typename LHS, typename RHS, typename OP>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator-=(const matrix_expression<const LHS, const RHS, OP> & proxy)

 {

   assert(  (viennacl::traits::size1(proxy) == size1())

            && (viennacl::traits::size2(proxy) == size2())

            && bool("Incompatible matrix sizes!"));

   assert( (size1() > 0) && bool("Vector not yet initialized!") );

   assert( (size2() > 0) && bool("Vector not yet initialized!") );


   linalg::detail::op_executor<self_type, op_inplace_sub, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy);


   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator = (identity_matrix<NumericT> const & m)

 {

   assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );

   assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );


   if (internal_size() == 0)

   {

     size1_ = m.size1();

     size2_ = m.size2();

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);

     if (internal_size() > 0)

     {

       viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());

       clear();

     }

   }

   else

     viennacl::linalg::matrix_assign(*this, NumericT(0));


   if (internal_size() > 0)

     viennacl::linalg::matrix_diagonal_assign(*this, m(0,0));


   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator = (zero_matrix<NumericT> const & m)

 {

   assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );

   assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );


   if (internal_size() == 0)

   {

     size1_ = m.size1();

     size2_ = m.size2();

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);

     if (internal_size() > 0)

     {

       viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());

       clear();

     }

   }

   else

     viennacl::linalg::matrix_assign(*this, NumericT(0));


   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator = (scalar_matrix<NumericT> const & m)

 {

   assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );

   assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );


   if (internal_size() == 0)

   {

     size1_ = m.size1();

     size2_ = m.size2();

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);

     if (internal_size() > 0)

     {

       viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), m.context());

       clear();

     }

   }


   if (internal_size() > 0)

   {

     viennacl::linalg::matrix_assign(*this, m(0,0));

   }


   return *this;

 }


 //read-write access to an element of the matrix/matrix_range/matrix_slice

 template<class NumericT, typename SizeT, typename DistanceT>

 entry_proxy<NumericT> matrix_base<NumericT, SizeT, DistanceT>::operator()(size_type row_index, size_type col_index)

 {

   if (row_major_)

     return entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);

   return entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 const_entry_proxy<NumericT> matrix_base<NumericT, SizeT, DistanceT>::operator()(size_type row_index, size_type col_index) const

 {

   if (row_major_)

     return const_entry_proxy<NumericT>(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);

   return const_entry_proxy<NumericT>(column_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);

 }


 //

 // Operator overloads for enabling implicit conversions:

 //

 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator += (const matrix_base<NumericT, SizeT, DistanceT> & other)

 {

   viennacl::linalg::ambm(*this,

                          *this, NumericT(1.0), 1, false, false,

                          other, NumericT(1.0), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator -= (const matrix_base<NumericT, SizeT, DistanceT> & other)

 {

   viennacl::linalg::ambm(*this,

                          *this, NumericT(1.0), 1, false, false,

                          other, NumericT(1.0), 1, false, true);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator *= (char val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator *= (short val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator *= (int val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator *= (long val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator *= (float val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator *= (double val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, false, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator /= (char val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, true, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator /= (short val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, true, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator /= (int val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, true, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator /= (long val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, true, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator /= (float val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, true, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_base<NumericT, SizeT, DistanceT> & matrix_base<NumericT, SizeT, DistanceT>::operator /= (double val)

 {

   viennacl::linalg::am(*this,

                        *this, NumericT(val), 1, true, false);

   return *this;

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>, const NumericT, op_mult> matrix_base<NumericT, SizeT, DistanceT>::operator-() const

 {

   return matrix_expression<const self_type, const NumericT, op_mult>(*this, NumericT(-1));

 }


 template<class NumericT, typename SizeT, typename DistanceT>

 void matrix_base<NumericT, SizeT, DistanceT>::clear() { viennacl::linalg::matrix_assign(*this, NumericT(0), true); }


 template<class NumericT, typename SizeT, typename DistanceT>

 void matrix_base<NumericT, SizeT, DistanceT>::resize(size_type rows, size_type columns, bool preserve)

 {

   assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));


   if (preserve && internal_size() > 0)

   {

     //get old entries:

     std::vector< NumericT > old_entries(internal_size());

     viennacl::backend::memory_read(elements_, 0, sizeof(NumericT)*internal_size(), &(old_entries[0]));


     //set up entries of new matrix:

     std::vector< NumericT > new_entries(  viennacl::tools::align_to_multiple<vcl_size_t>(rows,    dense_padding_size)

                                           * viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size));

     for (size_type i=0; i<rows; ++i)

     {

       if (i >= size1_)

         continue;


       for (size_type j=0; j<columns; ++j)

       {

         if (j >= size2_)

           continue;

         if (row_major_)

           new_entries[row_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]

               = old_entries[row_major::mem_index(i, j, internal_size1(), internal_size2())];

         else

           new_entries[column_major::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, dense_padding_size), viennacl::tools::align_to_multiple<vcl_size_t>(columns, dense_padding_size))]

               = old_entries[column_major::mem_index(i, j, internal_size1(), internal_size2())];

       }

     }


     //copy new entries to GPU:

     size1_ = rows;

     size2_ = columns;

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);

     viennacl::backend::memory_create(elements_, sizeof(NumericT)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));

   }

   else //discard old entries:

   {

     size1_ = rows;

     size2_ = columns;

     internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, dense_padding_size);

     internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, dense_padding_size);


     viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(elements_));

     clear();

   }

 }


 template<class NumericT, typename F, unsigned int AlignmentV>

 class matrix : public matrix_base<NumericT>

 {

   typedef matrix<NumericT, F, AlignmentV>          self_type;

   typedef matrix_base<NumericT>                   base_type;

 public:

   typedef typename base_type::size_type             size_type;


   explicit matrix() : base_type(static_cast<bool>(viennacl::is_row_major<F>::value)) {}


   explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, viennacl::is_row_major<F>::value, ctx) {}


   explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)

     : base_type(ptr_to_mem, mem_type,

                 rows, 0, 1, rows,

                 cols, 0, 1, cols,

                 viennacl::is_row_major<F>::value) {}


   explicit matrix(NumericT * ptr_to_mem, viennacl::memory_types mem_type,

                   size_type rows, size_type internal_row_count,

                   size_type cols, size_type internal_col_count)

     : base_type(ptr_to_mem, mem_type,

                 rows, 0, 1, internal_row_count,

                 cols, 0, 1, internal_col_count,

                 true, viennacl::is_row_major<F>::value) {}


 #ifdef VIENNACL_WITH_OPENCL

   explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns, viennacl::is_row_major<F>::value) {}

 #endif


   template<typename LHS, typename RHS, typename OP>

   matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {}


   matrix(identity_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())

   {

     if (base_type::internal_size() > 0)

       base_type::operator=(m);

   }


   matrix(zero_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())

   {

     if (base_type::internal_size() > 0)

       base_type::operator=(m);

   }


   matrix(scalar_matrix<NumericT> const & m) : base_type(m.size1(), m.size2(), viennacl::is_row_major<F>::value, m.context())

   {

     if (base_type::internal_size() > 0)

       base_type::operator=(m);

   }


   matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))

   {

     base_type::operator=(other);

   }


   //copy constructor:

   matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::is_row_major<F>::value, viennacl::traits::context(other))

   {

     base_type::operator=(other);

   }


   /*template<typename M1>

     self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)

     {

       self_type temp(proxy.lhs());

       *this = trans(temp);

       return *this;

     }*/


   using base_type::operator=;


   // the following are needed for Visual Studio:

   template<typename OtherNumericT, typename F2>

   base_type & operator=(viennacl::matrix<OtherNumericT, F2> const & B)                          { return base_type::operator=(static_cast<viennacl::matrix_base<OtherNumericT> const &>(B)); }


   template<typename OtherNumericT, typename F2>

   base_type & operator=(viennacl::matrix_range<viennacl::matrix<OtherNumericT, F2> > const & B) { return base_type::operator=(static_cast<viennacl::matrix_base<OtherNumericT> const &>(B)); }


   template<typename OtherNumericT, typename F2>

   base_type & operator=(viennacl::matrix_slice<viennacl::matrix<OtherNumericT, F2> > const & B) { return base_type::operator=(static_cast<viennacl::matrix_base<OtherNumericT> const &>(B)); }


   void resize(size_type rows, size_type columns, bool preserve = true)

   {

     base_type::resize(rows, columns, preserve);

   }


 }; //matrix


 template<class NumericT>

 std::ostream & operator<<(std::ostream & s, const matrix_base<NumericT> & gpu_matrix)

 {

   typedef typename matrix_base<NumericT>::size_type      size_type;


   std::vector<NumericT> tmp(gpu_matrix.internal_size());

   viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0]));


   s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";


   s << "(";

   for (size_type i = 0; i < gpu_matrix.size1(); ++i)

   {

     s << "(";

     for (size_type j = 0; j < gpu_matrix.size2(); ++j)

     {

       if (gpu_matrix.row_major())

         s << tmp[row_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];

       else

         s << tmp[column_major::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];


       if (j < gpu_matrix.size2() - 1)

         s << ",";

     }

     s << ")";

     if (i < gpu_matrix.size1() - 1)

       s << ",";

   }

   s << ")";

   return s;

 }


 template<typename LHS, typename RHS, typename OP>

 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)

 {

   typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType     ScalarType;


   matrix<ScalarType> temp = expr;

   s << temp;

   return s;

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>

 trans(const matrix_base<NumericT> & mat)

 {

   return matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(mat, mat);

 }


 //diag():

 template<typename NumericT>

 vector_expression< const matrix_base<NumericT>, const int, op_matrix_diag>

 diag(const matrix_base<NumericT> & A, int k = 0)

 {

   return vector_expression< const matrix_base<NumericT>, const int, op_matrix_diag>(A, k);

 }


 template<typename NumericT>

 matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>

 diag(const vector_base<NumericT> & v, int k = 0)

 {

   return matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>(v, k);

 }


 // row():

 template<typename NumericT, typename F>

 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>

 row(const matrix_base<NumericT, F> & A, unsigned int i)

 {

   return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);

 }


 // column():

 template<typename NumericT, typename F>

 vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>

 column(const matrix_base<NumericT, F> & A, unsigned int j)

 {

   return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);

 }


 //

 //cpu to gpu, generic type:

 //

 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>

 void copy(const CPUMatrixT & cpu_matrix,

           matrix<NumericT, F, AlignmentV> & gpu_matrix )

 {

   typedef typename matrix<NumericT, F, AlignmentV>::size_type      size_type;


   //std::cout << "Copying CPUMatrixT!" << std::endl;

   //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;

   if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)

   {

     gpu_matrix.resize(cpu_matrix.size1(),

                       cpu_matrix.size2(), false);

   }


   assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );


   std::vector<NumericT> data(gpu_matrix.internal_size());

   for (size_type i = 0; i < gpu_matrix.size1(); ++i)

   {

     for (size_type j = 0; j < gpu_matrix.size2(); ++j)

       data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);

   }


   viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));

   //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);

   //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;

 }


 //

 //cpu to gpu, STL type:

 //

 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>

 void copy(const std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix,

           matrix<NumericT, F, AlignmentV> & gpu_matrix )

 {

   typedef typename matrix<NumericT, F, AlignmentV>::size_type      size_type;


   if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)

   {

     gpu_matrix.resize(cpu_matrix.size(),

                       cpu_matrix[0].size(),

         false);

   }


   assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );


   std::vector<NumericT> data(gpu_matrix.internal_size());

   for (size_type i = 0; i < gpu_matrix.size1(); ++i)

   {

     assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );


     for (size_type j = 0; j < gpu_matrix.size2(); ++j)

       data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];

   }


   viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));

   //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);

 }


 //

 //cpu to gpu, another STL type:

 //

 template<typename NumericT, typename F, unsigned int AlignmentV>

 void fast_copy(NumericT * cpu_matrix_begin,

                NumericT * cpu_matrix_end,

                matrix<NumericT, F, AlignmentV> & gpu_matrix)

 {

   if (gpu_matrix.internal_size() == 0)

     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);

   else

   {

     assert( (gpu_matrix.internal_size() >= static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin)) && bool("fast_copy(): Matrix not large enough to fit data!"));

     viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * static_cast<vcl_size_t>(cpu_matrix_end - cpu_matrix_begin), cpu_matrix_begin);

   }

 }


 #ifdef VIENNACL_WITH_ARMADILLO


 template<typename NumericT, typename F, unsigned int AlignmentV>

 void copy(arma::Mat<NumericT>                       const & arma_matrix,

           viennacl::matrix<NumericT, F, AlignmentV>       & vcl_matrix)

 {

   typedef typename viennacl::matrix<NumericT, F, AlignmentV>::size_type      size_type;


   if (vcl_matrix.size1() == 0 || vcl_matrix.size2() == 0)

   {

     vcl_matrix.resize(arma_matrix.n_rows,

                       arma_matrix.n_cols,

                       false);

   }

   else

   {

     assert(    (vcl_matrix.size1() == static_cast<vcl_size_t>(arma_matrix.n_rows))

             && (vcl_matrix.size2() == static_cast<vcl_size_t>(arma_matrix.n_cols))

             && bool("matrix size mismatch")

             );

   }


   // prepare buffer:

   viennacl::backend::typesafe_host_array<NumericT> data(vcl_matrix.handle(), vcl_matrix.internal_size());

   for (size_type j = 0; j < vcl_matrix.size2(); ++j) // iterate along columns is certainly fast for arma_matrix

     for (size_type i = 0; i < vcl_matrix.size1(); ++i)

       data.set(F::mem_index(i, j, vcl_matrix.internal_size1(), vcl_matrix.internal_size2()), arma_matrix(i,j));


   // copy over:

   viennacl::backend::memory_write(vcl_matrix.handle(), 0, data.raw_size(), data.get());

 }

 #endif


 #ifdef VIENNACL_WITH_EIGEN

 namespace detail

 {

   template<typename EigenMatrixTypeT, typename NumericT, typename F, unsigned int AlignmentV>

   void copy_from_eigen_matrix(EigenMatrixTypeT const & cpu_matrix,

                               viennacl::matrix<NumericT, F, AlignmentV> & gpu_matrix)

   {

     typedef typename viennacl::matrix<NumericT, F, AlignmentV>::size_type      size_type;


     if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)

     {

       gpu_matrix.resize(cpu_matrix.rows(),

                         cpu_matrix.cols(),

                         false);

     }

     else

     {

       assert(    (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))

               && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))

               && bool("matrix size mismatch")

               );

     }


     std::vector<NumericT> data(gpu_matrix.internal_size());

     for (size_type i = 0; i < gpu_matrix.size1(); ++i)

     {

       for (size_type j = 0; j < gpu_matrix.size2(); ++j)

         data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);

     }


     viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));

   }


 }


 template<typename NumericT, int EigenOptions, typename F, unsigned int AlignmentV>

 void copy(Eigen::Matrix<NumericT, Eigen::Dynamic, Eigen::Dynamic, EigenOptions> const & cpu_matrix,

           viennacl::matrix<NumericT, F, AlignmentV> & vcl_matrix)

 {

   detail::copy_from_eigen_matrix(cpu_matrix, vcl_matrix);

 }


 template<typename NumericT, int EigenOptions, int EigenMatTypeV, typename EigenStrideT, typename F, unsigned int AlignmentV>

 void copy(Eigen::Map<Eigen::Matrix<NumericT, Eigen::Dynamic, Eigen::Dynamic, EigenOptions>, EigenMatTypeV, EigenStrideT> const & cpu_matrix,

           viennacl::matrix<NumericT, F, AlignmentV> & vcl_matrix)

 {

   detail::copy_from_eigen_matrix(cpu_matrix, vcl_matrix);

 }

 #endif


 #ifdef VIENNACL_WITH_MTL4


 template<typename NumericT, typename T, typename F, unsigned int AlignmentV>

 void copy(const mtl::dense2D<NumericT, T>& cpu_matrix,

           matrix<NumericT, F, AlignmentV> & gpu_matrix)

 {

   typedef typename matrix<NumericT, F, AlignmentV>::size_type      size_type;


   if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)

   {

     gpu_matrix.resize(cpu_matrix.num_rows(),

                       cpu_matrix.num_cols(),

                       false);

   }

   else

   {

     assert( (gpu_matrix.size1() == cpu_matrix.num_rows())

             && (gpu_matrix.size2() == cpu_matrix.num_cols())

             && bool("matrix size mismatch")

             );

   }


   std::vector<NumericT> data(gpu_matrix.internal_size());

   for (size_type i = 0; i < gpu_matrix.size1(); ++i)

   {

     for (size_type j = 0; j < gpu_matrix.size2(); ++j)

       data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];

   }


   viennacl::backend::memory_write(gpu_matrix.handle(), 0, sizeof(NumericT) * data.size(), &(data[0]));

   //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);

 }

 #endif


 //

 //gpu to cpu, generic type

 //

 template<typename CPUMatrixT, typename NumericT, typename F, unsigned int AlignmentV>

 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,

           CPUMatrixT & cpu_matrix )

 {

   typedef typename matrix<float, F, AlignmentV>::size_type      size_type;


   if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )

   {

     assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));


     std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());

     viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));


     //now copy entries to cpu_matrix:

     for (size_type i = 0; i < gpu_matrix.size1(); ++i)

     {

       assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));

       for (size_type j = 0; j < gpu_matrix.size2(); ++j)

         cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];

     }

   }

 }


 //gpu to cpu, STL type

 template<typename NumericT, typename A1, typename A2, typename F, unsigned int AlignmentV>

 void copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,

           std::vector< std::vector<NumericT, A1>, A2> & cpu_matrix)

 {

   typedef typename matrix<float, F, AlignmentV>::size_type      size_type;


   if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )

   {

     assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));


     std::vector<NumericT> temp_buffer(gpu_matrix.internal_size());

     viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), &(temp_buffer[0]));


     //now copy entries to cpu_matrix:

     for (size_type i = 0; i < gpu_matrix.size1(); ++i)

     {

       assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));


       for (size_type j = 0; j < gpu_matrix.size2(); ++j)

         cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];

     }

   }

 }


 //gpu to cpu, STL type

 template<typename NumericT, typename F, unsigned int AlignmentV>

 void fast_copy(const matrix<NumericT, F, AlignmentV> & gpu_matrix,

                NumericT * cpu_matrix_begin)

 {

   viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin);

 }


 // operator +

 template<typename LHS1, typename RHS1, typename OP1,

          typename LHS2, typename RHS2, typename OP2>

 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

 const matrix_expression<const LHS2, const RHS2, OP2>,

 op_add>

 operator + (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,

             matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)

 {

   assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))

              && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))

              && bool("Incompatible matrix sizes!"));

   return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

       const matrix_expression<const LHS2, const RHS2, OP2>,

       op_add>(proxy1, proxy2);

 }


 template<typename LHS1, typename RHS1, typename OP1,

          typename NumericT>

 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

 const matrix_base<NumericT>,

 op_add>

 operator + (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,

             matrix_base<NumericT> const & proxy2)

 {

   assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))

              && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))

              && bool("Incompatible matrix sizes!"));

   return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

       const matrix_base<NumericT>,

       op_add>(proxy1, proxy2);

 }


 template<typename NumericT,

          typename LHS2, typename RHS2, typename OP2>

 matrix_expression< const matrix_base<NumericT>,

 const matrix_expression<const LHS2, const RHS2, OP2>,

 op_add>

 operator + (matrix_base<NumericT> const & proxy1,

             matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)

 {

   assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))

              && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))

              && bool("Incompatible matrix sizes!"));

   return  matrix_expression< const matrix_base<NumericT>,

       const matrix_expression<const LHS2, const RHS2, OP2>,

       op_add>(proxy1, proxy2);

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_add >

 operator + (const matrix_base<NumericT> & m1, const matrix_base<NumericT> & m2)

 {

   return matrix_expression< const matrix_base<NumericT>,

       const matrix_base<NumericT>,

       op_add > (m1, m2);

 }


 // operator -

 template<typename LHS1, typename RHS1, typename OP1,

          typename LHS2, typename RHS2, typename OP2>

 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

 const matrix_expression<const LHS2, const RHS2, OP2>,

 op_sub>

 operator - (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,

             matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)

 {

   assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))

              && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))

              && bool("Incompatible matrix sizes!"));

   return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

       const matrix_expression<const LHS2, const RHS2, OP2>,

       op_sub>(proxy1, proxy2);

 }


 template<typename LHS1, typename RHS1, typename OP1,

          typename NumericT>

 matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

 const matrix_base<NumericT>,

 op_sub>

 operator - (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,

             matrix_base<NumericT> const & proxy2)

 {

   assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))

              && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))

              && bool("Incompatible matrix sizes!"));

   return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,

       const matrix_base<NumericT>,

       op_sub>(proxy1, proxy2);

 }


 template<typename NumericT,

          typename LHS2, typename RHS2, typename OP2>

 matrix_expression< const matrix_base<NumericT>,

 const matrix_expression<const LHS2, const RHS2, OP2>,

 op_sub>

 operator - (matrix_base<NumericT> const & proxy1,

             matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)

 {

   assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))

              && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))

              && bool("Incompatible matrix sizes!"));

   return  matrix_expression< const matrix_base<NumericT>,

       const matrix_expression<const LHS2, const RHS2, OP2>,

       op_sub>(proxy1, proxy2);

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_sub >

 operator - (const matrix_base<NumericT> & m1, const matrix_base<NumericT> & m2)

 {

   return matrix_expression< const matrix_base<NumericT>,

       const matrix_base<NumericT>,

       op_sub > (m1, m2);

 }


 // operator *

 template<typename S1, typename NumericT>

 typename viennacl::enable_if<    viennacl::is_any_scalar<S1>::value,

 matrix_expression< const matrix_base<NumericT>, const S1, op_mult>

 >::type

 operator * (S1 const & value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, value);

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (char value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (short value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (int value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (long value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (float value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (double value, matrix_base<NumericT> const & m1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(value));

 }


 template<typename LHS, typename RHS, typename OP, typename S1>

 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,

 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type

 operator * (matrix_expression< LHS, RHS, OP> const & proxy,

             S1 const & val)

 {

   return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);

 }


 template<typename S1, typename LHS, typename RHS, typename OP>

 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,

 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type

 operator * (S1 const & val,

             matrix_expression< LHS, RHS, OP> const & proxy)

 {

   return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);

 }


 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,

 matrix_expression< const matrix_base<NumericT>, const S1, op_mult> >::type

 operator * (matrix_base<NumericT> const & m1, S1 const & s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const S1, op_mult>(m1, s1);

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (matrix_base<NumericT> const & m1, char s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (matrix_base<NumericT> const & m1, short s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (matrix_base<NumericT> const & m1, int s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (matrix_base<NumericT> const & m1, long s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (matrix_base<NumericT> const & m1, float s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>

 operator * (matrix_base<NumericT> const & m1, double s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_mult>(m1, NumericT(s1));

 }


 // operator *=


 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type

 operator *= (matrix_base<NumericT> & m1, S1 const & gpu_val)

 {

   bool is_sign_flip = viennacl::is_flip_sign_scalar<S1>::value;

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, false, is_sign_flip ? true : false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator *= (matrix_base<NumericT> & m1, char gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, false, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator *= (matrix_base<NumericT> & m1, short gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, false, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator *= (matrix_base<NumericT> & m1, int gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, false, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator *= (matrix_base<NumericT> & m1, long gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, false, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator *= (matrix_base<NumericT> & m1, float gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, false, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator *= (matrix_base<NumericT> & m1, double gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, false, false);

   return m1;

 }


 // operator /


 template<typename LHS, typename RHS, typename OP, typename S1>

 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,

 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type

 operator / (matrix_expression<const LHS, const RHS, OP> const & proxy,

             S1 const & val)

 {

   return matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div>(proxy, val);

 }


 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,

 matrix_expression< const matrix_base<NumericT>, const S1, op_div> >::type

 operator / (matrix_base<NumericT> const & m1, S1 const & s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const S1, op_div>(m1, s1);

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>

 operator / (matrix_base<NumericT> const & m1, char s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>

 operator / (matrix_base<NumericT> const & m1, short s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>

 operator / (matrix_base<NumericT> const & m1, int s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>

 operator / (matrix_base<NumericT> const & m1, long s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>

 operator / (matrix_base<NumericT> const & m1, float s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));

 }


 template<typename NumericT>

 matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>

 operator / (matrix_base<NumericT> const & m1, double s1)

 {

   return matrix_expression< const matrix_base<NumericT>, const NumericT, op_div>(m1, NumericT(s1));

 }


 // operator /=


 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_scalar<S1>::value, matrix_base<NumericT> & >::type

 operator /= (matrix_base<NumericT> & m1, S1 const & gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, true, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator /= (matrix_base<NumericT> & m1, char gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, NumericT(gpu_val), 1, true, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator /= (matrix_base<NumericT> & m1, short gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, true, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator /= (matrix_base<NumericT> & m1, int gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, true, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator /= (matrix_base<NumericT> & m1, long gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, true, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator /= (matrix_base<NumericT> & m1, float gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, true, false);

   return m1;

 }


 template<typename NumericT>

 matrix_base<NumericT> &

 operator /= (matrix_base<NumericT> & m1, double gpu_val)

 {

   viennacl::linalg::am(m1,

                        m1, gpu_val, 1, true, false);

   return m1;

 }


 // outer_prod(v1, v2) * val;

 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_scalar<S1>::value,

 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

 const S1,

 op_mult>

 >::type

 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,

           const S1 & val)

 {

   return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

       const S1,

       op_mult>(proxy, val);

 }


 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_cpu_scalar<S1>::value,

 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

 const NumericT,

 op_mult>

 >::type

 operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,

           const S1 & val)

 {

   return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

       const NumericT,

       op_mult>(proxy, NumericT(val));

 }


 // val * outer_prod(v1, v2);

 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_scalar<S1>::value,

 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

 const S1,

 op_mult>

 >::type

 operator*(const S1 & val,

           const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)

 {

   return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

       const S1,

       op_mult>(proxy, val);

 }


 template<typename NumericT, typename S1>

 typename viennacl::enable_if< viennacl::is_cpu_scalar<S1>::value,

 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

 const NumericT,

 op_mult>

 >::type

 operator*(const S1 & val,

           const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)

 {

   return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,

       const NumericT,

       op_mult>(proxy, NumericT(val));

 }


 //

 // Specify available operations:

 //


 namespace linalg

 {

 namespace detail

 {


   // x = y

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_base<T> >

   {

     static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)

     {

       viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);

     }

   };


   // x = trans(y)

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)

     {

       matrix_base<T> temp(rhs);

       viennacl::linalg::am(lhs, temp, T(1), 1, false, false);

     }

   };


   // x += y

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_base<T> >

   {

     static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)

     {

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);

     }

   };


   // x += trans(y)

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)

     {

       matrix_base<T> temp(rhs);

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false);

     }

   };


   // x -= y

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_base<T> >

   {

     static void apply(matrix_base<T> & lhs, matrix_base<T> const & rhs)

     {

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);

     }

   };


   // x -= trans(y)

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans> const & rhs)

     {

       matrix_base<T> temp(rhs);

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, true);

     }

   };


   // x = alpha * y

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)

     {

       viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);

     }

   };


   // x += alpha * y

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)

     {

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);

     }

   };


   // x -= alpha * y

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_mult> const & proxy)

     {

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);

     }

   };


   // x = alpha * vec_expr

   template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)

     {

       if (lhs.row_major())

       {

         matrix<T> temp(proxy.lhs());

         lhs = temp * proxy.rhs();

       }

       else

       {

         matrix<T, column_major> temp(proxy.lhs());

         lhs = temp * proxy.rhs();

       }

     }

   };


   // x += alpha * vec_expr

   template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)

     {

       if (lhs.row_major())

       {

         matrix<T> temp(proxy.lhs());

         lhs += temp * proxy.rhs();

       }

       else

       {

         matrix<T, column_major> temp(proxy.lhs());

         lhs += temp * proxy.rhs();

       }

     }

   };


   // x -= alpha * vec_expr

   template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)

     {

       if (lhs.row_major())

       {

         matrix<T> temp(proxy.lhs());

         lhs -= temp * proxy.rhs();

       }

       else

       {

         matrix<T, column_major> temp(proxy.lhs());

         lhs -= temp * proxy.rhs();

       }

     }

   };


   // x = y / alpha

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)

     {

       viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);

     }

   };


   // x += y / alpha

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)

     {

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);

     }

   };


   // x -= y / alpha

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const ScalarType, op_div> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const ScalarType, op_div> const & proxy)

     {

       viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);

     }

   };


   // x = vec_expr / alpha

   template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)

     {

       if (lhs.row_major())

       {

         matrix<T> temp(proxy.lhs());

         lhs = temp / proxy.rhs();

       }

       else

       {

         matrix<T, column_major> temp(proxy.lhs());

         lhs = temp / proxy.rhs();

       }

     }

   };


   // x += vec_expr / alpha

   template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)

     {

       if (lhs.row_major())

       {

         matrix<T> temp(proxy.lhs());

         lhs += temp / proxy.rhs();

       }

       else

       {

         matrix<T, column_major> temp(proxy.lhs());

         lhs += temp / proxy.rhs();

       }

     }

   };


   // x -= vec_expr / alpha

   template<typename T, typename LHS, typename RHS, typename OP, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)

     {

       if (lhs.row_major())

       {

         matrix<T, row_major> temp(proxy.lhs());

         lhs -= temp / proxy.rhs();

       }

       else

       {

         matrix<T, column_major> temp(proxy.lhs());

         lhs -= temp / proxy.rhs();

       }

     }

   };


   // generic x = vec_expr1 + vec_expr2:

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_add> >

   {

     // generic x = vec_expr1 + vec_expr2:

     template<typename LHS1, typename RHS1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)

     {

       bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());

       bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());


       if (op_aliasing_lhs || op_aliasing_rhs)

       {

         matrix_base<T> temp(proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());

         lhs = temp;

       }

       else

       {

         op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());

       }

     }


     // x = y + z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs(), T(1), 1, false, false,

                              proxy.rhs(), T(1), 1, false, false);

     }


     // x = alpha * y + z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       const matrix_base<T>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                              proxy.rhs(), T(1), 1, false, false);

     }


     // x = y / alpha + z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       const matrix_base<T>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                              proxy.rhs(), T(1), 1, false, false);

     }


     // x = y + beta * z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs(), T(1), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x = y + z / beta

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs(), T(1), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }


     // x = alpha * y + beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x = alpha * y + z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }


     // x = y / alpha + beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x = y / alpha + z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }

   };


   // dense = sparse * dense

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >

   {

     template< typename SparseMatrixType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,

                       const viennacl::matrix_base<T>,

                       viennacl::op_prod> const & proxy)

     {

       // check for x = A * x

       if (op_aliasing(lhs, proxy.rhs()))

       {

         matrix_base<T> temp(proxy);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);

     }


     // dense = sparse * trans(dense)

     template< typename SparseMatrixType >

     static void apply(matrix_base<T> & lhs, matrix_expression<const SparseMatrixType,

                       const viennacl::matrix_expression< const viennacl::matrix_base<T>,

                       const viennacl::matrix_base<T>,

                       viennacl::op_trans >,

                       viennacl::op_prod> const & proxy)

     {

       // check for x = A * x

       if (op_aliasing(lhs, proxy.rhs()))

       {

         matrix_base<T> temp(proxy);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);

     }


   };


   // generic x += vec_expr1 + vec_expr2:

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >

   {

     // generic x += vec_expr1 + vec_expr2:

     template<typename LHS1, typename RHS1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)

     {

       bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());

       bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());


       if (op_aliasing_lhs || op_aliasing_rhs)

       {

         matrix_base<T> temp(proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());

         lhs += temp;

       }

       else

       {

         op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());

       }

     }


     // x += y + z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, false,

                                proxy.rhs(), T(1), 1, false, false);

     }


     // x += alpha * y + z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       const matrix_base<T>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                                proxy.rhs(), T(1), 1, false, false);

     }


     // x += y / alpha + z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       const matrix_base<T>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                                proxy.rhs(), T(1), 1, false, false);

     }


     // x += y + beta * z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x += y + z / beta

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }


     // x += alpha * y + beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x += alpha * y + z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }


     // x += y / alpha + beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x += y / alpha + z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }

   };


   // generic x -= vec_expr1 + vec_expr2:

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >

   {

     // generic x -= vec_expr1 + vec_expr2:

     template<typename LHS1, typename RHS1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)

     {

       bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());

       bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());


       if (op_aliasing_lhs || op_aliasing_rhs)

       {

         matrix_base<T> temp(proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(temp, proxy.rhs());

         lhs -= temp;

       }

       else

       {

         op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());

       }

     }


     // x -= y + z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, true,

                                proxy.rhs(), T(1), 1, false, true);

     }


     // x -= alpha * y + z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       const matrix_base<T>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,

                                proxy.rhs(), T(1), 1, false, true);

     }


     // x -= y / alpha + z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       const matrix_base<T>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,

                                proxy.rhs(), T(1), 1, false, true);

     }


     // x -= y + beta * z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x -= y + z / beta

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }


     // x -= alpha * y + beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x -= alpha * y + z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }


     // x -= y / alpha + beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x -= y / alpha + z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_add> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }

   };


   // generic x = vec_expr1 - vec_expr2:

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >

   {

     // generic x = vec_expr1 - vec_expr2:

     template<typename LHS1, typename RHS1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)

     {

       bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());

       bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());


       if (op_aliasing_lhs || op_aliasing_rhs)

       {

         matrix_base<T> temp(proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());

         lhs = temp;

       }

       else

       {

         op_executor<matrix_base<T>, op_assign, LHS>::apply(lhs, proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());

       }

     }


     // x = y - z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs(), T(1), 1, false, false,

                              proxy.rhs(), T(1), 1, false, true);

     }


     // x = alpha * y - z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       const matrix_base<T>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                              proxy.rhs(), T(1), 1, false, true);

     }


     // x = y / alpha - z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       const matrix_base<T>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                              proxy.rhs(), T(1), 1, false, true);

     }


     // x = y - beta * z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs(), T(1), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x = y - z / beta

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs(), T(1), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }


     // x = alpha * y - beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x = alpha * y - z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }


     // x = y / alpha - beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x = y / alpha - z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm(lhs,

                              proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                              proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }

   };


   // generic x += vec_expr1 - vec_expr2:

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >

   {

     // generic x += vec_expr1 - vec_expr2:

     template<typename LHS1, typename RHS1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)

     {

       bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());

       bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());


       if (op_aliasing_lhs || op_aliasing_rhs)

       {

         matrix_base<T> temp(proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());

         lhs += temp;

       }

       else

       {

         op_executor<matrix_base<T>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());

       }

     }


     // x += y - z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, false,

                                proxy.rhs(), T(1), 1, false, true);

     }


     // x += alpha * y - z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       const matrix_base<T>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                                proxy.rhs(), T(1), 1, false, true);

     }


     // x += y / alpha - z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       const matrix_base<T>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                                proxy.rhs(), T(1), 1, false, true);

     }


     // x += y - beta * z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x += y - z / beta

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }


     // x += alpha * y - beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x += alpha * y - z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }


     // x += y / alpha - beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);

     }


     // x += y / alpha - z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);

     }

   };


   // generic x -= vec_expr1 - vec_expr2:

   template<typename T, typename LHS, typename RHS>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >

   {

     // generic x -= vec_expr1 - vec_expr2:

     template<typename LHS1, typename RHS1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)

     {

       bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());

       bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());


       if (op_aliasing_lhs || op_aliasing_rhs)

       {

         matrix_base<T> temp(proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());

         lhs -= temp;

       }

       else

       {

         op_executor<matrix_base<T>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());

         op_executor<matrix_base<T>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());

       }

     }


     // x -= y - z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, true,

                                proxy.rhs(), T(1), 1, false, false);

     }


     // x -= alpha * y - z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       const matrix_base<T>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,

                                proxy.rhs(), T(1), 1, false, false);

     }


     // x -= y / alpha - z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       const matrix_base<T>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,

                                proxy.rhs(), T(1), 1, false, false);

     }


     // x -= y - beta * z

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x -= y - z / beta

     template<typename ScalarType>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const ScalarType, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs(), T(1), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }


     // x -= alpha * y - beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x -= alpha * y - z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_mult>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }


     // x -= y / alpha - beta * z

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_mult>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);

     }


     // x -= y / alpha - z / beta

     template<typename ScalarType1, typename ScalarType2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const ScalarType1, op_div>,

                       const matrix_expression<const matrix_base<T>, const ScalarType2, op_div>,

                       op_sub> const & proxy)

     {

       viennacl::linalg::ambm_m(lhs,

                                proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,

                                proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);

     }

   };


   template<typename T, typename LHS>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)

     {

       viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);

     }

   };


   template<typename T, typename LHS>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const int, op_matrix_diag> const & proxy)

     {

       viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);

     }

   };


   template<typename T, typename LHS>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_row> const & proxy)

     {

       viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);

     }

   };


   template<typename T, typename LHS>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const unsigned int, op_column> const & proxy)

     {

       viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);

     }

   };


   template<typename T>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const matrix_base<T>, op_row_sum> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const matrix_base<T>, op_row_sum> const & proxy)

     {

       viennacl::linalg::row_sum_impl(proxy.lhs(), lhs);

     }

   };


   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_row_sum> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_row_sum> const & proxy)

     {

       matrix_base<T> tmp(proxy.lhs());

       viennacl::linalg::row_sum_impl(tmp, lhs);

     }

   };


   template<typename T>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const matrix_base<T>, op_col_sum> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const matrix_base<T>, op_col_sum> const & proxy)

     {

       viennacl::linalg::column_sum_impl(proxy.lhs(), lhs);

     }

   };


   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_col_sum> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<LHS, RHS, OP>, const matrix_expression<LHS, RHS, OP>, op_col_sum> const & proxy)

     {

       matrix_base<T> tmp(proxy.lhs());

       viennacl::linalg::column_sum_impl(tmp, lhs);

     }

   };


   // generic x = mat_expr1 .* mat_expr2:

   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >

   {

     // x = y .* z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)

     {

       viennacl::linalg::element_op(lhs, proxy);

     }


     // x = y .* mat_expr

     template<typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.rhs());

       viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));

     }


     // x = mat_expr .* z

     template<typename LHS1, typename RHS1, typename OP1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.lhs());

       viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));

     }


     // x = mat_expr .* mat_expr

     template<typename LHS1, typename RHS1, typename OP1,

              typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,

                       const matrix_expression<const LHS2, const RHS2, OP2>,

                       op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp1(proxy.lhs());

       matrix_base<T> temp2(proxy.rhs());

       viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));

     }

   };


   // generic x += mat_expr .* mat_expr:

   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >

   {

     // x += y .* z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy);

       lhs += temp;

     }


     // x += y .* mat_expr

     template<typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.rhs());

       matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));

       viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));

       lhs += temp2;

     }


     // x += mat_expr .* z

     template<typename LHS1, typename RHS1, typename OP1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.lhs());

       matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));

       viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));

       lhs += temp2;

     }


     // x += mat_expr .* mat_expr

     template<typename LHS1, typename RHS1, typename OP1,

              typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,

                       const matrix_expression<const LHS2, const RHS2, OP2>,

                       op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp1(proxy.lhs());

       matrix_base<T> temp2(proxy.rhs());

       matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));

       viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));

       lhs += temp3;

     }

   };


   // generic x -= mat_expr1 .* mat_expr2:

   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >

   {


     // x -= y .* z

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy);

       lhs -= temp;

     }


     // x -= y .* mat_expr

     template<typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.rhs());

       matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));

       viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(proxy.lhs(), temp));

       lhs -= temp2;

     }


     // x -= mat_expr .* z

     template<typename LHS1, typename RHS1, typename OP1>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T>, op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.lhs());

       matrix_base<T> temp2(temp.size1(), temp.size2(), lhs.row_major(), viennacl::traits::context(lhs));

       viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp, proxy.rhs()));

       lhs -= temp2;

     }


     // x -= mat_expr .* mat_expr

     template<typename LHS1, typename RHS1, typename OP1,

              typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,

                       const matrix_expression<const LHS2, const RHS2, OP2>,

                       op_element_binary<OP> > const & proxy)

     {

       matrix_base<T> temp1(proxy.lhs());

       matrix_base<T> temp2(proxy.rhs());

       matrix_base<T> temp3(temp1.size1(), temp1.size2(), lhs.row_major(), viennacl::traits::context(lhs));

       viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<OP> >(temp1, temp2));

       lhs -= temp3;

     }

   };


   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >

   {

     // x = OP(y)

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)

     {

       viennacl::linalg::element_op(lhs, proxy);

     }


     // x = OP(vec_expr)

     template<typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,

                       const matrix_expression<const LHS2, const RHS2, OP2>,

                       op_element_unary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.rhs());

       viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp));

     }

   };


   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >

   {

     // x += OP(y)

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy);

       lhs += temp;

     }


     // x += OP(vec_expr)

     template<typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,

                       const matrix_expression<const LHS2, const RHS2, OP2>,

                       op_element_unary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.rhs());

       viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here

       lhs += temp;

     }

   };


   template<typename T, typename LHS, typename RHS, typename OP>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >

   {

     // x -= OP(y)

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy);

       lhs -= temp;

     }


     // x -= OP(vec_expr)

     template<typename LHS2, typename RHS2, typename OP2>

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,

                       const matrix_expression<const LHS2, const RHS2, OP2>,

                       op_element_unary<OP> > const & proxy)

     {

       matrix_base<T> temp(proxy.rhs());

       viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here

       lhs -= temp;

     }

   };


   // C = A * B

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))

       {

         matrix_base<T> temp(rhs);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));

     }

   };


   // C = A * B^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_base<T>,

       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))

       {

         matrix_base<T> temp(rhs);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));

     }

   };


   // C = A * EXPR   for some matrix expression EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<matrix_base<T>,

                      op_assign,

                      matrix_expression<const matrix_base<T>,

                                        const matrix_expression<const LhsT, const RhsT, OpT>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_base<T>,

                                         const matrix_expression<const LhsT, const RhsT, OpT>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.rhs());

       viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(1.0), T(0));

     }

   };


   // C = A^T * B

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const matrix_base<T>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const matrix_base<T>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))

       {

         matrix_base<T> temp(rhs);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));

     }

   };


   // C = EXPR * B   for some matrix expression EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<matrix_base<T>,

                      op_assign,

                      matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                        const matrix_base<T>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                         const matrix_base<T>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.lhs());

       viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(1.0), T(0));

     }

   };


   // C = A^T * B^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))

       {

         matrix_base<T> temp(rhs);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));

     }

   };


   // C = EXPR1 * EXPR2   for some matrix expressions EXPR1 and EXPR2

   template<typename T,

            typename LhsT1, typename RhsT1, typename OpT1,

            typename LhsT2, typename RhsT2, typename OpT2>

   struct op_executor<matrix_base<T>,

                      op_assign,

                      matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                        const matrix_expression<const LhsT2, const RhsT2, OpT2>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                         const matrix_expression<const LhsT2, const RhsT2, OpT2>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp1(rhs.lhs());

       matrix_base<T> temp2(rhs.rhs());

       viennacl::linalg::prod_impl(temp1, temp2, lhs, T(1.0), T(0));

     }

   };


   // C += A * B

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))

       {

         matrix_base<T> temp(rhs);

         lhs += temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));

     }

   };


   // C += A * B^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_base<T>,

       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))

       {

         matrix_base<T> temp(rhs);

         lhs += temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));

     }

   };


   // C += A * EXPR   for some matrix expression EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<matrix_base<T>,

                      op_inplace_add,

                      matrix_expression<const matrix_base<T>,

                                        const matrix_expression<const LhsT, const RhsT, OpT>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_base<T>,

                                         const matrix_expression<const LhsT, const RhsT, OpT>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.rhs());

       viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(1.0), T(1.0));

     }

   };


   // C += A^T * B

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const matrix_base<T>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const matrix_base<T>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))

       {

         matrix_base<T> temp(rhs);

         lhs += temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));

     }

   };


   // C += EXPR * B   for some matrix expression EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<matrix_base<T>,

                      op_inplace_add,

                      matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                        const matrix_base<T>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                         const matrix_base<T>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.lhs());

       viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(1.0), T(1.0));

     }

   };


   // C += A^T * B^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))

       {

         matrix_base<T> temp(rhs);

         lhs += temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));

     }

   };


   // C += EXPR1 * EXPR2   for some matrix expressions EXPR1 and EXPR2

   template<typename T,

            typename LhsT1, typename RhsT1, typename OpT1,

            typename LhsT2, typename RhsT2, typename OpT2>

   struct op_executor<matrix_base<T>,

                      op_inplace_add,

                      matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                        const matrix_expression<const LhsT2, const RhsT2, OpT2>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                         const matrix_expression<const LhsT2, const RhsT2, OpT2>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp1(rhs.lhs());

       matrix_base<T> temp2(rhs.rhs());

       viennacl::linalg::prod_impl(temp1, temp2, lhs, T(1.0), T(1.0));

     }

   };


   // C -= A * B

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>, const matrix_base<T>, op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()))

       {

         matrix_base<T> temp(rhs);

         lhs -= temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));

     }

   };


   // C -= A * B^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_base<T>,

       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_base<T>,

                       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))

       {

         matrix_base<T> temp(rhs);

         lhs -= temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));

     }

   };


   // C -= A * EXPR   for some matrix expression EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<matrix_base<T>,

                      op_inplace_sub,

                      matrix_expression<const matrix_base<T>,

                                        const matrix_expression<const LhsT, const RhsT, OpT>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_base<T>,

                                         const matrix_expression<const LhsT, const RhsT, OpT>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.rhs());

       viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs, T(-1.0), T(1.0));

     }

   };


   // C -= A^T * B

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const matrix_base<T>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const matrix_base<T>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs()))

       {

         matrix_base<T> temp(rhs);

         lhs -= temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));

     }

   };


   // C += EXPR * B   for some matrix expression EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<matrix_base<T>,

                      op_inplace_sub,

                      matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                        const matrix_base<T>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                         const matrix_base<T>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.lhs());

       viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs, T(-1.0), T(1.0));

     }

   };


   // C -= A^T * B^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       op_mat_mat_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       op_mat_mat_prod> const & rhs)

     {

       if (op_aliasing(lhs, rhs.lhs().lhs()) || op_aliasing(lhs, rhs.rhs().lhs()))

       {

         matrix_base<T> temp(rhs);

         lhs -= temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));

     }

   };


   // C -= EXPR1 * EXPR2   for some matrix expressions EXPR1 and EXPR2

   template<typename T,

            typename LhsT1, typename RhsT1, typename OpT1,

            typename LhsT2, typename RhsT2, typename OpT2>

   struct op_executor<matrix_base<T>,

                      op_inplace_sub,

                      matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                        const matrix_expression<const LhsT2, const RhsT2, OpT2>,

                                        op_mat_mat_prod>

                     >

   {

     static void apply(matrix_base<T> & lhs,

                       matrix_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                         const matrix_expression<const LhsT2, const RhsT2, OpT2>,

                                         op_mat_mat_prod> const & rhs)

     {

       matrix_base<T> temp1(rhs.lhs());

       matrix_base<T> temp2(rhs.rhs());

       viennacl::linalg::prod_impl(temp1, temp2, lhs, T(-1.0), T(1.0));

     }

   };


   // y = A * x

   template<typename T>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)

     {

       // check for x = A * x

       if (op_aliasing(lhs, rhs.rhs()))

       {

         vector_base<T> temp(rhs);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);

     }

   };


   // y = A^T * x

   template<typename T>

   struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const vector_base<T>,

       op_prod> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const vector_base<T>,

                       op_prod> const & rhs)

     {

       // check for x = A^T * x

       if (op_aliasing(lhs, rhs.rhs()))

       {

         vector_base<T> temp(rhs);

         lhs = temp;

       }

       else

         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);

     }

   };


   // y = MAT_EXPR * x   for a matrix expression MAT_EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<vector_base<T>,

                      op_assign,

                      vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                        const vector_base<T>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                         const vector_base<T>,

                                         op_prod> const & rhs)

     {

       matrix_base<T> temp(rhs.lhs());

       viennacl::linalg::prod_impl(temp, rhs.rhs(), lhs);

     }

   };


   // y = A * VEC_EXPR   for a vector expression VEC_EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<vector_base<T>,

                      op_assign,

                      vector_expression<const matrix_base<T>,

                                        const vector_expression<const LhsT, const RhsT, OpT>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_base<T>,

                                         const vector_expression<const LhsT, const RhsT, OpT>,

                                         op_prod> const & rhs)

     {

       vector_base<T> x(rhs.rhs());

       viennacl::linalg::prod_impl(rhs.lhs(), x, lhs);

     }

   };


   // y = MAT_EXPR * VEC_EXPR   for a matrix expression MAT_EXPR and a vector expression VEC_EXPR

   template<typename T,

            typename LhsT1, typename RhsT1, typename OpT1,

            typename LhsT2, typename RhsT2, typename OpT2>

   struct op_executor<vector_base<T>,

                      op_assign,

                      vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                        const vector_expression<const LhsT2, const RhsT2, OpT2>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                         const vector_expression<const LhsT2, const RhsT2, OpT2>,

                                         op_prod> const & rhs)

     {

       matrix_base<T> A(rhs.lhs());

       vector_base<T> x(rhs.rhs());

       viennacl::linalg::prod_impl(A, x, lhs);

     }

   };


   // y += A * x

   template<typename T>

   struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)

     {

       vector_base<T> temp(rhs);

       lhs += temp;

     }

   };


   // y += A^T * x

   template<typename T>

   struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const vector_base<T>,

       op_prod> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const vector_base<T>,

                       op_prod> const & rhs)

     {

       vector_base<T> temp(rhs);

       lhs += temp;

     }

   };


   // y += MAT_EXPR * x   for a matrix expression MAT_EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<vector_base<T>,

                      op_inplace_add,

                      vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                        const vector_base<T>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                         const vector_base<T>,

                                         op_prod> const & rhs)

     {

       matrix_base<T> A(rhs.lhs());

       vector_base<T> y(lhs);

       viennacl::linalg::prod_impl(A, rhs.rhs(), y);

       lhs += y;

     }

   };


   // y += A * VEC_EXPR   for a vector expression VEC_EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<vector_base<T>,

                      op_inplace_add,

                      vector_expression<const matrix_base<T>,

                                        const vector_expression<const LhsT, const RhsT, OpT>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_base<T>,

                                         const vector_expression<const LhsT, const RhsT, OpT>,

                                         op_prod> const & rhs)

     {

       vector_base<T> x(rhs.rhs());

       vector_base<T> y(lhs);

       viennacl::linalg::prod_impl(rhs.lhs(), x, y);

       lhs += y;

     }

   };


   // y += MAT_EXPR * VEC_EXPR   for a matrix expression MAT_EXPR and a vector expression VEC_EXPR

   template<typename T,

            typename LhsT1, typename RhsT1, typename OpT1,

            typename LhsT2, typename RhsT2, typename OpT2>

   struct op_executor<vector_base<T>,

                      op_inplace_add,

                      vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                        const vector_expression<const LhsT2, const RhsT2, OpT2>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                         const vector_expression<const LhsT2, const RhsT2, OpT2>,

                                         op_prod> const & rhs)

     {

       matrix_base<T> A(rhs.lhs());

       vector_base<T> x(rhs.rhs());

       vector_base<T> y(lhs);

       viennacl::linalg::prod_impl(A, x, y);

       lhs += y;

     }

   };


   // y -= A * x

   template<typename T>

   struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T>, const vector_base<T>, op_prod> const & rhs)

     {

       vector_base<T> temp(rhs);

       lhs -= temp;

     }

   };


   // y -= A^T * x

   template<typename T>

   struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

       const vector_base<T>,

       op_prod> >

   {

     static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T>, const matrix_base<T>, op_trans>,

                       const vector_base<T>,

                       op_prod> const & rhs)

     {

       vector_base<T> temp(rhs);

       lhs -= temp;

     }

   };


   // y -= MAT_EXPR * x   for a matrix expression MAT_EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<vector_base<T>,

                      op_inplace_sub,

                      vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                        const vector_base<T>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_expression<const LhsT, const RhsT, OpT>,

                                         const vector_base<T>,

                                         op_prod> const & rhs)

     {

       matrix_base<T> A(rhs.lhs());

       vector_base<T> y(lhs);

       viennacl::linalg::prod_impl(A, rhs.rhs(), y);

       lhs -= y;

     }

   };


   // y -= A * VEC_EXPR   for a vector expression VEC_EXPR

   template<typename T, typename LhsT, typename RhsT, typename OpT>

   struct op_executor<vector_base<T>,

                      op_inplace_sub,

                      vector_expression<const matrix_base<T>,

                                        const vector_expression<const LhsT, const RhsT, OpT>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_base<T>,

                                         const vector_expression<const LhsT, const RhsT, OpT>,

                                         op_prod> const & rhs)

     {

       vector_base<T> x(rhs.rhs());

       vector_base<T> y(lhs);

       viennacl::linalg::prod_impl(rhs.lhs(), x, y);

       lhs -= y;

     }

   };


   // y -= MAT_EXPR * VEC_EXPR   for a matrix expression MAT_EXPR and a vector expression VEC_EXPR

   template<typename T,

            typename LhsT1, typename RhsT1, typename OpT1,

            typename LhsT2, typename RhsT2, typename OpT2>

   struct op_executor<vector_base<T>,

                      op_inplace_sub,

                      vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                        const vector_expression<const LhsT2, const RhsT2, OpT2>,

                                        op_prod>

                     >

   {

     static void apply(vector_base<T> & lhs,

                       vector_expression<const matrix_expression<const LhsT1, const RhsT1, OpT1>,

                                         const vector_expression<const LhsT2, const RhsT2, OpT2>,

                                         op_prod> const & rhs)

     {

       matrix_base<T> A(rhs.lhs());

       vector_base<T> x(rhs.rhs());

       vector_base<T> y(lhs);

       viennacl::linalg::prod_impl(A, x, y);

       lhs -= y;

     }

   };


   // A = v1 * v2^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)

     {

       lhs.clear();

       viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());

     }

   };


   // A = alpha * v1 * v2^T

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,

       const ScalarType,

       op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,

                       const ScalarType,

                       op_mult> const & rhs)

     {

       lhs.clear();

       viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());

     }

   };


   // A += v1 * v2^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)

     {

       viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());

     }

   };


   // A += alpha * v1 * v2^T

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,

       const ScalarType,

       op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,

                       const ScalarType,

                       op_mult> const & rhs)

     {

       viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());

     }

   };


   // A -= v1 * v2^T

   template<typename T>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)

     {

       viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());

     }

   };


   // A -= alpha * v1 * v2^T

   template<typename T, typename ScalarType>

   struct op_executor<matrix_base<T>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,

       const ScalarType,

       op_mult> >

   {

     static void apply(matrix_base<T> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,

                       const ScalarType,

                       op_mult> const & rhs)

     {

       viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());

     }

   };


 } // namespace detail


 } // namespace linalg


 } //namespace viennacl


 #endif

viennacl::linalg::row_sum_impl
void row_sum_impl(const matrix_base< NumericT > &A, vector_base< NumericT > &result)
Definition: matrix_operations.hpp:679

viennacl::enable_if
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:30

viennacl::op_mult
A tag class representing multiplication by a scalar.
Definition: forwards.h:92

viennacl::backend::typesafe_host_array
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:92

viennacl::matrix_expression::size_type
vcl_size_t size_type
Definition: matrix.hpp:60

viennacl::linalg::matrix_diag_to_vector
void matrix_diag_to_vector(const matrix_base< NumericT > &A, int k, vector_base< NumericT > &v)
Dispatcher interface for v = diag(A, k)
Definition: matrix_operations.hpp:287

viennacl::row_major::mem_index
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:314

viennacl::cuda_not_available_exception
Definition: forwards.h:585

viennacl::backend::memory_write
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220

viennacl::trans
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Definition: sparse_matrix_operations.hpp:374

viennacl::matrix_base::size_type
SizeT size_type
Definition: matrix_def.hpp:112

viennacl::linalg::detail::op_executor
Worker class for decomposing expression templates.
Definition: op_executor.hpp:80

viennacl::const_entry_proxy
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:236

matrix_operations.hpp
Implementations of dense matrix related operations including matrix-vector products.

viennacl::matrix_base::internal_size
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
Definition: matrix_def.hpp:242

viennacl::matrix_slice
Class for representing strided submatrices of a bigger matrix A.
Definition: forwards.h:443

viennacl::matrix_base::operator=
self_type & operator=(const self_type &other)
Definition: matrix.hpp:262

viennacl::is_row_major
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:484

viennacl::is_flip_sign_scalar
Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> ...
Definition: forwards.h:462

viennacl::linalg::matrix_assign
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Definition: matrix_operations.hpp:208

tools.hpp
Various little tools used here and there in ViennaCL.

viennacl::matrix_base::operator-
matrix_expression< const self_type, const NumericT, op_mult > operator-() const
Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix.
Definition: matrix.hpp:628

viennacl::op_column
A tag class representing the extraction of a matrix column to a vector.
Definition: forwards.h:195

viennacl::traits::internal_size1
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:382

viennacl::traits::size1
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163

viennacl::op_vector_diag
A tag class representing a matrix given by a vector placed on a certain (off-)diagonal.
Definition: forwards.h:189

viennacl::tools::shared_ptr::inc
void inc()
Definition: shared_ptr.hpp:154

viennacl::matrix_iterator::index1
vcl_size_t index1()
Definition: matrix.hpp:109

viennacl::op_sub
A tag class representing subtraction.
Definition: forwards.h:90

viennacl::OPENCL_MEMORY
Definition: forwards.h:349

viennacl::linalg::am
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: matrix_operations.hpp:111

viennacl::implicit_matrix_base::context
viennacl::context context() const
Definition: matrix_def.hpp:46

viennacl::traits::internal_size2
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:390

viennacl::matrix_base
Definition: matrix_def.hpp:103

viennacl::row_iteration
A tag indicating iteration along increasing row index of a matrix.
Definition: matrix.hpp:84

viennacl::matrix_expression
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341

viennacl::linalg::ambm
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:139

viennacl::traits::clear
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43

forwards.h
This file provides the forward declarations for the main types used within ViennaCL.

viennacl::matrix
A dense matrix class.
Definition: forwards.h:375

viennacl::matrix::operator=
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F2 > > const &B)
Definition: matrix.hpp:804

viennacl::op_div
A tag class representing division.
Definition: forwards.h:98

viennacl::backend::memory_read
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261

viennacl::matrix_base::operator()
entry_proxy< NumericT > operator()(size_type row_index, size_type col_index)
Read-write access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:477

viennacl::matrix_iterator::matrix_iterator
matrix_iterator(MatrixT &mat, vcl_size_t start_row, vcl_size_t start_col)
Definition: matrix.hpp:98

viennacl::tools::MATRIX_SIZE_DEDUCER::size1
static vcl_size_t size1(LHS &lhs, RHS &)
Definition: matrix_size_deducer.hpp:50

viennacl::identity_matrix
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69

viennacl::operator/=
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator/=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1664

viennacl::operator*
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_base< NumericT >, const S1, op_mult >>::type operator*(S1 const &value, matrix_base< NumericT > const &m1)
Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m...
Definition: matrix.hpp:1364

s1
viennacl::scalar< float > s1
Definition: global_variables.cpp:57

viennacl::traits::internal_size
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:371

viennacl::vector_expression
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:239

viennacl::linalg::element_op
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
Definition: matrix_operations.hpp:702

matrix_def.hpp
Forward declaration of dense matrix classes.

viennacl::matrix_iterator::index2
vcl_size_t index2()
Definition: matrix.hpp:110

viennacl::linalg::detail::op_aliasing
bool op_aliasing(vector_base< NumericT > const &, B const &)
Definition: op_executor.hpp:36

viennacl::traits::size2
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201

viennacl::matrix::matrix
matrix(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:712

viennacl::matrix_expression::size1
vcl_size_t size1() const
Returns the size of the result vector.
Definition: matrix.hpp:72

viennacl::operator-
viennacl::vector< NumericT > operator-(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 - A * v2', where A is a matrix.
Definition: matrix_operations.hpp:1200

viennacl::matrix_iterator::operator++
self_type & operator++(void)
Definition: matrix.hpp:103

viennacl::matrix_iterator::operator++
self_type operator++(int)
Definition: matrix.hpp:104

NumericT
float NumericT
Definition: bisect.cpp:40

viennacl::op_matrix_diag
A tag class representing the (off-)diagonal of a matrix.
Definition: forwards.h:186

viennacl::matrix::size_type
base_type::size_type size_type
Definition: matrix.hpp:701

viennacl::context
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39

viennacl
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34

viennacl::matrix_iterator::operator*
value_type operator*(void)
Definition: matrix.hpp:102

viennacl::implicit_matrix_base::size2
size_type size2() const
Definition: matrix_def.hpp:45

viennacl::operator*=
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT > & >::type operator*=(matrix_base< NumericT > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1512

viennacl::matrix::matrix
matrix(zero_matrix< NumericT > const &m)
Creates the matrix from the supplied zero matrix.
Definition: matrix.hpp:760

viennacl::tools::CPU_SCALAR_TYPE_DEDUCER
Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar ...
Definition: tools.hpp:225

viennacl::traits::size
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235

viennacl::matrix::matrix
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type internal_row_count, size_type cols, size_type internal_col_count)
Wraps a CUDA or host buffer provided by the user including padding of rows and columns.
Definition: matrix.hpp:737

viennacl::matrix_base::matrix_base
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix_def.hpp:117

detail
Definition: blas3.hpp:36

viennacl::linalg::convert
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
Definition: matrix_operations.hpp:54

viennacl::CUDA_MEMORY
Definition: forwards.h:350

viennacl::linalg::matrix_row
void matrix_row(const matrix_base< NumericT > &A, unsigned int i, vector_base< NumericT > &v)
Definition: matrix_operations.hpp:312

viennacl::implicit_matrix_base::size1
size_type size1() const
Definition: matrix_def.hpp:44

viennacl::matrix_expression::size2
vcl_size_t size2() const
Definition: matrix.hpp:73

viennacl::traits::resize
void resize(MatrixType &matrix, vcl_size_t rows, vcl_size_t cols)
Generic resize routine for resizing a matrix (ViennaCL, uBLAS, etc.) to a new size/dimension.
Definition: size.hpp:63

viennacl::matrix_base::clear
void clear()
Resets all entries to zero.
Definition: matrix.hpp:634

sparse_matrix_operations.hpp
Implementations of operations using sparse matrices.

viennacl::op_add
A tag class representing addition.
Definition: forwards.h:88

viennacl::scalar_matrix
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: matrix_def.hpp:93

viennacl::matrix::matrix
matrix(const self_type &other)
Definition: matrix.hpp:780

viennacl::matrix_expression::matrix_expression
matrix_expression(LHS &lhs, RHS &rhs)
Definition: matrix.hpp:62

viennacl::vector_base< NumericT >

viennacl::linalg::scaled_rank_1_update
void scaled_rank_1_update(matrix_base< NumericT > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
Definition: matrix_operations.hpp:859

viennacl::operator/
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_expression< const LHS, const RHS, OP >, const S1, op_div > >::type operator/(matrix_expression< const LHS, const RHS, OP > const &proxy, S1 const &val)
Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha.
Definition: matrix.hpp:1593

row_major.hpp
Determines whether a given expression has a row-major matrix layout.

viennacl::vcl_size_t
std::size_t vcl_size_t
Definition: forwards.h:75

viennacl::matrix_base::size2
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:226

viennacl::matrix_base< NumericT >::handle
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix_def.hpp:244

viennacl::matrix::matrix
matrix(const base_type &other)
Definition: matrix.hpp:773

viennacl::linalg::trans
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
Definition: matrix_operations.hpp:83

viennacl::diag
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:885

viennacl::traits::active_handle_id
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:218

viennacl::zero_matrix
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
Definition: matrix_def.hpp:81

viennacl::linalg::matrix_column
void matrix_column(const matrix_base< NumericT > &A, unsigned int j, vector_base< NumericT > &v)
Definition: matrix_operations.hpp:337

viennacl::matrix::operator=
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F2 > > const &B)
Definition: matrix.hpp:801

viennacl::matrix_base::size1
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:224

viennacl::row
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900

viennacl::matrix_expression::rhs
RHS & rhs() const
Get right hand side operand.
Definition: matrix.hpp:69

viennacl::MAIN_MEMORY
Definition: forwards.h:348

viennacl::operator+
viennacl::vector< NumericT > operator+(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 + A * v2', where A is a matrix.
Definition: matrix_operations.hpp:1182

viennacl::backend::mem_handle::switch_active_handle_id
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:121

viennacl::op_prod
A tag class representing matrix-vector products and element-wise multiplications. ...
Definition: forwards.h:94

viennacl::traits::context
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40

viennacl::tools::align_to_multiple
INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base)
Rounds an integer to the next multiple of another integer.
Definition: tools.hpp:133

viennacl::matrix_iterator
A dense matrix class.
Definition: forwards.h:369

viennacl::matrix::matrix
matrix(matrix_expression< LHS, RHS, OP > const &proxy)
Definition: matrix.hpp:750

viennacl::matrix_base::row_major
bool row_major() const
Definition: matrix_def.hpp:248

viennacl::copy
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150

viennacl::traits::row_major
bool row_major(T const &)
Definition: row_major.hpp:38

viennacl::backend::typesafe_host_array::set
void set(vcl_size_t index, U value)
Definition: util.hpp:115

viennacl::matrix::matrix
matrix(scalar_matrix< NumericT > const &m)
Creates the matrix from the supplied scalar matrix.
Definition: matrix.hpp:767

viennacl::matrix::resize
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can optionally be preserved.
Definition: matrix.hpp:813

ScalarType
float ScalarType
Definition: fft_1d.cpp:42

viennacl::tools::shared_ptr::reset
void reset()
Definition: shared_ptr.hpp:123

viennacl::op_trans
A tag class representing transposed matrices.
Definition: forwards.h:220

matrix_size_deducer.hpp
Helper implementations that deduce the dimensions of the supplied matrix-valued expressions.

viennacl::backend::mem_handle::raw_size
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230

viennacl::matrix_base< NumericT >::internal_size2
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240

viennacl::matrix_range
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:440

viennacl::matrix_base::operator*=
self_type & operator*=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:517

viennacl::backend::memory_create
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87

m1
viennacl::matrix< float > m1
Definition: global_variables.cpp:63

viennacl::matrix_iterator::value_type
MatrixT::value_type value_type
Definition: matrix.hpp:96

viennacl::linalg::matrix_diagonal_assign
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
Definition: matrix_operations.hpp:234

viennacl::matrix_base< NumericT >::internal_size1
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238

viennacl::matrix_iterator::operator!=
bool operator!=(self_type const &other)
Definition: matrix.hpp:107

viennacl::column_major::mem_index
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:331

viennacl::column
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:908

handle.hpp
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...

viennacl::matrix_base::operator+=
self_type & operator+=(const matrix_expression< const LHS, const RHS, OP > &proxy)

viennacl::matrix_base::cpu_value_type
NumericT cpu_value_type
Definition: matrix_def.hpp:111

viennacl::matrix::matrix
matrix(NumericT *ptr_to_mem, viennacl::memory_types mem_type, size_type rows, size_type cols)
Wraps a CUDA or host buffer provided by the user.
Definition: matrix.hpp:721

viennacl::matrix_base::operator/=
self_type & operator/=(char val)
Scales the matrix by a char (8-bit integer)
Definition: matrix.hpp:573

viennacl::linalg::prod_impl
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:438

viennacl::matrix_base::resize
void resize(size_type rows, size_type columns, bool preserve=true)
Definition: matrix.hpp:638

viennacl::traits::handle
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41

viennacl::memory_types
memory_types
Definition: forwards.h:345

viennacl::tools::MATRIX_SIZE_DEDUCER::size2
static vcl_size_t size2(LHS &, RHS &rhs)
Definition: matrix_size_deducer.hpp:51

viennacl::matrix::matrix
matrix(identity_matrix< NumericT > const &m)
Creates the matrix from the supplied identity matrix.
Definition: matrix.hpp:753

viennacl::linalg::matrix_diag_from_vector
void matrix_diag_from_vector(const vector_base< NumericT > &v, int k, matrix_base< NumericT > &A)
Dispatcher interface for A = diag(v, k)
Definition: matrix_operations.hpp:261

viennacl::matrix_expression::lhs
LHS & lhs() const
Get left hand side operand.
Definition: matrix.hpp:66

viennacl::op_row
A tag class representing the extraction of a matrix row to a vector.
Definition: forwards.h:192

viennacl::entry_proxy
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:233

viennacl::matrix_iterator::operator()
MatrixT & operator()(void) const
Definition: matrix.hpp:112

viennacl::linalg::ambm_m
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:174

scalar.hpp
Implementation of the ViennaCL scalar class.

viennacl::tools::MATRIX_ITERATOR_INCREMENTER::apply
static void apply(const MATRIXTYPE &, unsigned int &, unsigned int &)
Definition: forwards.h:621

result_of.hpp
A collection of compile time type deductions.

viennacl::row_major
A tag for row-major storage of a dense matrix.
Definition: forwards.h:304

viennacl::matrix::matrix
matrix()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:704

op_assign
Definition: self_assign.cpp:132

viennacl::matrix_iterator::operator==
bool operator==(self_type const &other)
Definition: matrix.hpp:106

viennacl::backend::mem_handle::ram_handle
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
Definition: mem_handle.hpp:99

viennacl::col_iteration
A tag indicating iteration along increasing columns index of a matrix.
Definition: matrix.hpp:87

enable_if.hpp
Simple enable-if variant that uses the SFINAE pattern.

viennacl::linalg::column_sum_impl
void column_sum_impl(const matrix_base< NumericT > &A, vector_base< NumericT > &result)
Definition: matrix_operations.hpp:686

viennacl::matrix_base::operator-=
self_type & operator-=(const matrix_expression< const LHS, const RHS, OP > &proxy)

viennacl::matrix::operator=
base_type & operator=(viennacl::matrix< OtherNumericT, F2 > const &B)
Definition: matrix.hpp:798

viennacl::fast_copy
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)