ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector_operations.hpp File Reference

Implementations of NMF operations using CUDA. More...

Go to the source code of this file.

Namespaces

 viennacl
 Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
 
 viennacl::linalg
 Provides all linear algebra operations which are not covered by operator overloads.
 
 viennacl::linalg::cuda
 Holds all CUDA compute kernels used by ViennaCL.
 
 viennacl::linalg::cuda::detail
 Helper functions for the CUDA linear algebra backend.
 

Macros

#define VIENNACL_MDOT_WORKGROUP_SIZE   128
 
#define VIENNACL_MDOT_WORKGROUP_NUM   128
 

Functions

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_kernel (DestNumericT *dest, unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, SrcNumericT const *src, unsigned int start_src, unsigned int inc_src)
 
template<typename DestNumericT , typename SrcNumericT >
void viennacl::linalg::cuda::convert (vector_base< DestNumericT > &dest, vector_base< SrcNumericT > const &src)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT , typename ScalarType1 >
void viennacl::linalg::cuda::av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha)
 
template<typename NumericT , typename ScalarT1 >
void viennacl::linalg::cuda::vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
 Assign a constant value to a vector (-range/-slice) More...
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
 Swaps the contents of two vectors, data is copied. More...
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT , typename OpT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy)
 Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More...
 
template<typename OpT >
void viennacl::linalg::cuda::element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy)
 
template<typename OpT >
void viennacl::linalg::cuda::element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void viennacl::linalg::cuda::element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
void viennacl::linalg::cuda::inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result)
 
template<typename NumericT >
void viennacl::linalg::cuda::inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
void viennacl::linalg::cuda::norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
void viennacl::linalg::cuda::max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::max_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::min_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::sum_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::sum_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::cuda_abs (NumericT val)
 
__device__ unsigned long viennacl::linalg::cuda::cuda_abs (unsigned long val)
 
__device__ unsigned int viennacl::linalg::cuda::cuda_abs (unsigned int val)
 
__device__ unsigned short viennacl::linalg::cuda::cuda_abs (unsigned short val)
 
__device__ unsigned char viennacl::linalg::cuda::cuda_abs (unsigned char val)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result)
 
template<typename NumericT >
vcl_size_t viennacl::linalg::cuda::index_norm_inf (vector_base< NumericT > const &vec1)
 Computes the index of the first entry that is equal to the supremum-norm in modulus. More...
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta)
 
template<typename NumericT >
void viennacl::linalg::cuda::plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
 Computes a plane rotation of two vectors. More...
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::scan_kernel_1 (NumericT const *X, unsigned int startX, unsigned int incX, unsigned int sizeX, NumericT *Y, unsigned int startY, unsigned int incY, unsigned int scan_offset, NumericT *carries)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::scan_kernel_2 (NumericT *carries)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::scan_kernel_3 (NumericT *Y, unsigned int startY, unsigned int incY, unsigned int sizeY, NumericT const *carries)
 
template<typename NumericT >
void viennacl::linalg::cuda::detail::scan_impl (vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive)
 Worker routine for scan routines. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::inclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output)
 This function implements an inclusive scan using CUDA. More...
 
template<typename NumericT >
void viennacl::linalg::cuda::exclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output)
 This function implements an exclusive scan using CUDA. More...
 

Detailed Description

Implementations of NMF operations using CUDA.

Implementations of vector operations using a plain single-threaded execution on CPU.

Definition in file vector_operations.hpp.

Macro Definition Documentation

#define VIENNACL_MDOT_WORKGROUP_NUM   128

Definition at line 1804 of file vector_operations.hpp.

#define VIENNACL_MDOT_WORKGROUP_SIZE   128

Definition at line 1803 of file vector_operations.hpp.