ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
viennacl::linalg::cuda Namespace Reference

Holds all CUDA compute kernels used by ViennaCL. More...

Namespaces

 amg
 
 detail
 Helper functions for the CUDA linear algebra backend.
 

Classes

struct  mat_mult_matrix_index
 Helper struct for accessing an element of a row- or column-major matrix. More...
 

Functions

template<typename NumericT >
void bisectSmall (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
 
template<typename NumericT >
void bisectLarge (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision)
 
template<typename NumericT >
void bisectLarge_OneIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
 
template<typename NumericT >
void bisectLarge_MultIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision)
 
template<typename NumericT >
__device__ void writeToGmem (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum, unsigned short *s_compaction_list, unsigned short *s_cl_helper, unsigned int offset_mult_lambda)
 Write data to global memory. More...
 
template<typename NumericT >
__device__ void compactStreamsFinal (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, unsigned int &offset_mult_lambda, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper, unsigned int is_one_lambda, unsigned int is_one_lambda_2, NumericT &left, NumericT &right, NumericT &left_2, NumericT &right_2, unsigned int &left_count, unsigned int &right_count, unsigned int &left_count_2, unsigned int &right_count_2, unsigned int c_block_iend, unsigned int c_sum_block, unsigned int c_block_iend_2, unsigned int c_sum_block_2)
 Perform final stream compaction before writing data to global memory. More...
 
__device__ void scanCompactBlocksStartAddress (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 Compute addresses to obtain compact list of block start addresses. More...
 
__device__ void scanSumBlocks (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 Perform scan to obtain number of eigenvalues before a specific block. More...
 
__device__ void scanInitial (const unsigned int tid, const unsigned int tid_2, const unsigned int mat_size, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper)
 
template<typename NumericT >
__device__ void storeNonEmptyIntervalsLarge (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, NumericT left, NumericT mid, NumericT right, const unsigned short left_count, const unsigned short mid_count, const unsigned short right_count, NumericT epsilon, unsigned int &compact_second_chunk, unsigned short *s_compaction_list, unsigned int &is_active_second)
 
template<typename NumericT >
__global__ void bisectKernelLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon, unsigned int *g_num_one, unsigned int *g_num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum)
 Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute. More...
 
template<typename NumericT >
__global__ void bisectKernelLarge_MultIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int *blocks_mult, unsigned int *blocks_mult_sum, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, NumericT *g_lambda, unsigned int *g_pos, NumericT precision)
 
template<typename NumericT >
__global__ void bisectKernelLarge_OneIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision)
 
template<typename NumericT >
__global__ void bisectKernelSmall (const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon)
 Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. More...
 
__device__ int floorPow2 (int n)
 
__device__ int ceilPow2 (int n)
 
template<typename NumericT >
__device__ NumericT computeMidpoint (const NumericT left, const NumericT right)
 
template<class S , class T , class NumericT >
__device__ void storeInterval (unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision)
 
template<typename NumericT >
__device__ unsigned int computeNumSmallerEigenvals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
 
template<typename NumericT >
__device__ unsigned int computeNumSmallerEigenvalsLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
 
template<class S , class T , class NumericT >
__device__ void storeNonEmptyIntervals (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second)
 Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread. More...
 
template<class T >
__device__ void createIndicesCompaction (T *s_compaction_list_exc, unsigned int num_threads_compaction)
 
template<class T , class NumericT >
__device__ void compactIntervals (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second)
 Perform stream compaction for second child intervals. More...
 
template<class T , class S , class NumericT >
__device__ void storeIntervalConverged (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second)
 
template<class T , class NumericT >
__device__ void subdivideActiveIntervalMulti (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
 Subdivide interval if active and not already converged. More...
 
template<class T , class NumericT >
__device__ void subdivideActiveInterval (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
 Subdivide interval if active and not already converged. More...
 
template<typename NumericT >
__global__ void matrix_matrix_upper_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal)
 
template<typename NumericT >
__global__ void matrix_matrix_lower_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal)
 
template<typename NumericT , typename SolverTagT >
void inplace_solve (matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
 Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). More...
 
template<typename NumericT >
__global__ void triangular_substitute_inplace_row_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT >
__global__ void triangular_substitute_inplace_col_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options)
 
template<typename NumericT , typename SolverTagT >
void inplace_solve (matrix_base< NumericT > const &mat, vector_base< NumericT > &vec, SolverTagT)
 Direct inplace solver for dense triangular systems (non-transposed version) More...
 
__host__ __device__ float2 operator+ (float2 a, float2 b)
 
__host__ __device__ float2 operator- (float2 a, float2 b)
 
template<typename SCALARTYPE >
__device__ float2 operator/ (float2 a, SCALARTYPE b)
 
__device__ float2 operator* (float2 in1, float2 in2)
 
__host__ __device__ double2 operator+ (double2 a, double2 b)
 
__host__ __device__ double2 operator- (double2 a, double2 b)
 
template<typename SCALARTYPE >
__host__ __device__ double2 operator/ (double2 a, SCALARTYPE b)
 
__host__ __device__ double2 operator* (double2 in1, double2 in2)
 
__device__ unsigned int get_reorder_num (unsigned int v, unsigned int bit_size)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_direct (const Numeric2T *input, Numeric2T *output, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void direct (viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Direct 1D algorithm for computing Fourier transformation. More...
 
template<typename NumericT , unsigned int AlignmentV>
void direct (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &in, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Direct 2D algorithm for computing Fourier transformation. More...
 
template<typename NumericT >
__global__ void fft_reorder (NumericT *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void reorder (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_radix2_local (Numeric2T *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_radix2 (Numeric2T *input, unsigned int s, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major)
 
template<typename NumericT , unsigned int AlignmentV>
void radix2 (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Radix-2 1D algorithm for computing Fourier transformation. More...
 
template<typename NumericT , unsigned int AlignmentV>
void radix2 (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
 Radix-2 2D algorithm for computing Fourier transformation. More...
 
template<typename Numeric2T , typename NumericT >
__global__ void bluestein_post (Numeric2T *Z, Numeric2T *out, unsigned int size, NumericT sign)
 
template<typename Numeric2T , typename NumericT >
__global__ void bluestein_pre (Numeric2T *input, Numeric2T *A, Numeric2T *B, unsigned int size, unsigned int ext_size, NumericT sign)
 
template<typename NumericT >
__global__ void zero2 (NumericT *input1, NumericT *input2, unsigned int size)
 
template<typename NumericT , unsigned int AlignmentV>
void bluestein (viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
 Bluestein's algorithm for computing Fourier transformation. More...
 
template<typename NumericT >
__global__ void fft_mult_vec (const NumericT *input1, const NumericT *input2, NumericT *output, unsigned int size)
 
template<typename NumericT , unsigned int AlignmentV>
void multiply_complex (viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
 Mutiply two complex vectors and store result in output. More...
 
template<typename Numeric2T , typename NumericT >
__global__ void fft_div_vec_scalar (Numeric2T *input1, unsigned int size, NumericT factor)
 
template<typename NumericT , unsigned int AlignmentV>
void normalize (viennacl::vector< NumericT, AlignmentV > &input)
 Normalize vector on with his own size. More...
 
template<typename NumericT >
__global__ void transpose (const NumericT *input, NumericT *output, unsigned int row_num, unsigned int col_num)
 
template<typename NumericT , unsigned int AlignmentV>
void transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &input, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &output)
 Transpose matrix. More...
 
template<typename NumericT >
__global__ void transpose_inplace (NumericT *input, unsigned int row_num, unsigned int col_num)
 
template<typename NumericT , unsigned int AlignmentV>
void transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
 Inplace_transpose matrix. More...
 
template<typename RealT , typename ComplexT >
__global__ void real_to_complex (const RealT *in, ComplexT *out, unsigned int size)
 
template<typename NumericT >
void real_to_complex (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
 Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More...
 
template<typename ComplexT , typename RealT >
__global__ void complex_to_real (const ComplexT *in, RealT *out, unsigned int size)
 
template<typename NumericT >
void complex_to_real (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
 Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More...
 
template<typename NumericT >
__global__ void reverse_inplace (NumericT *vec, unsigned int size)
 
template<typename NumericT >
void reverse (viennacl::vector_base< NumericT > &in)
 Reverse vector to oposite order and save it in input vector. More...
 
template<typename IndexT >
__global__ void extract_L_kernel_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices)
 
template<typename NumericT >
__global__ void extract_L_kernel_2 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements)
 
template<typename NumericT >
void extract_L (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
 
template<typename NumericT >
__global__ void ilu_scale_kernel_1 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, NumericT *D_elements)
 
template<typename NumericT >
__global__ void ilu_scale_kernel_2 (unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements)
 Scales values in a matrix such that output = D * input * D, where D is a diagonal matrix (only the diagonal is provided) More...
 
template<typename NumericT >
void icc_scale (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
 Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. More...
 
template<typename NumericT >
__global__ void icc_chow_patel_sweep_kernel (unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L)
 CUDA kernel for one Chow-Patel-ICC sweep. More...
 
template<typename NumericT >
void icc_chow_patel_sweep (compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L)
 Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) More...
 
template<typename IndexT >
__global__ void extract_LU_kernel_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices, unsigned int *U_row_indices)
 
template<typename NumericT >
__global__ void extract_LU_kernel_2 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements, unsigned int const *U_row_indices, unsigned int *U_col_indices, NumericT *U_elements)
 
template<typename NumericT >
void extract_LU (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
 
template<typename NumericT >
void ilu_scale (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
 Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. More...
 
template<typename NumericT >
__global__ void ilu_chow_patel_sweep_kernel (unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L, unsigned int const *U_trans_row_indices, unsigned int const *U_trans_col_indices, NumericT *U_trans_elements, NumericT const *U_trans_backup, NumericT const *aij_U_trans)
 CUDA kernel for one Chow-Patel-ILU sweep. More...
 
template<typename NumericT >
void ilu_chow_patel_sweep (compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
 Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) More...
 
template<typename NumericT >
__global__ void ilu_form_neumann_matrix_kernel (unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements)
 
template<typename NumericT >
void ilu_form_neumann_matrix (compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
 
template<typename NumericT >
__global__ void pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void pipelined_cg_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
__global__ void pipelined_cg_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset)
 
template<typename NumericT >
void pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void pipelined_bicgstab_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename T >
__global__ void pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size)
 
template<typename T >
void pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More...
 
template<typename T >
__global__ void pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size)
 
template<typename T >
void pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k)
 
template<typename T >
void pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
 
template<typename NumericT >
void pipelined_gmres_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename DestNumericT , typename SrcNumericT >
void convert (matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
 
template<typename NumericT , typename SizeT , typename DistanceT >
void trans (matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
 
template<typename NumericT , typename ScalarT >
void am (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void ambm (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void ambm_m (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
void matrix_assign (matrix_base< NumericT > &mat, NumericT s, bool clear=false)
 
template<typename NumericT >
void matrix_diagonal_assign (matrix_base< NumericT > &mat, NumericT s)
 
template<typename NumericT >
void matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
 
template<typename NumericT >
void matrix_diag_to_vector (matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec)
 
template<typename NumericT >
void matrix_row (matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)
 
template<typename NumericT >
void matrix_column (const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
 
template<typename NumericT , typename SizeT , typename OpT >
void element_op (matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename SizeT , typename OpT >
void element_op (matrix_base< float, SizeT > &A, matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename SizeT , typename OpT >
void element_op (matrix_base< double, SizeT > &A, matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
void element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
void prod_impl (const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
 Carries out matrix-vector multiplication. More...
 
template<typename NumericT , typename ScalarT >
void prod_impl (const matrix_base< NumericT > &A, bool trans_A, const matrix_base< NumericT > &B, bool trans_B, matrix_base< NumericT > &C, ScalarT alpha, ScalarT beta)
 Carries out matrix-matrix multiplication. More...
 
template<typename NumericT , typename ScalarT >
void scaled_rank_1_update (matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
 The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More...
 
template<typename NumericT , typename VectorType >
void bidiag_pack (matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
 This function stores the diagonal and the superdiagonal of a matrix in two vectors. More...
 
template<typename NumericT >
void copy_vec (matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
 This function copies a row or a column from a matrix to a vector. More...
 
template<typename NumericT >
void house_update_A_left (matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
 This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P. More...
 
template<typename NumericT >
void house_update_A_right (matrix_base< NumericT > &A, vector_base< NumericT > &D)
 This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P. More...
 
template<typename NumericT >
void house_update_QL (matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
 This function updates the matrix Q, which is needed for the computation of the eigenvectors. More...
 
template<typename NumericT >
void givens_next (matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
 This function updates the matrix Q. It is part of the tql2 algorithm. More...
 
template<typename DestNumericT , typename SrcNumericT >
__global__ void convert_col_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename T >
__global__ void bidiag_pack_row_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void bidiag_pack_column_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void copy_col_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void copy_col_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void copy_row_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void copy_row_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_left_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_left_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_right_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__global__ void house_update_A_right_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride)
 
template<typename T >
__device__ void col_reduce_lcl_array (T *sums, unsigned int th_Idx, unsigned int bl_Dim)
 
template<typename T >
__global__ void house_update_QL_row_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ)
 
template<typename T >
__global__ void house_update_QL_column_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ)
 
template<typename T >
__global__ void givens_next_row_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i)
 
template<typename T >
__global__ void givens_next_column_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_col_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename NumericT >
__global__ void matrix_matrix_row_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols)
 
template<typename DestNumericT , typename SrcNumericT >
__global__ void convert_row_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void trans_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_internal_size1, unsigned int A_internal_size2, unsigned int A_size1, unsigned int A_size2, unsigned int A_stride1, unsigned int A_stride2, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_internal_size1, unsigned int B_internal_size2, unsigned int B_stride1, unsigned int B_stride2, bool data_major)
 
template<typename NumericT >
__global__ void am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void matrix_row_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha)
 
template<typename NumericT >
__global__ void element_op_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type)
 
template<typename NumericT >
__global__ void matrix_row_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void matrix_row_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2)
 
template<typename NumericT >
__global__ void vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void trans_vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size)
 
template<typename NumericT >
__global__ void scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2)
 
template<typename NumericT >
__global__ void el_wise_mul_div (NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size)
 Main CUDA kernel for nonnegative matrix factorization of a dense matrices. More...
 
template<typename NumericT >
void nmf (viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf)
 The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. More...
 
template<typename NumericT >
__global__ void as_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2)
 
template<typename NumericT >
__global__ void as_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_any_scalar< NumericT >::value >::type as (ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename NumericT >
__global__ void asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3)
 
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type asbs_s (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void scalar_swap_kernel (NumericT *s1, NumericT *s2)
 
template<typename ScalarT1 , typename ScalarT2 >
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap (ScalarT1 &s1, ScalarT2 &s2)
 Swaps the contents of two scalars, data is copied. More...
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<typename NumericT >
__global__ void compressed_matrix_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<class NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a compressed_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More...
 
template<typename NumericT >
__global__ void compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *result, unsigned int size)
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag)
 Carries out triangular inplace solves. More...
 
template<typename NumericT >
__global__ void compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, unsigned int nonzero_rows, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result)
 
template<typename NumericT >
void prod_impl (const viennacl::compressed_compressed_matrix< NumericT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a compressed_compressed_matrix. More...
 
template<typename NumericT >
__global__ void coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a coordinate_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More...
 
template<typename NumericT >
__global__ void ell_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a ell_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More...
 
template<typename NumericT >
__global__ void sliced_ell_matrix_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, unsigned int size_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result, unsigned int block_size)
 
template<typename NumericT , typename IndexT >
void prod_impl (const viennacl::sliced_ell_matrix< NumericT, IndexT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a sliced_ell_matrix. More...
 
template<typename NumericT >
__global__ void hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols)
 
template<typename NumericT , unsigned int AlignmentV>
void prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result)
 Carries out matrix-vector multiplication with a hyb_matrix. More...
 
template<typename NumericT >
__global__ void csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size)
 
template<typename NumericT >
__global__ void csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const NumericT *elements_L, const unsigned int *block_offsets, NumericT *result, unsigned int size)
 
template<typename NumericT >
__global__ void csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const NumericT *elements_U, const NumericT *diagonal_U, const unsigned int *block_offsets, NumericT *result, unsigned int size)
 
template<typename IndexT >
__device__ IndexT round_to_next_power_of_2 (IndexT val)
 
template<typename IndexT >
__global__ void compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group)
 
__device__ unsigned int merge_subwarp_symbolic (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int subwarpsize)
 
__device__ unsigned int merge_subwarp_symbolic_double (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int *output_array, unsigned int id_in_warp, unsigned int subwarpsize)
 
template<typename IndexT >
__global__ void compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices)
 
template<typename NumericT >
__device__ unsigned int merge_subwarp_numeric (NumericT scaling_factor, unsigned int input_start, unsigned int input_end, const unsigned int *input_indices, const NumericT *input_values, unsigned int invalid_token, unsigned int *output_indices, NumericT *output_values, unsigned int id_in_warp, unsigned int subwarpsize)
 
template<typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices, NumericT *scratchpad_values)
 
template<typename IndexT >
__global__ void compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row)
 
template<typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer)
 
template<typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer)
 
template<class NumericT , unsigned int AlignmentV>
void prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C)
 Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More...
 
template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT subwarp_minimum_shuffle (IndexT min_index)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT subwarp_minimum_shared (IndexT min_index, IndexT id_in_warp, IndexT *shared_buffer)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__global__ void compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT subwarp_accumulate_shuffle (NumericT output_value)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT subwarp_accumulate_shared (NumericT output_value, unsigned int id_in_warp, NumericT *shared_buffer)
 
template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT >
__global__ void compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements)
 
template<typename DestNumericT , typename SrcNumericT >
__global__ void convert_kernel (DestNumericT *dest, unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, SrcNumericT const *src, unsigned int start_src, unsigned int inc_src)
 
template<typename DestNumericT , typename SrcNumericT >
void convert (vector_base< DestNumericT > &dest, vector_base< SrcNumericT > const &src)
 
template<typename NumericT >
__global__ void av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
__global__ void av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT , typename ScalarType1 >
void av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT >
__global__ void avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3)
 
template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
 
template<typename NumericT >
__global__ void vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha)
 
template<typename NumericT , typename ScalarT1 >
void vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
 Assign a constant value to a vector (-range/-slice) More...
 
template<typename NumericT >
__global__ void vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
 Swaps the contents of two vectors, data is copied. More...
 
template<typename NumericT >
__global__ void element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT >
__global__ void element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type)
 
template<typename NumericT , typename OpT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy)
 Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More...
 
template<typename OpT >
void element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy)
 
template<typename OpT >
void element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy)
 
template<typename NumericT >
__global__ void vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2)
 
template<typename NumericT >
void element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy)
 
template<typename NumericT >
__global__ void inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
__global__ void vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT , typename ScalarT >
void inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
void inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
 Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More...
 
template<typename NumericT >
__global__ void inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results)
 
template<typename NumericT >
__global__ void inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results)
 
template<typename NumericT >
__global__ void vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result)
 
template<typename NumericT >
void inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result)
 
template<typename NumericT >
__global__ void norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
__global__ void norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer)
 
template<typename NumericT >
void norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^1-norm of a vector. More...
 
template<typename NumericT >
void norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the l^2-norm of a vector - implementation. More...
 
template<typename NumericT >
void norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
void norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the supremum-norm of a vector. More...
 
template<typename NumericT >
__global__ void vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result)
 
template<typename NumericT >
void max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void max_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void min_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
void sum_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result)
 Computes the maximum of a vector, both reduction stages run on the GPU. More...
 
template<typename NumericT >
void sum_cpu (vector_base< NumericT > const &vec1, NumericT &result)
 Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More...
 
template<typename NumericT >
__device__ NumericT cuda_abs (NumericT val)
 
__device__ unsigned long cuda_abs (unsigned long val)
 
__device__ unsigned int cuda_abs (unsigned int val)
 
__device__ unsigned short cuda_abs (unsigned short val)
 
__device__ unsigned char cuda_abs (unsigned char val)
 
template<typename NumericT >
__global__ void index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result)
 
template<typename NumericT >
vcl_size_t index_norm_inf (vector_base< NumericT > const &vec1)
 Computes the index of the first entry that is equal to the supremum-norm in modulus. More...
 
template<typename NumericT >
__global__ void plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta)
 
template<typename NumericT >
void plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
 Computes a plane rotation of two vectors. More...
 
template<typename NumericT >
__global__ void scan_kernel_1 (NumericT const *X, unsigned int startX, unsigned int incX, unsigned int sizeX, NumericT *Y, unsigned int startY, unsigned int incY, unsigned int scan_offset, NumericT *carries)
 
template<typename NumericT >
__global__ void scan_kernel_2 (NumericT *carries)
 
template<typename NumericT >
__global__ void scan_kernel_3 (NumericT *Y, unsigned int startY, unsigned int incY, unsigned int sizeY, NumericT const *carries)
 
template<typename NumericT >
void inclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output)
 This function implements an inclusive scan using CUDA. More...
 
template<typename NumericT >
void exclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output)
 This function implements an exclusive scan using CUDA. More...
 

Detailed Description

Holds all CUDA compute kernels used by ViennaCL.

Function Documentation

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::am ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 113 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 59 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 95 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 87 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::am_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 124 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::ambm ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT > const &  mat3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 164 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 136 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 210 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 283 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 357 of file matrix_operations_col.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::ambm_m ( matrix_base< NumericT > &  mat1,
matrix_base< NumericT > const &  mat2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
matrix_base< NumericT > const &  mat3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 239 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 436 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 511 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 585 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 660 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 469 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 544 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 618 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 693 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 166 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 241 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
NumericT  fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 315 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ambm_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT fac2,
unsigned int  options2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT fac3,
unsigned int  options3,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2 
)

Definition at line 390 of file matrix_operations_row.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_any_scalar<NumericT>::value >::type viennacl::linalg::cuda::as ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 77 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::as_kernel ( NumericT s1,
const NumericT fac2,
unsigned int  options2,
const NumericT s2 
)

Definition at line 48 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::as_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2 
)

Definition at line 60 of file scalar_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
ScalarT3 const &  s3,
NumericT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 191 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
const NumericT fac2,
unsigned int  options2,
const NumericT s2,
const NumericT fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 99 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT const *  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 120 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
NumericT const *  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 141 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 162 of file scalar_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs_s ( ScalarT1 &  s1,
ScalarT2 const &  s2,
NumericT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
ScalarT3 const &  s3,
NumericT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 314 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
const NumericT fac2,
unsigned int  options2,
const NumericT s2,
const NumericT fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 222 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT const *  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 243 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
NumericT const *  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 264 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::asbs_s_kernel ( NumericT s1,
NumericT  fac2,
unsigned int  options2,
const NumericT s2,
NumericT  fac3,
unsigned int  options3,
const NumericT s3 
)

Definition at line 285 of file scalar_operations.hpp.

template<typename NumericT , typename ScalarType1 >
void viennacl::linalg::cuda::av ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarType1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha 
)

Definition at line 144 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 77 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::av_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 110 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< NumericT > const &  vec3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 433 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 179 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 242 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 305 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 368 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT1 , typename ScalarT2 >
void viennacl::linalg::cuda::avbv_v ( vector_base< NumericT > &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT1 const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
vector_base< NumericT > const &  vec3,
ScalarT2 const &  beta,
vcl_size_t  len_beta,
bool  reciprocal_beta,
bool  flip_sign_beta 
)

Definition at line 735 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 483 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
const NumericT fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 546 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 609 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::avbv_v_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT  fac2,
unsigned int  options2,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT  fac3,
unsigned int  options3,
const NumericT vec3,
unsigned int  start3,
unsigned int  inc3 
)

Definition at line 672 of file vector_operations.hpp.

template<typename NumericT , typename VectorType >
void viennacl::linalg::cuda::bidiag_pack ( matrix_base< NumericT > &  A,
VectorType &  dh,
VectorType &  sh 
)

This function stores the diagonal and the superdiagonal of a matrix in two vectors.

Parameters
AThe matrix from which the vectors will be extracted of.
dhThe vector in which the diagonal of the matrix will be stored in.
shThe vector in which the superdiagonal of the matrix will be stored in.

Definition at line 2489 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::bidiag_pack_column_major_kernel ( T *  A,
T *  D,
T *  S,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1456 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::bidiag_pack_row_major_kernel ( T *  A,
T *  D,
T *  S,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1434 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
const NumericT  lg,
const NumericT  ug,
const unsigned int  lg_eig_count,
const unsigned int  ug_eig_count,
NumericT  epsilon,
unsigned int *  g_num_one,
unsigned int *  g_num_blocks_mult,
NumericT g_left_one,
NumericT g_right_one,
unsigned int *  g_pos_one,
NumericT g_left_mult,
NumericT g_right_mult,
unsigned int *  g_left_count_mult,
unsigned int *  g_right_count_mult,
unsigned int *  g_blocks_mult,
unsigned int *  g_blocks_mult_sum 
)

Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute.

Definition at line 537 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge_MultIntervals ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
unsigned int *  blocks_mult,
unsigned int *  blocks_mult_sum,
NumericT g_left,
NumericT g_right,
unsigned int *  g_left_count,
unsigned int *  g_right_count,
NumericT g_lambda,
unsigned int *  g_pos,
NumericT  precision 
)

Perform second step of bisection algorithm for large matrices for intervals that after the first step contained more than one eigenvalue

Parameters
g_ddiagonal elements of symmetric, tridiagonal matrix
g_ssuperdiagonal elements of symmetric, tridiagonal matrix
nmatrix size
blocks_multstart addresses of blocks of intervals that are processed by one block of threads, each of the intervals contains more than one eigenvalue
blocks_mult_sumtotal number of eigenvalues / singleton intervals in one block of intervals
g_leftleft limits of intervals
g_rightright limits of intervals
g_left_countnumber of eigenvalues less than left limits
g_right_countnumber of eigenvalues less than right limits
g_lambdafinal eigenvalue
g_posindex of eigenvalue (in ascending order)
precisiondesired precision of eigenvalues

Definition at line 68 of file bisect_kernel_large_multi.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelLarge_OneIntervals ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
unsigned int  num_intervals,
NumericT g_left,
NumericT g_right,
unsigned int *  g_pos,
NumericT  precision 
)

Determine eigenvalues for large matrices for intervals that after the first step contained one eigenvalue

Parameters
g_ddiagonal elements of symmetric, tridiagonal matrix
g_ssuperdiagonal elements of symmetric, tridiagonal matrix
nmatrix size
num_intervalstotal number of intervals containing one eigenvalue after the first step
g_leftleft interval limits
g_rightright interval limits
g_posindex of interval / number of intervals that are smaller than right interval limit
precisiondesired precision of eigenvalues

Definition at line 59 of file bisect_kernel_large_onei.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::bisectKernelSmall ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
NumericT g_left,
NumericT g_right,
unsigned int *  g_left_count,
unsigned int *  g_right_count,
const NumericT  lg,
const NumericT  ug,
const unsigned int  lg_eig_count,
const unsigned int  ug_eig_count,
NumericT  epsilon 
)

Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.

Parameters
g_ddiagonal elements in global memory
g_ssuperdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed an equals 0
nsize of matrix
g_lefthelper array
g_righthelper array
g_left_counthelper array
g_right_counthelper array
lglower bound of input interval (e.g. Gerschgorin interval)
ugupper bound of input interval (e.g. Gerschgorin interval)
lg_eig_countnumber of eigenvalues that are smaller than lg
ug_eig_countnumber of eigenvalues that are smaller than lu
epsilondesired accuracy of eigenvalues to compute

Definition at line 61 of file bisect_kernel_small.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  lg,
const NumericT  ug,
const NumericT  precision 
)

Definition at line 71 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge_MultIntervals ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  precision 
)

Definition at line 133 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectLarge_OneIntervals ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataLarge< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  precision 
)

Definition at line 103 of file bisect_kernel_calls.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::bisectSmall ( const viennacl::linalg::detail::InputData< NumericT > &  input,
viennacl::linalg::detail::ResultDataSmall< NumericT > &  result,
const unsigned int  mat_size,
const NumericT  lg,
const NumericT  ug,
const NumericT  precision 
)

Definition at line 45 of file bisect_kernel_calls.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::bluestein ( viennacl::vector< NumericT, AlignmentV > &  in,
viennacl::vector< NumericT, AlignmentV > &  out,
vcl_size_t   
)

Bluestein's algorithm for computing Fourier transformation.

Currently, Works only for sizes of input data which less than 2^16. Uses a lot of additional memory, but should be fast for any size of data. Serial implementation has something about o(n * lg n) complexity

Definition at line 622 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::bluestein_post ( Numeric2T *  Z,
Numeric2T *  out,
unsigned int  size,
NumericT  sign 
)

Definition at line 538 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::bluestein_pre ( Numeric2T *  input,
Numeric2T *  A,
Numeric2T *  B,
unsigned int  size,
unsigned int  ext_size,
NumericT  sign 
)

Definition at line 564 of file fft_operations.hpp.

__device__ int viennacl::linalg::cuda::ceilPow2 ( int  n)
inline

Compute the next higher power of two of n

Parameters
nnumber for which next higher power of two is seeked

Definition at line 66 of file bisect_util.hpp.

template<typename T >
__device__ void viennacl::linalg::cuda::col_reduce_lcl_array ( T *  sums,
unsigned int  th_Idx,
unsigned int  bl_Dim 
)

Definition at line 1672 of file matrix_operations_col.hpp.

template<class T , class NumericT >
__device__ void viennacl::linalg::cuda::compactIntervals ( NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  mid,
NumericT  right,
unsigned int  mid_count,
unsigned int  right_count,
T *  s_compaction_list,
unsigned int  num_threads_active,
unsigned int  is_active_second 
)

Perform stream compaction for second child intervals.

Parameters
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
midmidpoint of current interval (left of new interval)
rightupper limit of interval
mid_counteigenvalues less than mid
right_counteigenvalues less than right
s_compaction_listlist containing the indices where the data has to be stored
num_threads_activenumber of active threads / intervals
is_active_secondmark is thread has a second non-empty child interval

Definition at line 440 of file bisect_util.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::compactStreamsFinal ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
unsigned int &  offset_mult_lambda,
NumericT s_left,
NumericT s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
unsigned short *  s_cl_one,
unsigned short *  s_cl_mult,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper,
unsigned int  is_one_lambda,
unsigned int  is_one_lambda_2,
NumericT left,
NumericT right,
NumericT left_2,
NumericT right_2,
unsigned int &  left_count,
unsigned int &  right_count,
unsigned int &  left_count_2,
unsigned int &  right_count_2,
unsigned int  c_block_iend,
unsigned int  c_sum_block,
unsigned int  c_block_iend_2,
unsigned int  c_sum_block_2 
)

Perform final stream compaction before writing data to global memory.

Definition at line 134 of file bisect_kernel_large.hpp.

template<typename ComplexT , typename RealT >
__global__ void viennacl::linalg::cuda::complex_to_real ( const ComplexT *  in,
RealT *  out,
unsigned int  size 
)

Definition at line 809 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::complex_to_real ( viennacl::vector_base< NumericT > const &  in,
viennacl::vector_base< NumericT > &  out,
vcl_size_t  size 
)

Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)

Definition at line 819 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel ( const unsigned int *  row_jumper,
const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
unsigned int  nonzero_rows,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 909 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const NumericT sp_mat_elements,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 323 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_row_indices,
const unsigned int *  sp_mat_col_indices,
const NumericT sp_mat_elements,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 477 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT result,
unsigned int  size 
)

Definition at line 639 of file sparse_matrix_operations.hpp.

template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_A2 ( IndexT *  A2_row_indices,
IndexT *  A2_col_indices,
NumericT A2_elements,
IndexT  A2_size1,
IndexT *  new_row_buffer 
)

Definition at line 484 of file spgemm.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 ( const IndexT *  A_row_indices,
IndexT  A_size1,
IndexT  max_per_row,
IndexT *  chunks_per_row 
)

Definition at line 469 of file spgemm.hpp.

template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_G1 ( IndexT *  G1_row_indices,
IndexT *  G1_col_indices,
NumericT G1_elements,
IndexT  G1_size1,
IndexT const *  A_row_indices,
IndexT const *  A_col_indices,
NumericT const *  A_elements,
IndexT  A_size1,
IndexT  A_nnz,
IndexT  max_per_row,
IndexT *  new_row_buffer 
)

Definition at line 511 of file spgemm.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
IndexT  A_size1,
const IndexT *  B_row_indices,
IndexT *  subwarpsize_per_group,
IndexT *  max_nnz_row_A_per_group,
IndexT *  max_nnz_row_B_per_group 
)

Definition at line 82 of file spgemm.hpp.

template<unsigned int SubWarpSizeV, typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
IndexT  B_size2,
IndexT *  C_row_indices 
)

Definition at line 162 of file spgemm_rmerge.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
IndexT  B_size2,
IndexT *  C_row_indices,
unsigned int *  subwarpsize_array,
unsigned int *  max_row_size_A,
unsigned int *  max_row_size_B,
unsigned int *  scratchpad_offsets,
unsigned int *  scratchpad_indices 
)

Definition at line 217 of file spgemm.hpp.

template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
const NumericT A_elements,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
const NumericT B_elements,
IndexT  B_size2,
IndexT const *  C_row_indices,
IndexT *  C_col_indices,
NumericT C_elements 
)

Definition at line 251 of file spgemm_rmerge.hpp.

template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
const NumericT A_elements,
IndexT  A_size1,
const IndexT *  B_row_indices,
const IndexT *  B_col_indices,
const NumericT B_elements,
IndexT  B_size2,
IndexT const *  C_row_indices,
IndexT *  C_col_indices,
NumericT C_elements,
unsigned int *  subwarpsize_array,
unsigned int *  max_row_size_A,
unsigned int *  max_row_size_B,
unsigned int *  scratchpad_offsets,
unsigned int *  scratchpad_indices,
NumericT scratchpad_values 
)

Definition at line 365 of file spgemm.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_adaptive_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT elements,
unsigned int  num_blocks,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 167 of file sparse_matrix_operations.hpp.

template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result 
)

Definition at line 125 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::computeMidpoint ( const NumericT  left,
const NumericT  right 
)
inline

Compute midpoint of interval [left, right] avoiding overflow if possible

Parameters
leftleft / lower limit of interval
rightright / upper limit of interval

Definition at line 89 of file bisect_util.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::computeNumSmallerEigenvals ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
const NumericT  x,
const unsigned int  tid,
const unsigned int  num_intervals_active,
NumericT s_d,
NumericT s_s,
unsigned int  converged 
)
inline

Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix

Parameters
g_ddiagonal elements stored in global memory
g_ssuperdiagonal elements stored in global memory
nsize of matrix
xvalue for which the number of eigenvalues that are smaller is seeked
tidthread identified (e.g. threadIdx.x or gtid)
num_intervals_activenumber of active intervals / threads that currently process an interval
s_dscratch space to store diagonal entries of the tridiagonal matrix in shared memory
s_sscratch space to store superdiagonal entries of the tridiagonal matrix in shared memory
convergedflag if the current thread is already converged (that is count does not have to be computed)

Definition at line 177 of file bisect_util.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::computeNumSmallerEigenvalsLarge ( const NumericT g_d,
const NumericT g_s,
const unsigned int  n,
const NumericT  x,
const unsigned int  tid,
const unsigned int  num_intervals_active,
NumericT s_d,
NumericT s_s,
unsigned int  converged 
)
inline

Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix

Parameters
g_ddiagonal elements stored in global memory
g_ssuperdiagonal elements stored in global memory
nsize of matrix
xvalue for which the number of eigenvalues that are smaller is seeked
tidthread identified (e.g. threadIdx.x or gtid)
num_intervals_activenumber of active intervals / threads that currently process an interval
s_dscratch space to store diagonal entries of the tridiagonal matrix in shared memory
s_sscratch space to store superdiagonal entries of the tridiagonal matrix in shared memory
convergedflag if the current thread is already converged (that is count does not have to be computed)

Definition at line 237 of file bisect_util.hpp.

template<typename DestNumericT , typename SrcNumericT >
void viennacl::linalg::cuda::convert ( matrix_base< DestNumericT > &  mat1,
matrix_base< SrcNumericT > const &  mat2 
)

Definition at line 57 of file matrix_operations.hpp.

template<typename DestNumericT , typename SrcNumericT >
void viennacl::linalg::cuda::convert ( vector_base< DestNumericT > &  dest,
vector_base< SrcNumericT > const &  src 
)

Definition at line 59 of file vector_operations.hpp.

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_col_kernel ( DestNumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const SrcNumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 34 of file matrix_operations_col.hpp.

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_kernel ( DestNumericT *  dest,
unsigned int  start_dest,
unsigned int  inc_dest,
unsigned int  size_dest,
SrcNumericT const *  src,
unsigned int  start_src,
unsigned int  inc_src 
)

Definition at line 48 of file vector_operations.hpp.

template<typename DestNumericT , typename SrcNumericT >
__global__ void viennacl::linalg::cuda::convert_row_kernel ( DestNumericT *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const SrcNumericT *  B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 34 of file matrix_operations_row.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1225 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1420 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result 
)

Definition at line 1125 of file sparse_matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_col_column_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1498 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_col_row_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1480 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_row_column_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1535 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::copy_row_row_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size,
unsigned int  stride 
)

Definition at line 1516 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::copy_vec ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  V,
vcl_size_t  row_start,
vcl_size_t  col_start,
bool  copy_col 
)

This function copies a row or a column from a matrix to a vector.

Parameters
AThe matrix where to copy from.
VThe vector to fill with data.
row_startThe number of the first row to copy.
col_startThe number of the first column to copy.
copy_colSet to TRUE to copy a column, FALSE to copy a row.

Definition at line 2526 of file matrix_operations.hpp.

template<class T >
__device__ void viennacl::linalg::cuda::createIndicesCompaction ( T *  s_compaction_list_exc,
unsigned int  num_threads_compaction 
)

Create indices for compaction, that is process s_compaction_list_exc which is 1 for intervals that generated a second child and 0 otherwise and create for each of the non-zero elements the index where the new interval belongs to in a compact representation of all generated second childs

Parameters
s_compaction_list_exclist containing the flags which threads generated two childs
num_threads_compactionnumber of threads to employ for compaction

Definition at line 373 of file bisect_util.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward ( const unsigned int *  row_jumper_U,
const unsigned int *  column_indices_U,
const NumericT elements_U,
const NumericT diagonal_U,
const unsigned int *  block_offsets,
NumericT result,
unsigned int  size 
)

Definition at line 700 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward ( const unsigned int *  row_jumper_L,
const unsigned int *  column_indices_L,
const NumericT elements_L,
const unsigned int *  block_offsets,
NumericT result,
unsigned int  size 
)

Definition at line 668 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 257 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 110 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT diagonal_entries,
NumericT vector,
unsigned int  size 
)

Definition at line 597 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT diagonal_entries,
NumericT vector,
unsigned int  size 
)

Definition at line 563 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT diagonal_entries,
NumericT vector,
unsigned int  size 
)

Definition at line 429 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 342 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 497 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 367 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 180 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
NumericT vector,
unsigned int  size 
)

Definition at line 42 of file sparse_matrix_operations_solve.hpp.

template<typename NumericT >
__device__ NumericT viennacl::linalg::cuda::cuda_abs ( NumericT  val)

Definition at line 2910 of file vector_operations.hpp.

__device__ unsigned long viennacl::linalg::cuda::cuda_abs ( unsigned long  val)
inline

Definition at line 2911 of file vector_operations.hpp.

__device__ unsigned int viennacl::linalg::cuda::cuda_abs ( unsigned int  val)
inline

Definition at line 2912 of file vector_operations.hpp.

__device__ unsigned short viennacl::linalg::cuda::cuda_abs ( unsigned short  val)
inline

Definition at line 2913 of file vector_operations.hpp.

__device__ unsigned char viennacl::linalg::cuda::cuda_abs ( unsigned char  val)
inline

Definition at line 2914 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::direct ( viennacl::vector< NumericT, AlignmentV > const &  in,
viennacl::vector< NumericT, AlignmentV > &  out,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Direct 1D algorithm for computing Fourier transformation.

Works on any sizes of data. Serial implementation has o(n^2) complexity

Definition at line 197 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::direct ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &  in,
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  out,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Direct 2D algorithm for computing Fourier transformation.

Works on any sizes of data. Serial implementation has o(n^2) complexity

Definition at line 222 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::el_wise_mul_div ( NumericT matrix1,
NumericT const *  matrix2,
NumericT const *  matrix3,
unsigned int  size 
)

Main CUDA kernel for nonnegative matrix factorization of a dense matrices.

Definition at line 38 of file nmf_operations.hpp.

template<typename NumericT , typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT, SizeT > &  A,
matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 548 of file matrix_operations.hpp.

template<typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< float, SizeT > &  A,
matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 608 of file matrix_operations.hpp.

template<typename SizeT , typename OpT >
void viennacl::linalg::cuda::element_op ( matrix_base< double, SizeT > &  A,
matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 668 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &  proxy 
)

Definition at line 736 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &  proxy 
)

Definition at line 778 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &  proxy 
)

Definition at line 820 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &  proxy 
)

Definition at line 862 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &  proxy 
)

Definition at line 904 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &  proxy 
)

Definition at line 946 of file matrix_operations.hpp.

template<typename NumericT , typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &  proxy 
)

Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)

Parameters
vec1The result vector (or -range, or -slice)
proxyThe proxy object holding v2, v3 and the operation

Definition at line 957 of file vector_operations.hpp.

template<typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< float > &  vec1,
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 985 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &  proxy 
)

Definition at line 988 of file matrix_operations.hpp.

template<typename OpT >
void viennacl::linalg::cuda::element_op ( vector_base< double > &  vec1,
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &  proxy 
)

Definition at line 1013 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &  proxy 
)

Definition at line 1030 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &  proxy 
)

Definition at line 1056 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &  proxy 
)

Definition at line 1072 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &  proxy 
)

Definition at line 1083 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &  proxy 
)

Definition at line 1109 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &  proxy 
)

Definition at line 1114 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &  proxy 
)

Definition at line 1135 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &  proxy 
)

Definition at line 1156 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &  proxy 
)

Definition at line 1161 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &  proxy 
)

Definition at line 1187 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &  proxy 
)

Definition at line 1198 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &  proxy 
)

Definition at line 1213 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &  proxy 
)

Definition at line 1239 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &  proxy 
)

Definition at line 1240 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &  proxy 
)

Definition at line 1264 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &  proxy 
)

Definition at line 1282 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &  proxy 
)

Definition at line 1291 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &  proxy 
)

Definition at line 1317 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &  proxy 
)

Definition at line 1324 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &  proxy 
)

Definition at line 1343 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &  proxy 
)

Definition at line 1366 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &  proxy 
)

Definition at line 1369 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &  proxy 
)

Definition at line 1395 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( matrix_base< NumericT > &  A,
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &  proxy 
)

Definition at line 1408 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &  proxy 
)

Definition at line 1421 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &  proxy 
)

Definition at line 1447 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::element_op ( vector_base< NumericT > &  vec1,
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &  proxy 
)

Definition at line 1473 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 776 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 825 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)

Definition at line 915 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 856 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2,
NumericT const *  vec3,
unsigned int  start3,
unsigned int  inc3,
unsigned int  op_type 
)

Definition at line 869 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::element_op_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
const NumericT C,
unsigned int  C_start1,
unsigned int  C_start2,
unsigned int  C_inc1,
unsigned int  C_inc2,
unsigned int  C_internal_size1,
unsigned int  C_internal_size2,
unsigned int  op_type 
)

Definition at line 807 of file matrix_operations_row.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel ( const unsigned int *  sp_mat_coords,
const NumericT sp_mat_elements,
unsigned int  sp_mat_row_num,
unsigned int  sp_mat_col_num,
unsigned int  sp_mat_internal_row_num,
unsigned int  sp_mat_items_per_row,
unsigned int  sp_mat_aligned_items_per_row,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1690 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel ( const unsigned int *  sp_mat_coords,
const NumericT sp_mat_elements,
unsigned int  sp_mat_row_num,
unsigned int  sp_mat_col_num,
unsigned int  sp_mat_internal_row_num,
unsigned int  sp_mat_items_per_row,
unsigned int  sp_mat_aligned_items_per_row,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 1863 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  row_num,
unsigned int  col_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row 
)

Definition at line 1620 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::exclusive_scan ( vector_base< NumericT > const &  input,
vector_base< NumericT > &  output 
)

This function implements an exclusive scan using CUDA.

Parameters
inputInput vector
outputThe output vector. Either idential to input or non-overlapping.

Definition at line 3239 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::extract_L ( compressed_matrix< NumericT > const &  A,
compressed_matrix< NumericT > &  L 
)

Definition at line 107 of file ilu_operations.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::extract_L_kernel_1 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
unsigned int  A_size1,
unsigned int *  L_row_indices 
)

Definition at line 47 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::extract_L_kernel_2 ( unsigned int const *  A_row_indices,
unsigned int const *  A_col_indices,
NumericT const *  A_elements,
unsigned int  A_size1,
unsigned int const *  L_row_indices,
unsigned int *  L_col_indices,
NumericT L_elements 
)

Definition at line 73 of file ilu_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::extract_LU ( compressed_matrix< NumericT > const &  A,
compressed_matrix< NumericT > &  L,
compressed_matrix< NumericT > &  U 
)

Definition at line 387 of file ilu_operations.hpp.

template<typename IndexT >
__global__ void viennacl::linalg::cuda::extract_LU_kernel_1 ( const IndexT *  A_row_indices,
const IndexT *  A_col_indices,
unsigned int  A_size1,
unsigned int *  L_row_indices,
unsigned int *  U_row_indices 
)

Definition at line 308 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::extract_LU_kernel_2 ( unsigned int const *  A_row_indices,
unsigned int const *  A_col_indices,
NumericT const *  A_elements,
unsigned int  A_size1,
unsigned int const *  L_row_indices,
unsigned int *  L_col_indices,
NumericT L_elements,
unsigned int const *  U_row_indices,
unsigned int *  U_col_indices,
NumericT U_elements 
)

Definition at line 341 of file ilu_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_direct ( const Numeric2T *  input,
Numeric2T *  output,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
NumericT  sign,
bool  is_row_major 
)

Definition at line 140 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_div_vec_scalar ( Numeric2T *  input1,
unsigned int  size,
NumericT  factor 
)

Definition at line 690 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::fft_mult_vec ( const NumericT input1,
const NumericT input2,
NumericT output,
unsigned int  size 
)

Definition at line 657 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_radix2 ( Numeric2T *  input,
unsigned int  s,
unsigned int  bit_size,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
NumericT  sign,
bool  is_row_major 
)

Definition at line 371 of file fft_operations.hpp.

template<typename Numeric2T , typename NumericT >
__global__ void viennacl::linalg::cuda::fft_radix2_local ( Numeric2T *  input,
unsigned int  bit_size,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
NumericT  sign,
bool  is_row_major 
)

Definition at line 298 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::fft_reorder ( NumericT input,
unsigned int  bit_size,
unsigned int  size,
unsigned int  stride,
unsigned int  batch_num,
bool  is_row_major 
)

Definition at line 241 of file fft_operations.hpp.

__device__ int viennacl::linalg::cuda::floorPow2 ( int  n)
inline

Compute the next lower power of two of n

Parameters
nnumber for which next higher power of two is seeked

Definition at line 46 of file bisect_util.hpp.

__device__ unsigned int viennacl::linalg::cuda::get_reorder_num ( unsigned int  v,
unsigned int  bit_size 
)
inline

Definition at line 128 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::givens_next ( matrix_base< NumericT > &  Q,
vector_base< NumericT > &  tmp1,
vector_base< NumericT > &  tmp2,
int  l,
int  m 
)

This function updates the matrix Q. It is part of the tql2 algorithm.

Parameters
QThe matrix to be updated.
tmp1Vector with data from the tql2 algorithm.
tmp2Vector with data from the tql2 algorithm.
lData from the tql2 algorithm.
mData from the tql2 algorithm.

Definition at line 2694 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::givens_next_column_major_kernel ( T *  matr,
T *  cs,
T *  ss,
unsigned int  size,
unsigned int  stride,
unsigned int  start_i,
unsigned int  end_i 
)

Definition at line 1792 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::givens_next_row_major_kernel ( T *  matr,
T *  cs,
T *  ss,
unsigned int  size,
unsigned int  stride,
unsigned int  start_i,
unsigned int  end_i 
)

Definition at line 1745 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::house_update_A_left ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  D,
vcl_size_t  start 
)

This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P.

Parameters
AThe matrix to be updated.
DThe normalized householder vector.
startThe repetition counter.

Definition at line 2587 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_left_column_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1581 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_left_row_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1556 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::house_update_A_right ( matrix_base< NumericT > &  A,
vector_base< NumericT > &  D 
)

This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P.

Parameters
AThe matrix to be updated.
DThe normalized householder vector.

Definition at line 2626 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_right_column_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1639 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_A_right_row_major_kernel ( T *  A,
T *  V,
unsigned int  row_start,
unsigned int  col_start,
unsigned int  size1,
unsigned int  size2,
unsigned int  stride 
)

Definition at line 1608 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::house_update_QL ( matrix_base< NumericT > &  Q,
vector_base< NumericT > &  D,
vcl_size_t  A_size1 
)

This function updates the matrix Q, which is needed for the computation of the eigenvectors.

Parameters
QThe matrix to be updated.
DThe householder vector.
A_size1size1 of matrix A

Definition at line 2663 of file matrix_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_QL_column_major_kernel ( T *  QL,
T *  V,
unsigned int  size1,
unsigned int  strideQ 
)

Definition at line 1717 of file matrix_operations_col.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::house_update_QL_row_major_kernel ( T *  QL,
T *  V,
unsigned int  size1,
unsigned int  strideQ 
)

Definition at line 1690 of file matrix_operations_col.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel ( const unsigned int *  ell_coords,
const NumericT ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT csr_elements,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 2207 of file sparse_matrix_operations.hpp.

template<typename DMatIndexT , typename ResultIndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel ( const unsigned int *  ell_coords,
const NumericT ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT csr_elements,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row,
const NumericT d_mat,
unsigned int  d_mat_row_start,
unsigned int  d_mat_col_start,
unsigned int  d_mat_row_inc,
unsigned int  d_mat_col_inc,
unsigned int  d_mat_row_size,
unsigned int  d_mat_col_size,
unsigned int  d_mat_internal_rows,
unsigned int  d_mat_internal_cols,
NumericT result,
unsigned int  result_row_start,
unsigned int  result_col_start,
unsigned int  result_row_inc,
unsigned int  result_col_inc,
unsigned int  result_row_size,
unsigned int  result_col_size,
unsigned int  result_internal_rows,
unsigned int  result_internal_cols 
)

Definition at line 2405 of file sparse_matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel ( const unsigned int *  ell_coords,
const NumericT ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT csr_elements,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  row_num,
unsigned int  internal_row_num,
unsigned int  items_per_row,
unsigned int  aligned_items_per_row 
)

Definition at line 2123 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::icc_chow_patel_sweep ( compressed_matrix< NumericT > &  L,
vector< NumericT > const &  aij_L 
)

Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper)

Definition at line 285 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::icc_chow_patel_sweep_kernel ( unsigned int const *  L_row_indices,
unsigned int const *  L_col_indices,
NumericT L_elements,
NumericT const *  L_backup,
unsigned int  L_size1,
NumericT const *  aij_L 
)

CUDA kernel for one Chow-Patel-ICC sweep.

Definition at line 231 of file ilu_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::icc_scale ( compressed_matrix< NumericT > const &  A,
compressed_matrix< NumericT > &  L 
)

Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly.

Definition at line 203 of file ilu_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::ilu_chow_patel_sweep ( compressed_matrix< NumericT > &  L,
vector< NumericT > const &  aij_L,
compressed_matrix< NumericT > &  U_trans,
vector< NumericT > const &  aij_U_trans 
)

Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper)

Definition at line 576 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ilu_chow_patel_sweep_kernel ( unsigned int const *  L_row_indices,
unsigned int const *  L_col_indices,
NumericT L_elements,
NumericT const *  L_backup,
unsigned int  L_size1,
NumericT const *  aij_L,
unsigned int const *  U_trans_row_indices,
unsigned int const *  U_trans_col_indices,
NumericT U_trans_elements,
NumericT const *  U_trans_backup,
NumericT const *  aij_U_trans 
)

CUDA kernel for one Chow-Patel-ILU sweep.

Definition at line 476 of file ilu_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::ilu_form_neumann_matrix ( compressed_matrix< NumericT > &  R,
vector< NumericT > &  diag_R 
)

Definition at line 649 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ilu_form_neumann_matrix_kernel ( unsigned int const *  R_row_indices,
unsigned int const *  R_col_indices,
NumericT R_elements,
unsigned int  R_size1,
NumericT D_elements 
)

Definition at line 611 of file ilu_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::ilu_scale ( compressed_matrix< NumericT > const &  A,
compressed_matrix< NumericT > &  L,
compressed_matrix< NumericT > &  U 
)

Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly.

Definition at line 438 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ilu_scale_kernel_1 ( unsigned int const *  A_row_indices,
unsigned int const *  A_col_indices,
NumericT const *  A_elements,
unsigned int  A_size1,
NumericT D_elements 
)

Definition at line 148 of file ilu_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::ilu_scale_kernel_2 ( unsigned int const *  R_row_indices,
unsigned int const *  R_col_indices,
NumericT R_elements,
unsigned int  R_size1,
NumericT D_elements 
)

Scales values in a matrix such that output = D * input * D, where D is a diagonal matrix (only the diagonal is provided)

Definition at line 177 of file ilu_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::inclusive_scan ( vector_base< NumericT > const &  input,
vector_base< NumericT > &  output 
)

This function implements an inclusive scan using CUDA.

Parameters
inputInput vector.
outputThe output vector. Either idential to input or non-overlapping.

Definition at line 3226 of file vector_operations.hpp.

template<typename NumericT >
vcl_size_t viennacl::linalg::cuda::index_norm_inf ( vector_base< NumericT > const &  vec1)

Computes the index of the first entry that is equal to the supremum-norm in modulus.

Parameters
vec1The vector
Returns
The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types.

Definition at line 2972 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel ( const NumericT vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int *  result 
)

Definition at line 2917 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel ( const NumericT x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT y1,
unsigned int  start1,
unsigned int  stride1,
NumericT group_results 
)

Definition at line 1807 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel ( const NumericT x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT y2,
unsigned int  start2,
unsigned int  stride2,
NumericT group_results 
)

Definition at line 1846 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel ( const NumericT x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT y2,
unsigned int  start2,
unsigned int  stride2,
const NumericT y3,
unsigned int  start3,
unsigned int  stride3,
NumericT group_results 
)

Definition at line 1891 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel ( const NumericT x,
unsigned int  startx,
unsigned int  stridex,
unsigned int  sizex,
const NumericT y0,
unsigned int  start0,
unsigned int  stride0,
const NumericT y1,
unsigned int  start1,
unsigned int  stride1,
const NumericT y2,
unsigned int  start2,
unsigned int  stride2,
const NumericT y3,
unsigned int  start3,
unsigned int  stride3,
const NumericT y4,
unsigned int  start4,
unsigned int  stride4,
const NumericT y5,
unsigned int  start5,
unsigned int  stride5,
const NumericT y6,
unsigned int  start6,
unsigned int  stride6,
const NumericT y7,
unsigned int  start7,
unsigned int  stride7,
NumericT group_results 
)

Definition at line 1942 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::inner_prod_cpu ( vector_base< NumericT > const &  vec1,
vector_base< NumericT > const &  vec2,
NumericT result 
)

Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).

Parameters
vec1The first vector
vec2The second vector
resultThe result scalar (on the host)

Definition at line 1771 of file vector_operations.hpp.

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::inner_prod_impl ( vector_base< NumericT > const &  vec1,
vector_base< NumericT > const &  vec2,
ScalarT &  result 
)

Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).

Parameters
vec1The first vector
vec2The second vector
resultThe result scalar (on the gpu)

Definition at line 1739 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::inner_prod_impl ( vector_base< NumericT > const &  x,
vector_tuple< NumericT > const &  vec_tuple,
vector_base< NumericT > &  result 
)

Definition at line 2039 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::inner_prod_kernel ( const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2,
NumericT group_buffer 
)

Definition at line 1493 of file vector_operations.hpp.

template<typename NumericT , typename SolverTagT >
void viennacl::linalg::cuda::inplace_solve ( matrix_base< NumericT > const &  A,
matrix_base< NumericT > &  B,
SolverTagT  tag 
)

Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation).

Parameters
AThe system matrix
BThe matrix of row vectors, where the solution is directly written to
tagSolver tag for identifying the respective triangular solver
Examples:
blas2.cpp, and least-squares.cpp.

Definition at line 253 of file direct_solve.hpp.

template<typename NumericT , typename SolverTagT >
void viennacl::linalg::cuda::inplace_solve ( matrix_base< NumericT > const &  mat,
vector_base< NumericT > &  vec,
SolverTagT   
)

Direct inplace solver for dense triangular systems (non-transposed version)

Parameters
matThe system matrix proxy
vecThe load vector, where the solution is directly written to

Definition at line 398 of file direct_solve.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 673 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 694 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 716 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const SparseMatrixT &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 737 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 761 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::lower_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 782 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::unit_upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 813 of file sparse_matrix_operations.hpp.

template<typename SparseMatrixT , typename NumericT >
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve ( const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &  mat,
viennacl::vector_base< NumericT > &  vec,
viennacl::linalg::upper_tag   
)

Carries out triangular inplace solves.

Parameters
matThe matrix
vecThe vector holding the right hand side. Is overwritten by the solution.

Definition at line 834 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_assign ( matrix_base< NumericT > &  mat,
NumericT  s,
bool  clear = false 
)

Definition at line 316 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_assign_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 739 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 757 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 872 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 895 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 918 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 941 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 964 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 987 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1010 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1033 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1056 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1079 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1125 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1102 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1148 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1171 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1194 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1217 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1240 of file matrix_operations_col.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_column ( const matrix_base< NumericT > &  mat,
unsigned int  j,
vector_base< NumericT > &  vec 
)

Definition at line 509 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_diag_from_vector ( const vector_base< NumericT > &  vec,
int  k,
matrix_base< NumericT > &  mat 
)

Definition at line 377 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_diag_to_vector ( matrix_base< NumericT > const &  mat,
int  k,
vector_base< NumericT > &  vec 
)

Definition at line 429 of file matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_diagonal_assign ( matrix_base< NumericT > &  mat,
NumericT  s 
)

Definition at line 348 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 39 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 126 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 213 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 300 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 750 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 837 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 924 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1011 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1464 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1551 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1638 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1725 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2179 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2266 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2353 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2440 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel ( const NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
bool  row_major_A,
NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_size1,
unsigned int  B_size2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
bool  row_major_B,
bool  unit_diagonal 
)

Definition at line 107 of file direct_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 394 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 481 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 568 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 655 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1105 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1192 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1279 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1366 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1820 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1907 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 1994 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2081 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2536 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2623 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2710 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel ( NumericT  alpha,
const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT B,
unsigned int  B_row_start,
unsigned int  B_col_start,
unsigned int  B_row_inc,
unsigned int  B_col_inc,
unsigned int  B_row_size,
unsigned int  B_col_size,
unsigned int  B_internal_rows,
unsigned int  B_internal_cols,
NumericT  beta,
NumericT C,
unsigned int  C_row_start,
unsigned int  C_col_start,
unsigned int  C_row_inc,
unsigned int  C_col_inc,
unsigned int  C_row_size,
unsigned int  C_col_size,
unsigned int  C_internal_rows,
unsigned int  C_internal_cols 
)

Definition at line 2797 of file matrix_operations_prod.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel ( const NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
bool  row_major_A,
NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_size1,
unsigned int  B_size2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
bool  row_major_B,
bool  unit_diagonal 
)

Definition at line 41 of file direct_solve.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::matrix_row ( matrix_base< NumericT > const &  mat,
unsigned int  i,
vector_base< NumericT > &  vec 
)

Definition at line 476 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_assign_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 770 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  alpha 
)

Definition at line 788 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 902 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 925 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 948 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 971 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 994 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1017 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1040 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1063 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1086 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1109 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1155 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1132 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1178 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1201 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1224 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1247 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_inc1,
unsigned int  B_inc2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2 
)

Definition at line 1270 of file matrix_operations_row.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::max_cpu ( vector_base< NumericT > const &  vec1,
NumericT result 
)

Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.

Parameters
vec1The vector
resultThe result host scalar

Definition at line 2793 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::max_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the maximum of a vector, both reduction stages run on the GPU.

Parameters
vec1The vector
resultThe result GPU scalar

Definition at line 2765 of file vector_operations.hpp.

template<typename NumericT >
__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_numeric ( NumericT  scaling_factor,
unsigned int  input_start,
unsigned int  input_end,
const unsigned int *  input_indices,
const NumericT input_values,
unsigned int  invalid_token,
unsigned int *  output_indices,
NumericT output_values,
unsigned int  id_in_warp,
unsigned int  subwarpsize 
)

Definition at line 303 of file spgemm.hpp.

__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_symbolic ( unsigned int  row_B_start,
unsigned int  row_B_end,
unsigned int const *  B_col_indices,
unsigned int  B_size2,
unsigned int  subwarpsize 
)
inline

Definition at line 149 of file spgemm.hpp.

__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_symbolic_double ( unsigned int  row_B_start,
unsigned int  row_B_end,
unsigned int const *  B_col_indices,
unsigned int  B_size2,
unsigned int *  output_array,
unsigned int  id_in_warp,
unsigned int  subwarpsize 
)
inline

Definition at line 173 of file spgemm.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::min_cpu ( vector_base< NumericT > const &  vec1,
NumericT result 
)

Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.

Parameters
vec1The vector
resultThe result host scalar

Definition at line 2847 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::min_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the maximum of a vector, both reduction stages run on the GPU.

Parameters
vec1The vector
resultThe result GPU scalar

Definition at line 2819 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::multiply_complex ( viennacl::vector< NumericT, AlignmentV > const &  input1,
viennacl::vector< NumericT, AlignmentV > const &  input2,
viennacl::vector< NumericT, AlignmentV > &  output 
)

Mutiply two complex vectors and store result in output.

Definition at line 674 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::nmf ( viennacl::matrix_base< NumericT > const &  V,
viennacl::matrix_base< NumericT > &  W,
viennacl::matrix_base< NumericT > &  H,
viennacl::linalg::nmf_config const &  conf 
)

The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.

Parameters
VInput matrix
WFirst factor
HSecond factor
confA configuration object holding tolerances and the like

Definition at line 59 of file nmf_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_1_cpu ( vector_base< NumericT > const &  vec1,
NumericT result 
)

Computes the l^1-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2605 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_1_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the l^1-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2587 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_2_cpu ( vector_base< NumericT > const &  vec1,
NumericT result 
)

Computes the l^2-norm of a vector - implementation.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2651 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_2_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the l^2-norm of a vector - implementation.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2632 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_inf_cpu ( vector_base< NumericT > const &  vec1,
NumericT result 
)

Computes the supremum-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2699 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::norm_inf_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the supremum-norm of a vector.

Parameters
vec1The vector
resultThe result scalar

Definition at line 2679 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_floats ( const NumericT vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
NumericT group_buffer 
)

Definition at line 2238 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_integers ( const NumericT vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
NumericT group_buffer 
)

Definition at line 2331 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers ( const NumericT vec,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  norm_selector,
NumericT group_buffer 
)

Definition at line 2415 of file vector_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::normalize ( viennacl::vector< NumericT, AlignmentV > &  input)

Normalize vector on with his own size.

Definition at line 700 of file fft_operations.hpp.

__device__ float2 viennacl::linalg::cuda::operator* ( float2  in1,
float2  in2 
)
inline

Definition at line 98 of file fft_operations.hpp.

__host__ __device__ double2 viennacl::linalg::cuda::operator* ( double2  in1,
double2  in2 
)
inline

Definition at line 123 of file fft_operations.hpp.

__host__ __device__ float2 viennacl::linalg::cuda::operator+ ( float2  a,
float2  b 
)
inline

Definition at line 80 of file fft_operations.hpp.

__host__ __device__ double2 viennacl::linalg::cuda::operator+ ( double2  a,
double2  b 
)
inline

Definition at line 104 of file fft_operations.hpp.

__host__ __device__ float2 viennacl::linalg::cuda::operator- ( float2  a,
float2  b 
)
inline

Definition at line 86 of file fft_operations.hpp.

__host__ __device__ double2 viennacl::linalg::cuda::operator- ( double2  a,
double2  b 
)
inline

Definition at line 110 of file fft_operations.hpp.

template<typename SCALARTYPE >
__device__ float2 viennacl::linalg::cuda::operator/ ( float2  a,
SCALARTYPE  b 
)
inline

Definition at line 92 of file fft_operations.hpp.

template<typename SCALARTYPE >
__host__ __device__ double2 viennacl::linalg::cuda::operator/ ( double2  a,
SCALARTYPE  b 
)
inline

Definition at line 117 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_coo_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT p,
NumericT Ap,
const NumericT r0star,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1139 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_adaptive_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT elements,
unsigned int  num_blocks,
const NumericT p,
NumericT Ap,
const NumericT r0star,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 972 of file iterative_operations.hpp.

template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_blocked_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT p,
NumericT Ap,
const NumericT r0star,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 896 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_ell_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT p,
NumericT Ap,
const NumericT r0star,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1287 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_hyb_vec_mul_kernel ( const unsigned int *  ell_coords,
const NumericT ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT csr_elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT p,
NumericT Ap,
const NumericT r0star,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1490 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( compressed_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1080 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( coordinate_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1253 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1350 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( sliced_ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1456 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod ( hyb_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 1566 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_sliced_ell_vec_mul_kernel ( const unsigned int *  columns_per_block,
const unsigned int *  column_indices,
const unsigned int *  block_start,
const NumericT elements,
const NumericT p,
NumericT Ap,
const NumericT r0star,
unsigned int  size,
unsigned int  block_size,
NumericT inner_prod_buffer,
unsigned int  buffer_size,
unsigned int  buffer_offset 
)

Definition at line 1382 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_update_s ( vector_base< NumericT > &  s,
vector_base< NumericT > &  r,
vector_base< NumericT > const &  Ap,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Definition at line 791 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_update_s_kernel ( NumericT s,
NumericT const *  residual,
NumericT const *  Ap,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  chunk_size,
unsigned int  chunk_offset 
)

Definition at line 734 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_vector_kernel ( NumericT result,
NumericT  alpha,
NumericT p,
NumericT  omega,
NumericT const *  s,
NumericT residual,
NumericT const *  As,
NumericT  beta,
NumericT const *  Ap,
NumericT const *  r0star,
NumericT inner_prod_buffer,
unsigned int  size 
)

Definition at line 813 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_vector_update ( vector_base< NumericT > &  result,
NumericT  alpha,
vector_base< NumericT > &  p,
NumericT  omega,
vector_base< NumericT > const &  s,
vector_base< NumericT > &  residual,
vector_base< NumericT > const &  As,
NumericT  beta,
vector_base< NumericT > const &  Ap,
vector_base< NumericT > const &  r0star,
vector_base< NumericT > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size 
)

Definition at line 864 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_coo_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
const unsigned int *  group_boundaries,
const NumericT p,
NumericT Ap,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 331 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_adaptive_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const unsigned int *  row_blocks,
const NumericT elements,
unsigned int  num_blocks,
const NumericT p,
NumericT Ap,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 181 of file iterative_operations.hpp.

template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_blocked_kernel ( const unsigned int *  row_indices,
const unsigned int *  column_indices,
const NumericT elements,
const NumericT p,
NumericT Ap,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 114 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_ell_vec_mul_kernel ( const unsigned int *  coords,
const NumericT elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT p,
NumericT Ap,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 463 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_hyb_vec_mul_kernel ( const unsigned int *  ell_coords,
const NumericT ell_elements,
const unsigned int *  csr_rows,
const unsigned int *  csr_cols,
const NumericT csr_elements,
unsigned int  internal_row_num,
unsigned int  items_per_row,
const NumericT p,
NumericT Ap,
unsigned int  size,
NumericT inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 638 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( compressed_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 280 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( coordinate_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 435 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 518 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( sliced_ell_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 610 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod ( hyb_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 706 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_sliced_ell_vec_mul_kernel ( const unsigned int *  columns_per_block,
const unsigned int *  column_indices,
const unsigned int *  block_start,
const NumericT elements,
const NumericT p,
NumericT Ap,
unsigned int  size,
unsigned int  block_size,
NumericT inner_prod_buffer,
unsigned int  buffer_size 
)

Definition at line 544 of file iterative_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_vector_kernel ( NumericT result,
NumericT  alpha,
NumericT p,
NumericT r,
NumericT const *  Ap,
NumericT  beta,
NumericT inner_prod_buffer,
unsigned int  size 
)

Definition at line 44 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_vector_update ( vector_base< NumericT > &  result,
NumericT  alpha,
vector_base< NumericT > &  p,
vector_base< NumericT > &  r,
vector_base< NumericT > const &  Ap,
NumericT  beta,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 85 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1 ( vector_base< T > const &  device_krylov_basis,
vcl_size_t  v_k_size,
vcl_size_t  v_k_internal_size,
vcl_size_t  param_k,
vector_base< T > &  vi_in_vk_buffer,
vcl_size_t  buffer_chunk_size 
)

Definition at line 1738 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1_kernel ( T const *  krylov_basis,
unsigned int  size,
unsigned int  internal_size,
unsigned int  k,
T *  vi_in_vk_buffer,
unsigned int  chunk_size 
)

Definition at line 1691 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2 ( vector_base< T > &  device_krylov_basis,
vcl_size_t  v_k_size,
vcl_size_t  v_k_internal_size,
vcl_size_t  param_k,
vector_base< T > const &  vi_in_vk_buffer,
vector_base< T > &  R_buffer,
vcl_size_t  krylov_dim,
vector_base< T > &  inner_prod_buffer,
vcl_size_t  buffer_chunk_size 
)

Definition at line 1830 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2_kernel ( T *  krylov_basis,
unsigned int  size,
unsigned int  internal_size,
unsigned int  k,
T const *  vi_in_vk_buffer,
unsigned int  chunk_size,
T *  R_buffer,
unsigned int  krylov_dim,
T *  inner_prod_buffer 
)

Definition at line 1763 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_normalize_vk ( vector_base< T > &  v_k,
vector_base< T > const &  residual,
vector_base< T > &  R_buffer,
vcl_size_t  offset_in_R,
vector_base< T > const &  inner_prod_buffer,
vector_base< T > &  r_dot_vk_buffer,
vcl_size_t  buffer_chunk_size,
vcl_size_t  buffer_chunk_offset 
)

Performs a vector normalization needed for an efficient pipelined GMRES algorithm.

This routines computes for vectors 'r', 'v_k': Second reduction step for ||v_k|| v_k /= ||v_k|| First reduction step for <r, v_k>

Definition at line 1660 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_normalize_vk_kernel ( T *  vk,
unsigned int  vk_offset,
T const *  residual,
T *  R_buffer,
unsigned int  R_offset,
T const *  inner_prod_buffer,
unsigned int  chunk_size,
T *  r_dot_vk_buffer,
unsigned int  chunk_offset,
unsigned int  size 
)

Definition at line 1598 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::pipelined_gmres_prod ( compressed_matrix< NumericT > const &  A,
vector_base< NumericT > const &  p,
vector_base< NumericT > &  Ap,
vector_base< NumericT > &  inner_prod_buffer 
)

Definition at line 1907 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( coordinate_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1953 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( ell_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1975 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( sliced_ell_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 1996 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod ( hyb_matrix< T > const &  A,
vector_base< T > const &  p,
vector_base< T > &  Ap,
vector_base< T > &  inner_prod_buffer 
)

Definition at line 2019 of file iterative_operations.hpp.

template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_update_result ( vector_base< T > &  result,
vector_base< T > const &  residual,
vector_base< T > const &  krylov_basis,
vcl_size_t  v_k_size,
vcl_size_t  v_k_internal_size,
vector_base< T > const &  coefficients,
vcl_size_t  param_k 
)

Definition at line 1882 of file iterative_operations.hpp.

template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_update_result_kernel ( T *  result,
T const *  residual,
T const *  krylov_basis,
unsigned int  size,
unsigned int  internal_size,
T const *  coefficients,
unsigned int  k 
)

Definition at line 1862 of file iterative_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::plane_rotation ( vector_base< NumericT > &  vec1,
vector_base< NumericT > &  vec2,
NumericT  alpha,
NumericT  beta 
)

Computes a plane rotation of two vectors.

Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)

Parameters
vec1The first vector
vec2The second vector
alphaThe first transformation coefficient
betaThe second transformation coefficient

Definition at line 3032 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::plane_rotation_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2,
NumericT  alpha,
NumericT  beta 
)

Definition at line 2996 of file vector_operations.hpp.

template<class NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a compressed_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 246 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
sp_matThe sparse matrix
d_matThe dense matrix
resultThe result matrix

Definition at line 385 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.

Implementation of the convenience expression result = prod(sp_mat, d_mat);

Parameters
sp_matThe sparse matrix
d_matThe transposed dense matrix proxy
resultThe result matrix

Definition at line 540 of file sparse_matrix_operations.hpp.

template<class NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( viennacl::compressed_matrix< NumericT, AlignmentV > const &  A,
viennacl::compressed_matrix< NumericT, AlignmentV > const &  B,
viennacl::compressed_matrix< NumericT, AlignmentV > &  C 
)

Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices.

Implementation of the convenience expression C = prod(A, B); Based on computing C(i, :) = A(i, :) * B via merging the respective rows of B

Parameters
ALeft factor
BRight factor
CResult matrix

Definition at line 559 of file spgemm.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::prod_impl ( const viennacl::compressed_compressed_matrix< NumericT > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a compressed_compressed_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 952 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a coordinate_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 1202 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Compressed Matrix(COO)-Dense Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, d_mat);

Parameters
sp_matThe Sparse Matrix (Coordinate format)
d_matThe Dense Matrix
resultThe Result Matrix

Definition at line 1328 of file sparse_matrix_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT > &  mat,
bool  mat_transpose,
const vector_base< NumericT > &  vec,
vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication.

Implementation of the convenience expressions result = prod(mat, vec); and result = prod(trans(mat), vec);

Parameters
matThe matrix
mat_transposeWhether the matrix is to be transposed.
vecThe vector
resultThe result vector

Definition at line 1464 of file matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::coordinate_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));

Parameters
sp_matThe Sparse Matrix (Coordinate format)
d_matThe Dense Transposed Matrix
resultThe Result Matrix

Definition at line 1522 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a ell_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 1668 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format

Parameters
sp_matThe sparse matrix (ELL)
d_matThe dense matrix
resultThe result matrix

Definition at line 1760 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::ell_matrix< NumericT, AlignmentV > &  sp_mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.

Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format

Parameters
sp_matThe sparse matrix (ELL)
d_matThe dense matrix
resultThe result matrix

Definition at line 1933 of file sparse_matrix_operations.hpp.

template<typename NumericT , typename IndexT >
void viennacl::linalg::cuda::prod_impl ( const viennacl::sliced_ell_matrix< NumericT, IndexT > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a sliced_ell_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 2095 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, AlignmentV > &  mat,
const viennacl::vector_base< NumericT > &  vec,
viennacl::vector_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, vec);

Parameters
matThe matrix
vecThe vector
resultThe result vector

Definition at line 2181 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, AlignmentV > &  mat,
const viennacl::matrix_base< NumericT > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, d_mat);

Parameters
matThe sparse matrix
d_matThe dense matrix (row- or column-major)
resultThe dense result matrix (row- or column-major)

Definition at line 2288 of file sparse_matrix_operations.hpp.

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::prod_impl ( const matrix_base< NumericT > &  A,
bool  trans_A,
const matrix_base< NumericT > &  B,
bool  trans_B,
matrix_base< NumericT > &  C,
ScalarT  alpha,
ScalarT  beta 
)

Carries out matrix-matrix multiplication.

Implementation of C = prod(A, B);

Definition at line 2384 of file matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl ( const viennacl::hyb_matrix< NumericT, AlignmentV > &  mat,
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &  d_mat,
viennacl::matrix_base< NumericT > &  result 
)

Carries out matrix-vector multiplication with a hyb_matrix.

Implementation of the convenience expression result = prod(mat, trans(d_mat));

Parameters
matThe sparse matrix
d_matTransposed matrix proxy object for the rhs dense matrix (row- or column-major)
resultThe dense result matrix (row- or column-major)

Definition at line 2486 of file sparse_matrix_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::radix2 ( viennacl::vector< NumericT, AlignmentV > &  in,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Radix-2 1D algorithm for computing Fourier transformation.

Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm

Definition at line 442 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::radix2 ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  in,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  batch_num,
NumericT  sign = NumericT(-1),
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Radix-2 2D algorithm for computing Fourier transformation.

Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm

Definition at line 494 of file fft_operations.hpp.

template<typename RealT , typename ComplexT >
__global__ void viennacl::linalg::cuda::real_to_complex ( const RealT *  in,
ComplexT *  out,
unsigned int  size 
)

Definition at line 782 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::real_to_complex ( viennacl::vector_base< NumericT > const &  in,
viennacl::vector_base< NumericT > &  out,
vcl_size_t  size 
)

Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)

Definition at line 797 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::reorder ( viennacl::vector< NumericT, AlignmentV > &  in,
vcl_size_t  size,
vcl_size_t  stride,
vcl_size_t  bits_datasize,
vcl_size_t  batch_num,
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER  data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR 
)

Definition at line 282 of file fft_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::reverse ( viennacl::vector_base< NumericT > &  in)

Reverse vector to oposite order and save it in input vector.

Definition at line 847 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::reverse_inplace ( NumericT vec,
unsigned int  size 
)

Definition at line 832 of file fft_operations.hpp.

template<typename IndexT >
__device__ IndexT viennacl::linalg::cuda::round_to_next_power_of_2 ( IndexT  val)

Definition at line 63 of file spgemm.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scalar_swap_kernel ( NumericT s1,
NumericT s2 
)

Definition at line 345 of file scalar_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  val,
unsigned int  options2,
const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1355 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT val,
unsigned int  options2,
const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1395 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT  val,
unsigned int  options2,
const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1384 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel ( NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
const NumericT val,
unsigned int  options2,
const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
const NumericT vec2,
unsigned int  start2,
unsigned int  inc2,
unsigned int  size2 
)

Definition at line 1424 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT >
void viennacl::linalg::cuda::scaled_rank_1_update ( matrix_base< NumericT > &  mat1,
ScalarT const &  alpha,
vcl_size_t  len_alpha,
bool  reciprocal_alpha,
bool  flip_sign_alpha,
const vector_base< NumericT > &  vec1,
const vector_base< NumericT > &  vec2 
)

The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.

Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);

Parameters
mat1The matrix to be updated
alphaThe scaling factor (either a viennacl::scalar<>, float, or double)
len_alphaLength of the buffer for an eventual final reduction step (currently always '1')
reciprocal_alphaUse 1/alpha instead of alpha
flip_sign_alphaUse -alpha instead of alpha
vec1The first vector
vec2The second vector

Definition at line 2416 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scan_kernel_1 ( NumericT const *  X,
unsigned int  startX,
unsigned int  incX,
unsigned int  sizeX,
NumericT Y,
unsigned int  startY,
unsigned int  incY,
unsigned int  scan_offset,
NumericT carries 
)

Definition at line 3063 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scan_kernel_2 ( NumericT carries)

Definition at line 3121 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::scan_kernel_3 ( NumericT Y,
unsigned int  startY,
unsigned int  incY,
unsigned int  sizeY,
NumericT const *  carries 
)

Definition at line 3147 of file vector_operations.hpp.

__device__ void viennacl::linalg::cuda::scanCompactBlocksStartAddress ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_compaction,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper 
)
inline

Compute addresses to obtain compact list of block start addresses.

Definition at line 239 of file bisect_kernel_large.hpp.

__device__ void viennacl::linalg::cuda::scanInitial ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  mat_size,
const unsigned int  num_threads_active,
const unsigned int  num_threads_compaction,
unsigned short *  s_cl_one,
unsigned short *  s_cl_mult,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper 
)
inline

Perform initial scan for compaction of intervals containing one and multiple eigenvalues; also do initial scan to build blocks

Definition at line 370 of file bisect_kernel_large.hpp.

__device__ void viennacl::linalg::cuda::scanSumBlocks ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
const unsigned int  num_threads_compaction,
unsigned short *  s_cl_blocking,
unsigned short *  s_cl_helper 
)
inline

Perform scan to obtain number of eigenvalues before a specific block.

Definition at line 304 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::sliced_ell_matrix_vec_mul_kernel ( const unsigned int *  columns_per_block,
const unsigned int *  column_indices,
const unsigned int *  block_start,
const NumericT elements,
const NumericT x,
unsigned int  start_x,
unsigned int  inc_x,
unsigned int  size_x,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result,
unsigned int  size_result,
unsigned int  block_size 
)

Definition at line 2046 of file sparse_matrix_operations.hpp.

template<class S , class T , class NumericT >
__device__ void viennacl::linalg::cuda::storeInterval ( unsigned int  addr,
NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  left,
NumericT  right,
left_count,
right_count,
NumericT  precision 
)

Check if interval converged and store appropriately

Parameters
addraddress where to store the information of the interval
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
leftlower limit of interval
rightupper limit of interval
left_counteigenvalues less than left
right_counteigenvalues less than right
precisiondesired precision for eigenvalues

Definition at line 124 of file bisect_util.hpp.

template<class T , class S , class NumericT >
__device__ void viennacl::linalg::cuda::storeIntervalConverged ( NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
NumericT left,
NumericT mid,
NumericT right,
S &  left_count,
S &  mid_count,
S &  right_count,
T *  s_compaction_list_exc,
unsigned int &  compact_second_chunk,
const unsigned int  num_threads_active,
unsigned int &  is_active_second 
)

Definition at line 465 of file bisect_util.hpp.

template<class S , class T , class NumericT >
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervals ( unsigned int  addr,
const unsigned int  num_threads_active,
NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
NumericT  left,
NumericT  mid,
NumericT  right,
const S  left_count,
const S  mid_count,
const S  right_count,
NumericT  precision,
unsigned int &  compact_second_chunk,
T *  s_compaction_list_exc,
unsigned int &  is_active_second 
)

Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread.

Parameters
addrbase address for storing intervals
num_threads_activenumber of threads / intervals in current sweep
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
leftlower limit of interval
midmidpoint of interval
rightupper limit of interval
left_counteigenvalues less than left
mid_counteigenvalues less than mid
right_counteigenvalues less than right
precisiondesired precision for eigenvalues
compact_second_chunkshared mem flag if second chunk is used and ergo requires compaction
s_compaction_list_exchelper array for stream compaction, s_compaction_list_exc[tid] = 1 when the thread generated two child intervals
is_active_secondmark is thread has a second non-empty child interval

Definition at line 309 of file bisect_util.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervalsLarge ( unsigned int  addr,
const unsigned int  num_threads_active,
NumericT s_left,
NumericT s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
NumericT  left,
NumericT  mid,
NumericT  right,
const unsigned short  left_count,
const unsigned short  mid_count,
const unsigned short  right_count,
NumericT  epsilon,
unsigned int &  compact_second_chunk,
unsigned short *  s_compaction_list,
unsigned int &  is_active_second 
)

Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread

Definition at line 476 of file bisect_kernel_large.hpp.

template<class T , class NumericT >
__device__ void viennacl::linalg::cuda::subdivideActiveInterval ( const unsigned int  tid,
NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
const unsigned int  num_threads_active,
NumericT left,
NumericT right,
unsigned int &  left_count,
unsigned int &  right_count,
NumericT mid,
unsigned int &  all_threads_converged 
)

Subdivide interval if active and not already converged.

Parameters
tidid of thread
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
num_threads_activenumber of active threads in warp
leftlower limit of interval
rightupper limit of interval
left_counteigenvalues less than left
right_counteigenvalues less than right
midmedian of interval
all_threads_convergedshared memory flag if all threads are

Definition at line 582 of file bisect_util.hpp.

template<class T , class NumericT >
__device__ void viennacl::linalg::cuda::subdivideActiveIntervalMulti ( const unsigned int  tid,
NumericT s_left,
NumericT s_right,
T *  s_left_count,
T *  s_right_count,
const unsigned int  num_threads_active,
NumericT left,
NumericT right,
unsigned int &  left_count,
unsigned int &  right_count,
NumericT mid,
unsigned int &  all_threads_converged 
)

Subdivide interval if active and not already converged.

Parameters
tidid of thread
s_leftshared memory storage for left interval limits
s_rightshared memory storage for right interval limits
s_left_countshared memory storage for number of eigenvalues less than left interval limits
s_right_countshared memory storage for number of eigenvalues less than right interval limits
num_threads_activenumber of active threads in warp
leftlower limit of interval
rightupper limit of interval
left_counteigenvalues less than left
right_counteigenvalues less than right
midmedian of interval
all_threads_convergedshared memory flag if all threads are

Definition at line 529 of file bisect_util.hpp.

template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT viennacl::linalg::cuda::subwarp_accumulate_shared ( NumericT  output_value,
unsigned int  id_in_warp,
NumericT shared_buffer 
)

Definition at line 241 of file spgemm_rmerge.hpp.

template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT viennacl::linalg::cuda::subwarp_accumulate_shuffle ( NumericT  output_value)

Definition at line 232 of file spgemm_rmerge.hpp.

template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT viennacl::linalg::cuda::subwarp_minimum_shared ( IndexT  min_index,
IndexT  id_in_warp,
IndexT *  shared_buffer 
)

Definition at line 152 of file spgemm_rmerge.hpp.

template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT viennacl::linalg::cuda::subwarp_minimum_shuffle ( IndexT  min_index)

Definition at line 143 of file spgemm_rmerge.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::sum_cpu ( vector_base< NumericT > const &  vec1,
NumericT result 
)

Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.

Parameters
vec1The vector
resultThe result host scalar

Definition at line 2891 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::sum_impl ( vector_base< NumericT > const &  vec1,
scalar< NumericT > &  result 
)

Computes the maximum of a vector, both reduction stages run on the GPU.

Parameters
vec1The vector
resultThe result GPU scalar

Definition at line 2874 of file vector_operations.hpp.

template<typename ScalarT1 , typename ScalarT2 >
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value >::type viennacl::linalg::cuda::swap ( ScalarT1 &  s1,
ScalarT2 &  s2 
)

Swaps the contents of two scalars, data is copied.

Parameters
s1The first scalar
s2The second scalar

Definition at line 361 of file scalar_operations.hpp.

template<typename NumericT , typename SizeT , typename DistanceT >
void viennacl::linalg::cuda::trans ( matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &  proxy,
matrix_base< NumericT > &  temp_trans 
)
Examples:
blas2.cpp.

Definition at line 94 of file matrix_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::trans_kernel ( const NumericT A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_stride1,
unsigned int  A_stride2,
NumericT B,
unsigned int  B_start1,
unsigned int  B_start2,
unsigned int  B_internal_size1,
unsigned int  B_internal_size2,
unsigned int  B_stride1,
unsigned int  B_stride2,
bool  data_major 
)

Definition at line 56 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel ( const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1298 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel ( const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1342 of file matrix_operations_row.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::transpose ( const NumericT input,
NumericT output,
unsigned int  row_num,
unsigned int  col_num 
)

Definition at line 713 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::transpose ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &  input,
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  output 
)

Transpose matrix.

Definition at line 732 of file fft_operations.hpp.

template<typename NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::transpose ( viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &  input)

Inplace_transpose matrix.

Definition at line 770 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::transpose_inplace ( NumericT input,
unsigned int  row_num,
unsigned int  col_num 
)

Definition at line 746 of file fft_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel ( NumericT const *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
unsigned int  options 
)

Definition at line 307 of file direct_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel ( NumericT const *  A,
unsigned int  A_start1,
unsigned int  A_start2,
unsigned int  A_inc1,
unsigned int  A_inc2,
unsigned int  A_size1,
unsigned int  A_size2,
unsigned int  A_internal_size1,
unsigned int  A_internal_size2,
NumericT v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
unsigned int  options 
)

Definition at line 266 of file direct_solve.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1255 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1047 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1074 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1100 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1126 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1152 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1178 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1204 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1230 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1282 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1334 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_log_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1308 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1360 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1386 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1412 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1438 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT const *  vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 1464 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_mul_col_kernel ( const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1267 of file matrix_operations_col.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vec_mul_row_kernel ( const NumericT A,
unsigned int  A_row_start,
unsigned int  A_col_start,
unsigned int  A_row_inc,
unsigned int  A_col_inc,
unsigned int  A_row_size,
unsigned int  A_col_size,
unsigned int  A_internal_rows,
unsigned int  A_internal_cols,
const NumericT v,
unsigned int  v_start,
unsigned int  v_inc,
unsigned int  v_size,
NumericT result,
unsigned int  result_start,
unsigned int  result_inc,
unsigned int  result_size 
)

Definition at line 1297 of file matrix_operations_row.hpp.

template<typename NumericT , typename ScalarT1 >
void viennacl::linalg::cuda::vector_assign ( vector_base< NumericT > &  vec1,
ScalarT1 const &  alpha,
bool  up_to_internal_size = false 
)

Assign a constant value to a vector (-range/-slice)

Parameters
vec1The vector to which the value should be assigned
alphaThe value to be assigned
up_to_internal_sizeSpecifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer).

Definition at line 803 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_assign_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  internal_size1,
NumericT  alpha 
)

Definition at line 782 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_maxmin_kernel ( const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT result 
)

Definition at line 2722 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel ( NumericT const *  vec1,
NumericT result,
unsigned int  start_result,
unsigned int  inc_result 
)

Definition at line 2017 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats ( const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT result 
)

Definition at line 1533 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers ( const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT result 
)

Definition at line 1575 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers ( const NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
unsigned int  option,
NumericT result 
)

Definition at line 1612 of file vector_operations.hpp.

template<typename NumericT >
void viennacl::linalg::cuda::vector_swap ( vector_base< NumericT > &  vec1,
vector_base< NumericT > &  vec2 
)

Swaps the contents of two vectors, data is copied.

Parameters
vec1The first vector (or -range, or -slice)
vec2The second vector (or -range, or -slice)

Definition at line 853 of file vector_operations.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::vector_swap_kernel ( NumericT vec1,
unsigned int  start1,
unsigned int  inc1,
unsigned int  size1,
NumericT vec2,
unsigned int  start2,
unsigned int  inc2 
)

Definition at line 826 of file vector_operations.hpp.

template<typename NumericT >
__device__ void viennacl::linalg::cuda::writeToGmem ( const unsigned int  tid,
const unsigned int  tid_2,
const unsigned int  num_threads_active,
const unsigned int  num_blocks_mult,
NumericT g_left_one,
NumericT g_right_one,
unsigned int *  g_pos_one,
NumericT g_left_mult,
NumericT g_right_mult,
unsigned int *  g_left_count_mult,
unsigned int *  g_right_count_mult,
NumericT s_left,
NumericT s_right,
unsigned short *  s_left_count,
unsigned short *  s_right_count,
unsigned int *  g_blocks_mult,
unsigned int *  g_blocks_mult_sum,
unsigned short *  s_compaction_list,
unsigned short *  s_cl_helper,
unsigned int  offset_mult_lambda 
)

Write data to global memory.

Definition at line 53 of file bisect_kernel_large.hpp.

template<typename NumericT >
__global__ void viennacl::linalg::cuda::zero2 ( NumericT input1,
NumericT input2,
unsigned int  size 
)

Definition at line 602 of file fft_operations.hpp.