Holds all CUDA compute kernels used by ViennaCL. More...
Namespaces | |
amg | |
detail | |
Helper functions for the CUDA linear algebra backend. | |
Classes | |
struct | mat_mult_matrix_index |
Helper struct for accessing an element of a row- or column-major matrix. More... | |
Functions | |
template<typename NumericT > | |
void | bisectSmall (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision) |
template<typename NumericT > | |
void | bisectLarge (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision) |
template<typename NumericT > | |
void | bisectLarge_OneIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision) |
template<typename NumericT > | |
void | bisectLarge_MultIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision) |
template<typename NumericT > | |
__device__ void | writeToGmem (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum, unsigned short *s_compaction_list, unsigned short *s_cl_helper, unsigned int offset_mult_lambda) |
Write data to global memory. More... | |
template<typename NumericT > | |
__device__ void | compactStreamsFinal (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, unsigned int &offset_mult_lambda, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper, unsigned int is_one_lambda, unsigned int is_one_lambda_2, NumericT &left, NumericT &right, NumericT &left_2, NumericT &right_2, unsigned int &left_count, unsigned int &right_count, unsigned int &left_count_2, unsigned int &right_count_2, unsigned int c_block_iend, unsigned int c_sum_block, unsigned int c_block_iend_2, unsigned int c_sum_block_2) |
Perform final stream compaction before writing data to global memory. More... | |
__device__ void | scanCompactBlocksStartAddress (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
Compute addresses to obtain compact list of block start addresses. More... | |
__device__ void | scanSumBlocks (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
Perform scan to obtain number of eigenvalues before a specific block. More... | |
__device__ void | scanInitial (const unsigned int tid, const unsigned int tid_2, const unsigned int mat_size, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
template<typename NumericT > | |
__device__ void | storeNonEmptyIntervalsLarge (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, NumericT left, NumericT mid, NumericT right, const unsigned short left_count, const unsigned short mid_count, const unsigned short right_count, NumericT epsilon, unsigned int &compact_second_chunk, unsigned short *s_compaction_list, unsigned int &is_active_second) |
template<typename NumericT > | |
__global__ void | bisectKernelLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon, unsigned int *g_num_one, unsigned int *g_num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute. More... | |
template<typename NumericT > | |
__global__ void | bisectKernelLarge_MultIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int *blocks_mult, unsigned int *blocks_mult_sum, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, NumericT *g_lambda, unsigned int *g_pos, NumericT precision) |
template<typename NumericT > | |
__global__ void | bisectKernelLarge_OneIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision) |
template<typename NumericT > | |
__global__ void | bisectKernelSmall (const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. More... | |
__device__ int | floorPow2 (int n) |
__device__ int | ceilPow2 (int n) |
template<typename NumericT > | |
__device__ NumericT | computeMidpoint (const NumericT left, const NumericT right) |
template<class S , class T , class NumericT > | |
__device__ void | storeInterval (unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision) |
template<typename NumericT > | |
__device__ unsigned int | computeNumSmallerEigenvals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged) |
template<typename NumericT > | |
__device__ unsigned int | computeNumSmallerEigenvalsLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged) |
template<class S , class T , class NumericT > | |
__device__ void | storeNonEmptyIntervals (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread. More... | |
template<class T > | |
__device__ void | createIndicesCompaction (T *s_compaction_list_exc, unsigned int num_threads_compaction) |
template<class T , class NumericT > | |
__device__ void | compactIntervals (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second) |
Perform stream compaction for second child intervals. More... | |
template<class T , class S , class NumericT > | |
__device__ void | storeIntervalConverged (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second) |
template<class T , class NumericT > | |
__device__ void | subdivideActiveIntervalMulti (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged) |
Subdivide interval if active and not already converged. More... | |
template<class T , class NumericT > | |
__device__ void | subdivideActiveInterval (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged) |
Subdivide interval if active and not already converged. More... | |
template<typename NumericT > | |
__global__ void | matrix_matrix_upper_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal) |
template<typename NumericT > | |
__global__ void | matrix_matrix_lower_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal) |
template<typename NumericT , typename SolverTagT > | |
void | inplace_solve (matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). More... | |
template<typename NumericT > | |
__global__ void | triangular_substitute_inplace_row_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
template<typename NumericT > | |
__global__ void | triangular_substitute_inplace_col_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
template<typename NumericT , typename SolverTagT > | |
void | inplace_solve (matrix_base< NumericT > const &mat, vector_base< NumericT > &vec, SolverTagT) |
Direct inplace solver for dense triangular systems (non-transposed version) More... | |
__host__ __device__ float2 | operator+ (float2 a, float2 b) |
__host__ __device__ float2 | operator- (float2 a, float2 b) |
template<typename SCALARTYPE > | |
__device__ float2 | operator/ (float2 a, SCALARTYPE b) |
__device__ float2 | operator* (float2 in1, float2 in2) |
__host__ __device__ double2 | operator+ (double2 a, double2 b) |
__host__ __device__ double2 | operator- (double2 a, double2 b) |
template<typename SCALARTYPE > | |
__host__ __device__ double2 | operator/ (double2 a, SCALARTYPE b) |
__host__ __device__ double2 | operator* (double2 in1, double2 in2) |
__device__ unsigned int | get_reorder_num (unsigned int v, unsigned int bit_size) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_direct (const Numeric2T *input, Numeric2T *output, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
template<typename NumericT , unsigned int AlignmentV> | |
void | direct (viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Direct 1D algorithm for computing Fourier transformation. More... | |
template<typename NumericT , unsigned int AlignmentV> | |
void | direct (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &in, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Direct 2D algorithm for computing Fourier transformation. More... | |
template<typename NumericT > | |
__global__ void | fft_reorder (NumericT *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, bool is_row_major) |
template<typename NumericT , unsigned int AlignmentV> | |
void | reorder (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_radix2_local (Numeric2T *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_radix2 (Numeric2T *input, unsigned int s, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
template<typename NumericT , unsigned int AlignmentV> | |
void | radix2 (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Radix-2 1D algorithm for computing Fourier transformation. More... | |
template<typename NumericT , unsigned int AlignmentV> | |
void | radix2 (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
Radix-2 2D algorithm for computing Fourier transformation. More... | |
template<typename Numeric2T , typename NumericT > | |
__global__ void | bluestein_post (Numeric2T *Z, Numeric2T *out, unsigned int size, NumericT sign) |
template<typename Numeric2T , typename NumericT > | |
__global__ void | bluestein_pre (Numeric2T *input, Numeric2T *A, Numeric2T *B, unsigned int size, unsigned int ext_size, NumericT sign) |
template<typename NumericT > | |
__global__ void | zero2 (NumericT *input1, NumericT *input2, unsigned int size) |
template<typename NumericT , unsigned int AlignmentV> | |
void | bluestein (viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t) |
Bluestein's algorithm for computing Fourier transformation. More... | |
template<typename NumericT > | |
__global__ void | fft_mult_vec (const NumericT *input1, const NumericT *input2, NumericT *output, unsigned int size) |
template<typename NumericT , unsigned int AlignmentV> | |
void | multiply_complex (viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output) |
Mutiply two complex vectors and store result in output. More... | |
template<typename Numeric2T , typename NumericT > | |
__global__ void | fft_div_vec_scalar (Numeric2T *input1, unsigned int size, NumericT factor) |
template<typename NumericT , unsigned int AlignmentV> | |
void | normalize (viennacl::vector< NumericT, AlignmentV > &input) |
Normalize vector on with his own size. More... | |
template<typename NumericT > | |
__global__ void | transpose (const NumericT *input, NumericT *output, unsigned int row_num, unsigned int col_num) |
template<typename NumericT , unsigned int AlignmentV> | |
void | transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &input, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &output) |
Transpose matrix. More... | |
template<typename NumericT > | |
__global__ void | transpose_inplace (NumericT *input, unsigned int row_num, unsigned int col_num) |
template<typename NumericT , unsigned int AlignmentV> | |
void | transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input) |
Inplace_transpose matrix. More... | |
template<typename RealT , typename ComplexT > | |
__global__ void | real_to_complex (const RealT *in, ComplexT *out, unsigned int size) |
template<typename NumericT > | |
void | real_to_complex (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size) |
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More... | |
template<typename ComplexT , typename RealT > | |
__global__ void | complex_to_real (const ComplexT *in, RealT *out, unsigned int size) |
template<typename NumericT > | |
void | complex_to_real (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size) |
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More... | |
template<typename NumericT > | |
__global__ void | reverse_inplace (NumericT *vec, unsigned int size) |
template<typename NumericT > | |
void | reverse (viennacl::vector_base< NumericT > &in) |
Reverse vector to oposite order and save it in input vector. More... | |
template<typename IndexT > | |
__global__ void | extract_L_kernel_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices) |
template<typename NumericT > | |
__global__ void | extract_L_kernel_2 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements) |
template<typename NumericT > | |
void | extract_L (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L) |
template<typename NumericT > | |
__global__ void | ilu_scale_kernel_1 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, NumericT *D_elements) |
template<typename NumericT > | |
__global__ void | ilu_scale_kernel_2 (unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements) |
Scales values in a matrix such that output = D * input * D, where D is a diagonal matrix (only the diagonal is provided) More... | |
template<typename NumericT > | |
void | icc_scale (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L) |
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. More... | |
template<typename NumericT > | |
__global__ void | icc_chow_patel_sweep_kernel (unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L) |
CUDA kernel for one Chow-Patel-ICC sweep. More... | |
template<typename NumericT > | |
void | icc_chow_patel_sweep (compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L) |
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) More... | |
template<typename IndexT > | |
__global__ void | extract_LU_kernel_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, unsigned int A_size1, unsigned int *L_row_indices, unsigned int *U_row_indices) |
template<typename NumericT > | |
__global__ void | extract_LU_kernel_2 (unsigned int const *A_row_indices, unsigned int const *A_col_indices, NumericT const *A_elements, unsigned int A_size1, unsigned int const *L_row_indices, unsigned int *L_col_indices, NumericT *L_elements, unsigned int const *U_row_indices, unsigned int *U_col_indices, NumericT *U_elements) |
template<typename NumericT > | |
void | extract_LU (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U) |
template<typename NumericT > | |
void | ilu_scale (compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U) |
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. More... | |
template<typename NumericT > | |
__global__ void | ilu_chow_patel_sweep_kernel (unsigned int const *L_row_indices, unsigned int const *L_col_indices, NumericT *L_elements, NumericT const *L_backup, unsigned int L_size1, NumericT const *aij_L, unsigned int const *U_trans_row_indices, unsigned int const *U_trans_col_indices, NumericT *U_trans_elements, NumericT const *U_trans_backup, NumericT const *aij_U_trans) |
CUDA kernel for one Chow-Patel-ILU sweep. More... | |
template<typename NumericT > | |
void | ilu_chow_patel_sweep (compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans) |
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) More... | |
template<typename NumericT > | |
__global__ void | ilu_form_neumann_matrix_kernel (unsigned int const *R_row_indices, unsigned int const *R_col_indices, NumericT *R_elements, unsigned int R_size1, NumericT *D_elements) |
template<typename NumericT > | |
void | ilu_form_neumann_matrix (compressed_matrix< NumericT > &R, vector< NumericT > &diag_R) |
template<typename NumericT > | |
__global__ void | pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size) |
template<typename NumericT > | |
void | pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer) |
template<unsigned int SubWarpSizeV, typename NumericT > | |
__global__ void | pipelined_cg_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
__global__ void | pipelined_cg_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
template<typename NumericT > | |
void | pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size) |
template<typename NumericT > | |
void | pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size) |
template<unsigned int SubWarpSizeV, typename NumericT > | |
__global__ void | pipelined_bicgstab_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename NumericT > | |
__global__ void | pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
template<typename NumericT > | |
void | pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
template<typename T > | |
__global__ void | pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size) |
template<typename T > | |
void | pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More... | |
template<typename T > | |
__global__ void | pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size) |
template<typename T > | |
void | pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size) |
template<typename T > | |
__global__ void | pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size) |
template<typename T > | |
__global__ void | pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k) |
template<typename T > | |
void | pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k) |
template<typename NumericT > | |
void | pipelined_gmres_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename T > | |
void | pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
template<typename DestNumericT , typename SrcNumericT > | |
void | convert (matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2) |
template<typename NumericT , typename SizeT , typename DistanceT > | |
void | trans (matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans) |
template<typename NumericT , typename ScalarT > | |
void | am (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | ambm (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | ambm_m (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
void | matrix_assign (matrix_base< NumericT > &mat, NumericT s, bool clear=false) |
template<typename NumericT > | |
void | matrix_diagonal_assign (matrix_base< NumericT > &mat, NumericT s) |
template<typename NumericT > | |
void | matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat) |
template<typename NumericT > | |
void | matrix_diag_to_vector (matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec) |
template<typename NumericT > | |
void | matrix_row (matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec) |
template<typename NumericT > | |
void | matrix_column (const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec) |
template<typename NumericT , typename SizeT , typename OpT > | |
void | element_op (matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy) |
template<typename SizeT , typename OpT > | |
void | element_op (matrix_base< float, SizeT > &A, matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &proxy) |
template<typename SizeT , typename OpT > | |
void | element_op (matrix_base< double, SizeT > &A, matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
template<typename NumericT > | |
void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
template<typename NumericT > | |
void | prod_impl (const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result) |
Carries out matrix-vector multiplication. More... | |
template<typename NumericT , typename ScalarT > | |
void | prod_impl (const matrix_base< NumericT > &A, bool trans_A, const matrix_base< NumericT > &B, bool trans_B, matrix_base< NumericT > &C, ScalarT alpha, ScalarT beta) |
Carries out matrix-matrix multiplication. More... | |
template<typename NumericT , typename ScalarT > | |
void | scaled_rank_1_update (matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More... | |
template<typename NumericT , typename VectorType > | |
void | bidiag_pack (matrix_base< NumericT > &A, VectorType &dh, VectorType &sh) |
This function stores the diagonal and the superdiagonal of a matrix in two vectors. More... | |
template<typename NumericT > | |
void | copy_vec (matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col) |
This function copies a row or a column from a matrix to a vector. More... | |
template<typename NumericT > | |
void | house_update_A_left (matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start) |
This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P. More... | |
template<typename NumericT > | |
void | house_update_A_right (matrix_base< NumericT > &A, vector_base< NumericT > &D) |
This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P. More... | |
template<typename NumericT > | |
void | house_update_QL (matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1) |
This function updates the matrix Q, which is needed for the computation of the eigenvectors. More... | |
template<typename NumericT > | |
void | givens_next (matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m) |
This function updates the matrix Q. It is part of the tql2 algorithm. More... | |
template<typename DestNumericT , typename SrcNumericT > | |
__global__ void | convert_col_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | bidiag_pack_row_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | bidiag_pack_column_major_kernel (T *A, T *D, T *S, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | copy_col_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | copy_col_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | copy_row_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | copy_row_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size, unsigned int stride) |
template<typename T > | |
__global__ void | house_update_A_left_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | house_update_A_left_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | house_update_A_right_row_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__global__ void | house_update_A_right_column_major_kernel (T *A, T *V, unsigned int row_start, unsigned int col_start, unsigned int size1, unsigned int size2, unsigned int stride) |
template<typename T > | |
__device__ void | col_reduce_lcl_array (T *sums, unsigned int th_Idx, unsigned int bl_Dim) |
template<typename T > | |
__global__ void | house_update_QL_row_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ) |
template<typename T > | |
__global__ void | house_update_QL_column_major_kernel (T *QL, T *V, unsigned int size1, unsigned int strideQ) |
template<typename T > | |
__global__ void | givens_next_row_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i) |
template<typename T > | |
__global__ void | givens_next_column_major_kernel (T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_col_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename NumericT > | |
__global__ void | matrix_matrix_row_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename DestNumericT , typename SrcNumericT > | |
__global__ void | convert_row_kernel (DestNumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const SrcNumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | trans_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_internal_size1, unsigned int A_internal_size2, unsigned int A_size1, unsigned int A_size2, unsigned int A_stride1, unsigned int A_stride2, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_internal_size1, unsigned int B_internal_size2, unsigned int B_stride1, unsigned int B_stride2, bool data_major) |
template<typename NumericT > | |
__global__ void | am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | matrix_row_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
template<typename NumericT > | |
__global__ void | element_op_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | element_op_int_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | matrix_row_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | matrix_row_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename NumericT > | |
__global__ void | vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | trans_vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename NumericT > | |
__global__ void | el_wise_mul_div (NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size) |
Main CUDA kernel for nonnegative matrix factorization of a dense matrices. More... | |
template<typename NumericT > | |
void | nmf (viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf) |
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. More... | |
template<typename NumericT > | |
__global__ void | as_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2) |
template<typename NumericT > | |
__global__ void | as_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2) |
template<typename ScalarT1 , typename ScalarT2 , typename NumericT > | |
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_any_scalar< NumericT >::value >::type | as (ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 > | |
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type | asbs (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename NumericT > | |
__global__ void | asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 > | |
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type | asbs_s (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | scalar_swap_kernel (NumericT *s1, NumericT *s2) |
template<typename ScalarT1 , typename ScalarT2 > | |
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type | swap (ScalarT1 &s1, ScalarT2 &s2) |
Swaps the contents of two scalars, data is copied. More... | |
template<unsigned int SubWarpSizeV, typename NumericT > | |
__global__ void | compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<typename NumericT > | |
__global__ void | compressed_matrix_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<class NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a compressed_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More... | |
template<typename NumericT > | |
__global__ void | compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *result, unsigned int size) |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename SparseMatrixT , typename NumericT > | |
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag) |
Carries out triangular inplace solves. More... | |
template<typename NumericT > | |
__global__ void | compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, unsigned int nonzero_rows, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<typename NumericT > | |
void | prod_impl (const viennacl::compressed_compressed_matrix< NumericT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix. More... | |
template<typename NumericT > | |
__global__ void | coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a coordinate_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More... | |
template<typename NumericT > | |
__global__ void | ell_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a ell_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More... | |
template<typename NumericT > | |
__global__ void | sliced_ell_matrix_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, unsigned int size_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result, unsigned int block_size) |
template<typename NumericT , typename IndexT > | |
void | prod_impl (const viennacl::sliced_ell_matrix< NumericT, IndexT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a sliced_ell_matrix. More... | |
template<typename NumericT > | |
__global__ void | hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. More... | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int AlignmentV> | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. More... | |
template<typename NumericT > | |
__global__ void | csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const NumericT *elements_L, const unsigned int *block_offsets, NumericT *result, unsigned int size) |
template<typename NumericT > | |
__global__ void | csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const NumericT *elements_U, const NumericT *diagonal_U, const unsigned int *block_offsets, NumericT *result, unsigned int size) |
template<typename IndexT > | |
__device__ IndexT | round_to_next_power_of_2 (IndexT val) |
template<typename IndexT > | |
__global__ void | compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group) |
__device__ unsigned int | merge_subwarp_symbolic (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int subwarpsize) |
__device__ unsigned int | merge_subwarp_symbolic_double (unsigned int row_B_start, unsigned int row_B_end, unsigned int const *B_col_indices, unsigned int B_size2, unsigned int *output_array, unsigned int id_in_warp, unsigned int subwarpsize) |
template<typename IndexT > | |
__global__ void | compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices) |
template<typename NumericT > | |
__device__ unsigned int | merge_subwarp_numeric (NumericT scaling_factor, unsigned int input_start, unsigned int input_end, const unsigned int *input_indices, const NumericT *input_values, unsigned int invalid_token, unsigned int *output_indices, NumericT *output_values, unsigned int id_in_warp, unsigned int subwarpsize) |
template<typename IndexT , typename NumericT > | |
__global__ void | compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements, unsigned int *subwarpsize_array, unsigned int *max_row_size_A, unsigned int *max_row_size_B, unsigned int *scratchpad_offsets, unsigned int *scratchpad_indices, NumericT *scratchpad_values) |
template<typename IndexT > | |
__global__ void | compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row) |
template<typename IndexT , typename NumericT > | |
__global__ void | compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer) |
template<typename IndexT , typename NumericT > | |
__global__ void | compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer) |
template<class NumericT , unsigned int AlignmentV> | |
void | prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C) |
Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More... | |
template<unsigned int SubWarpSizeV, typename IndexT > | |
__device__ IndexT | subwarp_minimum_shuffle (IndexT min_index) |
template<unsigned int SubWarpSizeV, typename IndexT > | |
__device__ IndexT | subwarp_minimum_shared (IndexT min_index, IndexT id_in_warp, IndexT *shared_buffer) |
template<unsigned int SubWarpSizeV, typename IndexT > | |
__global__ void | compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices) |
template<unsigned int SubWarpSizeV, typename NumericT > | |
__device__ NumericT | subwarp_accumulate_shuffle (NumericT output_value) |
template<unsigned int SubWarpSizeV, typename NumericT > | |
__device__ NumericT | subwarp_accumulate_shared (NumericT output_value, unsigned int id_in_warp, NumericT *shared_buffer) |
template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT > | |
__global__ void | compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements) |
template<typename DestNumericT , typename SrcNumericT > | |
__global__ void | convert_kernel (DestNumericT *dest, unsigned int start_dest, unsigned int inc_dest, unsigned int size_dest, SrcNumericT const *src, unsigned int start_src, unsigned int inc_src) |
template<typename DestNumericT , typename SrcNumericT > | |
void | convert (vector_base< DestNumericT > &dest, vector_base< SrcNumericT > const &src) |
template<typename NumericT > | |
__global__ void | av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
__global__ void | av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT , typename ScalarType1 > | |
void | av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT > | |
__global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
void | avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT > | |
__global__ void | vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha) |
template<typename NumericT , typename ScalarT1 > | |
void | vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false) |
Assign a constant value to a vector (-range/-slice) More... | |
template<typename NumericT > | |
__global__ void | vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2) |
Swaps the contents of two vectors, data is copied. More... | |
template<typename NumericT > | |
__global__ void | element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename NumericT > | |
__global__ void | element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename NumericT , typename OpT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More... | |
template<typename OpT > | |
void | element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy) |
template<typename OpT > | |
void | element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
template<typename NumericT > | |
__global__ void | vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
template<typename NumericT > | |
void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
template<typename NumericT > | |
__global__ void | inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
__global__ void | vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
__global__ void | vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT , typename ScalarT > | |
void | inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
template<typename NumericT > | |
void | inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
template<typename NumericT > | |
__global__ void | inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result) |
template<typename NumericT > | |
void | inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result) |
template<typename NumericT > | |
__global__ void | norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
__global__ void | norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
template<typename NumericT > | |
void | norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the l^1-norm of a vector. More... | |
template<typename NumericT > | |
void | norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the l^1-norm of a vector. More... | |
template<typename NumericT > | |
void | norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the l^2-norm of a vector - implementation. More... | |
template<typename NumericT > | |
void | norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the l^2-norm of a vector - implementation. More... | |
template<typename NumericT > | |
void | norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the supremum-norm of a vector. More... | |
template<typename NumericT > | |
void | norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the supremum-norm of a vector. More... | |
template<typename NumericT > | |
__global__ void | vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
template<typename NumericT > | |
void | max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | max_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
void | min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | min_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
void | sum_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
template<typename NumericT > | |
void | sum_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
template<typename NumericT > | |
__device__ NumericT | cuda_abs (NumericT val) |
__device__ unsigned long | cuda_abs (unsigned long val) |
__device__ unsigned int | cuda_abs (unsigned int val) |
__device__ unsigned short | cuda_abs (unsigned short val) |
__device__ unsigned char | cuda_abs (unsigned char val) |
template<typename NumericT > | |
__global__ void | index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result) |
template<typename NumericT > | |
vcl_size_t | index_norm_inf (vector_base< NumericT > const &vec1) |
Computes the index of the first entry that is equal to the supremum-norm in modulus. More... | |
template<typename NumericT > | |
__global__ void | plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta) |
template<typename NumericT > | |
void | plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta) |
Computes a plane rotation of two vectors. More... | |
template<typename NumericT > | |
__global__ void | scan_kernel_1 (NumericT const *X, unsigned int startX, unsigned int incX, unsigned int sizeX, NumericT *Y, unsigned int startY, unsigned int incY, unsigned int scan_offset, NumericT *carries) |
template<typename NumericT > | |
__global__ void | scan_kernel_2 (NumericT *carries) |
template<typename NumericT > | |
__global__ void | scan_kernel_3 (NumericT *Y, unsigned int startY, unsigned int incY, unsigned int sizeY, NumericT const *carries) |
template<typename NumericT > | |
void | inclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output) |
This function implements an inclusive scan using CUDA. More... | |
template<typename NumericT > | |
void | exclusive_scan (vector_base< NumericT > const &input, vector_base< NumericT > &output) |
This function implements an exclusive scan using CUDA. More... | |
Holds all CUDA compute kernels used by ViennaCL.
void viennacl::linalg::cuda::am | ( | matrix_base< NumericT > & | mat1, |
matrix_base< NumericT > const & | mat2, | ||
ScalarT const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
Definition at line 113 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::am_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 59 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::am_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 95 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::am_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 87 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::am_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 124 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::ambm | ( | matrix_base< NumericT > & | mat1, |
matrix_base< NumericT > const & | mat2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
matrix_base< NumericT > const & | mat3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 164 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 136 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 210 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 283 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 357 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::ambm_m | ( | matrix_base< NumericT > & | mat1, |
matrix_base< NumericT > const & | mat2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
matrix_base< NumericT > const & | mat3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 239 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 436 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 511 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 585 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 660 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 469 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 544 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 618 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 693 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 166 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 241 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 315 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
Definition at line 390 of file matrix_operations_row.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_any_scalar<NumericT>::value >::type viennacl::linalg::cuda::as | ( | ScalarT1 & | s1, |
ScalarT2 const & | s2, | ||
NumericT const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
Definition at line 77 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::as_kernel | ( | NumericT * | s1, |
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2 | ||
) |
Definition at line 48 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::as_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2 | ||
) |
Definition at line 60 of file scalar_operations.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs | ( | ScalarT1 & | s1, |
ScalarT2 const & | s2, | ||
NumericT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
ScalarT3 const & | s3, | ||
NumericT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 191 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 99 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT const * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 120 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
NumericT const * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 141 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 162 of file scalar_operations.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs_s | ( | ScalarT1 & | s1, |
ScalarT2 const & | s2, | ||
NumericT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
ScalarT3 const & | s3, | ||
NumericT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 314 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 222 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT const * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 243 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
NumericT const * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 264 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | s2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | s3 | ||
) |
Definition at line 285 of file scalar_operations.hpp.
void viennacl::linalg::cuda::av | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
Definition at line 144 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::av_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 77 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::av_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 110 of file vector_operations.hpp.
void viennacl::linalg::cuda::avbv | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
vector_base< NumericT > const & | vec3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 433 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 179 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 242 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 305 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 368 of file vector_operations.hpp.
void viennacl::linalg::cuda::avbv_v | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarT1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
vector_base< NumericT > const & | vec3, | ||
ScalarT2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
Definition at line 735 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 483 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const NumericT * | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 546 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 609 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT | fac2, | ||
unsigned int | options2, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT | fac3, | ||
unsigned int | options3, | ||
const NumericT * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
Definition at line 672 of file vector_operations.hpp.
void viennacl::linalg::cuda::bidiag_pack | ( | matrix_base< NumericT > & | A, |
VectorType & | dh, | ||
VectorType & | sh | ||
) |
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
A | The matrix from which the vectors will be extracted of. |
dh | The vector in which the diagonal of the matrix will be stored in. |
sh | The vector in which the superdiagonal of the matrix will be stored in. |
Definition at line 2489 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::bidiag_pack_column_major_kernel | ( | T * | A, |
T * | D, | ||
T * | S, | ||
unsigned int | size1, | ||
unsigned int | size2, | ||
unsigned int | stride | ||
) |
Definition at line 1456 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::bidiag_pack_row_major_kernel | ( | T * | A, |
T * | D, | ||
T * | S, | ||
unsigned int | size1, | ||
unsigned int | size2, | ||
unsigned int | stride | ||
) |
Definition at line 1434 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelLarge | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const unsigned int | lg_eig_count, | ||
const unsigned int | ug_eig_count, | ||
NumericT | epsilon, | ||
unsigned int * | g_num_one, | ||
unsigned int * | g_num_blocks_mult, | ||
NumericT * | g_left_one, | ||
NumericT * | g_right_one, | ||
unsigned int * | g_pos_one, | ||
NumericT * | g_left_mult, | ||
NumericT * | g_right_mult, | ||
unsigned int * | g_left_count_mult, | ||
unsigned int * | g_right_count_mult, | ||
unsigned int * | g_blocks_mult, | ||
unsigned int * | g_blocks_mult_sum | ||
) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute.
Definition at line 537 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelLarge_MultIntervals | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
unsigned int * | blocks_mult, | ||
unsigned int * | blocks_mult_sum, | ||
NumericT * | g_left, | ||
NumericT * | g_right, | ||
unsigned int * | g_left_count, | ||
unsigned int * | g_right_count, | ||
NumericT * | g_lambda, | ||
unsigned int * | g_pos, | ||
NumericT | precision | ||
) |
Perform second step of bisection algorithm for large matrices for intervals that after the first step contained more than one eigenvalue
g_d | diagonal elements of symmetric, tridiagonal matrix |
g_s | superdiagonal elements of symmetric, tridiagonal matrix |
n | matrix size |
blocks_mult | start addresses of blocks of intervals that are processed by one block of threads, each of the intervals contains more than one eigenvalue |
blocks_mult_sum | total number of eigenvalues / singleton intervals in one block of intervals |
g_left | left limits of intervals |
g_right | right limits of intervals |
g_left_count | number of eigenvalues less than left limits |
g_right_count | number of eigenvalues less than right limits |
g_lambda | final eigenvalue |
g_pos | index of eigenvalue (in ascending order) |
precision | desired precision of eigenvalues |
Definition at line 68 of file bisect_kernel_large_multi.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelLarge_OneIntervals | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
unsigned int | num_intervals, | ||
NumericT * | g_left, | ||
NumericT * | g_right, | ||
unsigned int * | g_pos, | ||
NumericT | precision | ||
) |
Determine eigenvalues for large matrices for intervals that after the first step contained one eigenvalue
g_d | diagonal elements of symmetric, tridiagonal matrix |
g_s | superdiagonal elements of symmetric, tridiagonal matrix |
n | matrix size |
num_intervals | total number of intervals containing one eigenvalue after the first step |
g_left | left interval limits |
g_right | right interval limits |
g_pos | index of interval / number of intervals that are smaller than right interval limit |
precision | desired precision of eigenvalues |
Definition at line 59 of file bisect_kernel_large_onei.hpp.
__global__ void viennacl::linalg::cuda::bisectKernelSmall | ( | const NumericT * | g_d, |
const NumericT * | g_s, | ||
const unsigned int | n, | ||
NumericT * | g_left, | ||
NumericT * | g_right, | ||
unsigned int * | g_left_count, | ||
unsigned int * | g_right_count, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const unsigned int | lg_eig_count, | ||
const unsigned int | ug_eig_count, | ||
NumericT | epsilon | ||
) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.
g_d | diagonal elements in global memory |
g_s | superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed an equals 0 |
n | size of matrix |
g_left | helper array |
g_right | helper array |
g_left_count | helper array |
g_right_count | helper array |
lg | lower bound of input interval (e.g. Gerschgorin interval) |
ug | upper bound of input interval (e.g. Gerschgorin interval) |
lg_eig_count | number of eigenvalues that are smaller than lg |
ug_eig_count | number of eigenvalues that are smaller than lu |
epsilon | desired accuracy of eigenvalues to compute |
Definition at line 61 of file bisect_kernel_small.hpp.
void viennacl::linalg::cuda::bisectLarge | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const NumericT | precision | ||
) |
Definition at line 71 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bisectLarge_MultIntervals | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | precision | ||
) |
Definition at line 133 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bisectLarge_OneIntervals | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | precision | ||
) |
Definition at line 103 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bisectSmall | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
viennacl::linalg::detail::ResultDataSmall< NumericT > & | result, | ||
const unsigned int | mat_size, | ||
const NumericT | lg, | ||
const NumericT | ug, | ||
const NumericT | precision | ||
) |
Definition at line 45 of file bisect_kernel_calls.hpp.
void viennacl::linalg::cuda::bluestein | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
viennacl::vector< NumericT, AlignmentV > & | out, | ||
vcl_size_t | |||
) |
Bluestein's algorithm for computing Fourier transformation.
Currently, Works only for sizes of input data which less than 2^16. Uses a lot of additional memory, but should be fast for any size of data. Serial implementation has something about o(n * lg n) complexity
Definition at line 622 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::bluestein_post | ( | Numeric2T * | Z, |
Numeric2T * | out, | ||
unsigned int | size, | ||
NumericT | sign | ||
) |
Definition at line 538 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::bluestein_pre | ( | Numeric2T * | input, |
Numeric2T * | A, | ||
Numeric2T * | B, | ||
unsigned int | size, | ||
unsigned int | ext_size, | ||
NumericT | sign | ||
) |
Definition at line 564 of file fft_operations.hpp.
|
inline |
Compute the next higher power of two of n
n | number for which next higher power of two is seeked |
Definition at line 66 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::col_reduce_lcl_array | ( | T * | sums, |
unsigned int | th_Idx, | ||
unsigned int | bl_Dim | ||
) |
Definition at line 1672 of file matrix_operations_col.hpp.
__device__ void viennacl::linalg::cuda::compactIntervals | ( | NumericT * | s_left, |
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT | mid, | ||
NumericT | right, | ||
unsigned int | mid_count, | ||
unsigned int | right_count, | ||
T * | s_compaction_list, | ||
unsigned int | num_threads_active, | ||
unsigned int | is_active_second | ||
) |
Perform stream compaction for second child intervals.
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
mid | midpoint of current interval (left of new interval) |
right | upper limit of interval |
mid_count | eigenvalues less than mid |
right_count | eigenvalues less than right |
s_compaction_list | list containing the indices where the data has to be stored |
num_threads_active | number of active threads / intervals |
is_active_second | mark is thread has a second non-empty child interval |
Definition at line 440 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::compactStreamsFinal | ( | const unsigned int | tid, |
const unsigned int | tid_2, | ||
const unsigned int | num_threads_active, | ||
unsigned int & | offset_mult_lambda, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
unsigned short * | s_left_count, | ||
unsigned short * | s_right_count, | ||
unsigned short * | s_cl_one, | ||
unsigned short * | s_cl_mult, | ||
unsigned short * | s_cl_blocking, | ||
unsigned short * | s_cl_helper, | ||
unsigned int | is_one_lambda, | ||
unsigned int | is_one_lambda_2, | ||
NumericT & | left, | ||
NumericT & | right, | ||
NumericT & | left_2, | ||
NumericT & | right_2, | ||
unsigned int & | left_count, | ||
unsigned int & | right_count, | ||
unsigned int & | left_count_2, | ||
unsigned int & | right_count_2, | ||
unsigned int | c_block_iend, | ||
unsigned int | c_sum_block, | ||
unsigned int | c_block_iend_2, | ||
unsigned int | c_sum_block_2 | ||
) |
Perform final stream compaction before writing data to global memory.
Definition at line 134 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::complex_to_real | ( | const ComplexT * | in, |
RealT * | out, | ||
unsigned int | size | ||
) |
Definition at line 809 of file fft_operations.hpp.
void viennacl::linalg::cuda::complex_to_real | ( | viennacl::vector_base< NumericT > const & | in, |
viennacl::vector_base< NumericT > & | out, | ||
vcl_size_t | size | ||
) |
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
Definition at line 819 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_jumper, |
const unsigned int * | row_indices, | ||
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
unsigned int | nonzero_rows, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
Definition at line 909 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
const unsigned int * | sp_mat_col_indices, | ||
const NumericT * | sp_mat_elements, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 323 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
const unsigned int * | sp_mat_col_indices, | ||
const NumericT * | sp_mat_elements, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 477 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | result, | ||
unsigned int | size | ||
) |
Definition at line 639 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_A2 | ( | IndexT * | A2_row_indices, |
IndexT * | A2_col_indices, | ||
NumericT * | A2_elements, | ||
IndexT | A2_size1, | ||
IndexT * | new_row_buffer | ||
) |
Definition at line 484 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 | ( | const IndexT * | A_row_indices, |
IndexT | A_size1, | ||
IndexT | max_per_row, | ||
IndexT * | chunks_per_row | ||
) |
Definition at line 469 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_G1 | ( | IndexT * | G1_row_indices, |
IndexT * | G1_col_indices, | ||
NumericT * | G1_elements, | ||
IndexT | G1_size1, | ||
IndexT const * | A_row_indices, | ||
IndexT const * | A_col_indices, | ||
NumericT const * | A_elements, | ||
IndexT | A_size1, | ||
IndexT | A_nnz, | ||
IndexT | max_per_row, | ||
IndexT * | new_row_buffer | ||
) |
Definition at line 511 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
IndexT | A_size1, | ||
const IndexT * | B_row_indices, | ||
IndexT * | subwarpsize_per_group, | ||
IndexT * | max_nnz_row_A_per_group, | ||
IndexT * | max_nnz_row_B_per_group | ||
) |
Definition at line 82 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
IndexT | A_size1, | ||
const IndexT * | B_row_indices, | ||
const IndexT * | B_col_indices, | ||
IndexT | B_size2, | ||
IndexT * | C_row_indices | ||
) |
Definition at line 162 of file spgemm_rmerge.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
IndexT | A_size1, | ||
const IndexT * | B_row_indices, | ||
const IndexT * | B_col_indices, | ||
IndexT | B_size2, | ||
IndexT * | C_row_indices, | ||
unsigned int * | subwarpsize_array, | ||
unsigned int * | max_row_size_A, | ||
unsigned int * | max_row_size_B, | ||
unsigned int * | scratchpad_offsets, | ||
unsigned int * | scratchpad_indices | ||
) |
Definition at line 217 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
const NumericT * | A_elements, | ||
IndexT | A_size1, | ||
const IndexT * | B_row_indices, | ||
const IndexT * | B_col_indices, | ||
const NumericT * | B_elements, | ||
IndexT | B_size2, | ||
IndexT const * | C_row_indices, | ||
IndexT * | C_col_indices, | ||
NumericT * | C_elements | ||
) |
Definition at line 251 of file spgemm_rmerge.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
const NumericT * | A_elements, | ||
IndexT | A_size1, | ||
const IndexT * | B_row_indices, | ||
const IndexT * | B_col_indices, | ||
const NumericT * | B_elements, | ||
IndexT | B_size2, | ||
IndexT const * | C_row_indices, | ||
IndexT * | C_col_indices, | ||
NumericT * | C_elements, | ||
unsigned int * | subwarpsize_array, | ||
unsigned int * | max_row_size_A, | ||
unsigned int * | max_row_size_B, | ||
unsigned int * | scratchpad_offsets, | ||
unsigned int * | scratchpad_indices, | ||
NumericT * | scratchpad_values | ||
) |
Definition at line 365 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_adaptive_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const unsigned int * | row_blocks, | ||
const NumericT * | elements, | ||
unsigned int | num_blocks, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
Definition at line 167 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
Definition at line 125 of file sparse_matrix_operations.hpp.
|
inline |
Compute midpoint of interval [left, right] avoiding overflow if possible
left | left / lower limit of interval |
right | right / upper limit of interval |
Definition at line 89 of file bisect_util.hpp.
|
inline |
Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix
g_d | diagonal elements stored in global memory |
g_s | superdiagonal elements stored in global memory |
n | size of matrix |
x | value for which the number of eigenvalues that are smaller is seeked |
tid | thread identified (e.g. threadIdx.x or gtid) |
num_intervals_active | number of active intervals / threads that currently process an interval |
s_d | scratch space to store diagonal entries of the tridiagonal matrix in shared memory |
s_s | scratch space to store superdiagonal entries of the tridiagonal matrix in shared memory |
converged | flag if the current thread is already converged (that is count does not have to be computed) |
Definition at line 177 of file bisect_util.hpp.
|
inline |
Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix
g_d | diagonal elements stored in global memory |
g_s | superdiagonal elements stored in global memory |
n | size of matrix |
x | value for which the number of eigenvalues that are smaller is seeked |
tid | thread identified (e.g. threadIdx.x or gtid) |
num_intervals_active | number of active intervals / threads that currently process an interval |
s_d | scratch space to store diagonal entries of the tridiagonal matrix in shared memory |
s_s | scratch space to store superdiagonal entries of the tridiagonal matrix in shared memory |
converged | flag if the current thread is already converged (that is count does not have to be computed) |
Definition at line 237 of file bisect_util.hpp.
void viennacl::linalg::cuda::convert | ( | matrix_base< DestNumericT > & | mat1, |
matrix_base< SrcNumericT > const & | mat2 | ||
) |
Definition at line 57 of file matrix_operations.hpp.
void viennacl::linalg::cuda::convert | ( | vector_base< DestNumericT > & | dest, |
vector_base< SrcNumericT > const & | src | ||
) |
Definition at line 59 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::convert_col_kernel | ( | DestNumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const SrcNumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 34 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::convert_kernel | ( | DestNumericT * | dest, |
unsigned int | start_dest, | ||
unsigned int | inc_dest, | ||
unsigned int | size_dest, | ||
SrcNumericT const * | src, | ||
unsigned int | start_src, | ||
unsigned int | inc_src | ||
) |
Definition at line 48 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::convert_row_kernel | ( | DestNumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const SrcNumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 34 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1225 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1420 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result | ||
) |
Definition at line 1125 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::copy_col_column_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size, | ||
unsigned int | stride | ||
) |
Definition at line 1498 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::copy_col_row_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size, | ||
unsigned int | stride | ||
) |
Definition at line 1480 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::copy_row_column_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size, | ||
unsigned int | stride | ||
) |
Definition at line 1535 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::copy_row_row_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size, | ||
unsigned int | stride | ||
) |
Definition at line 1516 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::copy_vec | ( | matrix_base< NumericT > & | A, |
vector_base< NumericT > & | V, | ||
vcl_size_t | row_start, | ||
vcl_size_t | col_start, | ||
bool | copy_col | ||
) |
This function copies a row or a column from a matrix to a vector.
A | The matrix where to copy from. |
V | The vector to fill with data. |
row_start | The number of the first row to copy. |
col_start | The number of the first column to copy. |
copy_col | Set to TRUE to copy a column, FALSE to copy a row. |
Definition at line 2526 of file matrix_operations.hpp.
__device__ void viennacl::linalg::cuda::createIndicesCompaction | ( | T * | s_compaction_list_exc, |
unsigned int | num_threads_compaction | ||
) |
Create indices for compaction, that is process s_compaction_list_exc which is 1 for intervals that generated a second child and 0 otherwise and create for each of the non-zero elements the index where the new interval belongs to in a compact representation of all generated second childs
s_compaction_list_exc | list containing the flags which threads generated two childs |
num_threads_compaction | number of threads to employ for compaction |
Definition at line 373 of file bisect_util.hpp.
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward | ( | const unsigned int * | row_jumper_U, |
const unsigned int * | column_indices_U, | ||
const NumericT * | elements_U, | ||
const NumericT * | diagonal_U, | ||
const unsigned int * | block_offsets, | ||
NumericT * | result, | ||
unsigned int | size | ||
) |
Definition at line 700 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward | ( | const unsigned int * | row_jumper_L, |
const unsigned int * | column_indices_L, | ||
const NumericT * | elements_L, | ||
const unsigned int * | block_offsets, | ||
NumericT * | result, | ||
unsigned int | size | ||
) |
Definition at line 668 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 257 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 110 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | diagonal_entries, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 597 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | diagonal_entries, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 563 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | diagonal_entries, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 429 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 342 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 497 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 367 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 180 of file sparse_matrix_operations_solve.hpp.
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
NumericT * | vector, | ||
unsigned int | size | ||
) |
Definition at line 42 of file sparse_matrix_operations_solve.hpp.
Definition at line 2910 of file vector_operations.hpp.
|
inline |
Definition at line 2911 of file vector_operations.hpp.
|
inline |
Definition at line 2912 of file vector_operations.hpp.
|
inline |
Definition at line 2913 of file vector_operations.hpp.
|
inline |
Definition at line 2914 of file vector_operations.hpp.
void viennacl::linalg::cuda::direct | ( | viennacl::vector< NumericT, AlignmentV > const & | in, |
viennacl::vector< NumericT, AlignmentV > & | out, | ||
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Direct 1D algorithm for computing Fourier transformation.
Works on any sizes of data. Serial implementation has o(n^2) complexity
Definition at line 197 of file fft_operations.hpp.
void viennacl::linalg::cuda::direct | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const & | in, |
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | out, | ||
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Direct 2D algorithm for computing Fourier transformation.
Works on any sizes of data. Serial implementation has o(n^2) complexity
Definition at line 222 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::el_wise_mul_div | ( | NumericT * | matrix1, |
NumericT const * | matrix2, | ||
NumericT const * | matrix3, | ||
unsigned int | size | ||
) |
Main CUDA kernel for nonnegative matrix factorization of a dense matrices.
Definition at line 38 of file nmf_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT, SizeT > & | A, |
matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 548 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< float, SizeT > & | A, |
matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 608 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< double, SizeT > & | A, |
matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 668 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const & | proxy | ||
) |
Definition at line 736 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const & | proxy | ||
) |
Definition at line 778 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const & | proxy | ||
) |
Definition at line 820 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const & | proxy | ||
) |
Definition at line 862 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const & | proxy | ||
) |
Definition at line 904 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const & | proxy | ||
) |
Definition at line 946 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const & | proxy | ||
) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)
vec1 | The result vector (or -range, or -slice) |
proxy | The proxy object holding v2, v3 and the operation |
Definition at line 957 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< float > & | vec1, |
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 985 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const & | proxy | ||
) |
Definition at line 988 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< double > & | vec1, |
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const & | proxy | ||
) |
Definition at line 1013 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const & | proxy | ||
) |
Definition at line 1030 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const & | proxy | ||
) |
Definition at line 1056 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const & | proxy | ||
) |
Definition at line 1072 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const & | proxy | ||
) |
Definition at line 1083 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const & | proxy | ||
) |
Definition at line 1109 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const & | proxy | ||
) |
Definition at line 1114 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const & | proxy | ||
) |
Definition at line 1135 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const & | proxy | ||
) |
Definition at line 1156 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const & | proxy | ||
) |
Definition at line 1161 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const & | proxy | ||
) |
Definition at line 1187 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const & | proxy | ||
) |
Definition at line 1198 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const & | proxy | ||
) |
Definition at line 1213 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const & | proxy | ||
) |
Definition at line 1239 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const & | proxy | ||
) |
Definition at line 1240 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const & | proxy | ||
) |
Definition at line 1264 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const & | proxy | ||
) |
Definition at line 1282 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const & | proxy | ||
) |
Definition at line 1291 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const & | proxy | ||
) |
Definition at line 1317 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const & | proxy | ||
) |
Definition at line 1324 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const & | proxy | ||
) |
Definition at line 1343 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const & | proxy | ||
) |
Definition at line 1366 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const & | proxy | ||
) |
Definition at line 1369 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const & | proxy | ||
) |
Definition at line 1395 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const & | proxy | ||
) |
Definition at line 1408 of file matrix_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const & | proxy | ||
) |
Definition at line 1421 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const & | proxy | ||
) |
Definition at line 1447 of file vector_operations.hpp.
void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const & | proxy | ||
) |
Definition at line 1473 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::element_op_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 776 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 825 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::element_op_int_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT const * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3, | ||
unsigned int | op_type | ||
) |
Definition at line 915 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 856 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::element_op_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
NumericT const * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3, | ||
unsigned int | op_type | ||
) |
Definition at line 869 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::element_op_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const NumericT * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
Definition at line 807 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
const NumericT * | sp_mat_elements, | ||
unsigned int | sp_mat_row_num, | ||
unsigned int | sp_mat_col_num, | ||
unsigned int | sp_mat_internal_row_num, | ||
unsigned int | sp_mat_items_per_row, | ||
unsigned int | sp_mat_aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1690 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
const NumericT * | sp_mat_elements, | ||
unsigned int | sp_mat_row_num, | ||
unsigned int | sp_mat_col_num, | ||
unsigned int | sp_mat_internal_row_num, | ||
unsigned int | sp_mat_items_per_row, | ||
unsigned int | sp_mat_aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 1863 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | row_num, | ||
unsigned int | col_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row | ||
) |
Definition at line 1620 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::exclusive_scan | ( | vector_base< NumericT > const & | input, |
vector_base< NumericT > & | output | ||
) |
This function implements an exclusive scan using CUDA.
input | Input vector |
output | The output vector. Either idential to input or non-overlapping. |
Definition at line 3239 of file vector_operations.hpp.
void viennacl::linalg::cuda::extract_L | ( | compressed_matrix< NumericT > const & | A, |
compressed_matrix< NumericT > & | L | ||
) |
Definition at line 107 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::extract_L_kernel_1 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
unsigned int | A_size1, | ||
unsigned int * | L_row_indices | ||
) |
Definition at line 47 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::extract_L_kernel_2 | ( | unsigned int const * | A_row_indices, |
unsigned int const * | A_col_indices, | ||
NumericT const * | A_elements, | ||
unsigned int | A_size1, | ||
unsigned int const * | L_row_indices, | ||
unsigned int * | L_col_indices, | ||
NumericT * | L_elements | ||
) |
Definition at line 73 of file ilu_operations.hpp.
void viennacl::linalg::cuda::extract_LU | ( | compressed_matrix< NumericT > const & | A, |
compressed_matrix< NumericT > & | L, | ||
compressed_matrix< NumericT > & | U | ||
) |
Definition at line 387 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::extract_LU_kernel_1 | ( | const IndexT * | A_row_indices, |
const IndexT * | A_col_indices, | ||
unsigned int | A_size1, | ||
unsigned int * | L_row_indices, | ||
unsigned int * | U_row_indices | ||
) |
Definition at line 308 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::extract_LU_kernel_2 | ( | unsigned int const * | A_row_indices, |
unsigned int const * | A_col_indices, | ||
NumericT const * | A_elements, | ||
unsigned int | A_size1, | ||
unsigned int const * | L_row_indices, | ||
unsigned int * | L_col_indices, | ||
NumericT * | L_elements, | ||
unsigned int const * | U_row_indices, | ||
unsigned int * | U_col_indices, | ||
NumericT * | U_elements | ||
) |
Definition at line 341 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_direct | ( | const Numeric2T * | input, |
Numeric2T * | output, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
NumericT | sign, | ||
bool | is_row_major | ||
) |
Definition at line 140 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_div_vec_scalar | ( | Numeric2T * | input1, |
unsigned int | size, | ||
NumericT | factor | ||
) |
Definition at line 690 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_mult_vec | ( | const NumericT * | input1, |
const NumericT * | input2, | ||
NumericT * | output, | ||
unsigned int | size | ||
) |
Definition at line 657 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_radix2 | ( | Numeric2T * | input, |
unsigned int | s, | ||
unsigned int | bit_size, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
NumericT | sign, | ||
bool | is_row_major | ||
) |
Definition at line 371 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_radix2_local | ( | Numeric2T * | input, |
unsigned int | bit_size, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
NumericT | sign, | ||
bool | is_row_major | ||
) |
Definition at line 298 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::fft_reorder | ( | NumericT * | input, |
unsigned int | bit_size, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | batch_num, | ||
bool | is_row_major | ||
) |
Definition at line 241 of file fft_operations.hpp.
|
inline |
Compute the next lower power of two of n
n | number for which next higher power of two is seeked |
Definition at line 46 of file bisect_util.hpp.
|
inline |
Definition at line 128 of file fft_operations.hpp.
void viennacl::linalg::cuda::givens_next | ( | matrix_base< NumericT > & | Q, |
vector_base< NumericT > & | tmp1, | ||
vector_base< NumericT > & | tmp2, | ||
int | l, | ||
int | m | ||
) |
This function updates the matrix Q. It is part of the tql2 algorithm.
Q | The matrix to be updated. |
tmp1 | Vector with data from the tql2 algorithm. |
tmp2 | Vector with data from the tql2 algorithm. |
l | Data from the tql2 algorithm. |
m | Data from the tql2 algorithm. |
Definition at line 2694 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::givens_next_column_major_kernel | ( | T * | matr, |
T * | cs, | ||
T * | ss, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | start_i, | ||
unsigned int | end_i | ||
) |
Definition at line 1792 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::givens_next_row_major_kernel | ( | T * | matr, |
T * | cs, | ||
T * | ss, | ||
unsigned int | size, | ||
unsigned int | stride, | ||
unsigned int | start_i, | ||
unsigned int | end_i | ||
) |
Definition at line 1745 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::house_update_A_left | ( | matrix_base< NumericT > & | A, |
vector_base< NumericT > & | D, | ||
vcl_size_t | start | ||
) |
This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P.
A | The matrix to be updated. |
D | The normalized householder vector. |
start | The repetition counter. |
Definition at line 2587 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_left_column_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size1, | ||
unsigned int | size2, | ||
unsigned int | stride | ||
) |
Definition at line 1581 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_left_row_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size1, | ||
unsigned int | size2, | ||
unsigned int | stride | ||
) |
Definition at line 1556 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::house_update_A_right | ( | matrix_base< NumericT > & | A, |
vector_base< NumericT > & | D | ||
) |
This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P.
A | The matrix to be updated. |
D | The normalized householder vector. |
Definition at line 2626 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_right_column_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size1, | ||
unsigned int | size2, | ||
unsigned int | stride | ||
) |
Definition at line 1639 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::house_update_A_right_row_major_kernel | ( | T * | A, |
T * | V, | ||
unsigned int | row_start, | ||
unsigned int | col_start, | ||
unsigned int | size1, | ||
unsigned int | size2, | ||
unsigned int | stride | ||
) |
Definition at line 1608 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::house_update_QL | ( | matrix_base< NumericT > & | Q, |
vector_base< NumericT > & | D, | ||
vcl_size_t | A_size1 | ||
) |
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
Q | The matrix to be updated. |
D | The householder vector. |
A_size1 | size1 of matrix A |
Definition at line 2663 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::house_update_QL_column_major_kernel | ( | T * | QL, |
T * | V, | ||
unsigned int | size1, | ||
unsigned int | strideQ | ||
) |
Definition at line 1717 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::house_update_QL_row_major_kernel | ( | T * | QL, |
T * | V, | ||
unsigned int | size1, | ||
unsigned int | strideQ | ||
) |
Definition at line 1690 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 2207 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
Definition at line 2405 of file sparse_matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row | ||
) |
Definition at line 2123 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::icc_chow_patel_sweep | ( | compressed_matrix< NumericT > & | L, |
vector< NumericT > const & | aij_L | ||
) |
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper)
Definition at line 285 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::icc_chow_patel_sweep_kernel | ( | unsigned int const * | L_row_indices, |
unsigned int const * | L_col_indices, | ||
NumericT * | L_elements, | ||
NumericT const * | L_backup, | ||
unsigned int | L_size1, | ||
NumericT const * | aij_L | ||
) |
CUDA kernel for one Chow-Patel-ICC sweep.
Definition at line 231 of file ilu_operations.hpp.
void viennacl::linalg::cuda::icc_scale | ( | compressed_matrix< NumericT > const & | A, |
compressed_matrix< NumericT > & | L | ||
) |
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly.
Definition at line 203 of file ilu_operations.hpp.
void viennacl::linalg::cuda::ilu_chow_patel_sweep | ( | compressed_matrix< NumericT > & | L, |
vector< NumericT > const & | aij_L, | ||
compressed_matrix< NumericT > & | U_trans, | ||
vector< NumericT > const & | aij_U_trans | ||
) |
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper)
Definition at line 576 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::ilu_chow_patel_sweep_kernel | ( | unsigned int const * | L_row_indices, |
unsigned int const * | L_col_indices, | ||
NumericT * | L_elements, | ||
NumericT const * | L_backup, | ||
unsigned int | L_size1, | ||
NumericT const * | aij_L, | ||
unsigned int const * | U_trans_row_indices, | ||
unsigned int const * | U_trans_col_indices, | ||
NumericT * | U_trans_elements, | ||
NumericT const * | U_trans_backup, | ||
NumericT const * | aij_U_trans | ||
) |
CUDA kernel for one Chow-Patel-ILU sweep.
Definition at line 476 of file ilu_operations.hpp.
void viennacl::linalg::cuda::ilu_form_neumann_matrix | ( | compressed_matrix< NumericT > & | R, |
vector< NumericT > & | diag_R | ||
) |
Definition at line 649 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::ilu_form_neumann_matrix_kernel | ( | unsigned int const * | R_row_indices, |
unsigned int const * | R_col_indices, | ||
NumericT * | R_elements, | ||
unsigned int | R_size1, | ||
NumericT * | D_elements | ||
) |
Definition at line 611 of file ilu_operations.hpp.
void viennacl::linalg::cuda::ilu_scale | ( | compressed_matrix< NumericT > const & | A, |
compressed_matrix< NumericT > & | L, | ||
compressed_matrix< NumericT > & | U | ||
) |
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly.
Definition at line 438 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::ilu_scale_kernel_1 | ( | unsigned int const * | A_row_indices, |
unsigned int const * | A_col_indices, | ||
NumericT const * | A_elements, | ||
unsigned int | A_size1, | ||
NumericT * | D_elements | ||
) |
Definition at line 148 of file ilu_operations.hpp.
__global__ void viennacl::linalg::cuda::ilu_scale_kernel_2 | ( | unsigned int const * | R_row_indices, |
unsigned int const * | R_col_indices, | ||
NumericT * | R_elements, | ||
unsigned int | R_size1, | ||
NumericT * | D_elements | ||
) |
Scales values in a matrix such that output = D * input * D, where D is a diagonal matrix (only the diagonal is provided)
Definition at line 177 of file ilu_operations.hpp.
void viennacl::linalg::cuda::inclusive_scan | ( | vector_base< NumericT > const & | input, |
vector_base< NumericT > & | output | ||
) |
This function implements an inclusive scan using CUDA.
input | Input vector. |
output | The output vector. Either idential to input or non-overlapping. |
Definition at line 3226 of file vector_operations.hpp.
vcl_size_t viennacl::linalg::cuda::index_norm_inf | ( | vector_base< NumericT > const & | vec1 | ) |
Computes the index of the first entry that is equal to the supremum-norm in modulus.
vec1 | The vector |
Definition at line 2972 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int * | result | ||
) |
Definition at line 2917 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
NumericT * | group_results | ||
) |
Definition at line 1807 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
NumericT * | group_results | ||
) |
Definition at line 1846 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
const NumericT * | y3, | ||
unsigned int | start3, | ||
unsigned int | stride3, | ||
NumericT * | group_results | ||
) |
Definition at line 1891 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
const NumericT * | y3, | ||
unsigned int | start3, | ||
unsigned int | stride3, | ||
const NumericT * | y4, | ||
unsigned int | start4, | ||
unsigned int | stride4, | ||
const NumericT * | y5, | ||
unsigned int | start5, | ||
unsigned int | stride5, | ||
const NumericT * | y6, | ||
unsigned int | start6, | ||
unsigned int | stride6, | ||
const NumericT * | y7, | ||
unsigned int | start7, | ||
unsigned int | stride7, | ||
NumericT * | group_results | ||
) |
Definition at line 1942 of file vector_operations.hpp.
void viennacl::linalg::cuda::inner_prod_cpu | ( | vector_base< NumericT > const & | vec1, |
vector_base< NumericT > const & | vec2, | ||
NumericT & | result | ||
) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
vec1 | The first vector |
vec2 | The second vector |
result | The result scalar (on the host) |
Definition at line 1771 of file vector_operations.hpp.
void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< NumericT > const & | vec1, |
vector_base< NumericT > const & | vec2, | ||
ScalarT & | result | ||
) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
vec1 | The first vector |
vec2 | The second vector |
result | The result scalar (on the gpu) |
Definition at line 1739 of file vector_operations.hpp.
void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< NumericT > const & | x, |
vector_tuple< NumericT > const & | vec_tuple, | ||
vector_base< NumericT > & | result | ||
) |
Definition at line 2039 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::inner_prod_kernel | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2, | ||
NumericT * | group_buffer | ||
) |
Definition at line 1493 of file vector_operations.hpp.
void viennacl::linalg::cuda::inplace_solve | ( | matrix_base< NumericT > const & | A, |
matrix_base< NumericT > & | B, | ||
SolverTagT | tag | ||
) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation).
A | The system matrix |
B | The matrix of row vectors, where the solution is directly written to |
tag | Solver tag for identifying the respective triangular solver |
Definition at line 253 of file direct_solve.hpp.
void viennacl::linalg::cuda::inplace_solve | ( | matrix_base< NumericT > const & | mat, |
vector_base< NumericT > & | vec, | ||
SolverTagT | |||
) |
Direct inplace solver for dense triangular systems (non-transposed version)
mat | The system matrix proxy |
vec | The load vector, where the solution is directly written to |
Definition at line 398 of file direct_solve.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 673 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 694 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 716 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 737 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 761 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 782 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::unit_upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 813 of file sparse_matrix_operations.hpp.
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
viennacl::vector_base< NumericT > & | vec, | ||
viennacl::linalg::upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 834 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_assign | ( | matrix_base< NumericT > & | mat, |
NumericT | s, | ||
bool | clear = false |
||
) |
Definition at line 316 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 739 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 757 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 872 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 895 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 918 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 941 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 964 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 987 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1010 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1033 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1056 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1079 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1125 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1102 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1148 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1171 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1194 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1217 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1240 of file matrix_operations_col.hpp.
void viennacl::linalg::cuda::matrix_column | ( | const matrix_base< NumericT > & | mat, |
unsigned int | j, | ||
vector_base< NumericT > & | vec | ||
) |
Definition at line 509 of file matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_diag_from_vector | ( | const vector_base< NumericT > & | vec, |
int | k, | ||
matrix_base< NumericT > & | mat | ||
) |
Definition at line 377 of file matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_diag_to_vector | ( | matrix_base< NumericT > const & | mat, |
int | k, | ||
vector_base< NumericT > & | vec | ||
) |
Definition at line 429 of file matrix_operations.hpp.
void viennacl::linalg::cuda::matrix_diagonal_assign | ( | matrix_base< NumericT > & | mat, |
NumericT | s | ||
) |
Definition at line 348 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 39 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 126 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 213 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 300 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 750 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 837 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 924 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1011 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1464 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1551 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1638 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1725 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2179 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2266 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2353 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2440 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel | ( | const NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
bool | row_major_A, | ||
NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_size1, | ||
unsigned int | B_size2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
bool | row_major_B, | ||
bool | unit_diagonal | ||
) |
Definition at line 107 of file direct_solve.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 394 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 481 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 568 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 655 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1105 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1192 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1279 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1366 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1820 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1907 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 1994 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2081 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2536 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2623 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2710 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel | ( | NumericT | alpha, |
const NumericT * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
NumericT | beta, | ||
NumericT * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
Definition at line 2797 of file matrix_operations_prod.hpp.
__global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel | ( | const NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
bool | row_major_A, | ||
NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_size1, | ||
unsigned int | B_size2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
bool | row_major_B, | ||
bool | unit_diagonal | ||
) |
Definition at line 41 of file direct_solve.hpp.
void viennacl::linalg::cuda::matrix_row | ( | matrix_base< NumericT > const & | mat, |
unsigned int | i, | ||
vector_base< NumericT > & | vec | ||
) |
Definition at line 476 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 770 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | alpha | ||
) |
Definition at line 788 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 902 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 925 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 948 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 971 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 994 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1017 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1040 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1063 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1086 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1109 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1155 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1132 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1178 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1201 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1224 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1247 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
Definition at line 1270 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::max_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
vec1 | The vector |
result | The result host scalar |
Definition at line 2793 of file vector_operations.hpp.
void viennacl::linalg::cuda::max_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the maximum of a vector, both reduction stages run on the GPU.
vec1 | The vector |
result | The result GPU scalar |
Definition at line 2765 of file vector_operations.hpp.
__device__ unsigned int viennacl::linalg::cuda::merge_subwarp_numeric | ( | NumericT | scaling_factor, |
unsigned int | input_start, | ||
unsigned int | input_end, | ||
const unsigned int * | input_indices, | ||
const NumericT * | input_values, | ||
unsigned int | invalid_token, | ||
unsigned int * | output_indices, | ||
NumericT * | output_values, | ||
unsigned int | id_in_warp, | ||
unsigned int | subwarpsize | ||
) |
Definition at line 303 of file spgemm.hpp.
|
inline |
Definition at line 149 of file spgemm.hpp.
|
inline |
Definition at line 173 of file spgemm.hpp.
void viennacl::linalg::cuda::min_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
vec1 | The vector |
result | The result host scalar |
Definition at line 2847 of file vector_operations.hpp.
void viennacl::linalg::cuda::min_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the maximum of a vector, both reduction stages run on the GPU.
vec1 | The vector |
result | The result GPU scalar |
Definition at line 2819 of file vector_operations.hpp.
void viennacl::linalg::cuda::multiply_complex | ( | viennacl::vector< NumericT, AlignmentV > const & | input1, |
viennacl::vector< NumericT, AlignmentV > const & | input2, | ||
viennacl::vector< NumericT, AlignmentV > & | output | ||
) |
Mutiply two complex vectors and store result in output.
Definition at line 674 of file fft_operations.hpp.
void viennacl::linalg::cuda::nmf | ( | viennacl::matrix_base< NumericT > const & | V, |
viennacl::matrix_base< NumericT > & | W, | ||
viennacl::matrix_base< NumericT > & | H, | ||
viennacl::linalg::nmf_config const & | conf | ||
) |
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
V | Input matrix |
W | First factor |
H | Second factor |
conf | A configuration object holding tolerances and the like |
Definition at line 59 of file nmf_operations.hpp.
void viennacl::linalg::cuda::norm_1_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the l^1-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2605 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_1_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the l^1-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2587 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_2_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the l^2-norm of a vector - implementation.
vec1 | The vector |
result | The result scalar |
Definition at line 2651 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_2_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the l^2-norm of a vector - implementation.
vec1 | The vector |
result | The result scalar |
Definition at line 2632 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_inf_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the supremum-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2699 of file vector_operations.hpp.
void viennacl::linalg::cuda::norm_inf_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the supremum-norm of a vector.
vec1 | The vector |
result | The result scalar |
Definition at line 2679 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::norm_kernel_floats | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
NumericT * | group_buffer | ||
) |
Definition at line 2238 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::norm_kernel_integers | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
NumericT * | group_buffer | ||
) |
Definition at line 2331 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers | ( | const NumericT * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
NumericT * | group_buffer | ||
) |
Definition at line 2415 of file vector_operations.hpp.
void viennacl::linalg::cuda::normalize | ( | viennacl::vector< NumericT, AlignmentV > & | input | ) |
Normalize vector on with his own size.
Definition at line 700 of file fft_operations.hpp.
|
inline |
Definition at line 98 of file fft_operations.hpp.
|
inline |
Definition at line 123 of file fft_operations.hpp.
|
inline |
Definition at line 80 of file fft_operations.hpp.
|
inline |
Definition at line 104 of file fft_operations.hpp.
|
inline |
Definition at line 86 of file fft_operations.hpp.
|
inline |
Definition at line 110 of file fft_operations.hpp.
|
inline |
Definition at line 92 of file fft_operations.hpp.
|
inline |
Definition at line 117 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_coo_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 1139 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_adaptive_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const unsigned int * | row_blocks, | ||
const NumericT * | elements, | ||
unsigned int | num_blocks, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 972 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_blocked_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 896 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_ell_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 1287 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_hyb_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 1490 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | compressed_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1080 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | coordinate_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1253 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1350 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | sliced_ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1456 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | hyb_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 1566 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_sliced_ell_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
const unsigned int * | column_indices, | ||
const unsigned int * | block_start, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
const NumericT * | r0star, | ||
unsigned int | size, | ||
unsigned int | block_size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size, | ||
unsigned int | buffer_offset | ||
) |
Definition at line 1382 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_update_s | ( | vector_base< NumericT > & | s, |
vector_base< NumericT > & | r, | ||
vector_base< NumericT > const & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Definition at line 791 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_update_s_kernel | ( | NumericT * | s, |
NumericT const * | residual, | ||
NumericT const * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | chunk_size, | ||
unsigned int | chunk_offset | ||
) |
Definition at line 734 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_vector_kernel | ( | NumericT * | result, |
NumericT | alpha, | ||
NumericT * | p, | ||
NumericT | omega, | ||
NumericT const * | s, | ||
NumericT * | residual, | ||
NumericT const * | As, | ||
NumericT | beta, | ||
NumericT const * | Ap, | ||
NumericT const * | r0star, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | size | ||
) |
Definition at line 813 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_bicgstab_vector_update | ( | vector_base< NumericT > & | result, |
NumericT | alpha, | ||
vector_base< NumericT > & | p, | ||
NumericT | omega, | ||
vector_base< NumericT > const & | s, | ||
vector_base< NumericT > & | residual, | ||
vector_base< NumericT > const & | As, | ||
NumericT | beta, | ||
vector_base< NumericT > const & | Ap, | ||
vector_base< NumericT > const & | r0star, | ||
vector_base< NumericT > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size | ||
) |
Definition at line 864 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_coo_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 331 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_adaptive_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const unsigned int * | row_blocks, | ||
const NumericT * | elements, | ||
unsigned int | num_blocks, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 181 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_blocked_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 114 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_ell_vec_mul_kernel | ( | const unsigned int * | coords, |
const NumericT * | elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 463 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_hyb_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 638 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | compressed_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 280 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | coordinate_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 435 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 518 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | sliced_ell_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 610 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_prod | ( | hyb_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 706 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_sliced_ell_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
const unsigned int * | column_indices, | ||
const unsigned int * | block_start, | ||
const NumericT * | elements, | ||
const NumericT * | p, | ||
NumericT * | Ap, | ||
unsigned int | size, | ||
unsigned int | block_size, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | buffer_size | ||
) |
Definition at line 544 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_cg_vector_kernel | ( | NumericT * | result, |
NumericT | alpha, | ||
NumericT * | p, | ||
NumericT * | r, | ||
NumericT const * | Ap, | ||
NumericT | beta, | ||
NumericT * | inner_prod_buffer, | ||
unsigned int | size | ||
) |
Definition at line 44 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_cg_vector_update | ( | vector_base< NumericT > & | result, |
NumericT | alpha, | ||
vector_base< NumericT > & | p, | ||
vector_base< NumericT > & | r, | ||
vector_base< NumericT > const & | Ap, | ||
NumericT | beta, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 85 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1 | ( | vector_base< T > const & | device_krylov_basis, |
vcl_size_t | v_k_size, | ||
vcl_size_t | v_k_internal_size, | ||
vcl_size_t | param_k, | ||
vector_base< T > & | vi_in_vk_buffer, | ||
vcl_size_t | buffer_chunk_size | ||
) |
Definition at line 1738 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1_kernel | ( | T const * | krylov_basis, |
unsigned int | size, | ||
unsigned int | internal_size, | ||
unsigned int | k, | ||
T * | vi_in_vk_buffer, | ||
unsigned int | chunk_size | ||
) |
Definition at line 1691 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2 | ( | vector_base< T > & | device_krylov_basis, |
vcl_size_t | v_k_size, | ||
vcl_size_t | v_k_internal_size, | ||
vcl_size_t | param_k, | ||
vector_base< T > const & | vi_in_vk_buffer, | ||
vector_base< T > & | R_buffer, | ||
vcl_size_t | krylov_dim, | ||
vector_base< T > & | inner_prod_buffer, | ||
vcl_size_t | buffer_chunk_size | ||
) |
Definition at line 1830 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2_kernel | ( | T * | krylov_basis, |
unsigned int | size, | ||
unsigned int | internal_size, | ||
unsigned int | k, | ||
T const * | vi_in_vk_buffer, | ||
unsigned int | chunk_size, | ||
T * | R_buffer, | ||
unsigned int | krylov_dim, | ||
T * | inner_prod_buffer | ||
) |
Definition at line 1763 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_normalize_vk | ( | vector_base< T > & | v_k, |
vector_base< T > const & | residual, | ||
vector_base< T > & | R_buffer, | ||
vcl_size_t | offset_in_R, | ||
vector_base< T > const & | inner_prod_buffer, | ||
vector_base< T > & | r_dot_vk_buffer, | ||
vcl_size_t | buffer_chunk_size, | ||
vcl_size_t | buffer_chunk_offset | ||
) |
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
This routines computes for vectors 'r', 'v_k': Second reduction step for ||v_k|| v_k /= ||v_k|| First reduction step for <r, v_k>
Definition at line 1660 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_normalize_vk_kernel | ( | T * | vk, |
unsigned int | vk_offset, | ||
T const * | residual, | ||
T * | R_buffer, | ||
unsigned int | R_offset, | ||
T const * | inner_prod_buffer, | ||
unsigned int | chunk_size, | ||
T * | r_dot_vk_buffer, | ||
unsigned int | chunk_offset, | ||
unsigned int | size | ||
) |
Definition at line 1598 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | compressed_matrix< NumericT > const & | A, |
vector_base< NumericT > const & | p, | ||
vector_base< NumericT > & | Ap, | ||
vector_base< NumericT > & | inner_prod_buffer | ||
) |
Definition at line 1907 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | coordinate_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1953 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | ell_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1975 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | sliced_ell_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 1996 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_prod | ( | hyb_matrix< T > const & | A, |
vector_base< T > const & | p, | ||
vector_base< T > & | Ap, | ||
vector_base< T > & | inner_prod_buffer | ||
) |
Definition at line 2019 of file iterative_operations.hpp.
void viennacl::linalg::cuda::pipelined_gmres_update_result | ( | vector_base< T > & | result, |
vector_base< T > const & | residual, | ||
vector_base< T > const & | krylov_basis, | ||
vcl_size_t | v_k_size, | ||
vcl_size_t | v_k_internal_size, | ||
vector_base< T > const & | coefficients, | ||
vcl_size_t | param_k | ||
) |
Definition at line 1882 of file iterative_operations.hpp.
__global__ void viennacl::linalg::cuda::pipelined_gmres_update_result_kernel | ( | T * | result, |
T const * | residual, | ||
T const * | krylov_basis, | ||
unsigned int | size, | ||
unsigned int | internal_size, | ||
T const * | coefficients, | ||
unsigned int | k | ||
) |
Definition at line 1862 of file iterative_operations.hpp.
void viennacl::linalg::cuda::plane_rotation | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > & | vec2, | ||
NumericT | alpha, | ||
NumericT | beta | ||
) |
Computes a plane rotation of two vectors.
Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)
vec1 | The first vector |
vec2 | The second vector |
alpha | The first transformation coefficient |
beta | The second transformation coefficient |
Definition at line 3032 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::plane_rotation_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2, | ||
NumericT | alpha, | ||
NumericT | beta | ||
) |
Definition at line 2996 of file vector_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 246 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.
Implementation of the convenience expression result = prod(mat, vec);
sp_mat | The sparse matrix |
d_mat | The dense matrix |
result | The result matrix |
Definition at line 385 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
sp_mat | The sparse matrix |
d_mat | The transposed dense matrix proxy |
result | The result matrix |
Definition at line 540 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | viennacl::compressed_matrix< NumericT, AlignmentV > const & | A, |
viennacl::compressed_matrix< NumericT, AlignmentV > const & | B, | ||
viennacl::compressed_matrix< NumericT, AlignmentV > & | C | ||
) |
Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices.
Implementation of the convenience expression C = prod(A, B); Based on computing C(i, :) = A(i, :) * B via merging the respective rows of B
A | Left factor |
B | Right factor |
C | Result matrix |
Definition at line 559 of file spgemm.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_compressed_matrix< NumericT > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 952 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a coordinate_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 1202 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
sp_mat | The Sparse Matrix (Coordinate format) |
d_mat | The Dense Matrix |
result | The Result Matrix |
Definition at line 1328 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT > & | mat, |
bool | mat_transpose, | ||
const vector_base< NumericT > & | vec, | ||
vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication.
Implementation of the convenience expressions result = prod(mat, vec); and result = prod(trans(mat), vec);
mat | The matrix |
mat_transpose | Whether the matrix is to be transposed. |
vec | The vector |
result | The result vector |
Definition at line 1464 of file matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));
sp_mat | The Sparse Matrix (Coordinate format) |
d_mat | The Dense Transposed Matrix |
result | The Result Matrix |
Definition at line 1522 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 1668 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format
sp_mat | The sparse matrix (ELL) |
d_mat | The dense matrix |
result | The result matrix |
Definition at line 1760 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format
sp_mat | The sparse matrix (ELL) |
d_mat | The dense matrix |
result | The result matrix |
Definition at line 1933 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::sliced_ell_matrix< NumericT, IndexT > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a sliced_ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 2095 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::vector_base< NumericT > & | vec, | ||
viennacl::vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
Definition at line 2181 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::matrix_base< NumericT > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, d_mat);
mat | The sparse matrix |
d_mat | The dense matrix (row- or column-major) |
result | The dense result matrix (row- or column-major) |
Definition at line 2288 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT > & | A, |
bool | trans_A, | ||
const matrix_base< NumericT > & | B, | ||
bool | trans_B, | ||
matrix_base< NumericT > & | C, | ||
ScalarT | alpha, | ||
ScalarT | beta | ||
) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(A, B);
Definition at line 2384 of file matrix_operations.hpp.
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, trans(d_mat));
mat | The sparse matrix |
d_mat | Transposed matrix proxy object for the rhs dense matrix (row- or column-major) |
result | The dense result matrix (row- or column-major) |
Definition at line 2486 of file sparse_matrix_operations.hpp.
void viennacl::linalg::cuda::radix2 | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Radix-2 1D algorithm for computing Fourier transformation.
Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm
Definition at line 442 of file fft_operations.hpp.
void viennacl::linalg::cuda::radix2 | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | in, |
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | batch_num, | ||
NumericT | sign = NumericT(-1) , |
||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Radix-2 2D algorithm for computing Fourier transformation.
Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm
Definition at line 494 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::real_to_complex | ( | const RealT * | in, |
ComplexT * | out, | ||
unsigned int | size | ||
) |
Definition at line 782 of file fft_operations.hpp.
void viennacl::linalg::cuda::real_to_complex | ( | viennacl::vector_base< NumericT > const & | in, |
viennacl::vector_base< NumericT > & | out, | ||
vcl_size_t | size | ||
) |
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
Definition at line 797 of file fft_operations.hpp.
void viennacl::linalg::cuda::reorder | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
vcl_size_t | size, | ||
vcl_size_t | stride, | ||
vcl_size_t | bits_datasize, | ||
vcl_size_t | batch_num, | ||
viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
) |
Definition at line 282 of file fft_operations.hpp.
void viennacl::linalg::cuda::reverse | ( | viennacl::vector_base< NumericT > & | in | ) |
Reverse vector to oposite order and save it in input vector.
Definition at line 847 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::reverse_inplace | ( | NumericT * | vec, |
unsigned int | size | ||
) |
Definition at line 832 of file fft_operations.hpp.
__device__ IndexT viennacl::linalg::cuda::round_to_next_power_of_2 | ( | IndexT | val | ) |
Definition at line 63 of file spgemm.hpp.
__global__ void viennacl::linalg::cuda::scalar_swap_kernel | ( | NumericT * | s1, |
NumericT * | s2 | ||
) |
Definition at line 345 of file scalar_operations.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1355 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1395 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1384 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const NumericT * | val, | ||
unsigned int | options2, | ||
const NumericT * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
Definition at line 1424 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::scaled_rank_1_update | ( | matrix_base< NumericT > & | mat1, |
ScalarT const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
const vector_base< NumericT > & | vec1, | ||
const vector_base< NumericT > & | vec2 | ||
) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.
Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
mat1 | The matrix to be updated |
alpha | The scaling factor (either a viennacl::scalar<>, float, or double) |
len_alpha | Length of the buffer for an eventual final reduction step (currently always '1') |
reciprocal_alpha | Use 1/alpha instead of alpha |
flip_sign_alpha | Use -alpha instead of alpha |
vec1 | The first vector |
vec2 | The second vector |
Definition at line 2416 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::scan_kernel_1 | ( | NumericT const * | X, |
unsigned int | startX, | ||
unsigned int | incX, | ||
unsigned int | sizeX, | ||
NumericT * | Y, | ||
unsigned int | startY, | ||
unsigned int | incY, | ||
unsigned int | scan_offset, | ||
NumericT * | carries | ||
) |
Definition at line 3063 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::scan_kernel_2 | ( | NumericT * | carries | ) |
Definition at line 3121 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::scan_kernel_3 | ( | NumericT * | Y, |
unsigned int | startY, | ||
unsigned int | incY, | ||
unsigned int | sizeY, | ||
NumericT const * | carries | ||
) |
Definition at line 3147 of file vector_operations.hpp.
|
inline |
Compute addresses to obtain compact list of block start addresses.
Definition at line 239 of file bisect_kernel_large.hpp.
|
inline |
Perform initial scan for compaction of intervals containing one and multiple eigenvalues; also do initial scan to build blocks
Definition at line 370 of file bisect_kernel_large.hpp.
|
inline |
Perform scan to obtain number of eigenvalues before a specific block.
Definition at line 304 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::sliced_ell_matrix_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
const unsigned int * | column_indices, | ||
const unsigned int * | block_start, | ||
const NumericT * | elements, | ||
const NumericT * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
unsigned int | size_x, | ||
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result, | ||
unsigned int | block_size | ||
) |
Definition at line 2046 of file sparse_matrix_operations.hpp.
__device__ void viennacl::linalg::cuda::storeInterval | ( | unsigned int | addr, |
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT | left, | ||
NumericT | right, | ||
S | left_count, | ||
S | right_count, | ||
NumericT | precision | ||
) |
Check if interval converged and store appropriately
addr | address where to store the information of the interval |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
left | lower limit of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
right_count | eigenvalues less than right |
precision | desired precision for eigenvalues |
Definition at line 124 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::storeIntervalConverged | ( | NumericT * | s_left, |
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT & | left, | ||
NumericT & | mid, | ||
NumericT & | right, | ||
S & | left_count, | ||
S & | mid_count, | ||
S & | right_count, | ||
T * | s_compaction_list_exc, | ||
unsigned int & | compact_second_chunk, | ||
const unsigned int | num_threads_active, | ||
unsigned int & | is_active_second | ||
) |
Definition at line 465 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervals | ( | unsigned int | addr, |
const unsigned int | num_threads_active, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
NumericT | left, | ||
NumericT | mid, | ||
NumericT | right, | ||
const S | left_count, | ||
const S | mid_count, | ||
const S | right_count, | ||
NumericT | precision, | ||
unsigned int & | compact_second_chunk, | ||
T * | s_compaction_list_exc, | ||
unsigned int & | is_active_second | ||
) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread.
addr | base address for storing intervals |
num_threads_active | number of threads / intervals in current sweep |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
left | lower limit of interval |
mid | midpoint of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
mid_count | eigenvalues less than mid |
right_count | eigenvalues less than right |
precision | desired precision for eigenvalues |
compact_second_chunk | shared mem flag if second chunk is used and ergo requires compaction |
s_compaction_list_exc | helper array for stream compaction, s_compaction_list_exc[tid] = 1 when the thread generated two child intervals |
is_active_second | mark is thread has a second non-empty child interval |
Definition at line 309 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::storeNonEmptyIntervalsLarge | ( | unsigned int | addr, |
const unsigned int | num_threads_active, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
unsigned short * | s_left_count, | ||
unsigned short * | s_right_count, | ||
NumericT | left, | ||
NumericT | mid, | ||
NumericT | right, | ||
const unsigned short | left_count, | ||
const unsigned short | mid_count, | ||
const unsigned short | right_count, | ||
NumericT | epsilon, | ||
unsigned int & | compact_second_chunk, | ||
unsigned short * | s_compaction_list, | ||
unsigned int & | is_active_second | ||
) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread
Definition at line 476 of file bisect_kernel_large.hpp.
__device__ void viennacl::linalg::cuda::subdivideActiveInterval | ( | const unsigned int | tid, |
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
const unsigned int | num_threads_active, | ||
NumericT & | left, | ||
NumericT & | right, | ||
unsigned int & | left_count, | ||
unsigned int & | right_count, | ||
NumericT & | mid, | ||
unsigned int & | all_threads_converged | ||
) |
Subdivide interval if active and not already converged.
tid | id of thread |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
num_threads_active | number of active threads in warp |
left | lower limit of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
right_count | eigenvalues less than right |
mid | median of interval |
all_threads_converged | shared memory flag if all threads are |
Definition at line 582 of file bisect_util.hpp.
__device__ void viennacl::linalg::cuda::subdivideActiveIntervalMulti | ( | const unsigned int | tid, |
NumericT * | s_left, | ||
NumericT * | s_right, | ||
T * | s_left_count, | ||
T * | s_right_count, | ||
const unsigned int | num_threads_active, | ||
NumericT & | left, | ||
NumericT & | right, | ||
unsigned int & | left_count, | ||
unsigned int & | right_count, | ||
NumericT & | mid, | ||
unsigned int & | all_threads_converged | ||
) |
Subdivide interval if active and not already converged.
tid | id of thread |
s_left | shared memory storage for left interval limits |
s_right | shared memory storage for right interval limits |
s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
num_threads_active | number of active threads in warp |
left | lower limit of interval |
right | upper limit of interval |
left_count | eigenvalues less than left |
right_count | eigenvalues less than right |
mid | median of interval |
all_threads_converged | shared memory flag if all threads are |
Definition at line 529 of file bisect_util.hpp.
__device__ NumericT viennacl::linalg::cuda::subwarp_accumulate_shared | ( | NumericT | output_value, |
unsigned int | id_in_warp, | ||
NumericT * | shared_buffer | ||
) |
Definition at line 241 of file spgemm_rmerge.hpp.
__device__ NumericT viennacl::linalg::cuda::subwarp_accumulate_shuffle | ( | NumericT | output_value | ) |
Definition at line 232 of file spgemm_rmerge.hpp.
__device__ IndexT viennacl::linalg::cuda::subwarp_minimum_shared | ( | IndexT | min_index, |
IndexT | id_in_warp, | ||
IndexT * | shared_buffer | ||
) |
Definition at line 152 of file spgemm_rmerge.hpp.
__device__ IndexT viennacl::linalg::cuda::subwarp_minimum_shuffle | ( | IndexT | min_index | ) |
Definition at line 143 of file spgemm_rmerge.hpp.
void viennacl::linalg::cuda::sum_cpu | ( | vector_base< NumericT > const & | vec1, |
NumericT & | result | ||
) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
vec1 | The vector |
result | The result host scalar |
Definition at line 2891 of file vector_operations.hpp.
void viennacl::linalg::cuda::sum_impl | ( | vector_base< NumericT > const & | vec1, |
scalar< NumericT > & | result | ||
) |
Computes the maximum of a vector, both reduction stages run on the GPU.
vec1 | The vector |
result | The result GPU scalar |
Definition at line 2874 of file vector_operations.hpp.
viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value >::type viennacl::linalg::cuda::swap | ( | ScalarT1 & | s1, |
ScalarT2 & | s2 | ||
) |
Swaps the contents of two scalars, data is copied.
s1 | The first scalar |
s2 | The second scalar |
Definition at line 361 of file scalar_operations.hpp.
void viennacl::linalg::cuda::trans | ( | matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const & | proxy, |
matrix_base< NumericT > & | temp_trans | ||
) |
Definition at line 94 of file matrix_operations.hpp.
__global__ void viennacl::linalg::cuda::trans_kernel | ( | const NumericT * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_stride1, | ||
unsigned int | A_stride2, | ||
NumericT * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
unsigned int | B_stride1, | ||
unsigned int | B_stride2, | ||
bool | data_major | ||
) |
Definition at line 56 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1298 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1342 of file matrix_operations_row.hpp.
__global__ void viennacl::linalg::cuda::transpose | ( | const NumericT * | input, |
NumericT * | output, | ||
unsigned int | row_num, | ||
unsigned int | col_num | ||
) |
Definition at line 713 of file fft_operations.hpp.
void viennacl::linalg::cuda::transpose | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const & | input, |
viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | output | ||
) |
Transpose matrix.
Definition at line 732 of file fft_operations.hpp.
void viennacl::linalg::cuda::transpose | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | input | ) |
Inplace_transpose matrix.
Definition at line 770 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::transpose_inplace | ( | NumericT * | input, |
unsigned int | row_num, | ||
unsigned int | col_num | ||
) |
Definition at line 746 of file fft_operations.hpp.
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel | ( | NumericT const * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
unsigned int | options | ||
) |
Definition at line 307 of file direct_solve.hpp.
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel | ( | NumericT const * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
unsigned int | options | ||
) |
Definition at line 266 of file direct_solve.hpp.
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1255 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1047 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1074 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1100 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1126 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1152 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1178 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1204 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1230 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1282 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1334 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_log_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1308 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1360 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1386 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1412 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1438 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 1464 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vec_mul_col_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1267 of file matrix_operations_col.hpp.
__global__ void viennacl::linalg::cuda::vec_mul_row_kernel | ( | const NumericT * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const NumericT * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
NumericT * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
Definition at line 1297 of file matrix_operations_row.hpp.
void viennacl::linalg::cuda::vector_assign | ( | vector_base< NumericT > & | vec1, |
ScalarT1 const & | alpha, | ||
bool | up_to_internal_size = false |
||
) |
Assign a constant value to a vector (-range/-slice)
vec1 | The vector to which the value should be assigned |
alpha | The value to be assigned |
up_to_internal_size | Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). |
Definition at line 803 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_assign_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | internal_size1, | ||
NumericT | alpha | ||
) |
Definition at line 782 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_maxmin_kernel | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 2722 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel | ( | NumericT const * | vec1, |
NumericT * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result | ||
) |
Definition at line 2017 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 1533 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 1575 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers | ( | const NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
NumericT * | result | ||
) |
Definition at line 1612 of file vector_operations.hpp.
void viennacl::linalg::cuda::vector_swap | ( | vector_base< NumericT > & | vec1, |
vector_base< NumericT > & | vec2 | ||
) |
Swaps the contents of two vectors, data is copied.
vec1 | The first vector (or -range, or -slice) |
vec2 | The second vector (or -range, or -slice) |
Definition at line 853 of file vector_operations.hpp.
__global__ void viennacl::linalg::cuda::vector_swap_kernel | ( | NumericT * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
NumericT * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
Definition at line 826 of file vector_operations.hpp.
__device__ void viennacl::linalg::cuda::writeToGmem | ( | const unsigned int | tid, |
const unsigned int | tid_2, | ||
const unsigned int | num_threads_active, | ||
const unsigned int | num_blocks_mult, | ||
NumericT * | g_left_one, | ||
NumericT * | g_right_one, | ||
unsigned int * | g_pos_one, | ||
NumericT * | g_left_mult, | ||
NumericT * | g_right_mult, | ||
unsigned int * | g_left_count_mult, | ||
unsigned int * | g_right_count_mult, | ||
NumericT * | s_left, | ||
NumericT * | s_right, | ||
unsigned short * | s_left_count, | ||
unsigned short * | s_right_count, | ||
unsigned int * | g_blocks_mult, | ||
unsigned int * | g_blocks_mult_sum, | ||
unsigned short * | s_compaction_list, | ||
unsigned short * | s_cl_helper, | ||
unsigned int | offset_mult_lambda | ||
) |
Write data to global memory.
Definition at line 53 of file bisect_kernel_large.hpp.
__global__ void viennacl::linalg::cuda::zero2 | ( | NumericT * | input1, |
NumericT * | input2, | ||
unsigned int | size | ||
) |
Definition at line 602 of file fft_operations.hpp.