ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
iterative_operations.hpp File Reference

Implementations of operations using sparse matrices using CUDA. More...

Go to the source code of this file.

Namespaces

 viennacl
 Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
 
 viennacl::linalg
 Provides all linear algebra operations which are not covered by operator overloads.
 
 viennacl::linalg::cuda
 Holds all CUDA compute kernels used by ViennaCL.
 

Functions

template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_blocked_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, unsigned int block_size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename NumericT >
__global__ void viennacl::linalg::cuda::pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 
template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
 Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More...
 
template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
 
template<typename T >
__global__ void viennacl::linalg::cuda::pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
 
template<typename NumericT >
void viennacl::linalg::cuda::pipelined_gmres_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 
template<typename T >
void viennacl::linalg::cuda::pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
 

Detailed Description

Implementations of operations using sparse matrices using CUDA.

Definition in file iterative_operations.hpp.