ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
spgemm_rmerge.hpp File Reference

Go to the source code of this file.

Namespaces

 viennacl
 Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
 
 viennacl::linalg
 Provides all linear algebra operations which are not covered by operator overloads.
 
 viennacl::linalg::cuda
 Holds all CUDA compute kernels used by ViennaCL.
 

Functions

template<typename IndexT >
__device__ IndexT viennacl::linalg::cuda::round_to_next_power_of_2 (IndexT val)
 
template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_1 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, IndexT *subwarpsize_per_group, IndexT *max_nnz_row_A_per_group, IndexT *max_nnz_row_B_per_group)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT viennacl::linalg::cuda::subwarp_minimum_shuffle (IndexT min_index)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__device__ IndexT viennacl::linalg::cuda::subwarp_minimum_shared (IndexT min_index, IndexT id_in_warp, IndexT *shared_buffer)
 
template<unsigned int SubWarpSizeV, typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_2 (const IndexT *A_row_indices, const IndexT *A_col_indices, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, IndexT B_size2, IndexT *C_row_indices)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT viennacl::linalg::cuda::subwarp_accumulate_shuffle (NumericT output_value)
 
template<unsigned int SubWarpSizeV, typename NumericT >
__device__ NumericT viennacl::linalg::cuda::subwarp_accumulate_shared (NumericT output_value, unsigned int id_in_warp, NumericT *shared_buffer)
 
template<unsigned int SubWarpSizeV, typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_stage_3 (const IndexT *A_row_indices, const IndexT *A_col_indices, const NumericT *A_elements, IndexT A_size1, const IndexT *B_row_indices, const IndexT *B_col_indices, const NumericT *B_elements, IndexT B_size2, IndexT const *C_row_indices, IndexT *C_col_indices, NumericT *C_elements)
 
template<typename IndexT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_decompose_1 (const IndexT *A_row_indices, IndexT A_size1, IndexT max_per_row, IndexT *chunks_per_row)
 
template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_A2 (IndexT *A2_row_indices, IndexT *A2_col_indices, NumericT *A2_elements, IndexT A2_size1, IndexT *new_row_buffer)
 
template<typename IndexT , typename NumericT >
__global__ void viennacl::linalg::cuda::compressed_matrix_gemm_G1 (IndexT *G1_row_indices, IndexT *G1_col_indices, NumericT *G1_elements, IndexT G1_size1, IndexT const *A_row_indices, IndexT const *A_col_indices, NumericT const *A_elements, IndexT A_size1, IndexT A_nnz, IndexT max_per_row, IndexT *new_row_buffer)
 
template<class NumericT , unsigned int AlignmentV>
void viennacl::linalg::cuda::prod_impl (viennacl::compressed_matrix< NumericT, AlignmentV > const &A, viennacl::compressed_matrix< NumericT, AlignmentV > const &B, viennacl::compressed_matrix< NumericT, AlignmentV > &C)
 Carries out sparse_matrix-sparse_matrix multiplication for CSR matrices. More...