1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
54 template<
typename NumericT,
unsigned int AlignmentV>
64 viennacl::traits::opencl_handle(x),
66 cl_uint(info_selector)
80 template<
typename NumericT,
unsigned int AlignmentV>
91 unsigned int alignment = AlignmentV;
92 if (use_nvidia_specific)
116 if (alignment == 4 || alignment == 8)
128 if (use_nvidia_specific)
158 template<
typename NumericT,
unsigned int AlignmentV>
169 viennacl::traits::opencl_handle(d_A),
174 viennacl::traits::opencl_handle(y),
190 template<
typename NumericT,
unsigned int AlignmentV>
203 viennacl::traits::opencl_handle(d_A.lhs()),
208 viennacl::traits::opencl_handle(y),
224 template<
typename NumericT,
unsigned int AlignmentV>
242 viennacl::traits::opencl_handle(upper_bound_nonzeros_per_row_A)
246 unsigned int * upper_bound_nonzeros_per_row_A_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(upper_bound_nonzeros_per_row_A.
handle());
248 unsigned int max_nnz_per_row_A = 0;
249 for (std::size_t i=0; i<upper_bound_nonzeros_per_row_A.
size(); ++i)
250 max_nnz_per_row_A =
std::max(max_nnz_per_row_A, upper_bound_nonzeros_per_row_A_ptr[i]);
252 if (max_nnz_per_row_A > 32)
255 unsigned int max_entries_in_G = 32;
256 if (max_nnz_per_row_A <= 256)
257 max_entries_in_G = 16;
258 if (max_nnz_per_row_A <= 64)
259 max_entries_in_G = 8;
264 cl_uint(max_entries_in_G),
265 viennacl::traits::opencl_handle(exclusive_scan_helper)
270 unsigned int augmented_size = exclusive_scan_helper[A.
size1()];
278 viennacl::ocl::enqueue(k_fill_A2(A2.handle1().opencl_handle(), A2.handle2().opencl_handle(), A2.handle().opencl_handle(), cl_uint(A2.size1()),
279 viennacl::traits::opencl_handle(exclusive_scan_helper)
284 viennacl::ocl::enqueue(k_fill_G1(G1.handle1().opencl_handle(), G1.handle2().opencl_handle(), G1.handle().opencl_handle(), cl_uint(G1.size1()),
286 cl_uint(max_entries_in_G),
287 viennacl::traits::opencl_handle(exclusive_scan_helper)
315 unsigned int current_offset = 0;
316 for (std::size_t i=0; i<C.
size1(); ++i)
318 unsigned int tmp = row_buffer[i];
319 row_buffer.set(i, current_offset);
320 current_offset += tmp;
322 row_buffer.
set(C.
size1(), current_offset);
330 C.
reserve(current_offset,
false);
349 template<
typename NumericT,
unsigned int MAT_AlignmentV>
361 viennacl::traits::opencl_handle(x),
372 template<
typename NumericT,
unsigned int AlignmentV>
385 viennacl::traits::opencl_handle(x),
397 template<
typename NumericT,
unsigned int AlignmentV>
409 viennacl::traits::opencl_handle(x),
420 template<
typename NumericT,
unsigned int AlignmentV>
433 viennacl::traits::opencl_handle(x),
450 template<
typename NumericT,
unsigned int AlignmentV>
465 L.lhs().handle2().opencl_handle(),
466 L.lhs().handle().opencl_handle(),
467 block_indices.opencl_handle(),
469 static_cast<cl_uint
>(x.
size())));
473 template<
typename NumericT,
unsigned int AlignmentV>
488 U.lhs().handle2().opencl_handle(),
489 U.lhs().handle().opencl_handle(),
491 block_indices.opencl_handle(),
493 static_cast<cl_uint
>(x.
size())));
505 template<
typename NumericT,
unsigned int AlignmentV>
518 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
519 viennacl::traits::opencl_handle(x),
520 cl_uint(proxy_L.lhs().size1())
531 template<
typename NumericT,
unsigned int AlignmentV>
546 k.local_work_size(0, 128);
547 k.global_work_size(0, k.local_work_size());
548 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
549 viennacl::traits::opencl_handle(diagonal),
550 viennacl::traits::opencl_handle(x),
551 cl_uint(proxy_L.lhs().size1())
561 template<
typename NumericT,
unsigned int AlignmentV>
574 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
575 viennacl::traits::opencl_handle(x),
576 cl_uint(proxy_U.lhs().size1())
587 template<
typename NumericT,
unsigned int AlignmentV>
602 k.local_work_size(0, 128);
603 k.global_work_size(0, k.local_work_size());
604 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
605 viennacl::traits::opencl_handle(diagonal),
606 viennacl::traits::opencl_handle(x),
607 cl_uint(proxy_U.lhs().size1())
625 template<
typename NumericT>
661 template<
typename NumericT,
unsigned int AlignmentV>
669 unsigned int thread_num = 128;
675 viennacl::traits::opencl_handle(x),
676 cl_uint(info_selector),
690 template<
typename NumericT,
unsigned int AlignmentV>
715 unsigned int thread_num = 128;
722 viennacl::traits::opencl_handle(x),
724 viennacl::traits::opencl_handle(y),
740 template<
typename NumericT,
unsigned int AlignmentV>
753 unsigned int thread_num = 128;
758 viennacl::traits::opencl_handle(d_A),
763 viennacl::traits::opencl_handle(y),
781 template<
typename NumericT,
unsigned int AlignmentV>
796 unsigned int thread_num = 128;
801 viennacl::traits::opencl_handle(d_A),
806 viennacl::traits::opencl_handle(y),
821 template<
typename NumericT,
unsigned int AlignmentV>
845 std::stringstream ss;
846 ss <<
"vec_mul_" << 1;
849 unsigned int thread_num = 128;
850 unsigned int group_num = 256;
856 A.
handle().opencl_handle(),
857 viennacl::traits::opencl_handle(x),
859 viennacl::traits::opencl_handle(y),
881 template<
typename NumericT,
unsigned int AlignmentV>
898 cl_uint(sp_A.
size1()),
899 cl_uint(sp_A.
size2()),
903 viennacl::traits::opencl_handle(d_A),
908 viennacl::traits::opencl_handle(y),
926 template<
typename NumericT,
unsigned int AlignmentV>
945 cl_uint(sp_A.
size1()),
946 cl_uint(sp_A.
size2()),
950 viennacl::traits::opencl_handle(d_A.lhs()),
955 viennacl::traits::opencl_handle(y),
968 template<
typename ScalarT,
typename IndexT>
992 std::stringstream ss;
993 ss <<
"vec_mul_" << 1;
997 unsigned int group_num = 256;
1008 A.
handle().opencl_handle(),
1009 viennacl::traits::opencl_handle(x),
1011 viennacl::traits::opencl_handle(y),
1022 template<
typename NumericT,
unsigned int AlignmentV>
1048 A.
handle().opencl_handle(),
1052 viennacl::traits::opencl_handle(x),
1054 viennacl::traits::opencl_handle(y),
1064 template<
typename NumericT,
unsigned int AlignmentV>
1075 A.
handle().opencl_handle(),
1083 viennacl::traits::opencl_handle(d_A),
1088 viennacl::traits::opencl_handle(y),
1097 template<
typename NumericT,
unsigned int AlignmentV>
1110 A.
handle().opencl_handle(),
1118 viennacl::traits::opencl_handle(d_A.lhs()),
1123 viennacl::traits::opencl_handle(y),
const vcl_size_t & size2() const
Returns the number of columns.
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
cl_uint stride
Increment between integers.
static void init(viennacl::ocl::context &ctx)
viennacl::ocl::device const & current_device() const
Returns the current device.
Helper class implementing an array on the host. Default case: No conversion necessary.
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
const handle_type & handle4() const
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents an OpenCL kernel within ViennaCL.
cl_uint start
Starting value of the integer stride.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
A tag class representing a lower triangular matrix.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Main kernel class for generating OpenCL kernels for coordinate_matrix.
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
vcl_size_t internal_ellnnz() const
Expression template class for representing a tree of expressions which ultimately result in a matrix...
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
T max(const T &lhs, const T &rhs)
Maximum.
vcl_size_t rows_per_block() const
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
Main kernel class for generating OpenCL kernels for ell_matrix.
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
vcl_size_t ell_nnz() const
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Main kernel class for generating OpenCL kernels for compressed_matrix.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
static void init(viennacl::ocl::context &ctx)
const handle_type & handle2() const
OpenCL kernel file for compressed_matrix operations.
A tag class representing an upper triangular matrix.
vcl_size_t internal_size1() const
OpenCL kernel file for ell_matrix operations.
Sparse matrix class using the sliced ELLPACK with parameters C, .
void clear()
Resets all entries to zero.
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Main kernel class for generating OpenCL kernels for ell_matrix.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
OpenCL kernel file for sliced_ell_matrix operations.
vcl_size_t maxnnz() const
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
OpenCL kernel file for hyb_matrix operations.
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for vector operations.
void set(vcl_size_t index, U value)
size_type size() const
Returns the length of the vector (cf. std::vector)
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
const handle_type & handle() const
void switch_memory_context(viennacl::context new_ctx)
A tag class representing a lower triangular matrix with unit diagonal.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
OpenCL kernel file for coordinate_matrix operations.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices.
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
A sparse square matrix in compressed sparse rows format.
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< NumericT > const &, vector_base< NumericT > &x, viennacl::linalg::unit_lower_tag)
static void init(viennacl::ocl::context &ctx)
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
const handle_type & handle() const
Returns the memory handle.
static void init(viennacl::ocl::context &ctx)
void prod_impl(const matrix_base< NumericT > &A, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
const handle_type & handle3() const
A tag class representing an upper triangular matrix with unit diagonal.
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
cl_uint size
Number of values in the stride.
Main kernel class for generating OpenCL kernels for hyb_matrix.
const handle_type & handle5() const
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void row_info(compressed_matrix< NumericT, AlignmentV > const &A, vector_base< NumericT > &x, viennacl::linalg::detail::row_info_types info_selector)