1 #ifndef VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
51 template<
typename NumericT>
78 template<
typename NumericT>
92 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
103 if (use_nvidia_blocked)
110 buffer_size_per_vector,
122 buffer_size_per_vector,
131 template<
typename NumericT>
141 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
146 unsigned int thread_num = 256;
159 buffer_size_per_vector,
165 template<
typename NumericT>
175 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
179 unsigned int thread_num = 128;
180 unsigned int group_num = 256;
192 A.
handle().opencl_handle(),
196 viennacl::traits::opencl_handle(p),
197 viennacl::traits::opencl_handle(Ap),
200 buffer_size_per_vector,
207 template<
typename NumericT>
217 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
222 unsigned int group_num = 256;
233 A.
handle().opencl_handle(),
234 viennacl::traits::opencl_handle(p),
235 viennacl::traits::opencl_handle(Ap),
239 buffer_size_per_vector,
247 template<
typename NumericT>
257 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
261 unsigned int thread_num = 128;
262 unsigned int group_num = 128;
274 A.
handle().opencl_handle(),
281 viennacl::traits::opencl_handle(p),
282 viennacl::traits::opencl_handle(Ap),
285 buffer_size_per_vector,
295 template<
typename NumericT>
318 cl_uint chunk_size = cl_uint(buffer_chunk_size);
319 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
321 inner_prod_buffer, chunk_size, chunk_offset, vec_size,
326 template<
typename NumericT>
333 (void)buffer_chunk_size;
360 template<
typename NumericT>
377 cl_uint chunk_size = cl_uint(buffer_chunk_size);
378 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
389 if (use_nvidia_blocked)
396 inner_prod_buffer, chunk_size, chunk_offset,
409 inner_prod_buffer, chunk_size, chunk_offset,
419 template<
typename NumericT>
432 cl_uint chunk_size = cl_uint(buffer_chunk_size);
433 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
438 unsigned int thread_num = 256;
451 inner_prod_buffer, chunk_size, chunk_offset,
458 template<
typename NumericT>
471 cl_uint chunk_size = cl_uint(buffer_chunk_size);
472 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
476 unsigned int thread_num = 128;
477 unsigned int group_num = 128;
489 A.
handle().opencl_handle(),
493 viennacl::traits::opencl_handle(p),
494 viennacl::traits::opencl_handle(Ap),
497 inner_prod_buffer, chunk_size, chunk_offset,
505 template<
typename NumericT>
518 cl_uint chunk_size = cl_uint(buffer_chunk_size);
519 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
524 unsigned int group_num = 256;
535 A.
handle().opencl_handle(),
536 viennacl::traits::opencl_handle(p),
537 viennacl::traits::opencl_handle(Ap),
541 inner_prod_buffer, chunk_size, chunk_offset,
550 template<
typename NumericT>
563 cl_uint chunk_size = cl_uint(buffer_chunk_size);
564 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
568 unsigned int thread_num = 256;
569 unsigned int group_num = 128;
581 A.
handle().opencl_handle(),
588 viennacl::traits::opencl_handle(p),
589 viennacl::traits::opencl_handle(Ap),
592 inner_prod_buffer, chunk_size, chunk_offset,
609 template <
typename T>
627 cl_uint size_vk = cl_uint(v_k.
size());
629 cl_uint R_offset = cl_uint(offset_in_R);
630 cl_uint chunk_size = cl_uint(buffer_chunk_size);
631 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
635 inner_prod_buffer, chunk_size,
636 r_dot_vk_buffer, chunk_offset,
642 template <
typename T>
658 cl_uint size_vk = cl_uint(v_k_size);
659 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
660 cl_uint ocl_k = cl_uint(param_k);
661 cl_uint chunk_size = cl_uint(buffer_chunk_size);
663 vi_in_vk_buffer, chunk_size
667 template <
typename T>
686 cl_uint size_vk = cl_uint(v_k_size);
687 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
688 cl_uint ocl_k = cl_uint(param_k);
689 cl_uint chunk_size = cl_uint(buffer_chunk_size);
690 cl_uint ocl_krylov_dim = cl_uint(krylov_dim);
692 vi_in_vk_buffer, chunk_size,
693 R_buffer, ocl_krylov_dim,
699 template <
typename T>
716 cl_uint size_vk = cl_uint(v_k_size);
717 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
718 cl_uint ocl_k = cl_uint(param_k);
721 krylov_basis, size_vk, internal_size_vk,
727 template <
typename T>
741 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
754 if (use_nvidia_blocked)
761 buffer_size_per_vector,
773 buffer_size_per_vector,
781 template <
typename T>
791 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
796 inner_prod_buffer.
clear();
799 unsigned int thread_num = 128;
812 buffer_size_per_vector,
818 template <
typename T>
828 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
835 unsigned int group_num = 128;
841 A.
handle().opencl_handle(),
845 viennacl::traits::opencl_handle(p), start_p,
846 viennacl::traits::opencl_handle(Ap), start_Ap,
849 buffer_size_per_vector,
856 template <
typename T>
866 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
873 unsigned int group_num = 128;
884 A.
handle().opencl_handle(),
885 viennacl::traits::opencl_handle(p), start_p,
886 viennacl::traits::opencl_handle(Ap), start_Ap,
890 buffer_size_per_vector,
898 template <
typename T>
908 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
915 unsigned int group_num = 128;
922 A.
handle().opencl_handle(),
929 viennacl::traits::opencl_handle(p), start_p,
930 viennacl::traits::opencl_handle(Ap), start_Ap,
933 buffer_size_per_vector,
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
viennacl::ocl::device const & current_device() const
Returns the current device.
Main kernel class for generating specialized OpenCL kernels for fast iterative solvers.
Represents an OpenCL device within ViennaCL.
const handle_type & handle4() const
void pipelined_bicgstab_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Generic size and resize functionality for different vector and matrix types.
const vcl_size_t & size1() const
Returns the number of rows.
Represents an OpenCL kernel within ViennaCL.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
static void init(viennacl::ocl::context &ctx)
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
vcl_size_t internal_ellnnz() const
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
T max(const T &lhs, const T &rhs)
Maximum.
vcl_size_t rows_per_block() const
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
const vcl_size_t & nnz() const
Returns the number of nonzero entries.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
vcl_size_t ell_nnz() const
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
OpenCL kernel file for specialized iterative solver kernels.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
const handle_type & handle2() const
vcl_size_t internal_size1() const
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of a smart-pointer-like class for handling OpenCL handles.
void pipelined_cg_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
result_of::size_type< T >::type start(T const &obj)
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vcl_size_t maxnnz() const
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
size_type size() const
Returns the length of the vector (cf. std::vector)
const handle_type & handle() const
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void pipelined_cg_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
Forward declarations of the implicit_vector_base, vector_base class.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void pipelined_gmres_prod(compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
const handle_type & handle3() const
Simple enable-if variant that uses the SFINAE pattern.
const handle_type & handle5() const
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...