ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
ilu_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
35 #include "viennacl/traits/size.hpp"
39 
40 
41 namespace viennacl
42 {
43 namespace linalg
44 {
45 namespace opencl
46 {
47 
49 
50 template<typename NumericT>
53 {
54  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
56 
57  //
58  // Step 1: Count elements in L:
59  //
61 
62  viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
63  L.handle1().opencl_handle())
64  );
65 
66  //
67  // Step 2: Exclusive scan on row_buffers:
68  //
69  viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
70  viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
71  L.reserve(wrapped_L_row_buffer[L.size1()], false);
72 
73 
74  //
75  // Step 3: Write entries
76  //
78 
79  viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
80  L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle())
81  );
82 
84 
85 } // extract_LU
86 
88 
89 
90 
92 template<typename NumericT>
95 {
97 
98  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
100 
101  // fill D:
102  viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
103  viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
104 
105  // scale L:
106  viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
107  viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
108 
109 }
110 
112 
113 
115 template<typename NumericT>
117  vector<NumericT> const & aij_L)
118 {
119  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
121 
124  viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
125 
127  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
128  aij_L)
129  );
130 
131 }
132 
133 
135 
136 template<typename NumericT>
140 {
141  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
143 
144  //
145  // Step 1: Count elements in L and U:
146  //
148 
149  viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
150  L.handle1().opencl_handle(),
151  U.handle1().opencl_handle())
152  );
153 
154  //
155  // Step 2: Exclusive scan on row_buffers:
156  //
157  viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
158  viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
159  L.reserve(wrapped_L_row_buffer[L.size1()], false);
160 
161  viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), A.size1() + 1, 0, 1);
162  viennacl::linalg::exclusive_scan(wrapped_U_row_buffer, wrapped_U_row_buffer);
163  U.reserve(wrapped_U_row_buffer[U.size1()], false);
164 
165  //
166  // Step 3: Write entries
167  //
169 
170  viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
171  L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
172  U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle())
173  );
174 
176  // Note: block information for U will be generated after transposition
177 
178 } // extract_LU
179 
181 
182 
183 
185 template<typename NumericT>
189 {
191 
192  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
194 
195  // fill D:
196  viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
197  viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
198 
199  // scale L:
200  viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
201  viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
202 
203  // scale U:
204  viennacl::ocl::enqueue(k2(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), cl_uint(A.size1()), D) );
205 
206 }
207 
209 
210 
212 template<typename NumericT>
214  vector<NumericT> const & aij_L,
215  compressed_matrix<NumericT> & U_trans,
216  vector<NumericT> const & aij_U_trans)
217 {
218  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
220 
223  viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
224 
227  viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size());
228 
230  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
231  aij_L,
232  U_trans.handle1().opencl_handle(), U_trans.handle2().opencl_handle(), U_trans.handle().opencl_handle(), U_backup.opencl_handle(),
233  aij_U_trans)
234  );
235 
236 }
237 
239 
240 
241 
242 template<typename NumericT>
244  vector<NumericT> & diag_R)
245 {
246  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(R).context());
248 
250  viennacl::ocl::enqueue(k(R.handle1().opencl_handle(), R.handle2().opencl_handle(), R.handle().opencl_handle(), cl_uint(R.size1()),
251  diag_R)
252  );
253 }
254 
255 } //namespace opencl
256 } //namespace linalg
257 } //namespace viennacl
258 
259 
260 #endif
Generic size and resize functionality for different vector and matrix types.
Implementations of vector operations.
const vcl_size_t & size1() const
Returns the number of rows.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
void ilu_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
void ilu_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Common implementations shared by OpenCL-based operations.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications.
Main kernel class for generating OpenCL kernels for incomplete LU factorization preconditioners.
Definition: ilu.hpp:453
void extract_L(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
OpenCL kernel file for nonnegative matrix factorization.
static void init(viennacl::ocl::context &ctx)
Definition: ilu.hpp:460
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
void ilu_form_neumann_matrix(compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:230
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
void icc_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
void extract_LU(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Implementation of the ViennaCL scalar class.
void icc_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
Simple enable-if variant that uses the SFINAE pattern.