ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
fft_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_FFT_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_FFT_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19  ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/ocl/device.hpp"
27 #include "viennacl/ocl/kernel.hpp"
33 
34 #include <viennacl/vector.hpp>
35 #include <viennacl/matrix.hpp>
36 
37 #include <cmath>
38 #include <stdexcept>
39 
40 namespace viennacl
41 {
42 namespace linalg
43 {
44 namespace detail
45 {
46 namespace fft
47 {
48 
50 
55  {
56  vcl_size_t bits_datasize = 0;
57  vcl_size_t ds = 1;
58 
59  while (ds < size)
60  {
61  ds = ds << 1;
62  bits_datasize++;
63  }
64 
65  return bits_datasize;
66  }
67 
72  {
73  n = n - 1;
74 
75  vcl_size_t power = 1;
76 
77  while (power < sizeof(vcl_size_t) * 8)
78  {
79  n = n | (n >> power);
80  power *= 2;
81  }
82 
83  return n + 1;
84  }
85 
86 } //namespce fft
87 } //namespace detail
88 
89 namespace opencl
90 {
91 
98 template<typename NumericT>
100  viennacl::ocl::handle<cl_mem> const & out,
103 {
104  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
106 
109  {
111  program_string =
113  } else
115 
116  viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_direct");
117  viennacl::ocl::enqueue(k(in, out,
118  static_cast<cl_uint>(size),
119  static_cast<cl_uint>(stride),
120  static_cast<cl_uint>(batch_num),
121  sign)
122  );
123 }
124 
125 /*
126  * This function performs reorder of input data. Indexes are sorted in bit-reversal order.
127  * Such reordering should be done before in-place FFT.
128  */
129 template<typename NumericT>
131  vcl_size_t size, vcl_size_t stride,
132  vcl_size_t bits_datasize, vcl_size_t batch_num,
134 {
135  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
137 
140  {
143  } else
145 
146  viennacl::ocl::kernel& k = ctx.get_kernel(program_string, "fft_reorder");
148  static_cast<cl_uint>(bits_datasize), static_cast<cl_uint>(size),
149  static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num))
150  );
151 }
152 
160 template<typename NumericT>
162  vcl_size_t size, vcl_size_t stride,
163  vcl_size_t batch_num, NumericT sign = NumericT(-1),
165 {
166  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
168 
169  assert(batch_num != 0 && bool("batch_num must be larger than 0"));
170 
173  {
176  } else
178 
181  {
182  viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_radix2_local");
184  viennacl::ocl::local_mem((size * 4) * sizeof(NumericT)),
185  static_cast<cl_uint>(bits_datasize), static_cast<cl_uint>(size),
186  static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num), sign));
187 
188  }
189  else
190  {
191  viennacl::linalg::opencl::reorder<NumericT>(in, size, stride, bits_datasize, batch_num);
192 
193  for (vcl_size_t step = 0; step < bits_datasize; step++)
194  {
195  viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_radix2");
197  static_cast<cl_uint>(step), static_cast<cl_uint>(bits_datasize),
198  static_cast<cl_uint>(size), static_cast<cl_uint>(stride),
199  static_cast<cl_uint>(batch_num), sign));
200  }
201  }
202 }
203 
211 template<typename NumericT, unsigned int AlignmentV>
214 {
215  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
217 
218  vcl_size_t size = in.size() >> 1;
220 
224 
225  {
227  viennacl::ocl::enqueue(k(A, B, static_cast<cl_uint>(ext_size)));
228  }
229  {
231  viennacl::ocl::enqueue(k(in, A, B, static_cast<cl_uint>(size), static_cast<cl_uint>(ext_size)));
232  }
233 
235 
236  {
238  viennacl::ocl::enqueue(k(Z, out, static_cast<cl_uint>(size)));
239  }
240 }
241 
245 template<typename NumericT, unsigned int AlignmentV>
249 {
250  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input1).context());
252  vcl_size_t size = input1.size() >> 1;
254  viennacl::ocl::enqueue(k(input1, input2, output, static_cast<cl_uint>(size)));
255 }
256 
260 template<typename NumericT, unsigned int AlignmentV>
262 {
263  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
265 
267 
268  vcl_size_t size = input.size() >> 1;
269  NumericT norm_factor = static_cast<NumericT>(size);
270  viennacl::ocl::enqueue(k(input, static_cast<cl_uint>(size), norm_factor));
271 }
272 
276 template<typename NumericT, unsigned int AlignmentV>
278 {
279  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
281 
283  viennacl::ocl::enqueue(k(input, static_cast<cl_uint>(input.internal_size1() >> 1),
284  static_cast<cl_uint>(input.internal_size2()) >> 1));
285 }
286 
290 template<typename NumericT, unsigned int AlignmentV>
293 {
294  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
296 
298  viennacl::ocl::enqueue(k(input, output, static_cast<cl_uint>(input.internal_size1() >> 1),
299  static_cast<cl_uint>(input.internal_size2() >> 1)));
300 }
301 
305 template<typename NumericT>
308 {
309  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
311 
313  viennacl::ocl::enqueue(k(in, out, static_cast<cl_uint>(size)));
314 }
315 
319 template<typename NumericT>
322 {
323  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
325 
327  viennacl::ocl::enqueue(k(in, out, static_cast<cl_uint>(size)));
328 }
329 
333 template<typename NumericT>
335 {
336  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
338 
339  vcl_size_t size = in.size();
340 
342  viennacl::ocl::enqueue(k(in, static_cast<cl_uint>(size)));
343 }
344 
345 } //namespace opencl
346 } //namespace linalg
347 } //namespace viennacl
348 
349 #endif /* FFT_OPERATIONS_HPP_ */
350 
void reorder(viennacl::ocl::handle< cl_mem > const &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
OpenCL kernel file for FFT operations.
vcl_size_t next_power_2(vcl_size_t n)
Find next power of two.
Represents an OpenCL device within ViennaCL.
void complex_to_real(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Implementation of the dense matrix class.
Main kernel class for generating OpenCL kernels for the fast Fourier transform.
Definition: fft.hpp:260
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:55
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
endcode *Final step
A dense matrix class.
Definition: forwards.h:375
Determines row and column increments for matrices and matrix proxies.
void transpose(viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
Inplace_transpose matrix.
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Definition: blas3.hpp:36
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:605
static void init(viennacl::ocl::context &ctx)
Definition: matrix.hpp:779
void radix2(viennacl::ocl::handle< cl_mem > const &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation.
void reverse(viennacl::vector_base< NumericT > &in)
Reverse vector to oposite order and save it in input vector.
void real_to_complex(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
std::size_t vcl_size_t
Definition: forwards.h:75
void convolve_i(viennacl::vector< SCALARTYPE, ALIGNMENT > &input1, viennacl::vector< SCALARTYPE, ALIGNMENT > &input2, viennacl::vector< SCALARTYPE, ALIGNMENT > &output)
const vcl_size_t MAX_LOCAL_POINTS_NUM
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void multiply_complex(viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
Mutiply two complex vectors and store result in output.
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
void direct(viennacl::ocl::handle< cl_mem > const &in, viennacl::ocl::handle< cl_mem > const &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct algorithm for computing Fourier transformation.
Implementations of Fast Furier Transformation using a plain single-threaded or OpenMP-enabled executi...
vcl_size_t num_bits(vcl_size_t size)
Get number of bits.
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
static void init(viennacl::ocl::context &ctx)
Definition: fft.hpp:267
void normalize(viennacl::vector< NumericT, AlignmentV > &input)
Normalize vector on with his own size.
void bluestein(viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
Bluestein's algorithm for computing Fourier transformation.
Runtime generation of OpenCL kernels for matrix operations.
ScalarType fft(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int, unsigned int, unsigned int batch_size)
Definition: fft_1d.cpp:719
SCALARTYPE sign(SCALARTYPE val)