00001 #ifndef VIENNACL_FFT_HPP
00002 #define VIENNACL_FFT_HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include <viennacl/vector.hpp>
00025 #include <viennacl/matrix.hpp>
00026
00027 #include "viennacl/linalg/kernels/fft_kernels.h"
00028
00029 #include <cmath>
00030
00031 #include <stdexcept>
00032
00033 namespace viennacl
00034 {
00035 namespace detail
00036 {
00037 namespace fft
00038 {
00039 const std::size_t MAX_LOCAL_POINTS_NUM = 512;
00040
00041 namespace FFT_DATA_ORDER {
00042 enum DATA_ORDER {
00043 ROW_MAJOR,
00044 COL_MAJOR
00045 };
00046 }
00047 }
00048 }
00049 }
00050
00052 namespace viennacl {
00053 namespace detail {
00054 namespace fft {
00055
00056 inline bool is_radix2(std::size_t data_size) {
00057 return !((data_size > 2) && (data_size & (data_size - 1)));
00058
00059 }
00060
00061 inline std::size_t next_power_2(std::size_t n) {
00062 n = n - 1;
00063
00064 std::size_t power = 1;
00065
00066 while(power < sizeof(std::size_t) * 8) {
00067 n = n | (n >> power);
00068 power *= 2;
00069 }
00070
00071 return n + 1;
00072 }
00073
00074 inline std::size_t num_bits(std::size_t size)
00075 {
00076 std::size_t bits_datasize = 0;
00077 std::size_t ds = 1;
00078
00079 while(ds < size)
00080 {
00081 ds = ds << 1;
00082 bits_datasize++;
00083 }
00084
00085 return bits_datasize;
00086 }
00087
00088
00095 template<class SCALARTYPE>
00096 void direct(const viennacl::ocl::handle<cl_mem>& in,
00097 const viennacl::ocl::handle<cl_mem>& out,
00098 std::size_t size,
00099 std::size_t stride,
00100 std::size_t batch_num,
00101 SCALARTYPE sign = -1.0f,
00102 FFT_DATA_ORDER::DATA_ORDER data_order = FFT_DATA_ORDER::ROW_MAJOR
00103 )
00104 {
00105 viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::init();
00106 std::string program_string = viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::program_name();
00107 if (data_order == FFT_DATA_ORDER::COL_MAJOR)
00108 {
00109 viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::init();
00110 program_string = viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::program_name();
00111 }
00112 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context().get_program(program_string).get_kernel("fft_direct");
00113 viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size), static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num), sign));
00114 }
00115
00116
00117
00118
00119
00120 template <typename SCALARTYPE>
00121 void reorder(const viennacl::ocl::handle<cl_mem>& in,
00122 std::size_t size,
00123 std::size_t stride,
00124 std::size_t bits_datasize,
00125 std::size_t batch_num,
00126 FFT_DATA_ORDER::DATA_ORDER data_order = FFT_DATA_ORDER::ROW_MAJOR
00127 )
00128 {
00129 viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::init();
00130 std::string program_string = viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::program_name();
00131 if (data_order == FFT_DATA_ORDER::COL_MAJOR)
00132 {
00133 viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::init();
00134 program_string = viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::program_name();
00135 }
00136
00137 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00138 .get_program(program_string)
00139 .get_kernel("fft_reorder");
00140 viennacl::ocl::enqueue(kernel(in,
00141 static_cast<cl_uint>(bits_datasize),
00142 static_cast<cl_uint>(size),
00143 static_cast<cl_uint>(stride),
00144 static_cast<cl_uint>(batch_num)
00145 )
00146 );
00147 }
00148
00156 template<class SCALARTYPE>
00157 void radix2(const viennacl::ocl::handle<cl_mem>& in,
00158 std::size_t size,
00159 std::size_t stride,
00160 std::size_t batch_num,
00161 SCALARTYPE sign = -1.0f,
00162 FFT_DATA_ORDER::DATA_ORDER data_order = FFT_DATA_ORDER::ROW_MAJOR
00163 )
00164 {
00165 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00166
00167 assert(batch_num != 0);
00168 assert(is_radix2(size));
00169
00170 viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::init();
00171 std::string program_string = viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::program_name();
00172 if (data_order == FFT_DATA_ORDER::COL_MAJOR)
00173 {
00174 viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::init();
00175 program_string = viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::program_name();
00176 }
00177
00178 std::size_t bits_datasize = num_bits(size);
00179
00180 if(size <= MAX_LOCAL_POINTS_NUM)
00181 {
00182 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00183 .get_program(program_string)
00184 .get_kernel("fft_radix2_local");
00185 viennacl::ocl::enqueue(kernel(in,
00186 viennacl::ocl::local_mem((size * 4) * sizeof(SCALARTYPE)),
00187 static_cast<cl_uint>(bits_datasize),
00188 static_cast<cl_uint>(size),
00189 static_cast<cl_uint>(stride),
00190 static_cast<cl_uint>(batch_num),
00191 sign));
00192 }
00193 else
00194 {
00195 reorder<SCALARTYPE>(in, size, stride, bits_datasize, batch_num);
00196
00197 for(std::size_t step = 0; step < bits_datasize; step++)
00198 {
00199 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00200 .get_program(program_string)
00201 .get_kernel("fft_radix2");
00202 viennacl::ocl::enqueue(kernel(in,
00203 static_cast<cl_uint>(step),
00204 static_cast<cl_uint>(bits_datasize),
00205 static_cast<cl_uint>(size),
00206 static_cast<cl_uint>(stride),
00207 static_cast<cl_uint>(batch_num),
00208 sign));
00209 }
00210
00211 }
00212 }
00213
00221 template<class SCALARTYPE, unsigned int ALIGNMENT>
00222 void bluestein(viennacl::vector<SCALARTYPE, ALIGNMENT>& in,
00223 viennacl::vector<SCALARTYPE, ALIGNMENT>& out,
00224 std::size_t batch_num,
00225 SCALARTYPE sign = -1.0
00226 )
00227 {
00228 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00229
00230 std::size_t size = in.size() >> 1;
00231 std::size_t ext_size = next_power_2(2 * size - 1);
00232
00233 viennacl::vector<SCALARTYPE, ALIGNMENT> A(ext_size << 1);
00234 viennacl::vector<SCALARTYPE, ALIGNMENT> B(ext_size << 1);
00235
00236 viennacl::vector<SCALARTYPE, ALIGNMENT> Z(ext_size << 1);
00237
00238 {
00239 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00240 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00241 .get_kernel("zero2");
00242 viennacl::ocl::enqueue(kernel(
00243 A,
00244 B,
00245 static_cast<cl_uint>(ext_size)
00246 ));
00247
00248 }
00249 {
00250 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00251 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00252 .get_kernel("bluestein_pre");
00253 viennacl::ocl::enqueue(kernel(
00254 in,
00255 A,
00256 B,
00257 static_cast<cl_uint>(size),
00258 static_cast<cl_uint>(ext_size)
00259 ));
00260 }
00261
00262 viennacl::linalg::convolve_i(A, B, Z);
00263
00264 {
00265 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00266 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00267 .get_kernel("bluestein_post");
00268 viennacl::ocl::enqueue(kernel(
00269 Z,
00270 out,
00271 static_cast<cl_uint>(size)
00272 ));
00273 }
00274 }
00275
00276 template<class SCALARTYPE, unsigned int ALIGNMENT>
00277 void multiply(viennacl::vector<SCALARTYPE, ALIGNMENT> const & input1,
00278 viennacl::vector<SCALARTYPE, ALIGNMENT> const & input2,
00279 viennacl::vector<SCALARTYPE, ALIGNMENT> & output)
00280 {
00281 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00282 std::size_t size = input1.size() >> 1;
00283 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00284 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00285 .get_kernel("fft_mult_vec");
00286 viennacl::ocl::enqueue(kernel(input1, input2, output, static_cast<cl_uint>(size)));
00287 }
00288
00289 template<class SCALARTYPE, unsigned int ALIGNMENT>
00290 void normalize(viennacl::vector<SCALARTYPE, ALIGNMENT> & input)
00291 {
00292 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00293 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00294 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00295 .get_kernel("fft_div_vec_scalar");
00296 std::size_t size = input.size() >> 1;
00297 SCALARTYPE norm_factor = static_cast<SCALARTYPE>(size);
00298 viennacl::ocl::enqueue(kernel(input, static_cast<cl_uint>(size), norm_factor));
00299 }
00300
00301 template<class SCALARTYPE, unsigned int ALIGNMENT>
00302 void transpose(viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> & input)
00303 {
00304 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00305 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00306 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00307 .get_kernel("transpose_inplace");
00308 viennacl::ocl::enqueue(kernel(input,
00309 static_cast<cl_uint>(input.internal_size1()),
00310 static_cast<cl_uint>(input.internal_size2()) >> 1));
00311 }
00312
00313 template<class SCALARTYPE, unsigned int ALIGNMENT>
00314 void transpose(viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> const & input,
00315 viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> & output)
00316 {
00317 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00318
00319 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00320 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00321 .get_kernel("transpose");
00322 viennacl::ocl::enqueue(kernel(input,
00323 output,
00324 static_cast<cl_uint>(input.internal_size1()),
00325 static_cast<cl_uint>(input.internal_size2() >> 1))
00326 );
00327 }
00328
00329 template<class SCALARTYPE, unsigned int ALIGNMENT>
00330 void real_to_complex(viennacl::vector<SCALARTYPE, ALIGNMENT> const & in,
00331 viennacl::vector<SCALARTYPE, ALIGNMENT> & out,
00332 std::size_t size)
00333 {
00334 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00335 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00336 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00337 .get_kernel("real_to_complex");
00338 viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size)));
00339 }
00340
00341 template<class SCALARTYPE, unsigned int ALIGNMENT>
00342 void complex_to_real(viennacl::vector<SCALARTYPE, ALIGNMENT> const & in,
00343 viennacl::vector<SCALARTYPE, ALIGNMENT>& out,
00344 std::size_t size)
00345 {
00346 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00347 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00348 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00349 .get_kernel("complex_to_real");
00350 viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size)));
00351 }
00352
00353 template<class SCALARTYPE, unsigned int ALIGNMENT>
00354 void reverse(viennacl::vector<SCALARTYPE, ALIGNMENT>& in)
00355 {
00356 viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
00357 std::size_t size = in.size();
00358 viennacl::ocl::kernel& kernel = viennacl::ocl::current_context()
00359 .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
00360 .get_kernel("reverse_inplace");
00361 viennacl::ocl::enqueue(kernel(in, static_cast<cl_uint>(size)));
00362 }
00363
00364
00365 }
00366 }
00367
00375 template<class SCALARTYPE, unsigned int ALIGNMENT>
00376 void inplace_fft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
00377 std::size_t batch_num = 1,
00378 SCALARTYPE sign = -1.0)
00379 {
00380 std::size_t size = (input.size() >> 1) / batch_num;
00381
00382 if(!detail::fft::is_radix2(size))
00383 {
00384 viennacl::vector<SCALARTYPE, ALIGNMENT> output(input.size());
00385 detail::fft::direct(input.handle(),
00386 output.handle(),
00387 size,
00388 size,
00389 batch_num,
00390 sign);
00391
00392 viennacl::copy(output, input);
00393 } else {
00394 detail::fft::radix2(input.handle(), size, size, batch_num, sign);
00395 }
00396 }
00397
00406 template<class SCALARTYPE, unsigned int ALIGNMENT>
00407 void fft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
00408 viennacl::vector<SCALARTYPE, ALIGNMENT>& output,
00409 std::size_t batch_num = 1,
00410 SCALARTYPE sign = -1.0
00411 )
00412 {
00413 std::size_t size = (input.size() >> 1) / batch_num;
00414
00415 if(detail::fft::is_radix2(size))
00416 {
00417 viennacl::copy(input, output);
00418 detail::fft::radix2(output.handle(), size, size, batch_num, sign);
00419 } else {
00420 detail::fft::direct(input.handle(),
00421 output.handle(),
00422 size,
00423 size,
00424 batch_num,
00425 sign);
00426 }
00427 }
00428
00435 template<class SCALARTYPE, unsigned int ALIGNMENT>
00436 void inplace_fft(viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>& input,
00437 SCALARTYPE sign = -1.0)
00438 {
00439 std::size_t rows_num = input.size1();
00440 std::size_t cols_num = input.size2() >> 1;
00441
00442 std::size_t cols_int = input.internal_size2() >> 1;
00443
00444
00445 if(detail::fft::is_radix2(cols_num))
00446 {
00447 detail::fft::radix2(input.handle(), cols_num, cols_int, rows_num, sign, detail::fft::FFT_DATA_ORDER::ROW_MAJOR);
00448 }
00449 else
00450 {
00451 viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> output(input.size1(), input.size2());
00452
00453 detail::fft::direct(input.handle(),
00454 output.handle(),
00455 cols_num,
00456 cols_int,
00457 rows_num,
00458 sign,
00459 detail::fft::FFT_DATA_ORDER::ROW_MAJOR
00460 );
00461
00462 input = output;
00463 }
00464
00465
00466 if (detail::fft::is_radix2(rows_num)) {
00467 detail::fft::radix2(input.handle(), rows_num, cols_int, cols_num, sign, detail::fft::FFT_DATA_ORDER::COL_MAJOR);
00468 } else {
00469 viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> output(input.size1(), input.size2());
00470
00471 detail::fft::direct(input.handle(),
00472 output.handle(),
00473 rows_num,
00474 cols_int,
00475 cols_num,
00476 sign,
00477 detail::fft::FFT_DATA_ORDER::COL_MAJOR);
00478
00479 input = output;
00480 }
00481
00482 }
00483
00491 template<class SCALARTYPE, unsigned int ALIGNMENT>
00492 void fft(viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>& input,
00493 viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>& output,
00494 SCALARTYPE sign = -1.0)
00495 {
00496 std::size_t rows_num = input.size1();
00497 std::size_t cols_num = input.size2() >> 1;
00498
00499 std::size_t cols_int = input.internal_size2() >> 1;
00500
00501
00502 if(detail::fft::is_radix2(cols_num))
00503 {
00504 output = input;
00505 detail::fft::radix2(output.handle(), cols_num, cols_int, rows_num, sign, detail::fft::FFT_DATA_ORDER::ROW_MAJOR);
00506 }
00507 else
00508 {
00509 detail::fft::direct(input.handle(),
00510 output.handle(),
00511 cols_num,
00512 cols_int,
00513 rows_num,
00514 sign,
00515 detail::fft::FFT_DATA_ORDER::ROW_MAJOR
00516 );
00517 }
00518
00519
00520 if(detail::fft::is_radix2(rows_num))
00521 {
00522 detail::fft::radix2(output.handle(), rows_num, cols_int, cols_num, sign, detail::fft::FFT_DATA_ORDER::COL_MAJOR);
00523 }
00524 else
00525 {
00526 viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> tmp(output.size1(), output.size2());
00527 tmp = output;
00528
00529 detail::fft::direct(tmp.handle(),
00530 output.handle(),
00531 rows_num,
00532 cols_int,
00533 cols_num,
00534 sign,
00535 detail::fft::FFT_DATA_ORDER::COL_MAJOR);
00536 }
00537 }
00538
00548 template<class SCALARTYPE, unsigned int ALIGNMENT>
00549 void inplace_ifft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
00550 std::size_t batch_num = 1)
00551 {
00552 viennacl::inplace_fft(input, batch_num, SCALARTYPE(1.0));
00553 detail::fft::normalize(input);
00554 }
00555
00566 template<class SCALARTYPE, unsigned int ALIGNMENT>
00567 void ifft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
00568 viennacl::vector<SCALARTYPE, ALIGNMENT>& output,
00569 std::size_t batch_num = 1
00570 )
00571 {
00572 viennacl::fft(input, output, batch_num, SCALARTYPE(1.0));
00573 detail::fft::normalize(output);
00574 }
00575
00576 namespace linalg
00577 {
00587 template<class SCALARTYPE, unsigned int ALIGNMENT>
00588 void convolve(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1,
00589 viennacl::vector<SCALARTYPE, ALIGNMENT>& input2,
00590 viennacl::vector<SCALARTYPE, ALIGNMENT>& output
00591 )
00592 {
00593 assert(input1.size() == input2.size());
00594 assert(input1.size() == output.size());
00595
00596 viennacl::vector<SCALARTYPE, ALIGNMENT> tmp1(input1.size());
00597 viennacl::vector<SCALARTYPE, ALIGNMENT> tmp2(input2.size());
00598 viennacl::vector<SCALARTYPE, ALIGNMENT> tmp3(output.size());
00599
00600
00601
00602 viennacl::fft(input1, tmp1);
00603 viennacl::fft(input2, tmp2);
00604
00605
00606 viennacl::detail::fft::multiply(tmp1, tmp2, tmp3);
00607
00608 viennacl::ifft(tmp3, output);
00609 }
00610
00620 template<class SCALARTYPE, unsigned int ALIGNMENT>
00621 void convolve_i(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1,
00622 viennacl::vector<SCALARTYPE, ALIGNMENT>& input2,
00623 viennacl::vector<SCALARTYPE, ALIGNMENT>& output
00624 )
00625 {
00626 assert(input1.size() == input2.size());
00627 assert(input1.size() == output.size());
00628
00629 viennacl::inplace_fft(input1);
00630 viennacl::inplace_fft(input2);
00631
00632 viennacl::detail::fft::multiply(input1, input2, output);
00633
00634 viennacl::inplace_ifft(output);
00635 }
00636 }
00637 }
00638
00640 #endif