ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
fft_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_FFT_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_FFT_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 //TODO openom Conditions
26 #include <viennacl/vector.hpp>
27 #include <viennacl/matrix.hpp>
28 
30 
31 #include <stdexcept>
32 #include <cmath>
33 #include <complex>
34 
35 namespace viennacl
36 {
37 namespace linalg
38 {
39 namespace host_based
40 {
41 namespace detail
42 {
43  namespace fft
44  {
46 
47  namespace FFT_DATA_ORDER
48  {
50  {
52  };
53  }
54 
56  {
57  vcl_size_t bits_datasize = 0;
58  vcl_size_t ds = 1;
59 
60  while (ds < size)
61  {
62  ds = ds << 1;
63  bits_datasize++;
64  }
65 
66  return bits_datasize;
67  }
68 
70  {
71  n = n - 1;
72 
73  vcl_size_t power = 1;
74 
75  while (power < sizeof(vcl_size_t) * 8)
76  {
77  n = n | (n >> power);
78  power *= 2;
79  }
80 
81  return n + 1;
82  }
83 
85  {
86  v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
87  v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
88  v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
89  v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
90  v = (v >> 16) | (v << 16);
91  v = v >> (32 - bit_size);
92  return v;
93  }
94 
95  template<typename NumericT, unsigned int AlignmentV>
96  void copy_to_complex_array(std::complex<NumericT> * input_complex,
98  {
99 #ifdef VIENNACL_WITH_OPENMP
100  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
101 #endif
102  for (long i2 = 0; i2 < long(size * 2); i2 += 2)
103  { //change array to complex array
104  vcl_size_t i = vcl_size_t(i2);
105  input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
106  }
107  }
108 
109  template<typename NumericT>
110  void copy_to_complex_array(std::complex<NumericT> * input_complex,
112  {
113 #ifdef VIENNACL_WITH_OPENMP
114  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
115 #endif
116  for (long i2 = 0; i2 < long(size * 2); i2 += 2)
117  { //change array to complex array
118  vcl_size_t i = vcl_size_t(i2);
119  input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
120  }
121  }
122 
123  template<typename NumericT, unsigned int AlignmentV>
124  void copy_to_vector(std::complex<NumericT> * input_complex,
126  {
127 #ifdef VIENNACL_WITH_OPENMP
128  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
129 #endif
130  for (long i2 = 0; i2 < long(size); i2++)
131  {
132  vcl_size_t i = vcl_size_t(i2);
133  in(i * 2) = static_cast<NumericT>(std::real(input_complex[i]));
134  in(i * 2 + 1) = static_cast<NumericT>(std::imag(input_complex[i]));
135  }
136  }
137 
138  template<typename NumericT>
139  void copy_to_complex_array(std::complex<NumericT> * input_complex,
140  NumericT const * in, vcl_size_t size)
141  {
142 #ifdef VIENNACL_WITH_OPENMP
143  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
144 #endif
145  for (long i2 = 0; i2 < long(size * 2); i2 += 2)
146  { //change array to complex array
147  vcl_size_t i = vcl_size_t(i2);
148  input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
149  }
150  }
151 
152  template<typename NumericT>
153  void copy_to_vector(std::complex<NumericT> * input_complex, NumericT * in, vcl_size_t size)
154  {
155 #ifdef VIENNACL_WITH_OPENMP
156  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
157 #endif
158  for (long i2 = 0; i2 < long(size); i2++)
159  {
160  vcl_size_t i = vcl_size_t(i2);
161  in[i * 2] = static_cast<NumericT>(std::real(input_complex[i]));
162  in[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i]));
163  }
164  }
165 
166  template<typename NumericT>
167  void copy_to_vector(std::complex<NumericT> * input_complex,
169  {
170  std::vector<NumericT> temp(2 * size);
171 #ifdef VIENNACL_WITH_OPENMP
172  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
173 #endif
174  for (long i2 = 0; i2 < long(size); i2++)
175  {
176  vcl_size_t i = vcl_size_t(i2);
177  temp[i * 2] = static_cast<NumericT>(std::real(input_complex[i]));
178  temp[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i]));
179  }
180  viennacl::copy(temp, in);
181  }
182 
183  template<typename NumericT>
184  void zero2(NumericT *input1, NumericT *input2, vcl_size_t size)
185  {
186 #ifdef VIENNACL_WITH_OPENMP
187  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
188 #endif
189  for (long i2 = 0; i2 < long(size); i2++)
190  {
191  vcl_size_t i = vcl_size_t(i2);
192  input1[i] = 0;
193  input2[i] = 0;
194  }
195  }
196 
197  } //namespace fft
198 
199 } //namespace detail
200 
204 template<typename NumericT>
205 void fft_direct(std::complex<NumericT> * input_complex, std::complex<NumericT> * output,
208 {
209  NumericT const NUM_PI = NumericT(3.14159265358979323846);
210 #ifdef VIENNACL_WITH_OPENMP
211  #pragma omp parallel
212 #endif
213  for (long batch_id2 = 0; batch_id2 < long(batch_num); batch_id2++)
214  {
215  vcl_size_t batch_id = vcl_size_t(batch_id2);
216  for (vcl_size_t k = 0; k < size; k += 1)
217  {
218  std::complex<NumericT> f = 0;
219  for (vcl_size_t n = 0; n < size; n++)
220  {
221  std::complex<NumericT> input;
222  if (!data_order)
223  input = input_complex[batch_id * stride + n]; //input index here
224  else
225  input = input_complex[n * stride + batch_id];
226  NumericT arg = sign * 2 * NUM_PI * NumericT(k) / NumericT(size * n);
227  NumericT sn = std::sin(arg);
228  NumericT cs = std::cos(arg);
229 
230  std::complex<NumericT> ex(cs, sn);
231  std::complex<NumericT> tmp(input.real() * ex.real() - input.imag() * ex.imag(),
232  input.real() * ex.imag() + input.imag() * ex.real());
233  f = f + tmp;
234  }
235  if (!data_order)
236  output[batch_id * stride + k] = f; // output index here
237  else
238  output[k * stride + batch_id] = f;
239  }
240  }
241 
242 }
243 
250 template<typename NumericT, unsigned int AlignmentV>
254  vcl_size_t batch_num, NumericT sign = NumericT(-1),
256 {
257  std::vector<std::complex<NumericT> > input_complex(size * batch_num);
258  std::vector<std::complex<NumericT> > output(size * batch_num);
259 
260  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], in, size * batch_num);
261 
262  fft_direct(&input_complex[0], &output[0], size, stride, batch_num, sign, data_order);
263 
264  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output[0], out, size * batch_num);
265 }
266 
273 template<typename NumericT, unsigned int AlignmentV>
276  vcl_size_t stride, vcl_size_t batch_num, NumericT sign = NumericT(-1),
278 {
279  vcl_size_t row_num = in.internal_size1();
280  vcl_size_t col_num = in.internal_size2() >> 1;
281 
282  vcl_size_t size_mat = row_num * col_num;
283 
284  std::vector<std::complex<NumericT> > input_complex(size_mat);
285  std::vector<std::complex<NumericT> > output(size_mat);
286 
287  NumericT const * data_A = detail::extract_raw_pointer<NumericT>(in);
288  NumericT * data_B = detail::extract_raw_pointer<NumericT>(out);
289 
290  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], data_A, size_mat);
291 
292  fft_direct(&input_complex[0], &output[0], size, stride, batch_num, sign, data_order);
293 
294  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output[0], data_B, size_mat);
295 }
296 
297 /*
298  * This function performs reorder of 1D input data. Indexes are sorted in bit-reversal order.
299  * Such reordering should be done before in-place FFT.
300  */
301 template<typename NumericT, unsigned int AlignmentV>
303  vcl_size_t bits_datasize, vcl_size_t batch_num,
305 {
306  std::vector<std::complex<NumericT> > input(size * batch_num);
308 #ifdef VIENNACL_WITH_OPENMP
309  #pragma omp parallel for
310 #endif
311  for (long batch_id2 = 0; batch_id2 < long(batch_num); batch_id2++)
312  {
313  vcl_size_t batch_id = vcl_size_t(batch_id2);
314  for (vcl_size_t i = 0; i < size; i++)
315  {
317  if (i < v)
318  {
319  if (!data_order)
320  {
321  std::complex<NumericT> tmp = input[batch_id * stride + i]; // index
322  input[batch_id * stride + i] = input[batch_id * stride + v]; //index
323  input[batch_id * stride + v] = tmp; //index
324  }
325  else
326  {
327  std::complex<NumericT> tmp = input[i * stride + batch_id]; // index
328  input[i * stride + batch_id] = input[v * stride + batch_id]; //index
329  input[v * stride + batch_id] = tmp; //index
330  }
331  }
332  }
333  }
334  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input[0], in, size * batch_num);
335 }
336 
337 /*
338  * This function performs reorder of 2D input data. Indexes are sorted in bit-reversal order.
339  * Such reordering should be done before in-place FFT.
340  */
341 template<typename NumericT, unsigned int AlignmentV>
343  vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num,
345 {
346 
347  NumericT * data = detail::extract_raw_pointer<NumericT>(in);
348  vcl_size_t row_num = in.internal_size1();
349  vcl_size_t col_num = in.internal_size2() >> 1;
350  vcl_size_t size_mat = row_num * col_num;
351 
352  std::vector<std::complex<NumericT> > input(size_mat);
353 
355 
356 #ifdef VIENNACL_WITH_OPENMP
357  #pragma omp parallel for
358 #endif
359  for (long batch_id2 = 0; batch_id2 < long(batch_num); batch_id2++)
360  {
361  vcl_size_t batch_id = vcl_size_t(batch_id2);
362  for (vcl_size_t i = 0; i < size; i++)
363  {
365  if (i < v)
366  {
367  if (!data_order)
368  {
369  std::complex<NumericT> tmp = input[batch_id * stride + i]; // index
370  input[batch_id * stride + i] = input[batch_id * stride + v]; //index
371  input[batch_id * stride + v] = tmp; //index
372  } else
373  {
374  std::complex<NumericT> tmp = input[i * stride + batch_id]; // index
375  input[i * stride + batch_id] = input[v * stride + batch_id]; //index
376  input[v * stride + batch_id] = tmp; //index
377  }
378  }
379  }
380  }
382 }
383 
388 template<typename NumericT>
389 void fft_radix2(std::complex<NumericT> * input_complex, vcl_size_t batch_num,
390  vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign,
392 {
393  NumericT const NUM_PI = NumericT(3.14159265358979323846);
394 
395  for (vcl_size_t step = 0; step < bit_size; step++)
396  {
397  vcl_size_t ss = 1 << step;
398  vcl_size_t half_size = size >> 1;
399 
400 #ifdef VIENNACL_WITH_OPENMP
401  #pragma omp parallel for
402 #endif
403  for (long batch_id2 = 0; batch_id2 < long(batch_num); batch_id2++)
404  {
405  vcl_size_t batch_id = vcl_size_t(batch_id2);
406  for (vcl_size_t tid = 0; tid < half_size; tid++)
407  {
408  vcl_size_t group = (tid & (ss - 1));
409  vcl_size_t pos = ((tid >> step) << (step + 1)) + group;
410  std::complex<NumericT> in1;
411  std::complex<NumericT> in2;
412  vcl_size_t offset;
413  if (!data_order)
414  {
415  offset = batch_id * stride + pos;
416  in1 = input_complex[offset];
417  in2 = input_complex[offset + ss];
418  }
419  else
420  {
421  offset = pos * stride + batch_id;
422  in1 = input_complex[offset];
423  in2 = input_complex[offset + ss * stride];
424  }
425  NumericT arg = NumericT(group) * sign * NUM_PI / NumericT(ss);
426  NumericT sn = std::sin(arg);
427  NumericT cs = std::cos(arg);
428  std::complex<NumericT> ex(cs, sn);
429  std::complex<NumericT> tmp(in2.real() * ex.real() - in2.imag() * ex.imag(),
430  in2.real() * ex.imag() + in2.imag() * ex.real());
431  if (!data_order)
432  input_complex[offset + ss] = in1 - tmp;
433  else
434  input_complex[offset + ss * stride] = in1 - tmp;
435  input_complex[offset] = in1 + tmp;
436  }
437  }
438  }
439 
440 }
441 
446 template<typename NumericT>
447 void fft_radix2_local(std::complex<NumericT> * input_complex,
448  std::complex<NumericT> * lcl_input, vcl_size_t batch_num, vcl_size_t bit_size,
449  vcl_size_t size, vcl_size_t stride, NumericT sign,
451 {
452  NumericT const NUM_PI = NumericT(3.14159265358979323846);
453 
454  for (vcl_size_t batch_id = 0; batch_id < batch_num; batch_id++)
455  {
456 #ifdef VIENNACL_WITH_OPENMP
457  #pragma omp parallel for
458 #endif
459  for (long p2 = 0; p2 < long(size); p2 += 1)
460  {
461  vcl_size_t p = vcl_size_t(p2);
463 
464  if (!data_order)
465  lcl_input[v] = input_complex[batch_id * stride + p]; //index
466  else
467  lcl_input[v] = input_complex[p * stride + batch_id];
468  }
469 
470  for (vcl_size_t s = 0; s < bit_size; s++)
471  {
472  vcl_size_t ss = 1 << s;
473 #ifdef VIENNACL_WITH_OPENMP
474  #pragma omp parallel for
475 #endif
476  for (long tid2 = 0; tid2 < long(size)/2; tid2++)
477  {
478  vcl_size_t tid = vcl_size_t(tid2);
479  vcl_size_t group = (tid & (ss - 1));
480  vcl_size_t pos = ((tid >> s) << (s + 1)) + group;
481 
482  std::complex<NumericT> in1 = lcl_input[pos];
483  std::complex<NumericT> in2 = lcl_input[pos + ss];
484 
485  NumericT arg = NumericT(group) * sign * NUM_PI / NumericT(ss);
486 
487  NumericT sn = std::sin(arg);
488  NumericT cs = std::cos(arg);
489  std::complex<NumericT> ex(cs, sn);
490 
491  std::complex<NumericT> tmp(in2.real() * ex.real() - in2.imag() * ex.imag(),
492  in2.real() * ex.imag() + in2.imag() * ex.real());
493 
494  lcl_input[pos + ss] = in1 - tmp;
495  lcl_input[pos] = in1 + tmp;
496  }
497 
498  }
499 #ifdef VIENNACL_WITH_OPENMP
500  #pragma omp parallel for
501 #endif
502  //copy local array back to global memory
503  for (long p2 = 0; p2 < long(size); p2 += 1)
504  {
505  vcl_size_t p = vcl_size_t(p2);
506  if (!data_order)
507  input_complex[batch_id * stride + p] = lcl_input[p];
508  else
509  input_complex[p * stride + batch_id] = lcl_input[p];
510 
511  }
512 
513  }
514 
515 }
516 
524 template<typename NumericT, unsigned int AlignmentV>
526  vcl_size_t batch_num, NumericT sign = NumericT(-1),
528 {
529 
531 
532  std::vector<std::complex<NumericT> > input_complex(size * batch_num);
533  std::vector<std::complex<NumericT> > lcl_input(size * batch_num);
534  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], in, size * batch_num);
535 
537  {
538  viennacl::linalg::host_based::fft_radix2_local(&input_complex[0], &lcl_input[0], batch_num, bit_size, size, stride, sign, data_order);
539  }
540  else
541  {
542  viennacl::linalg::host_based::reorder<NumericT>(in, size, stride, bit_size, batch_num, data_order);
543  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], in, size * batch_num);
544  viennacl::linalg::host_based::fft_radix2(&input_complex[0], batch_num, bit_size, size, stride, sign, data_order);
545  }
546 
547  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input_complex[0], in, size * batch_num);
548 }
549 
557 template<typename NumericT, unsigned int AlignmentV>
559  vcl_size_t stride, vcl_size_t batch_num, NumericT sign = NumericT(-1),
561 {
562 
564 
565  NumericT * data = detail::extract_raw_pointer<NumericT>(in);
566 
567  vcl_size_t row_num = in.internal_size1();
568  vcl_size_t col_num = in.internal_size2() >> 1;
569  vcl_size_t size_mat = row_num * col_num;
570 
571  std::vector<std::complex<NumericT> > input_complex(size_mat);
572 
573  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], data, size_mat);
575  {
576  //std::cout<<bit_size<<","<<size<<","<<stride<<","<<batch_num<<","<<size<<","<<sign<<","<<data_order<<std::endl;
577  std::vector<std::complex<NumericT> > lcl_input(size_mat);
578  viennacl::linalg::host_based::fft_radix2_local(&input_complex[0], &lcl_input[0], batch_num, bit_size, size, stride, sign, data_order);
579  }
580  else
581  {
582  viennacl::linalg::host_based::reorder<NumericT>(in, size, stride, bit_size, batch_num, data_order);
583  viennacl::linalg::host_based::detail::fft::copy_to_complex_array(&input_complex[0], data, size_mat);
584  viennacl::linalg::host_based::fft_radix2(&input_complex[0], batch_num, bit_size, size, stride, sign, data_order);
585  }
586 
587  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input_complex[0], data, size_mat);
588 
589 }
590 
598 template<typename NumericT, unsigned int AlignmentV>
600 {
601 
602  vcl_size_t size = in.size() >> 1;
604 
608 
609  std::vector<std::complex<NumericT> > input_complex(size);
610  std::vector<std::complex<NumericT> > output_complex(size);
611 
612  std::vector<std::complex<NumericT> > A_complex(ext_size);
613  std::vector<std::complex<NumericT> > B_complex(ext_size);
614  std::vector<std::complex<NumericT> > Z_complex(ext_size);
615 
617 #ifdef VIENNACL_WITH_OPENMP
618  #pragma omp parallel for
619 #endif
620  for (long i2 = 0; i2 < long(ext_size); i2++)
621  {
622  vcl_size_t i = vcl_size_t(i2);
623  A_complex[i] = 0;
624  B_complex[i] = 0;
625  }
626 
627  vcl_size_t double_size = size << 1;
628 
629  NumericT const NUM_PI = NumericT(3.14159265358979323846);
630 #ifdef VIENNACL_WITH_OPENMP
631  #pragma omp parallel for
632 #endif
633  for (long i2 = 0; i2 < long(size); i2++)
634  {
635  vcl_size_t i = vcl_size_t(i2);
636  vcl_size_t rm = i * i % (double_size);
637  NumericT angle = NumericT(rm) / NumericT(size) * NumericT(NUM_PI);
638 
639  NumericT sn_a = std::sin(-angle);
640  NumericT cs_a = std::cos(-angle);
641 
642  std::complex<NumericT> a_i(cs_a, sn_a);
643  std::complex<NumericT> b_i(cs_a, -sn_a);
644 
645  A_complex[i] = std::complex<NumericT>(input_complex[i].real() * a_i.real() - input_complex[i].imag() * a_i.imag(),
646  input_complex[i].real() * a_i.imag() + input_complex[i].imag() * a_i.real());
647  B_complex[i] = b_i;
648 
649  // very bad instruction, to be fixed
650  if (i)
651  B_complex[ext_size - i] = b_i;
652  }
653 
657 
659 
661 
662 #ifdef VIENNACL_WITH_OPENMP
663  #pragma omp parallel for
664 #endif
665  for (long i2 = 0; i2 < long(size); i2++)
666  {
667  vcl_size_t i = vcl_size_t(i2);
668  vcl_size_t rm = i * i % (double_size);
669  NumericT angle = NumericT(rm) / NumericT(size) * NumericT(-NUM_PI);
670  NumericT sn_a = std::sin(angle);
671  NumericT cs_a = std::cos(angle);
672  std::complex<NumericT> b_i(cs_a, sn_a);
673  output_complex[i] = std::complex<NumericT>(Z_complex[i].real() * b_i.real() - Z_complex[i].imag() * b_i.imag(),
674  Z_complex[i].real() * b_i.imag() + Z_complex[i].imag() * b_i.real());
675  }
676  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output_complex[0], out, size);
677 
678 }
679 
683 template<typename NumericT, unsigned int AlignmentV>
685 {
686  vcl_size_t size = input.size() >> 1;
687  NumericT norm_factor = static_cast<NumericT>(size);
688  for (vcl_size_t i = 0; i < size * 2; i++)
689  input[i] /= norm_factor;
690 
691 }
692 
696 template<typename NumericT, unsigned int AlignmentV>
700 {
701  vcl_size_t size = input1.size() >> 1;
702 
703  std::vector<std::complex<NumericT> > input1_complex(size);
704  std::vector<std::complex<NumericT> > input2_complex(size);
705  std::vector<std::complex<NumericT> > output_complex(size);
708 
709 #ifdef VIENNACL_WITH_OPENMP
710  #pragma omp parallel for
711 #endif
712  for (long i2 = 0; i2 < long(size); i2++)
713  {
714  vcl_size_t i = vcl_size_t(i2);
715  std::complex<NumericT> in1 = input1_complex[i];
716  std::complex<NumericT> in2 = input2_complex[i];
717  output_complex[i] = std::complex<NumericT>(in1.real() * in2.real() - in1.imag() * in2.imag(),
718  in1.real() * in2.imag() + in1.imag() * in2.real());
719  }
720  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output_complex[0], output, size);
721 
722 }
726 template<typename NumericT, unsigned int AlignmentV>
728 {
729  vcl_size_t row_num = input.internal_size1() / 2;
730  vcl_size_t col_num = input.internal_size2() / 2;
731 
732  vcl_size_t size = row_num * col_num;
733 
734  NumericT * data = detail::extract_raw_pointer<NumericT>(input);
735 
736  std::vector<std::complex<NumericT> > input_complex(size);
737 
739 #ifdef VIENNACL_WITH_OPENMP
740  #pragma omp parallel for
741 #endif
742  for (long i2 = 0; i2 < long(size); i2++)
743  {
744  vcl_size_t i = vcl_size_t(i2);
745  vcl_size_t row = i / col_num;
746  vcl_size_t col = i - row * col_num;
747  vcl_size_t new_pos = col * row_num + row;
748 
749  if (i < new_pos)
750  {
751  std::complex<NumericT> val = input_complex[i];
752  input_complex[i] = input_complex[new_pos];
753  input_complex[new_pos] = val;
754  }
755  }
756  viennacl::linalg::host_based::detail::fft::copy_to_vector(&input_complex[0], data, size);
757 
758 }
759 
763 template<typename NumericT, unsigned int AlignmentV>
766 {
767 
768  vcl_size_t row_num = input.internal_size1() / 2;
769  vcl_size_t col_num = input.internal_size2() / 2;
770  vcl_size_t size = row_num * col_num;
771 
772  NumericT const * data_A = detail::extract_raw_pointer<NumericT>(input);
773  NumericT * data_B = detail::extract_raw_pointer<NumericT>(output);
774 
775  std::vector<std::complex<NumericT> > input_complex(size);
777 
778  std::vector<std::complex<NumericT> > output_complex(size);
779 #ifdef VIENNACL_WITH_OPENMP
780  #pragma omp parallel for
781 #endif
782  for (long i2 = 0; i2 < long(size); i2++)
783  {
784  vcl_size_t i = vcl_size_t(i2);
785  vcl_size_t row = i / col_num;
786  vcl_size_t col = i % col_num;
787  vcl_size_t new_pos = col * row_num + row;
788  output_complex[new_pos] = input_complex[i];
789  }
790  viennacl::linalg::host_based::detail::fft::copy_to_vector(&output_complex[0], data_B, size);
791 }
792 
796 template<typename NumericT>
799 {
800  NumericT const * data_in = detail::extract_raw_pointer<NumericT>(in);
801  NumericT * data_out = detail::extract_raw_pointer<NumericT>(out);
802 
803 #ifdef VIENNACL_WITH_OPENMP
804  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
805 #endif
806  for (long i2 = 0; i2 < long(size); i2++)
807  {
808  vcl_size_t i = static_cast<vcl_size_t>(i2);
809  data_out[2*i ] = data_in[i];
810  data_out[2*i+1] = NumericT(0);
811  }
812 }
813 
817 template<typename NumericT>
820 {
821  NumericT const * data_in = detail::extract_raw_pointer<NumericT>(in);
822  NumericT * data_out = detail::extract_raw_pointer<NumericT>(out);
823 
824 #ifdef VIENNACL_WITH_OPENMP
825 #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
826 #endif
827  for (long i = 0; i < long(size); i++)
828  data_out[i] = data_in[2*i];
829 }
830 
834 template<typename NumericT>
836 {
837  vcl_size_t size = in.size();
838 
839 #ifdef VIENNACL_WITH_OPENMP
840  #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
841 #endif
842  for (long i2 = 0; i2 < long(size); i2++)
843  {
844  vcl_size_t i = vcl_size_t(i2);
845  NumericT val1 = in[i];
846  NumericT val2 = in[size - i - 1];
847  in[i] = val2;
848  in[size - i - 1] = val1;
849  }
850 }
851 
852 } //namespace host_based
853 } //namespace linalg
854 } //namespace viennacl
855 
856 #endif /* FFT_OPERATIONS_HPP_ */
void fft_radix2_local(std::complex< NumericT > *input_complex, std::complex< NumericT > *lcl_input, vcl_size_t batch_num, vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation. Kernel for computing bigger amount of data...
Implementation of the dense matrix class.
void zero2(NumericT *input1, NumericT *input2, vcl_size_t size)
void reverse(viennacl::vector_base< NumericT > &in)
Reverse vector to opposite order and save it in input vector.
void radix2(viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 1D algorithm for computing Fourier transformation.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
endcode *Final step
A dense matrix class.
Definition: forwards.h:375
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
void real_to_complex(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
Definition: blas3.hpp:36
void multiply_complex(viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output)
Complex multiplikation of two vectors.
void fft_direct(std::complex< NumericT > *input_complex, std::complex< NumericT > *output, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct algoritm kenrnel.
vcl_size_t get_reorder_num(vcl_size_t v, vcl_size_t bit_size)
void copy_to_complex_array(std::complex< NumericT > *input_complex, viennacl::vector< NumericT, AlignmentV > const &in, vcl_size_t size)
std::size_t vcl_size_t
Definition: forwards.h:75
void bluestein(viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t)
Bluestein's algorithm for computing Fourier transformation.
void direct(viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Direct 1D algorithm for computing Fourier transformation.
void copy_to_vector(std::complex< NumericT > *input_complex, viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
void convolve_i(viennacl::vector< SCALARTYPE, ALIGNMENT > &input1, viennacl::vector< SCALARTYPE, ALIGNMENT > &input2, viennacl::vector< SCALARTYPE, ALIGNMENT > &output)
void transpose(viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
Inplace transpose of matrix.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
vcl_size_t num_bits(vcl_size_t size)
void fft_radix2(std::complex< NumericT > *input_complex, vcl_size_t batch_num, vcl_size_t bit_size, vcl_size_t size, vcl_size_t stride, NumericT sign, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
Radix-2 algorithm for computing Fourier transformation. Kernel for computing smaller amount of data...
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
void normalize(viennacl::vector< NumericT, AlignmentV > &input)
Normalize vector with his own size.
void complex_to_real(viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size)
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imagina...
void reorder(viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
ScalarType fft(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int, unsigned int, unsigned int batch_size)
Definition: fft_1d.cpp:719
SCALARTYPE sign(SCALARTYPE val)
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...