ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
fft_2d.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2015, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
22 #include <iostream>
23 #include <vector>
24 #include <cmath>
25 #include <complex>
26 #include <algorithm>
27 
28 //#define VIENNACL_BUILD_INFO
30 
31 #ifdef VIENNACL_WITH_OPENCL
34 #endif
35 
36 #ifdef VIENNACL_WITH_CUDA
38 #endif
40 #include "viennacl/fft.hpp"
41 
42 typedef float ScalarType;
43 
44 const ScalarType EPS = ScalarType(0.06f); //use smaller values in double precision
45 
46 typedef ScalarType (*test_function_ptr)(std::vector<ScalarType>&, std::vector<ScalarType>&,
47  unsigned int, unsigned int, unsigned int);
48 typedef void (*input_function_ptr)(std::vector<ScalarType>&, std::vector<ScalarType>&,
49  unsigned int&, unsigned int&, unsigned int&, const std::string&);
50 
51 struct testData
52 {
53  float input[2048];
54  float output[2048];
55  unsigned int batch_num;
56  unsigned int row_num;
57  unsigned int col_num;
58 };
59 
60 static testData direct_2d = { { 0.120294f, 0.839315f, 0.890936f, 0.775417f, 0.375051f, 0.775645f, 0.367671f, 0.309852f, 0.551154f, 0.166495f, 0.174865f, 0.340252f, 0.393914f, 0.439817f, 0.523974f, 0.291109f, 0.181803f,
61  0.811176f, 0.490668f, 0.234881f, 0.611783f, 0.098058f, 0.106492f, 0.399059f, 0.974164f, 0.403960f, 0.324111f, 0.772581f, 0.609412f, 0.917312f, 0.538254f, 0.729706f, 0.756627f, 0.429191f, 0.505123f, 0.131678f,
62  0.204836f, 0.872794f, 0.441530f, 0.755990f, 0.039289f, 0.616395f, 0.096242f, 0.433203f, 0.056212f, 0.620216f, 0.724312f, 0.238015f }, { 10.058718f, 12.402115f, 3.306907f, 0.570050f, -0.527832f, -1.052828f,
63  -0.309640f, 1.578631f, 0.027247f, 1.441292f, -2.396150f, 0.396048f, -2.490234f, -0.923666f, -0.890061f, 1.154475f, -2.485666f, -0.029132f, -1.617884f, -0.788678f, 0.008640f, -0.751211f, -0.245883f, 2.815872f,
64  2.316608f, 0.780692f, 0.437285f, -0.798080f, 0.304596f, -0.176831f, 1.481121f, -0.633767f, -0.177035f, 0.302556f, -1.388328f, 0.109418f, 0.034794f, 0.568763f, 0.053167f, -0.332043f, 0.074045f, -1.350742f,
65  -1.101494f, 1.267548f, -1.288304f, 2.578995f, -0.297569f, 1.014074f }, 1, 4, 6 };
66 
67 static testData radix2_2d = { { 0.860600f, 0.020071f, 0.756794f, 0.472348f, 0.604630f, 0.445387f, 0.738811f, 0.644715f, 0.840903f, 0.746019f, 0.629334f, 0.682880f, 0.516268f, 0.235386f, 0.800333f, 0.175785f, 0.974124f,
68  0.485907f, 0.492256f, 0.696148f, 0.230253f, 0.600575f, 0.138786f, 0.136737f, 0.114667f, 0.516912f, 0.173743f, 0.899410f, 0.891824f, 0.704459f, 0.450209f, 0.752424f, 0.724530f, 0.207003f, 0.224772f, 0.329161f,
69  0.652390f, 0.963583f, 0.973876f, 0.493293f, 0.709602f, 0.603211f, 0.176173f, 0.225870f, 0.838596f, 0.976507f, 0.401655f, 0.812721f, 0.462413f, 0.893911f, 0.508869f, 0.692667f, 0.494486f, 0.647656f, 0.829403f,
70  0.609152f, 0.164568f, 0.003146f, 0.508563f, 0.056392f, 0.707605f, 0.958771f, 0.808816f, 0.432136f }, { 18.399853f, 17.120342f, 1.194352f, 0.639568f, -0.086731f, -0.384759f, 1.241270f, -2.175158f, 1.175068f,
71  0.896665f, 0.753659f, 0.780709f, -0.082556f, -3.727531f, 1.578434f, -0.294704f, 1.544822f, -0.169894f, 0.570453f, -1.065756f, 1.432534f, -1.146827f, -1.713843f, 2.376111f, -2.141517f, -3.200578f, -1.061705f,
72  -1.680550f, 0.656694f, 2.493567f, -1.462913f, -3.195214f, 2.498683f, -1.052464f, -1.144435f, -4.022502f, 0.301723f, 0.550845f, -1.033154f, -0.872973f, 0.916475f, -0.175878f, 0.123236f, -1.495021f, 1.962570f,
73  -0.616791f, -2.436357f, -1.537166f, 0.547337f, -2.207615f, 1.563801f, -0.916862f, 2.013805f, 1.934075f, 0.940849f, -0.143010f, -0.361511f, 0.364330f, -0.161776f, 1.245928f, -1.553198f, 1.579960f, 1.363282f,
74  0.741429f }, 1, 4, 8 };
75 
76 static testData direct_2d_big = { { 0.475679f, 0.408864f, 0.313085f, 0.387599f, 0.767833f, 0.015767f, 0.832733f, 0.764867f, 0.850312f, 0.782744f, 0.355199f, 0.308463f, 0.496935f, 0.043339f, 0.309902f, 0.030681f, 0.497275f,
77  0.237185f, 0.229802f, 0.606489f, 0.720393f, 0.848826f, 0.704500f, 0.845834f, 0.451885f, 0.339276f, 0.523190f, 0.688469f, 0.646792f, 0.975192f, 0.933888f, 0.122471f, 0.384056f, 0.246973f, 0.510070f, 0.151889f,
78  0.262739f, 0.342803f, 0.916756f, 0.113051f, 0.125547f, 0.271954f, 0.421514f, 0.622482f, 0.315293f, 0.731416f, 0.653164f, 0.812568f, 0.968601f, 0.882965f, 0.419057f, 0.688994f, 0.731792f, 0.123557f, 0.534827f,
79  0.183676f, 0.462833f, 0.058017f, 0.872145f, 0.109626f, 0.033209f, 0.806033f, 0.232097f, 0.417265f, 0.053006f, 0.742167f, 0.569154f, 0.315745f, 0.084970f, 0.485910f, 0.428796f, 0.210517f, 0.757864f, 0.850311f,
80  0.832999f, 0.073158f, 0.581726f, 0.486163f, 0.885726f, 0.550328f, 0.369128f, 0.304783f, 0.239321f, 0.100920f }, { 21.755795f, 18.089336f, -1.248233f, -0.179035f, 1.307578f, 1.589876f, -1.680055f, 1.879153f,
81  0.500297f, 0.839735f, 0.046095f, -0.177522f, 0.742587f, -0.786261f, -3.427422f, -0.445572f, -1.376776f, 1.221333f, 0.334313f, -0.588123f, -2.070653f, 1.297694f, -1.879930f, -2.445690f, 1.692045f, 0.251480f,
82  0.435994f, 0.257269f, 1.513737f, 0.859310f, 0.538316f, -3.698363f, -3.243739f, 2.342074f, 1.255018f, -1.052454f, 0.450322f, 3.684811f, -0.951320f, 2.863686f, -0.170055f, 1.501932f, -0.800708f, 2.040001f,
83  -0.229112f, -0.175461f, -5.128507f, -2.872447f, -2.125049f, -2.656515f, 0.632609f, -2.080163f, 2.527745f, -1.830541f, 0.086613f, -1.402300f, -0.900261f, -1.355287f, -0.909127f, 2.822799f, 2.142723f, -0.882929f,
84  -3.627774f, 0.180693f, -0.073456f, 0.783774f, 2.144351f, -0.252458f, 0.090970f, -0.007880f, 3.457415f, 0.527979f, 0.505462f, 0.978198f, -1.807562f, -2.692160f, 2.556900f, -1.385276f, 3.526823f, 0.247212f,
85  1.879590f, 0.288942f, 1.504963f, -0.408566f }, 1, 7, 6 };
86 
87 static testData transposeMatrix= {{0.139420f,0.539278f,0.547922f,0.672097f,0.528360f,0.158671f,0.596258f,0.432662f,0.445432f,0.597279f,0.966011f,0.707923f,0.705743f,0.282214f,0.100677f,0.143657f,0.040120f,0.346660f,0.279002f,
88  0.568480f,0.505332f,0.875261f,0.001142f,0.237294f,0.673498f,0.699611f,0.990521f,0.379241f,0.981826f,0.091198f,0.522898f,0.637506f}, {0.13942f,0.539278f,0.445432f,0.597279f,0.04012f,0.34666f,0.673498f,0.699611f,
89  0.547922f,0.672097f,0.966011f,0.707923f,0.279002f,0.56848f,0.990521f,0.379241f,0.52836f,0.158671f,0.705743f,0.282214f,0.505332f,0.875261f,0.981826f,0.091198f,0.596258f,0.432662f,0.100677f,0.143657f,0.001142f,
90  0.237294f,0.522898f,0.637506f},1,4,4};
91 
92 void set_values_struct(std::vector<ScalarType>& input, std::vector<ScalarType>& output,
93  unsigned int& rows, unsigned int& cols, unsigned int& batch_size, testData& data);
94 
95 void set_values_struct(std::vector<ScalarType>& input, std::vector<ScalarType>& output,
96  unsigned int& rows, unsigned int& cols, unsigned int& batch_size, testData& data)
97 {
98  unsigned int size = data.col_num * data.batch_num * 2 * data.row_num;
99  input.resize(size);
100  output.resize(size);
101  rows = data.row_num;
102  cols = data.col_num;
103  batch_size = data.batch_num;
104  for (unsigned int i = 0; i < size; i++)
105  {
106  input[i] = data.input[i];
107  output[i] = data.output[i];
108  }
109 
110 }
111 
112 void read_matrices_pair(std::vector<ScalarType>& input, std::vector<ScalarType>& output,
113  unsigned int& rows, unsigned int& cols, unsigned int& batch_size, const std::string& log_tag);
114 
115 void read_matrices_pair(std::vector<ScalarType>& input, std::vector<ScalarType>& output,
116  unsigned int& rows, unsigned int& cols, unsigned int& batch_size, const std::string& log_tag)
117 {
118  if (log_tag == "fft:2d::direct::1_arg")
119  set_values_struct(input, output, rows, cols, batch_size, direct_2d);
120  if (log_tag == "fft:2d::radix2::1_arg")
121  set_values_struct(input, output, rows, cols, batch_size, radix2_2d);
122  if (log_tag == "fft:2d::direct::big::2_arg")
123  set_values_struct(input, output, rows, cols, batch_size, direct_2d_big);
124  if (log_tag == "fft::transpose" || log_tag == "fft::transpose_inplace")
125  set_values_struct(input, output, rows, cols, batch_size, transposeMatrix);
126 
127 }
128 
129 template<typename ScalarType>
130 ScalarType diff(std::vector<ScalarType>& vec, std::vector<ScalarType>& ref)
131 {
132  ScalarType df = 0.0;
133  ScalarType norm_ref = 0;
134 
135  for (std::size_t i = 0; i < vec.size(); i++)
136  {
137  df = df + pow(vec[i] - ref[i], 2);
138  norm_ref += ref[i] * ref[i];
139  }
140 
141  return sqrt(df / norm_ref);
142 }
143 
144 template<typename ScalarType>
145 ScalarType diff_max(std::vector<ScalarType>& vec, std::vector<ScalarType>& ref)
146 {
147  ScalarType df = 0.0;
148  ScalarType mx = 0.0;
149  ScalarType norm_max = 0;
150 
151  for (std::size_t i = 0; i < vec.size(); i++)
152  {
153  df = std::max<ScalarType>(std::fabs(vec[i] - ref[i]), df);
154  mx = std::max<ScalarType>(std::fabs(vec[i]), mx);
155 
156  if (mx > 0)
157  {
158  if (norm_max < df / mx)
159  norm_max = df / mx;
160  }
161  }
162 
163  return norm_max;
164 }
165 
166 
167 void copy_vector_to_matrix(viennacl::matrix<ScalarType> & input, std::vector<ScalarType> & in,
168  unsigned int row, unsigned int col);
169 
170 void copy_vector_to_matrix(viennacl::matrix<ScalarType> & input, std::vector<ScalarType> & in,
171  unsigned int row, unsigned int col)
172 {
173  std::vector<std::vector<ScalarType> > my_matrix(row, std::vector<ScalarType>(col * 2));
174  for (unsigned int i = 0; i < row; i++)
175  for (unsigned int j = 0; j < col * 2; j++)
176  my_matrix[i][j] = in[i * col * 2 + j];
177  viennacl::copy(my_matrix, input);
178 
179 }
180 
181 void copy_matrix_to_vector(viennacl::matrix<ScalarType> & input, std::vector<ScalarType> & in,
182  unsigned int row, unsigned int col);
183 
184 void copy_matrix_to_vector(viennacl::matrix<ScalarType> & input, std::vector<ScalarType> & in,
185  unsigned int row, unsigned int col)
186 {
187  std::vector<std::vector<ScalarType> > my_matrix(row, std::vector<ScalarType>(col * 2));
188  viennacl::copy(input, my_matrix);
189  for (unsigned int i = 0; i < row; i++)
190  for (unsigned int j = 0; j < col * 2; j++)
191  in[i * col * 2 + j] = my_matrix[i][j];
192 }
193 
194 ScalarType fft_2d_1arg(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
195  unsigned int col, unsigned int /*batch_size*/);
196 
197 ScalarType fft_2d_1arg(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
198  unsigned int col, unsigned int /*batch_size*/)
199 {
200  viennacl::matrix<ScalarType> input(row, 2 * col);
201 
202  std::vector<ScalarType> res(in.size());
203 
204  copy_vector_to_matrix(input, in, row, col);
205 
206  viennacl::inplace_fft(input);
207  //std::cout << input << "\n";
209 
210  copy_matrix_to_vector(input, res, row, col);
211 
212  return diff_max(res, out);
213 }
214 
215 ScalarType transpose_inplace(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
216  unsigned int col, unsigned int /*batch_size*/);
217 
218 ScalarType transpose_inplace(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
219  unsigned int col, unsigned int /*batch_size*/)
220 {
221  viennacl::matrix<ScalarType> input(row, 2 * col);
222 
223  std::vector<ScalarType> res(in.size());
224 
225  copy_vector_to_matrix(input, in, row, col);
226 
228 
230 
231  copy_matrix_to_vector(input, res, row, col);
232 
233  return diff_max(res, out);
234 }
235 
236 ScalarType transpose(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
237  unsigned int col, unsigned int /*batch_size*/);
238 
239 ScalarType transpose(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
240  unsigned int col, unsigned int /*batch_size*/)
241 {
242  viennacl::matrix<ScalarType> input(row, 2 * col);
243  viennacl::matrix<ScalarType> output(row, 2 * col);
244 
245 
246  std::vector<ScalarType> res(in.size());
247 
248  copy_vector_to_matrix(input, in, row, col);
249 
250  viennacl::linalg::transpose(input,output);
251 
253 
254  copy_matrix_to_vector(output, res, row, col);
255 
256  return diff_max(res, out);
257 }
258 
259 
260 ScalarType fft_2d_2arg(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
261  unsigned int col, unsigned int /*batch_size*/);
262 
263 ScalarType fft_2d_2arg(std::vector<ScalarType>& in, std::vector<ScalarType>& out, unsigned int row,
264  unsigned int col, unsigned int /*batch_size*/)
265 {
266  viennacl::matrix<ScalarType> input(row, 2 * col);
267  viennacl::matrix<ScalarType> output(row, 2 * col);
268 
269  std::vector<ScalarType> res(in.size());
270 
271  copy_vector_to_matrix(input, in, row, col);
272 
273  //std::cout << input << "\n";
274  viennacl::fft(input, output);
275  //std::cout << input << "\n";
277 
278  copy_matrix_to_vector(output, res, row, col);
279 
280  return diff_max(res, out);
281 }
282 
283 int test_correctness(const std::string& log_tag, input_function_ptr input_function,
284  test_function_ptr func);
285 
286 int test_correctness(const std::string& log_tag, input_function_ptr input_function,
287  test_function_ptr func)
288 {
289 
290  std::vector<ScalarType> input;
291  std::vector<ScalarType> output;
292 
293  std::cout << std::endl;
294  std::cout << "*****************" << log_tag << "***************************\n";
295 
296  unsigned int batch_size;
297  unsigned int rows_num, cols_num;
298 
299  input_function(input, output, rows_num, cols_num, batch_size, log_tag);
300  ScalarType df = func(input, output, rows_num, cols_num, batch_size);
301  printf("%7s ROWS=%6d COLS=%6d; BATCH=%3d; DIFF=%3.15f;\n", ((fabs(df) < EPS) ? "[Ok]" : "[Fail]"),
302  rows_num, cols_num, batch_size, df);
303  std::cout << std::endl;
304 
305  if (df > EPS)
306  return EXIT_FAILURE;
307 
308  return EXIT_SUCCESS;
309 }
310 
311 int main()
312 {
313  std::cout << "*" << std::endl;
314  std::cout << "* ViennaCL test: FFT" << std::endl;
315  std::cout << "*" << std::endl;
316 
317  //2D FFT tests
318  if (test_correctness("fft:2d::radix2::1_arg", read_matrices_pair, &fft_2d_1arg) == EXIT_FAILURE)
319  return EXIT_FAILURE;
320  if (test_correctness("fft:2d::direct::1_arg", read_matrices_pair, &fft_2d_1arg) == EXIT_FAILURE)
321  return EXIT_FAILURE;
322  if (test_correctness("fft:2d::direct::big::2_arg", read_matrices_pair,
323  &fft_2d_2arg) == EXIT_FAILURE)
324  return EXIT_FAILURE;
325  if (test_correctness("fft::transpose_inplace", read_matrices_pair, &transpose_inplace) == EXIT_FAILURE)
326  return EXIT_FAILURE;
327  if (test_correctness("fft::transpose", read_matrices_pair, &transpose) == EXIT_FAILURE)
328  return EXIT_FAILURE;
329 
330  std::cout << std::endl;
331  std::cout << "------- Test completed --------" << std::endl;
332  std::cout << std::endl;
333 
334  return EXIT_SUCCESS;
335 }
336 
OpenCL kernel file for FFT operations.
void copy_matrix_to_vector(viennacl::matrix< ScalarType > &input, std::vector< ScalarType > &in, unsigned int row, unsigned int col)
Definition: fft_2d.cpp:184
Implementations of Fast Furier Transformation using OpenCL.
unsigned int row_num
Definition: fft_1d.cpp:57
ScalarType diff(std::vector< ScalarType > &vec, std::vector< ScalarType > &ref)
Definition: fft_2d.cpp:130
ScalarType(* test_function_ptr)(std::vector< ScalarType > &, std::vector< ScalarType > &, unsigned int, unsigned int, unsigned int)
Definition: fft_2d.cpp:46
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
A dense matrix class.
Definition: forwards.h:375
void(* input_function_ptr)(std::vector< ScalarType > &, std::vector< ScalarType > &, unsigned int &, unsigned int &, unsigned int &, const std::string &)
Definition: fft_2d.cpp:48
Implementations of Fast Furier Transformation.
float output[2048]
Definition: fft_1d.cpp:54
ScalarType diff_max(std::vector< ScalarType > &vec, std::vector< ScalarType > &ref)
Definition: fft_2d.cpp:145
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
unsigned int col_num
Definition: fft_1d.cpp:58
int test_correctness(const std::string &log_tag, input_function_ptr input_function, test_function_ptr func)
Definition: fft_2d.cpp:286
ScalarType transpose_inplace(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
Definition: fft_2d.cpp:218
int main()
Definition: fft_2d.cpp:311
unsigned int batch_num
Definition: fft_1d.cpp:56
float input[2048]
Definition: fft_1d.cpp:53
float ScalarType
Definition: fft_2d.cpp:42
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
void transpose(viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
Inplace_transpose matrix.
ScalarType transpose(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
Definition: fft_2d.cpp:239
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Implementations of Fast Furier Transformation using cuda.
void set_values_struct(std::vector< ScalarType > &input, std::vector< ScalarType > &output, unsigned int &rows, unsigned int &cols, unsigned int &batch_size, testData &data)
Definition: fft_2d.cpp:95
float ScalarType
Definition: fft_1d.cpp:42
All routines related to the Fast Fourier Transform. Experimental.
Implementations of Fast Furier Transformation using a plain single-threaded or OpenMP-enabled executi...
void copy_vector_to_matrix(viennacl::matrix< ScalarType > &input, std::vector< ScalarType > &in, unsigned int row, unsigned int col)
Definition: fft_2d.cpp:170
ScalarType fft_2d_2arg(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
Definition: fft_2d.cpp:263
void read_matrices_pair(std::vector< ScalarType > &input, std::vector< ScalarType > &output, unsigned int &rows, unsigned int &cols, unsigned int &batch_size, const std::string &log_tag)
Definition: fft_2d.cpp:115
ScalarType fft_2d_1arg(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
Definition: fft_2d.cpp:197
ScalarType fft(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int, unsigned int, unsigned int batch_size)
Definition: fft_1d.cpp:719
const ScalarType EPS
Definition: fft_2d.cpp:44