31 #ifdef VIENNACL_WITH_OPENCL
36 #ifdef VIENNACL_WITH_CUDA
47 unsigned int,
unsigned int,
unsigned int);
49 unsigned int&,
unsigned int&,
unsigned int&,
const std::string&);
60 static testData direct_2d = { { 0.120294f, 0.839315f, 0.890936f, 0.775417f, 0.375051f, 0.775645f, 0.367671f, 0.309852f, 0.551154f, 0.166495f, 0.174865f, 0.340252f, 0.393914f, 0.439817f, 0.523974f, 0.291109f, 0.181803f,
61 0.811176f, 0.490668f, 0.234881f, 0.611783f, 0.098058f, 0.106492f, 0.399059f, 0.974164f, 0.403960f, 0.324111f, 0.772581f, 0.609412f, 0.917312f, 0.538254f, 0.729706f, 0.756627f, 0.429191f, 0.505123f, 0.131678f,
62 0.204836f, 0.872794f, 0.441530f, 0.755990f, 0.039289f, 0.616395f, 0.096242f, 0.433203f, 0.056212f, 0.620216f, 0.724312f, 0.238015f }, { 10.058718f, 12.402115f, 3.306907f, 0.570050f, -0.527832f, -1.052828f,
63 -0.309640f, 1.578631f, 0.027247f, 1.441292f, -2.396150f, 0.396048f, -2.490234f, -0.923666f, -0.890061f, 1.154475f, -2.485666f, -0.029132f, -1.617884f, -0.788678f, 0.008640f, -0.751211f, -0.245883f, 2.815872f,
64 2.316608f, 0.780692f, 0.437285f, -0.798080f, 0.304596f, -0.176831f, 1.481121f, -0.633767f, -0.177035f, 0.302556f, -1.388328f, 0.109418f, 0.034794f, 0.568763f, 0.053167f, -0.332043f, 0.074045f, -1.350742f,
65 -1.101494f, 1.267548f, -1.288304f, 2.578995f, -0.297569f, 1.014074f }, 1, 4, 6 };
67 static testData radix2_2d = { { 0.860600f, 0.020071f, 0.756794f, 0.472348f, 0.604630f, 0.445387f, 0.738811f, 0.644715f, 0.840903f, 0.746019f, 0.629334f, 0.682880f, 0.516268f, 0.235386f, 0.800333f, 0.175785f, 0.974124f,
68 0.485907f, 0.492256f, 0.696148f, 0.230253f, 0.600575f, 0.138786f, 0.136737f, 0.114667f, 0.516912f, 0.173743f, 0.899410f, 0.891824f, 0.704459f, 0.450209f, 0.752424f, 0.724530f, 0.207003f, 0.224772f, 0.329161f,
69 0.652390f, 0.963583f, 0.973876f, 0.493293f, 0.709602f, 0.603211f, 0.176173f, 0.225870f, 0.838596f, 0.976507f, 0.401655f, 0.812721f, 0.462413f, 0.893911f, 0.508869f, 0.692667f, 0.494486f, 0.647656f, 0.829403f,
70 0.609152f, 0.164568f, 0.003146f, 0.508563f, 0.056392f, 0.707605f, 0.958771f, 0.808816f, 0.432136f }, { 18.399853f, 17.120342f, 1.194352f, 0.639568f, -0.086731f, -0.384759f, 1.241270f, -2.175158f, 1.175068f,
71 0.896665f, 0.753659f, 0.780709f, -0.082556f, -3.727531f, 1.578434f, -0.294704f, 1.544822f, -0.169894f, 0.570453f, -1.065756f, 1.432534f, -1.146827f, -1.713843f, 2.376111f, -2.141517f, -3.200578f, -1.061705f,
72 -1.680550f, 0.656694f, 2.493567f, -1.462913f, -3.195214f, 2.498683f, -1.052464f, -1.144435f, -4.022502f, 0.301723f, 0.550845f, -1.033154f, -0.872973f, 0.916475f, -0.175878f, 0.123236f, -1.495021f, 1.962570f,
73 -0.616791f, -2.436357f, -1.537166f, 0.547337f, -2.207615f, 1.563801f, -0.916862f, 2.013805f, 1.934075f, 0.940849f, -0.143010f, -0.361511f, 0.364330f, -0.161776f, 1.245928f, -1.553198f, 1.579960f, 1.363282f,
74 0.741429f }, 1, 4, 8 };
76 static testData direct_2d_big = { { 0.475679f, 0.408864f, 0.313085f, 0.387599f, 0.767833f, 0.015767f, 0.832733f, 0.764867f, 0.850312f, 0.782744f, 0.355199f, 0.308463f, 0.496935f, 0.043339f, 0.309902f, 0.030681f, 0.497275f,
77 0.237185f, 0.229802f, 0.606489f, 0.720393f, 0.848826f, 0.704500f, 0.845834f, 0.451885f, 0.339276f, 0.523190f, 0.688469f, 0.646792f, 0.975192f, 0.933888f, 0.122471f, 0.384056f, 0.246973f, 0.510070f, 0.151889f,
78 0.262739f, 0.342803f, 0.916756f, 0.113051f, 0.125547f, 0.271954f, 0.421514f, 0.622482f, 0.315293f, 0.731416f, 0.653164f, 0.812568f, 0.968601f, 0.882965f, 0.419057f, 0.688994f, 0.731792f, 0.123557f, 0.534827f,
79 0.183676f, 0.462833f, 0.058017f, 0.872145f, 0.109626f, 0.033209f, 0.806033f, 0.232097f, 0.417265f, 0.053006f, 0.742167f, 0.569154f, 0.315745f, 0.084970f, 0.485910f, 0.428796f, 0.210517f, 0.757864f, 0.850311f,
80 0.832999f, 0.073158f, 0.581726f, 0.486163f, 0.885726f, 0.550328f, 0.369128f, 0.304783f, 0.239321f, 0.100920f }, { 21.755795f, 18.089336f, -1.248233f, -0.179035f, 1.307578f, 1.589876f, -1.680055f, 1.879153f,
81 0.500297f, 0.839735f, 0.046095f, -0.177522f, 0.742587f, -0.786261f, -3.427422f, -0.445572f, -1.376776f, 1.221333f, 0.334313f, -0.588123f, -2.070653f, 1.297694f, -1.879930f, -2.445690f, 1.692045f, 0.251480f,
82 0.435994f, 0.257269f, 1.513737f, 0.859310f, 0.538316f, -3.698363f, -3.243739f, 2.342074f, 1.255018f, -1.052454f, 0.450322f, 3.684811f, -0.951320f, 2.863686f, -0.170055f, 1.501932f, -0.800708f, 2.040001f,
83 -0.229112f, -0.175461f, -5.128507f, -2.872447f, -2.125049f, -2.656515f, 0.632609f, -2.080163f, 2.527745f, -1.830541f, 0.086613f, -1.402300f, -0.900261f, -1.355287f, -0.909127f, 2.822799f, 2.142723f, -0.882929f,
84 -3.627774f, 0.180693f, -0.073456f, 0.783774f, 2.144351f, -0.252458f, 0.090970f, -0.007880f, 3.457415f, 0.527979f, 0.505462f, 0.978198f, -1.807562f, -2.692160f, 2.556900f, -1.385276f, 3.526823f, 0.247212f,
85 1.879590f, 0.288942f, 1.504963f, -0.408566f }, 1, 7, 6 };
87 static testData transposeMatrix= {{0.139420f,0.539278f,0.547922f,0.672097f,0.528360f,0.158671f,0.596258f,0.432662f,0.445432f,0.597279f,0.966011f,0.707923f,0.705743f,0.282214f,0.100677f,0.143657f,0.040120f,0.346660f,0.279002f,
88 0.568480f,0.505332f,0.875261f,0.001142f,0.237294f,0.673498f,0.699611f,0.990521f,0.379241f,0.981826f,0.091198f,0.522898f,0.637506f}, {0.13942f,0.539278f,0.445432f,0.597279f,0.04012f,0.34666f,0.673498f,0.699611f,
89 0.547922f,0.672097f,0.966011f,0.707923f,0.279002f,0.56848f,0.990521f,0.379241f,0.52836f,0.158671f,0.705743f,0.282214f,0.505332f,0.875261f,0.981826f,0.091198f,0.596258f,0.432662f,0.100677f,0.143657f,0.001142f,
90 0.237294f,0.522898f,0.637506f},1,4,4};
92 void set_values_struct(std::vector<ScalarType>& input, std::vector<ScalarType>& output,
93 unsigned int& rows,
unsigned int& cols,
unsigned int& batch_size,
testData& data);
96 unsigned int& rows,
unsigned int& cols,
unsigned int& batch_size,
testData& data)
104 for (
unsigned int i = 0; i <
size; i++)
106 input[i] = data.
input[i];
107 output[i] = data.
output[i];
112 void read_matrices_pair(std::vector<ScalarType>& input, std::vector<ScalarType>& output,
113 unsigned int& rows,
unsigned int& cols,
unsigned int& batch_size,
const std::string& log_tag);
116 unsigned int& rows,
unsigned int& cols,
unsigned int& batch_size,
const std::string& log_tag)
118 if (log_tag ==
"fft:2d::direct::1_arg")
120 if (log_tag ==
"fft:2d::radix2::1_arg")
122 if (log_tag ==
"fft:2d::direct::big::2_arg")
124 if (log_tag ==
"fft::transpose" || log_tag ==
"fft::transpose_inplace")
129 template<
typename ScalarType>
135 for (std::size_t i = 0; i < vec.size(); i++)
137 df = df + pow(vec[i] - ref[i], 2);
138 norm_ref += ref[i] * ref[i];
141 return sqrt(df / norm_ref);
144 template<
typename ScalarType>
151 for (std::size_t i = 0; i < vec.size(); i++)
153 df = std::max<ScalarType>(std::fabs(vec[i] - ref[i]), df);
154 mx = std::max<ScalarType>(std::fabs(vec[i]), mx);
158 if (norm_max < df / mx)
168 unsigned int row,
unsigned int col);
171 unsigned int row,
unsigned int col)
173 std::vector<std::vector<ScalarType> > my_matrix(row, std::vector<ScalarType>(col * 2));
174 for (
unsigned int i = 0; i <
row; i++)
175 for (
unsigned int j = 0; j < col * 2; j++)
176 my_matrix[i][j] = in[i * col * 2 + j];
182 unsigned int row,
unsigned int col);
185 unsigned int row,
unsigned int col)
187 std::vector<std::vector<ScalarType> > my_matrix(row, std::vector<ScalarType>(col * 2));
189 for (
unsigned int i = 0; i <
row; i++)
190 for (
unsigned int j = 0; j < col * 2; j++)
191 in[i * col * 2 + j] = my_matrix[i][j];
195 unsigned int col,
unsigned int );
198 unsigned int col,
unsigned int )
202 std::vector<ScalarType> res(in.size());
206 viennacl::inplace_fft(input);
216 unsigned int col,
unsigned int );
219 unsigned int col,
unsigned int )
223 std::vector<ScalarType> res(in.size());
237 unsigned int col,
unsigned int );
240 unsigned int col,
unsigned int )
246 std::vector<ScalarType> res(in.size());
261 unsigned int col,
unsigned int );
264 unsigned int col,
unsigned int )
269 std::vector<ScalarType> res(in.size());
290 std::vector<ScalarType> input;
291 std::vector<ScalarType> output;
293 std::cout << std::endl;
294 std::cout <<
"*****************" << log_tag <<
"***************************\n";
296 unsigned int batch_size;
297 unsigned int rows_num, cols_num;
299 input_function(input, output, rows_num, cols_num, batch_size, log_tag);
300 ScalarType df = func(input, output, rows_num, cols_num, batch_size);
301 printf(
"%7s ROWS=%6d COLS=%6d; BATCH=%3d; DIFF=%3.15f;\n", ((fabs(df) <
EPS) ?
"[Ok]" :
"[Fail]"),
302 rows_num, cols_num, batch_size, df);
303 std::cout << std::endl;
313 std::cout <<
"*" << std::endl;
314 std::cout <<
"* ViennaCL test: FFT" << std::endl;
315 std::cout <<
"*" << std::endl;
330 std::cout << std::endl;
331 std::cout <<
"------- Test completed --------" << std::endl;
332 std::cout << std::endl;
OpenCL kernel file for FFT operations.
void copy_matrix_to_vector(viennacl::matrix< ScalarType > &input, std::vector< ScalarType > &in, unsigned int row, unsigned int col)
Implementations of Fast Furier Transformation using OpenCL.
ScalarType diff(std::vector< ScalarType > &vec, std::vector< ScalarType > &ref)
ScalarType(* test_function_ptr)(std::vector< ScalarType > &, std::vector< ScalarType > &, unsigned int, unsigned int, unsigned int)
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
void(* input_function_ptr)(std::vector< ScalarType > &, std::vector< ScalarType > &, unsigned int &, unsigned int &, unsigned int &, const std::string &)
Implementations of Fast Furier Transformation.
ScalarType diff_max(std::vector< ScalarType > &vec, std::vector< ScalarType > &ref)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
int test_correctness(const std::string &log_tag, input_function_ptr input_function, test_function_ptr func)
ScalarType transpose_inplace(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
void transpose(viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input)
Inplace_transpose matrix.
ScalarType transpose(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Implementations of Fast Furier Transformation using cuda.
void set_values_struct(std::vector< ScalarType > &input, std::vector< ScalarType > &output, unsigned int &rows, unsigned int &cols, unsigned int &batch_size, testData &data)
All routines related to the Fast Fourier Transform. Experimental.
Implementations of Fast Furier Transformation using a plain single-threaded or OpenMP-enabled executi...
void copy_vector_to_matrix(viennacl::matrix< ScalarType > &input, std::vector< ScalarType > &in, unsigned int row, unsigned int col)
ScalarType fft_2d_2arg(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
void read_matrices_pair(std::vector< ScalarType > &input, std::vector< ScalarType > &output, unsigned int &rows, unsigned int &cols, unsigned int &batch_size, const std::string &log_tag)
ScalarType fft_2d_1arg(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int row, unsigned int col, unsigned int)
ScalarType fft(std::vector< ScalarType > &in, std::vector< ScalarType > &out, unsigned int, unsigned int, unsigned int batch_size)