Go to the documentation of this file.00001 #ifndef VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
00002 #define VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
00003
00004 namespace viennacl
00005 {
00006 namespace linalg
00007 {
00008 namespace kernels
00009 {
00010 const char * const fft_align1_real_to_complex =
00011 "// embedd a real-valued vector into a complex one\n"
00012 "__kernel void real_to_complex(__global float* in,\n"
00013 " __global float2* out,\n"
00014 " unsigned int size) {\n"
00015 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
00016 " float2 val = 0;\n"
00017 " val.x = in[i];\n"
00018 " out[i] = val;\n"
00019 " }\n"
00020 "}\n"
00021 ;
00022
00023 const char * const fft_align1_fft_mult_vec =
00024 "// elementwise product of two complex vectors\n"
00025 "__kernel void fft_mult_vec(__global const float2* input1,\n"
00026 " __global const float2* input2,\n"
00027 " __global float2* output,\n"
00028 " unsigned int size) {\n"
00029 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
00030 " float2 in1 = input1[i];\n"
00031 " float2 in2 = input2[i];\n"
00032 " output[i] = (float2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x);\n"
00033 " }\n"
00034 "}\n"
00035 ;
00036
00037 const char * const fft_align1_fft_div_vec_scalar =
00038 "// divide a vector by a scalar (to be removed...)\n"
00039 "__kernel void fft_div_vec_scalar(__global float2* input1, unsigned int size, float factor) {\n"
00040 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
00041 " input1[i] /= factor;\n"
00042 " }\n"
00043 "}\n"
00044 ;
00045
00046 const char * const fft_align1_zero2 =
00047 "// Zero two complex vectors (to avoid kernel launch overhead)\n"
00048 "__kernel void zero2(__global float2* input1,\n"
00049 " __global float2* input2,\n"
00050 " unsigned int size) {\n"
00051 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
00052 " input1[i] = 0;\n"
00053 " input2[i] = 0;\n"
00054 " }\n"
00055 "}\n"
00056 ;
00057
00058 const char * const fft_align1_vandermonde_prod =
00059 "// computes the matrix vector product with a Vandermonde matrix\n"
00060 "__kernel void vandermonde_prod(__global float* vander,\n"
00061 " __global float* vector,\n"
00062 " __global float* result,\n"
00063 " uint size) {\n"
00064 " for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
00065 " float mul = vander[i];\n"
00066 " float pwr = 1;\n"
00067 " float val = 0;\n"
00068 " for(uint j = 0; j < size; j++) {\n"
00069 " val = val + pwr * vector[j];\n"
00070 " pwr *= mul;\n"
00071 " }\n"
00072 " \n"
00073 " result[i] = val;\n"
00074 " }\n"
00075 "}\n"
00076 ;
00077
00078 const char * const fft_align1_bluestein_pre =
00079 "// Preprocessing phase of Bluestein algorithm\n"
00080 "__kernel void bluestein_pre(__global float2* input,\n"
00081 " __global float2* A,\n"
00082 " __global float2* B,\n"
00083 " unsigned int size,\n"
00084 " unsigned int ext_size\n"
00085 " ) {\n"
00086 " unsigned int glb_id = get_global_id(0);\n"
00087 " unsigned int glb_sz = get_global_size(0);\n"
00088 " unsigned int double_size = size << 1;\n"
00089 " float sn_a, cs_a;\n"
00090 " const float NUM_PI = 3.14159265358979323846;\n"
00091 " for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
00092 " unsigned int rm = i * i % (double_size);\n"
00093 " float angle = (float)rm / size * NUM_PI;\n"
00094 " sn_a = sincos(-angle, &cs_a);\n"
00095 " float2 a_i = (float2)(cs_a, sn_a);\n"
00096 " float2 b_i = (float2)(cs_a, -sn_a);\n"
00097 " A[i] = (float2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x);\n"
00098 " B[i] = b_i;\n"
00099 " // very bad instruction, to be fixed\n"
00100 " if(i) \n"
00101 " B[ext_size - i] = b_i;\n"
00102 " }\n"
00103 "}\n"
00104 ;
00105
00106 const char * const fft_align1_transpose =
00107 "// simplistic matrix transpose function\n"
00108 "__kernel void transpose(__global float2* input,\n"
00109 " __global float2* output,\n"
00110 " unsigned int row_num,\n"
00111 " unsigned int col_num) {\n"
00112 " unsigned int size = row_num * col_num;\n"
00113 " for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
00114 " unsigned int row = i / col_num;\n"
00115 " unsigned int col = i - row*col_num;\n"
00116 " unsigned int new_pos = col * row_num + row;\n"
00117 " output[new_pos] = input[i];\n"
00118 " }\n"
00119 "}\n"
00120 ;
00121
00122 const char * const fft_align1_complex_to_real =
00123 "__kernel void complex_to_real(__global float2* in,\n"
00124 " __global float* out,\n"
00125 " unsigned int size) {\n"
00126 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
00127 " out[i] = in[i].x;\n"
00128 " }\n"
00129 "}\n"
00130 ;
00131
00132 const char * const fft_align1_reverse_inplace =
00133 "// reverses the entries in a vector\n"
00134 "__kernel void reverse_inplace(__global float* vec, uint size) {\n"
00135 " for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) {\n"
00136 " float val1 = vec[i];\n"
00137 " float val2 = vec[size - i - 1];\n"
00138 " vec[i] = val2;\n"
00139 " vec[size - i - 1] = val1;\n"
00140 " }\n"
00141 "}\n"
00142 ;
00143
00144 const char * const fft_align1_transpose_inplace =
00145 "// inplace-transpose of a matrix\n"
00146 "__kernel void transpose_inplace(__global float2* input,\n"
00147 " unsigned int row_num,\n"
00148 " unsigned int col_num) {\n"
00149 " unsigned int size = row_num * col_num;\n"
00150 " for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
00151 " unsigned int row = i / col_num;\n"
00152 " unsigned int col = i - row*col_num;\n"
00153 " unsigned int new_pos = col * row_num + row;\n"
00154 " //new_pos = col < row?0:1;\n"
00155 " //input[i] = new_pos;\n"
00156 " if(i < new_pos) {\n"
00157 " float2 val = input[i];\n"
00158 " input[i] = input[new_pos];\n"
00159 " input[new_pos] = val;\n"
00160 " }\n"
00161 " }\n"
00162 "}\n"
00163 ;
00164
00165 const char * const fft_align1_bluestein_post =
00166 "// Postprocessing phase of Bluestein algorithm\n"
00167 "__kernel void bluestein_post(__global float2* Z,\n"
00168 " __global float2* out,\n"
00169 " unsigned int size) \n"
00170 "{\n"
00171 " unsigned int glb_id = get_global_id(0);\n"
00172 " unsigned int glb_sz = get_global_size(0);\n"
00173 " unsigned int double_size = size << 1;\n"
00174 " float sn_a, cs_a;\n"
00175 " const float NUM_PI = 3.14159265358979323846;\n"
00176 " for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
00177 " unsigned int rm = i * i % (double_size);\n"
00178 " float angle = (float)rm / size * (-NUM_PI);\n"
00179 " sn_a = sincos(angle, &cs_a);\n"
00180 " float2 b_i = (float2)(cs_a, sn_a);\n"
00181 " out[i] = (float2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x);\n"
00182 " }\n"
00183 "}\n"
00184 ;
00185
00186 }
00187 }
00188 }
00189 #endif