00001 #ifndef _VIENNACL_MATRIX_ROW_KERNELS_HPP_
00002 #define _VIENNACL_MATRIX_ROW_KERNELS_HPP_
00003 #include "viennacl/tools/tools.hpp"
00004 #include "viennacl/ocl/kernel.hpp"
00005 #include "viennacl/ocl/platform.hpp"
00006 #include "viennacl/ocl/utils.hpp"
00007 #include "viennacl/linalg/kernels/matrix_row_source.h"
00008
00009
00010 namespace viennacl
00011 {
00012 namespace linalg
00013 {
00014 namespace kernels
00015 {
00016 template<class TYPE, unsigned int alignment>
00017 struct matrix_row;
00018
00019
00021 template <>
00022 struct matrix_row<float, 16>
00023 {
00024 static std::string program_name()
00025 {
00026 return "f_matrix_row_16";
00027 }
00028 static void init()
00029 {
00030 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00031 static std::map<cl_context, bool> init_done;
00032 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00033 if (!init_done[context_.handle()])
00034 {
00035 std::string source;
00036 source.append(matrix_row_align1_unit_lower_triangular_substitute_inplace);
00037 source.append(matrix_row_align1_inplace_sub);
00038 source.append(matrix_row_align1_lower_triangular_substitute_inplace);
00039 source.append(matrix_row_align1_trans_vec_mul);
00040 source.append(matrix_row_align1_rank1_update);
00041 source.append(matrix_row_align1_sub);
00042 source.append(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace);
00043 source.append(matrix_row_align1_lu_factorize);
00044 source.append(matrix_row_align1_add);
00045 source.append(matrix_row_align1_fft_direct);
00046 source.append(matrix_row_align1_vec_mul);
00047 source.append(matrix_row_align1_fft_radix2_local);
00048 source.append(matrix_row_align1_trans_lower_triangular_substitute_inplace);
00049 source.append(matrix_row_align1_inplace_divide);
00050 source.append(matrix_row_align1_trans_upper_triangular_substitute_inplace);
00051 source.append(matrix_row_align1_unit_upper_triangular_substitute_inplace);
00052 source.append(matrix_row_align1_inplace_add);
00053 source.append(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace);
00054 source.append(matrix_row_align1_scaled_rank1_update);
00055 source.append(matrix_row_align1_clear);
00056 source.append(matrix_row_align1_fft_radix2);
00057 source.append(matrix_row_align1_cpu_inplace_mult);
00058 source.append(matrix_row_align1_fft_reorder);
00059 source.append(matrix_row_align1_inplace_mult);
00060 source.append(matrix_row_align1_upper_triangular_substitute_inplace);
00061 std::string prog_name = program_name();
00062 #ifdef VIENNACL_BUILD_INFO
00063 std::cout << "Creating program " << prog_name << std::endl;
00064 #endif
00065 context_.add_program(source, prog_name);
00066 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00067 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00068 prog_.add_kernel("inplace_sub");
00069 prog_.add_kernel("lower_triangular_substitute_inplace");
00070 prog_.add_kernel("trans_vec_mul");
00071 prog_.add_kernel("rank1_update");
00072 prog_.add_kernel("sub");
00073 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00074 prog_.add_kernel("lu_factorize");
00075 prog_.add_kernel("add");
00076 prog_.add_kernel("fft_direct");
00077 prog_.add_kernel("vec_mul");
00078 prog_.add_kernel("fft_radix2_local");
00079 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00080 prog_.add_kernel("inplace_divide");
00081 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00082 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00083 prog_.add_kernel("inplace_add");
00084 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00085 prog_.add_kernel("scaled_rank1_update");
00086 prog_.add_kernel("clear");
00087 prog_.add_kernel("fft_radix2");
00088 prog_.add_kernel("cpu_inplace_mult");
00089 prog_.add_kernel("fft_reorder");
00090 prog_.add_kernel("inplace_mult");
00091 prog_.add_kernel("upper_triangular_substitute_inplace");
00092 init_done[context_.handle()] = true;
00093 }
00094 }
00095 };
00096
00097 template <>
00098 struct matrix_row<float, 1>
00099 {
00100 static std::string program_name()
00101 {
00102 return "f_matrix_row_1";
00103 }
00104 static void init()
00105 {
00106 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00107 static std::map<cl_context, bool> init_done;
00108 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00109 if (!init_done[context_.handle()])
00110 {
00111 std::string source;
00112 source.append(matrix_row_align1_unit_lower_triangular_substitute_inplace);
00113 source.append(matrix_row_align1_inplace_sub);
00114 source.append(matrix_row_align1_lower_triangular_substitute_inplace);
00115 source.append(matrix_row_align1_trans_vec_mul);
00116 source.append(matrix_row_align1_rank1_update);
00117 source.append(matrix_row_align1_sub);
00118 source.append(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace);
00119 source.append(matrix_row_align1_lu_factorize);
00120 source.append(matrix_row_align1_add);
00121 source.append(matrix_row_align1_fft_direct);
00122 source.append(matrix_row_align1_vec_mul);
00123 source.append(matrix_row_align1_fft_radix2_local);
00124 source.append(matrix_row_align1_trans_lower_triangular_substitute_inplace);
00125 source.append(matrix_row_align1_inplace_divide);
00126 source.append(matrix_row_align1_trans_upper_triangular_substitute_inplace);
00127 source.append(matrix_row_align1_unit_upper_triangular_substitute_inplace);
00128 source.append(matrix_row_align1_inplace_add);
00129 source.append(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace);
00130 source.append(matrix_row_align1_scaled_rank1_update);
00131 source.append(matrix_row_align1_clear);
00132 source.append(matrix_row_align1_fft_radix2);
00133 source.append(matrix_row_align1_cpu_inplace_mult);
00134 source.append(matrix_row_align1_fft_reorder);
00135 source.append(matrix_row_align1_inplace_mult);
00136 source.append(matrix_row_align1_upper_triangular_substitute_inplace);
00137 std::string prog_name = program_name();
00138 #ifdef VIENNACL_BUILD_INFO
00139 std::cout << "Creating program " << prog_name << std::endl;
00140 #endif
00141 context_.add_program(source, prog_name);
00142 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00143 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00144 prog_.add_kernel("inplace_sub");
00145 prog_.add_kernel("lower_triangular_substitute_inplace");
00146 prog_.add_kernel("trans_vec_mul");
00147 prog_.add_kernel("rank1_update");
00148 prog_.add_kernel("sub");
00149 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00150 prog_.add_kernel("lu_factorize");
00151 prog_.add_kernel("add");
00152 prog_.add_kernel("fft_direct");
00153 prog_.add_kernel("vec_mul");
00154 prog_.add_kernel("fft_radix2_local");
00155 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00156 prog_.add_kernel("inplace_divide");
00157 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00158 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00159 prog_.add_kernel("inplace_add");
00160 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00161 prog_.add_kernel("scaled_rank1_update");
00162 prog_.add_kernel("clear");
00163 prog_.add_kernel("fft_radix2");
00164 prog_.add_kernel("cpu_inplace_mult");
00165 prog_.add_kernel("fft_reorder");
00166 prog_.add_kernel("inplace_mult");
00167 prog_.add_kernel("upper_triangular_substitute_inplace");
00168 init_done[context_.handle()] = true;
00169 }
00170 }
00171 };
00172
00173
00174
00176 template <>
00177 struct matrix_row<double, 16>
00178 {
00179 static std::string program_name()
00180 {
00181 return "d_matrix_row_16";
00182 }
00183 static void init()
00184 {
00185 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00186 static std::map<cl_context, bool> init_done;
00187 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00188 if (!init_done[context_.handle()])
00189 {
00190 std::string source;
00191 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00192 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_lower_triangular_substitute_inplace, fp64_ext));
00193 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_sub, fp64_ext));
00194 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lower_triangular_substitute_inplace, fp64_ext));
00195 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_vec_mul, fp64_ext));
00196 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_rank1_update, fp64_ext));
00197 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_sub, fp64_ext));
00198 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace, fp64_ext));
00199 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lu_factorize, fp64_ext));
00200 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_add, fp64_ext));
00201 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_direct, fp64_ext));
00202 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_vec_mul, fp64_ext));
00203 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_radix2_local, fp64_ext));
00204 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_lower_triangular_substitute_inplace, fp64_ext));
00205 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_divide, fp64_ext));
00206 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_upper_triangular_substitute_inplace, fp64_ext));
00207 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_upper_triangular_substitute_inplace, fp64_ext));
00208 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_add, fp64_ext));
00209 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace, fp64_ext));
00210 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_scaled_rank1_update, fp64_ext));
00211 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_clear, fp64_ext));
00212 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_radix2, fp64_ext));
00213 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_cpu_inplace_mult, fp64_ext));
00214 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_reorder, fp64_ext));
00215 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_mult, fp64_ext));
00216 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_upper_triangular_substitute_inplace, fp64_ext));
00217 std::string prog_name = program_name();
00218 #ifdef VIENNACL_BUILD_INFO
00219 std::cout << "Creating program " << prog_name << std::endl;
00220 #endif
00221 context_.add_program(source, prog_name);
00222 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00223 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00224 prog_.add_kernel("inplace_sub");
00225 prog_.add_kernel("lower_triangular_substitute_inplace");
00226 prog_.add_kernel("trans_vec_mul");
00227 prog_.add_kernel("rank1_update");
00228 prog_.add_kernel("sub");
00229 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00230 prog_.add_kernel("lu_factorize");
00231 prog_.add_kernel("add");
00232 prog_.add_kernel("fft_direct");
00233 prog_.add_kernel("vec_mul");
00234 prog_.add_kernel("fft_radix2_local");
00235 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00236 prog_.add_kernel("inplace_divide");
00237 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00238 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00239 prog_.add_kernel("inplace_add");
00240 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00241 prog_.add_kernel("scaled_rank1_update");
00242 prog_.add_kernel("clear");
00243 prog_.add_kernel("fft_radix2");
00244 prog_.add_kernel("cpu_inplace_mult");
00245 prog_.add_kernel("fft_reorder");
00246 prog_.add_kernel("inplace_mult");
00247 prog_.add_kernel("upper_triangular_substitute_inplace");
00248 init_done[context_.handle()] = true;
00249 }
00250 }
00251 };
00252
00253 template <>
00254 struct matrix_row<double, 1>
00255 {
00256 static std::string program_name()
00257 {
00258 return "d_matrix_row_1";
00259 }
00260 static void init()
00261 {
00262 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00263 static std::map<cl_context, bool> init_done;
00264 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00265 if (!init_done[context_.handle()])
00266 {
00267 std::string source;
00268 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00269 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_lower_triangular_substitute_inplace, fp64_ext));
00270 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_sub, fp64_ext));
00271 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lower_triangular_substitute_inplace, fp64_ext));
00272 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_vec_mul, fp64_ext));
00273 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_rank1_update, fp64_ext));
00274 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_sub, fp64_ext));
00275 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace, fp64_ext));
00276 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lu_factorize, fp64_ext));
00277 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_add, fp64_ext));
00278 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_direct, fp64_ext));
00279 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_vec_mul, fp64_ext));
00280 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_radix2_local, fp64_ext));
00281 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_lower_triangular_substitute_inplace, fp64_ext));
00282 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_divide, fp64_ext));
00283 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_upper_triangular_substitute_inplace, fp64_ext));
00284 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_upper_triangular_substitute_inplace, fp64_ext));
00285 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_add, fp64_ext));
00286 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace, fp64_ext));
00287 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_scaled_rank1_update, fp64_ext));
00288 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_clear, fp64_ext));
00289 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_radix2, fp64_ext));
00290 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_cpu_inplace_mult, fp64_ext));
00291 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_fft_reorder, fp64_ext));
00292 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_mult, fp64_ext));
00293 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_upper_triangular_substitute_inplace, fp64_ext));
00294 std::string prog_name = program_name();
00295 #ifdef VIENNACL_BUILD_INFO
00296 std::cout << "Creating program " << prog_name << std::endl;
00297 #endif
00298 context_.add_program(source, prog_name);
00299 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00300 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00301 prog_.add_kernel("inplace_sub");
00302 prog_.add_kernel("lower_triangular_substitute_inplace");
00303 prog_.add_kernel("trans_vec_mul");
00304 prog_.add_kernel("rank1_update");
00305 prog_.add_kernel("sub");
00306 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00307 prog_.add_kernel("lu_factorize");
00308 prog_.add_kernel("add");
00309 prog_.add_kernel("fft_direct");
00310 prog_.add_kernel("vec_mul");
00311 prog_.add_kernel("fft_radix2_local");
00312 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00313 prog_.add_kernel("inplace_divide");
00314 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00315 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00316 prog_.add_kernel("inplace_add");
00317 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00318 prog_.add_kernel("scaled_rank1_update");
00319 prog_.add_kernel("clear");
00320 prog_.add_kernel("fft_radix2");
00321 prog_.add_kernel("cpu_inplace_mult");
00322 prog_.add_kernel("fft_reorder");
00323 prog_.add_kernel("inplace_mult");
00324 prog_.add_kernel("upper_triangular_substitute_inplace");
00325 init_done[context_.handle()] = true;
00326 }
00327 }
00328 };
00329
00330
00331 }
00332 }
00333 }
00334 #endif