00001 #ifndef _VIENNACL_VECTOR_KERNELS_HPP_
00002 #define _VIENNACL_VECTOR_KERNELS_HPP_
00003 #include "viennacl/tools/tools.hpp"
00004 #include "viennacl/ocl/kernel.hpp"
00005 #include "viennacl/ocl/platform.hpp"
00006 #include "viennacl/ocl/utils.hpp"
00007 #include "viennacl/linalg/kernels/vector_source.h"
00008
00009
00010 namespace viennacl
00011 {
00012 namespace linalg
00013 {
00014 namespace kernels
00015 {
00016 template<class TYPE, unsigned int alignment>
00017 struct vector;
00018
00019
00021 template <>
00022 struct vector<float, 16>
00023 {
00024 static std::string program_name()
00025 {
00026 return "f_vector_16";
00027 }
00028 static void init()
00029 {
00030 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00031 static std::map<cl_context, bool> init_done;
00032 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00033 if (!init_done[context_.handle()])
00034 {
00035 std::string source;
00036 source.append(vector_align16_inplace_sub);
00037 source.append(vector_align1_norm_inf);
00038 source.append(vector_align1_index_norm_inf);
00039 source.append(vector_align16_mult);
00040 source.append(vector_align1_swap);
00041 source.append(vector_align4_inplace_div_add);
00042 source.append(vector_align1_norm_2);
00043 source.append(vector_align16_sub);
00044 source.append(vector_align4_cpu_mul_add);
00045 source.append(vector_align1_vmax);
00046 source.append(vector_align1_inner_prod);
00047 source.append(vector_align16_add);
00048 source.append(vector_align1_plane_rotation);
00049 source.append(vector_align4_inplace_mul_sub);
00050 source.append(vector_align4_inplace_mul_add);
00051 source.append(vector_align4_mul_add);
00052 source.append(vector_align16_cpu_mult);
00053 source.append(vector_align16_inplace_divide);
00054 source.append(vector_align1_sqrt_sum);
00055 source.append(vector_align4_cpu_inplace_mul_add);
00056 source.append(vector_align16_inplace_add);
00057 source.append(vector_align16_divide);
00058 source.append(vector_align1_norm_1);
00059 source.append(vector_align1_clear);
00060 source.append(vector_align1_cpu_inplace_mult);
00061 source.append(vector_align16_inplace_mult);
00062 source.append(vector_align1_sum);
00063 source.append(vector_align4_inplace_div_sub);
00064 source.append(vector_align1_diag_precond);
00065 source.append(vector_align1_mul_sub);
00066 std::string prog_name = program_name();
00067 #ifdef VIENNACL_BUILD_INFO
00068 std::cout << "Creating program " << prog_name << std::endl;
00069 #endif
00070 context_.add_program(source, prog_name);
00071 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00072 prog_.add_kernel("inplace_sub");
00073 prog_.add_kernel("norm_inf");
00074 prog_.add_kernel("index_norm_inf");
00075 prog_.add_kernel("mult");
00076 prog_.add_kernel("swap");
00077 prog_.add_kernel("inplace_div_add");
00078 prog_.add_kernel("norm_2");
00079 prog_.add_kernel("sub");
00080 prog_.add_kernel("cpu_mul_add");
00081 prog_.add_kernel("vmax");
00082 prog_.add_kernel("inner_prod");
00083 prog_.add_kernel("add");
00084 prog_.add_kernel("plane_rotation");
00085 prog_.add_kernel("inplace_mul_sub");
00086 prog_.add_kernel("inplace_mul_add");
00087 prog_.add_kernel("mul_add");
00088 prog_.add_kernel("cpu_mult");
00089 prog_.add_kernel("inplace_divide");
00090 prog_.add_kernel("sqrt_sum");
00091 prog_.add_kernel("cpu_inplace_mul_add");
00092 prog_.add_kernel("inplace_add");
00093 prog_.add_kernel("divide");
00094 prog_.add_kernel("norm_1");
00095 prog_.add_kernel("clear");
00096 prog_.add_kernel("cpu_inplace_mult");
00097 prog_.add_kernel("inplace_mult");
00098 prog_.add_kernel("sum");
00099 prog_.add_kernel("inplace_div_sub");
00100 prog_.add_kernel("diag_precond");
00101 prog_.add_kernel("mul_sub");
00102 init_done[context_.handle()] = true;
00103 }
00104 }
00105 };
00106
00107 template <>
00108 struct vector<float, 4>
00109 {
00110 static std::string program_name()
00111 {
00112 return "f_vector_4";
00113 }
00114 static void init()
00115 {
00116 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00117 static std::map<cl_context, bool> init_done;
00118 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00119 if (!init_done[context_.handle()])
00120 {
00121 std::string source;
00122 source.append(vector_align1_inplace_sub);
00123 source.append(vector_align1_norm_inf);
00124 source.append(vector_align1_index_norm_inf);
00125 source.append(vector_align1_mult);
00126 source.append(vector_align1_swap);
00127 source.append(vector_align4_inplace_div_add);
00128 source.append(vector_align1_norm_2);
00129 source.append(vector_align1_sub);
00130 source.append(vector_align4_cpu_mul_add);
00131 source.append(vector_align1_vmax);
00132 source.append(vector_align1_inner_prod);
00133 source.append(vector_align1_add);
00134 source.append(vector_align1_plane_rotation);
00135 source.append(vector_align4_inplace_mul_sub);
00136 source.append(vector_align4_inplace_mul_add);
00137 source.append(vector_align4_mul_add);
00138 source.append(vector_align1_cpu_mult);
00139 source.append(vector_align1_inplace_divide);
00140 source.append(vector_align1_sqrt_sum);
00141 source.append(vector_align4_cpu_inplace_mul_add);
00142 source.append(vector_align1_inplace_add);
00143 source.append(vector_align1_divide);
00144 source.append(vector_align1_norm_1);
00145 source.append(vector_align1_clear);
00146 source.append(vector_align1_cpu_inplace_mult);
00147 source.append(vector_align1_inplace_mult);
00148 source.append(vector_align1_sum);
00149 source.append(vector_align4_inplace_div_sub);
00150 source.append(vector_align1_diag_precond);
00151 source.append(vector_align1_mul_sub);
00152 std::string prog_name = program_name();
00153 #ifdef VIENNACL_BUILD_INFO
00154 std::cout << "Creating program " << prog_name << std::endl;
00155 #endif
00156 context_.add_program(source, prog_name);
00157 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00158 prog_.add_kernel("inplace_sub");
00159 prog_.add_kernel("norm_inf");
00160 prog_.add_kernel("index_norm_inf");
00161 prog_.add_kernel("mult");
00162 prog_.add_kernel("swap");
00163 prog_.add_kernel("inplace_div_add");
00164 prog_.add_kernel("norm_2");
00165 prog_.add_kernel("sub");
00166 prog_.add_kernel("cpu_mul_add");
00167 prog_.add_kernel("vmax");
00168 prog_.add_kernel("inner_prod");
00169 prog_.add_kernel("add");
00170 prog_.add_kernel("plane_rotation");
00171 prog_.add_kernel("inplace_mul_sub");
00172 prog_.add_kernel("inplace_mul_add");
00173 prog_.add_kernel("mul_add");
00174 prog_.add_kernel("cpu_mult");
00175 prog_.add_kernel("inplace_divide");
00176 prog_.add_kernel("sqrt_sum");
00177 prog_.add_kernel("cpu_inplace_mul_add");
00178 prog_.add_kernel("inplace_add");
00179 prog_.add_kernel("divide");
00180 prog_.add_kernel("norm_1");
00181 prog_.add_kernel("clear");
00182 prog_.add_kernel("cpu_inplace_mult");
00183 prog_.add_kernel("inplace_mult");
00184 prog_.add_kernel("sum");
00185 prog_.add_kernel("inplace_div_sub");
00186 prog_.add_kernel("diag_precond");
00187 prog_.add_kernel("mul_sub");
00188 init_done[context_.handle()] = true;
00189 }
00190 }
00191 };
00192
00193 template <>
00194 struct vector<float, 1>
00195 {
00196 static std::string program_name()
00197 {
00198 return "f_vector_1";
00199 }
00200 static void init()
00201 {
00202 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00203 static std::map<cl_context, bool> init_done;
00204 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00205 if (!init_done[context_.handle()])
00206 {
00207 std::string source;
00208 source.append(vector_align1_inplace_sub);
00209 source.append(vector_align1_norm_inf);
00210 source.append(vector_align1_index_norm_inf);
00211 source.append(vector_align1_mult);
00212 source.append(vector_align1_swap);
00213 source.append(vector_align1_inplace_div_add);
00214 source.append(vector_align1_norm_2);
00215 source.append(vector_align1_sub);
00216 source.append(vector_align1_cpu_mul_add);
00217 source.append(vector_align1_vmax);
00218 source.append(vector_align1_inner_prod);
00219 source.append(vector_align1_add);
00220 source.append(vector_align1_plane_rotation);
00221 source.append(vector_align1_inplace_mul_sub);
00222 source.append(vector_align1_inplace_mul_add);
00223 source.append(vector_align1_mul_add);
00224 source.append(vector_align1_cpu_mult);
00225 source.append(vector_align1_inplace_divide);
00226 source.append(vector_align1_sqrt_sum);
00227 source.append(vector_align1_cpu_inplace_mul_add);
00228 source.append(vector_align1_inplace_add);
00229 source.append(vector_align1_divide);
00230 source.append(vector_align1_norm_1);
00231 source.append(vector_align1_clear);
00232 source.append(vector_align1_cpu_inplace_mult);
00233 source.append(vector_align1_inplace_mult);
00234 source.append(vector_align1_sum);
00235 source.append(vector_align1_inplace_div_sub);
00236 source.append(vector_align1_diag_precond);
00237 source.append(vector_align1_mul_sub);
00238 std::string prog_name = program_name();
00239 #ifdef VIENNACL_BUILD_INFO
00240 std::cout << "Creating program " << prog_name << std::endl;
00241 #endif
00242 context_.add_program(source, prog_name);
00243 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00244 prog_.add_kernel("inplace_sub");
00245 prog_.add_kernel("norm_inf");
00246 prog_.add_kernel("index_norm_inf");
00247 prog_.add_kernel("mult");
00248 prog_.add_kernel("swap");
00249 prog_.add_kernel("inplace_div_add");
00250 prog_.add_kernel("norm_2");
00251 prog_.add_kernel("sub");
00252 prog_.add_kernel("cpu_mul_add");
00253 prog_.add_kernel("vmax");
00254 prog_.add_kernel("inner_prod");
00255 prog_.add_kernel("add");
00256 prog_.add_kernel("plane_rotation");
00257 prog_.add_kernel("inplace_mul_sub");
00258 prog_.add_kernel("inplace_mul_add");
00259 prog_.add_kernel("mul_add");
00260 prog_.add_kernel("cpu_mult");
00261 prog_.add_kernel("inplace_divide");
00262 prog_.add_kernel("sqrt_sum");
00263 prog_.add_kernel("cpu_inplace_mul_add");
00264 prog_.add_kernel("inplace_add");
00265 prog_.add_kernel("divide");
00266 prog_.add_kernel("norm_1");
00267 prog_.add_kernel("clear");
00268 prog_.add_kernel("cpu_inplace_mult");
00269 prog_.add_kernel("inplace_mult");
00270 prog_.add_kernel("sum");
00271 prog_.add_kernel("inplace_div_sub");
00272 prog_.add_kernel("diag_precond");
00273 prog_.add_kernel("mul_sub");
00274 init_done[context_.handle()] = true;
00275 }
00276 }
00277 };
00278
00279
00280
00282 template <>
00283 struct vector<double, 16>
00284 {
00285 static std::string program_name()
00286 {
00287 return "d_vector_16";
00288 }
00289 static void init()
00290 {
00291 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00292 static std::map<cl_context, bool> init_done;
00293 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00294 if (!init_done[context_.handle()])
00295 {
00296 std::string source;
00297 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00298 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_sub, fp64_ext));
00299 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_inf, fp64_ext));
00300 source.append(viennacl::tools::make_double_kernel(vector_align1_index_norm_inf, fp64_ext));
00301 source.append(viennacl::tools::make_double_kernel(vector_align16_mult, fp64_ext));
00302 source.append(viennacl::tools::make_double_kernel(vector_align1_swap, fp64_ext));
00303 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_add, fp64_ext));
00304 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_2, fp64_ext));
00305 source.append(viennacl::tools::make_double_kernel(vector_align16_sub, fp64_ext));
00306 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_mul_add, fp64_ext));
00307 source.append(viennacl::tools::make_double_kernel(vector_align1_vmax, fp64_ext));
00308 source.append(viennacl::tools::make_double_kernel(vector_align1_inner_prod, fp64_ext));
00309 source.append(viennacl::tools::make_double_kernel(vector_align16_add, fp64_ext));
00310 source.append(viennacl::tools::make_double_kernel(vector_align1_plane_rotation, fp64_ext));
00311 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_sub, fp64_ext));
00312 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_add, fp64_ext));
00313 source.append(viennacl::tools::make_double_kernel(vector_align4_mul_add, fp64_ext));
00314 source.append(viennacl::tools::make_double_kernel(vector_align16_cpu_mult, fp64_ext));
00315 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_divide, fp64_ext));
00316 source.append(viennacl::tools::make_double_kernel(vector_align1_sqrt_sum, fp64_ext));
00317 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_inplace_mul_add, fp64_ext));
00318 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_add, fp64_ext));
00319 source.append(viennacl::tools::make_double_kernel(vector_align16_divide, fp64_ext));
00320 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_1, fp64_ext));
00321 source.append(viennacl::tools::make_double_kernel(vector_align1_clear, fp64_ext));
00322 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mult, fp64_ext));
00323 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_mult, fp64_ext));
00324 source.append(viennacl::tools::make_double_kernel(vector_align1_sum, fp64_ext));
00325 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_sub, fp64_ext));
00326 source.append(viennacl::tools::make_double_kernel(vector_align1_diag_precond, fp64_ext));
00327 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_sub, fp64_ext));
00328 std::string prog_name = program_name();
00329 #ifdef VIENNACL_BUILD_INFO
00330 std::cout << "Creating program " << prog_name << std::endl;
00331 #endif
00332 context_.add_program(source, prog_name);
00333 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00334 prog_.add_kernel("inplace_sub");
00335 prog_.add_kernel("norm_inf");
00336 prog_.add_kernel("index_norm_inf");
00337 prog_.add_kernel("mult");
00338 prog_.add_kernel("swap");
00339 prog_.add_kernel("inplace_div_add");
00340 prog_.add_kernel("norm_2");
00341 prog_.add_kernel("sub");
00342 prog_.add_kernel("cpu_mul_add");
00343 prog_.add_kernel("vmax");
00344 prog_.add_kernel("inner_prod");
00345 prog_.add_kernel("add");
00346 prog_.add_kernel("plane_rotation");
00347 prog_.add_kernel("inplace_mul_sub");
00348 prog_.add_kernel("inplace_mul_add");
00349 prog_.add_kernel("mul_add");
00350 prog_.add_kernel("cpu_mult");
00351 prog_.add_kernel("inplace_divide");
00352 prog_.add_kernel("sqrt_sum");
00353 prog_.add_kernel("cpu_inplace_mul_add");
00354 prog_.add_kernel("inplace_add");
00355 prog_.add_kernel("divide");
00356 prog_.add_kernel("norm_1");
00357 prog_.add_kernel("clear");
00358 prog_.add_kernel("cpu_inplace_mult");
00359 prog_.add_kernel("inplace_mult");
00360 prog_.add_kernel("sum");
00361 prog_.add_kernel("inplace_div_sub");
00362 prog_.add_kernel("diag_precond");
00363 prog_.add_kernel("mul_sub");
00364 init_done[context_.handle()] = true;
00365 }
00366 }
00367 };
00368
00369 template <>
00370 struct vector<double, 4>
00371 {
00372 static std::string program_name()
00373 {
00374 return "d_vector_4";
00375 }
00376 static void init()
00377 {
00378 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00379 static std::map<cl_context, bool> init_done;
00380 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00381 if (!init_done[context_.handle()])
00382 {
00383 std::string source;
00384 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00385 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_sub, fp64_ext));
00386 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_inf, fp64_ext));
00387 source.append(viennacl::tools::make_double_kernel(vector_align1_index_norm_inf, fp64_ext));
00388 source.append(viennacl::tools::make_double_kernel(vector_align1_mult, fp64_ext));
00389 source.append(viennacl::tools::make_double_kernel(vector_align1_swap, fp64_ext));
00390 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_add, fp64_ext));
00391 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_2, fp64_ext));
00392 source.append(viennacl::tools::make_double_kernel(vector_align1_sub, fp64_ext));
00393 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_mul_add, fp64_ext));
00394 source.append(viennacl::tools::make_double_kernel(vector_align1_vmax, fp64_ext));
00395 source.append(viennacl::tools::make_double_kernel(vector_align1_inner_prod, fp64_ext));
00396 source.append(viennacl::tools::make_double_kernel(vector_align1_add, fp64_ext));
00397 source.append(viennacl::tools::make_double_kernel(vector_align1_plane_rotation, fp64_ext));
00398 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_sub, fp64_ext));
00399 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_add, fp64_ext));
00400 source.append(viennacl::tools::make_double_kernel(vector_align4_mul_add, fp64_ext));
00401 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_mult, fp64_ext));
00402 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_divide, fp64_ext));
00403 source.append(viennacl::tools::make_double_kernel(vector_align1_sqrt_sum, fp64_ext));
00404 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_inplace_mul_add, fp64_ext));
00405 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_add, fp64_ext));
00406 source.append(viennacl::tools::make_double_kernel(vector_align1_divide, fp64_ext));
00407 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_1, fp64_ext));
00408 source.append(viennacl::tools::make_double_kernel(vector_align1_clear, fp64_ext));
00409 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mult, fp64_ext));
00410 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mult, fp64_ext));
00411 source.append(viennacl::tools::make_double_kernel(vector_align1_sum, fp64_ext));
00412 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_sub, fp64_ext));
00413 source.append(viennacl::tools::make_double_kernel(vector_align1_diag_precond, fp64_ext));
00414 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_sub, fp64_ext));
00415 std::string prog_name = program_name();
00416 #ifdef VIENNACL_BUILD_INFO
00417 std::cout << "Creating program " << prog_name << std::endl;
00418 #endif
00419 context_.add_program(source, prog_name);
00420 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00421 prog_.add_kernel("inplace_sub");
00422 prog_.add_kernel("norm_inf");
00423 prog_.add_kernel("index_norm_inf");
00424 prog_.add_kernel("mult");
00425 prog_.add_kernel("swap");
00426 prog_.add_kernel("inplace_div_add");
00427 prog_.add_kernel("norm_2");
00428 prog_.add_kernel("sub");
00429 prog_.add_kernel("cpu_mul_add");
00430 prog_.add_kernel("vmax");
00431 prog_.add_kernel("inner_prod");
00432 prog_.add_kernel("add");
00433 prog_.add_kernel("plane_rotation");
00434 prog_.add_kernel("inplace_mul_sub");
00435 prog_.add_kernel("inplace_mul_add");
00436 prog_.add_kernel("mul_add");
00437 prog_.add_kernel("cpu_mult");
00438 prog_.add_kernel("inplace_divide");
00439 prog_.add_kernel("sqrt_sum");
00440 prog_.add_kernel("cpu_inplace_mul_add");
00441 prog_.add_kernel("inplace_add");
00442 prog_.add_kernel("divide");
00443 prog_.add_kernel("norm_1");
00444 prog_.add_kernel("clear");
00445 prog_.add_kernel("cpu_inplace_mult");
00446 prog_.add_kernel("inplace_mult");
00447 prog_.add_kernel("sum");
00448 prog_.add_kernel("inplace_div_sub");
00449 prog_.add_kernel("diag_precond");
00450 prog_.add_kernel("mul_sub");
00451 init_done[context_.handle()] = true;
00452 }
00453 }
00454 };
00455
00456 template <>
00457 struct vector<double, 1>
00458 {
00459 static std::string program_name()
00460 {
00461 return "d_vector_1";
00462 }
00463 static void init()
00464 {
00465 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00466 static std::map<cl_context, bool> init_done;
00467 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00468 if (!init_done[context_.handle()])
00469 {
00470 std::string source;
00471 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00472 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_sub, fp64_ext));
00473 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_inf, fp64_ext));
00474 source.append(viennacl::tools::make_double_kernel(vector_align1_index_norm_inf, fp64_ext));
00475 source.append(viennacl::tools::make_double_kernel(vector_align1_mult, fp64_ext));
00476 source.append(viennacl::tools::make_double_kernel(vector_align1_swap, fp64_ext));
00477 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_div_add, fp64_ext));
00478 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_2, fp64_ext));
00479 source.append(viennacl::tools::make_double_kernel(vector_align1_sub, fp64_ext));
00480 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_mul_add, fp64_ext));
00481 source.append(viennacl::tools::make_double_kernel(vector_align1_vmax, fp64_ext));
00482 source.append(viennacl::tools::make_double_kernel(vector_align1_inner_prod, fp64_ext));
00483 source.append(viennacl::tools::make_double_kernel(vector_align1_add, fp64_ext));
00484 source.append(viennacl::tools::make_double_kernel(vector_align1_plane_rotation, fp64_ext));
00485 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mul_sub, fp64_ext));
00486 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mul_add, fp64_ext));
00487 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_add, fp64_ext));
00488 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_mult, fp64_ext));
00489 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_divide, fp64_ext));
00490 source.append(viennacl::tools::make_double_kernel(vector_align1_sqrt_sum, fp64_ext));
00491 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mul_add, fp64_ext));
00492 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_add, fp64_ext));
00493 source.append(viennacl::tools::make_double_kernel(vector_align1_divide, fp64_ext));
00494 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_1, fp64_ext));
00495 source.append(viennacl::tools::make_double_kernel(vector_align1_clear, fp64_ext));
00496 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mult, fp64_ext));
00497 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mult, fp64_ext));
00498 source.append(viennacl::tools::make_double_kernel(vector_align1_sum, fp64_ext));
00499 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_div_sub, fp64_ext));
00500 source.append(viennacl::tools::make_double_kernel(vector_align1_diag_precond, fp64_ext));
00501 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_sub, fp64_ext));
00502 std::string prog_name = program_name();
00503 #ifdef VIENNACL_BUILD_INFO
00504 std::cout << "Creating program " << prog_name << std::endl;
00505 #endif
00506 context_.add_program(source, prog_name);
00507 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00508 prog_.add_kernel("inplace_sub");
00509 prog_.add_kernel("norm_inf");
00510 prog_.add_kernel("index_norm_inf");
00511 prog_.add_kernel("mult");
00512 prog_.add_kernel("swap");
00513 prog_.add_kernel("inplace_div_add");
00514 prog_.add_kernel("norm_2");
00515 prog_.add_kernel("sub");
00516 prog_.add_kernel("cpu_mul_add");
00517 prog_.add_kernel("vmax");
00518 prog_.add_kernel("inner_prod");
00519 prog_.add_kernel("add");
00520 prog_.add_kernel("plane_rotation");
00521 prog_.add_kernel("inplace_mul_sub");
00522 prog_.add_kernel("inplace_mul_add");
00523 prog_.add_kernel("mul_add");
00524 prog_.add_kernel("cpu_mult");
00525 prog_.add_kernel("inplace_divide");
00526 prog_.add_kernel("sqrt_sum");
00527 prog_.add_kernel("cpu_inplace_mul_add");
00528 prog_.add_kernel("inplace_add");
00529 prog_.add_kernel("divide");
00530 prog_.add_kernel("norm_1");
00531 prog_.add_kernel("clear");
00532 prog_.add_kernel("cpu_inplace_mult");
00533 prog_.add_kernel("inplace_mult");
00534 prog_.add_kernel("sum");
00535 prog_.add_kernel("inplace_div_sub");
00536 prog_.add_kernel("diag_precond");
00537 prog_.add_kernel("mul_sub");
00538 init_done[context_.handle()] = true;
00539 }
00540 }
00541 };
00542
00543
00544 }
00545 }
00546 }
00547 #endif