00001 #ifndef VIENNACL_VECTOR_OPERATIONS_HPP_
00002 #define VIENNACL_VECTOR_OPERATIONS_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #include "viennacl/forwards.h"
00025 #include "viennacl/ocl/device.hpp"
00026 #include "viennacl/ocl/handle.hpp"
00027 #include "viennacl/ocl/kernel.hpp"
00028 #include "viennacl/scalar.hpp"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/linalg/kernels/vector_kernels.h"
00031 #include "viennacl/meta/predicate.hpp"
00032 #include "viennacl/meta/enable_if.hpp"
00033 #include "viennacl/traits/size.hpp"
00034 #include "viennacl/traits/start.hpp"
00035 #include "viennacl/traits/handle.hpp"
00036
00037 namespace viennacl
00038 {
00039 namespace linalg
00040 {
00047 template <typename V1, typename V2, typename V3>
00048 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00049 && viennacl::is_vector<V2>::value
00050 && viennacl::is_vector<V3>::value
00051 >::type
00052 add(const V1 & vec1,
00053 const V2 & vec2,
00054 V3 & result)
00055 {
00056 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00057
00058
00059 const unsigned int ALIGNMENT = V1::alignment;
00060
00061 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00062 && (viennacl::traits::size(vec1) == viennacl::traits::size(result))
00063 && "Incompatible vector sizes in add()!");
00064
00065
00066
00067
00068 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "add");
00069
00070 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00071 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00072 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)) )
00073 );
00074 }
00075
00083 template <typename V1, typename V2>
00084 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00085 && viennacl::is_vector<V2>::value
00086 >::type
00087 inplace_add(V1 & vec1,
00088 const V2 & vec2)
00089 {
00090 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00091
00092
00093 const unsigned int ALIGNMENT = V1::alignment;
00094
00095 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00096 && "Incompatible vector sizes in inplace_add()!");
00097
00098
00099
00100
00101 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_add");
00102
00103 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00104 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)))
00105 );
00106 }
00107
00108
00109
00118 template <typename V1, typename V2, typename V3>
00119 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00120 && viennacl::is_vector<V2>::value
00121 && viennacl::is_vector<V3>::value
00122 >::type
00123 sub(const V1 & vec1,
00124 const V2 & vec2,
00125 V3 & result)
00126 {
00127 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00128
00129
00130 const unsigned int ALIGNMENT = V1::alignment;
00131
00132 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00133 && (viennacl::traits::size(vec1) == viennacl::traits::size(result))
00134 && "Incompatible vector sizes in sub()!");
00135
00136
00137 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "sub");
00138
00139 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00140 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00141 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)) )
00142 );
00143 }
00144
00152 template <typename V1, typename V2>
00153 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00154 && viennacl::is_vector<V2>::value
00155 >::type
00156 inplace_sub(V1 & vec1,
00157 const V2 & vec2)
00158 {
00159 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00160
00161
00162 const unsigned int ALIGNMENT = V1::alignment;
00163
00164 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00165 && "Incompatible vector sizes in inplace_sub()!");
00166
00167
00168 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_sub");
00169
00170 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00171 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)))
00172 );
00173 }
00174
00175
00176
00185 template <typename V1, typename S2, typename V3>
00186 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00187 && viennacl::is_scalar<S2>::value
00188 && viennacl::is_vector<V3>::value
00189 >::type
00190 mult(const V1 & vec,
00191 S2 const & alpha,
00192 V3 & result)
00193 {
00194 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00195
00196
00197 const unsigned int ALIGNMENT = V1::alignment;
00198
00199 assert( (viennacl::traits::size(vec) == viennacl::traits::size(result))
00200 && "Incompatible vector sizes in mult()!");
00201
00202 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "mult");
00203
00204 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00205 alpha,
00206 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)))
00207 );
00208 }
00209
00218 template <typename V1, typename SCALARTYPE, typename V3>
00219 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00220 && viennacl::is_cpu_scalar<SCALARTYPE>::value
00221 && viennacl::is_vector<V3>::value
00222 >::type
00223 mult(V1 const & vec,
00224 SCALARTYPE alpha,
00225 V3 & result)
00226 {
00227 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00228
00229
00230 const unsigned int ALIGNMENT = V1::alignment;
00231
00232 assert( (viennacl::traits::size(vec) == viennacl::traits::size(result))
00233 && "Incompatible vector sizes in mult()!");
00234
00235 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "cpu_mult");
00236
00237 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00238 static_cast<value_type>(alpha),
00239 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)))
00240 );
00241 }
00242
00250 template <typename V1, typename S2>
00251 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00252 && viennacl::is_scalar<S2>::value
00253 >::type
00254 inplace_mult(V1 & vec,
00255 S2 const & alpha)
00256 {
00257 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00258
00259
00260 const unsigned int ALIGNMENT = V1::alignment;
00261
00262 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_mult");
00263
00264 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00265 alpha)
00266 );
00267 }
00268
00276 template <typename V1, typename S2>
00277 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00278 && viennacl::is_cpu_scalar<S2>::value
00279 >::type
00280 inplace_mult(V1 & vec,
00281 S2 alpha)
00282 {
00283 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00284
00285
00286 const unsigned int ALIGNMENT = V1::alignment;
00287
00288 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "cpu_inplace_mult");
00289
00290 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00291 static_cast<value_type>(alpha))
00292 );
00293 }
00294
00295
00304 template <typename V1, typename S2, typename V3>
00305 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00306 && viennacl::is_scalar<S2>::value
00307 && viennacl::is_vector<V3>::value
00308 >::type
00309 divide(V1 const & vec,
00310 S2 const & alpha,
00311 V3 & result)
00312 {
00313 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00314
00315
00316 const unsigned int ALIGNMENT = V1::alignment;
00317
00318 assert( (viennacl::traits::size(vec) == viennacl::traits::size(result))
00319 && "Incompatible vector sizes in divide()!");
00320
00321 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "divide");
00322
00323 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00324 alpha,
00325 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)))
00326 );
00327 }
00328
00336 template <typename V1, typename S2>
00337 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00338 && viennacl::is_scalar<S2>::value
00339 >::type
00340 inplace_divide(V1 & vec,
00341 S2 const & alpha)
00342 {
00343 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00344
00345
00346 const unsigned int ALIGNMENT = V1::alignment;
00347
00348 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_divide");
00349
00350 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00351 alpha)
00352 );
00353 }
00354
00355
00365 template <typename V1, typename S2, typename V3, typename V4>
00366 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00367 && viennacl::is_scalar<S2>::value
00368 && viennacl::is_vector<V3>::value
00369 && viennacl::is_vector<V4>::value
00370 >::type
00371 mul_add(V1 const & vec1,
00372 S2 const & alpha,
00373 V3 const & vec2,
00374 V4 & result)
00375 {
00376 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00377
00378
00379 const unsigned int ALIGNMENT = V1::alignment;
00380
00381 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00382 && (viennacl::traits::size(vec1) == viennacl::traits::size(result))
00383 && "Incompatible vector sizes in mul_add()!");
00384
00385
00386 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "mul_add");
00387
00388
00389 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00390 alpha,
00391 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00392 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)))
00393 );
00394 }
00395
00405 template <typename V1, typename SCALARTYPE, typename V3, typename V4>
00406 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00407 && viennacl::is_cpu_scalar<SCALARTYPE>::value
00408 && viennacl::is_vector<V3>::value
00409 && viennacl::is_vector<V4>::value
00410 >::type
00411 mul_add(V1 const & vec1,
00412 SCALARTYPE alpha,
00413 V3 const & vec2,
00414 V4 & result)
00415 {
00416 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00417
00418
00419 const unsigned int ALIGNMENT = V1::alignment;
00420
00421 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00422 && (viennacl::traits::size(vec1) == viennacl::traits::size(result))
00423 && "Incompatible vector sizes in mul_add()!");
00424
00425 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "cpu_mul_add");
00426
00427
00428 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00429 static_cast<value_type>(alpha),
00430 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00431 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)))
00432 );
00433 }
00434
00435
00444 template <typename V1, typename V2, typename S3>
00445 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00446 && viennacl::is_vector<V2>::value
00447 && viennacl::is_scalar<S3>::value
00448 >::type
00449 inplace_mul_add(V1 & vec1,
00450 V2 const & vec2,
00451 S3 const & alpha)
00452 {
00453 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00454
00455
00456 const unsigned int ALIGNMENT = V1::alignment;
00457
00458 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00459 && "Incompatible vector sizes in inplace_mul_add()!");
00460
00461
00462
00463 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_mul_add");
00464
00465 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00466 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00467 alpha));
00468 }
00469
00478 template <typename V1, typename V2, typename SCALARTYPE>
00479 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00480 && viennacl::is_vector<V2>::value
00481 && viennacl::is_cpu_scalar<SCALARTYPE>::value
00482 >::type
00483 inplace_mul_add(V1 & vec1,
00484 V2 const & vec2,
00485 SCALARTYPE alpha)
00486 {
00487 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00488
00489
00490 const unsigned int ALIGNMENT = V1::alignment;
00491
00492 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00493 && "Incompatible vector sizes in inplace_mul_add()!");
00494
00495 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "cpu_inplace_mul_add");
00496
00497
00498 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00499 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00500 value_type(alpha)));
00501 }
00502
00512 template <typename V1, typename S2, typename V3, typename V4>
00513 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00514 && viennacl::is_scalar<S2>::value
00515 && viennacl::is_vector<V3>::value
00516 && viennacl::is_vector<V4>::value
00517 >::type
00518 mul_sub(V1 const & vec1,
00519 S2 const & alpha,
00520 V3 const & vec2,
00521 V4 & result)
00522 {
00523 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00524
00525
00526 const unsigned int ALIGNMENT = V1::alignment;
00527
00528 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00529 && (viennacl::traits::size(vec1) == viennacl::traits::size(result))
00530 && "Incompatible vector sizes in mul_sub()!");
00531
00532 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "mul_sub");
00533
00534
00535 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00536 alpha,
00537 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00538 viennacl::traits::handle(result), cl_uint(viennacl::traits::start(result)), cl_uint(viennacl::traits::size(result)))
00539 );
00540 }
00541
00542
00551 template <typename V1, typename V2, typename S3>
00552 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00553 && viennacl::is_vector<V2>::value
00554 && viennacl::is_scalar<S3>::value
00555 >::type
00556 inplace_mul_sub(V1 & vec1,
00557 V2 const & vec2,
00558 S3 const & alpha)
00559 {
00560 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00561
00562
00563 const unsigned int ALIGNMENT = V1::alignment;
00564
00565 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00566 && "Incompatible vector sizes in inplace_mul_sub()!");
00567
00568 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_mul_sub");
00569
00570
00571 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00572 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00573 alpha)
00574 );
00575 }
00576
00585 template <typename V1, typename V2, typename S3>
00586 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00587 && viennacl::is_vector<V2>::value
00588 && viennacl::is_scalar<S3>::value
00589 >::type
00590 inplace_div_add(V1 & vec1,
00591 V2 const & vec2,
00592 S3 const & alpha)
00593 {
00594 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00595
00596
00597 const unsigned int ALIGNMENT = V1::alignment;
00598
00599 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00600 && "Incompatible vector sizes in inplace_div_add()!");
00601
00602 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_div_add");
00603
00604
00605 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00606 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00607 alpha)
00608 );
00609 }
00610
00619 template <typename V1, typename V2, typename S3>
00620 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00621 && viennacl::is_vector<V2>::value
00622 && viennacl::is_scalar<S3>::value
00623 >::type
00624 inplace_div_sub(V1 & vec1,
00625 V2 const & vec2,
00626 S3 const & alpha)
00627 {
00628 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00629
00630
00631 const unsigned int ALIGNMENT = V1::alignment;
00632
00633 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00634 && "Incompatible vector sizes in inplace_div_sub()!");
00635
00636 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inplace_div_sub");
00637
00638
00639 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00640 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00641 alpha)
00642 );
00643 }
00644
00645
00647
00648
00649
00650
00657 template <typename V1, typename V2, typename S3>
00658 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00659 && viennacl::is_vector<V2>::value
00660 && viennacl::is_scalar<S3>::value
00661 >::type
00662 inner_prod_impl(V1 const & vec1,
00663 V2 const & vec2,
00664 S3 & result)
00665 {
00666 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00667
00668
00669 const unsigned int ALIGNMENT = V1::alignment;
00670
00671 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2))
00672 && "Incompatible vector sizes in inner_prod_impl()!");
00673
00674 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "inner_prod");
00675
00676 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00677
00678 static viennacl::vector<value_type> temp(work_groups);
00679
00680
00681
00682 assert( work_groups * k.local_work_size() == k.global_work_size() );
00683 assert( (k.global_work_size() / k.local_work_size()) == 1
00684 || (k.global_work_size() / k.local_work_size()) == 2
00685 || (k.global_work_size() / k.local_work_size()) == 4
00686 || (k.global_work_size() / k.local_work_size()) == 8
00687 || (k.global_work_size() / k.local_work_size()) == 16
00688 || (k.global_work_size() / k.local_work_size()) == 32
00689 || (k.global_work_size() / k.local_work_size()) == 64
00690 || (k.global_work_size() / k.local_work_size()) == 128
00691 || (k.global_work_size() / k.local_work_size()) == 256
00692 || (k.global_work_size() / k.local_work_size()) == 512 );
00693
00694 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00695 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00696 viennacl::ocl::local_mem(sizeof(value_type) * k.local_work_size()),
00697 temp));
00698
00699 viennacl::ocl::kernel & ksum = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "sum");
00700
00701 ksum.local_work_size(0, work_groups);
00702 ksum.global_work_size(0, work_groups);
00703 viennacl::ocl::enqueue(ksum(viennacl::traits::handle(temp), cl_uint(viennacl::traits::start(temp)), cl_uint(viennacl::traits::size(temp)),
00704 result)
00705 );
00706 }
00707
00708
00715 template <typename V1, typename V2>
00716 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00717 && viennacl::is_vector<V2>::value,
00718 viennacl::scalar_expression< const V1,
00719 const V2,
00720 viennacl::op_inner_prod >
00721 >::type
00722 inner_prod_impl(V1 const & vec1,
00723 V2 const & vec2)
00724 {
00725 return viennacl::scalar_expression< const V1,
00726 const V2,
00727 viennacl::op_inner_prod >(vec1, vec2);
00728 }
00729
00730
00731
00737 template <typename V1, typename S2>
00738 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00739 && viennacl::is_scalar<S2>::value
00740 >::type
00741 norm_1_impl(V1 const & vec,
00742 S2 & result)
00743 {
00744 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00745
00746
00747 const unsigned int ALIGNMENT = V1::alignment;
00748
00749 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "norm_1");
00750
00751
00752 if (k.local_work_size() != k.global_work_size())
00753 {
00754
00755 k.local_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00756 k.global_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00757 }
00758
00759 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00760 viennacl::vector<value_type> temp(work_groups);
00761
00762
00763
00764 assert( work_groups * k.local_work_size() == k.global_work_size() );
00765 assert( (k.global_work_size() / k.local_work_size()) == 1
00766 || (k.global_work_size() / k.local_work_size()) == 2
00767 || (k.global_work_size() / k.local_work_size()) == 4
00768 || (k.global_work_size() / k.local_work_size()) == 8
00769 || (k.global_work_size() / k.local_work_size()) == 16
00770 || (k.global_work_size() / k.local_work_size()) == 32
00771 || (k.global_work_size() / k.local_work_size()) == 64
00772 || (k.global_work_size() / k.local_work_size()) == 128
00773 || (k.global_work_size() / k.local_work_size()) == 256
00774 || (k.global_work_size() / k.local_work_size()) == 512 );
00775
00776 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00777 viennacl::ocl::local_mem(sizeof(value_type) * k.local_work_size()),
00778 temp));
00779
00780 viennacl::ocl::kernel & ksum = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "sum");
00781
00782 ksum.local_work_size(0, work_groups);
00783 ksum.global_work_size(0, work_groups);
00784 viennacl::ocl::enqueue(ksum(viennacl::traits::handle(temp), cl_uint(viennacl::traits::start(temp)), cl_uint(viennacl::traits::size(temp)),
00785 result)
00786 );
00787 }
00788
00794 template <typename V1, typename S2>
00795 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00796 && viennacl::is_scalar<S2>::value
00797 >::type
00798 norm_2_impl(V1 const & vec,
00799 S2 & result)
00800 {
00801 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00802
00803
00804 const unsigned int ALIGNMENT = V1::alignment;
00805
00806 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "norm_2");
00807
00808
00809 if (k.local_work_size() != k.global_work_size())
00810 {
00811
00812 k.local_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00813 k.global_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00814 }
00815
00816 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00817 viennacl::vector<value_type> temp(work_groups);
00818
00819
00820
00821 assert( work_groups * k.local_work_size() == k.global_work_size() );
00822 assert( (k.global_work_size() / k.local_work_size()) == 1
00823 || (k.global_work_size() / k.local_work_size()) == 2
00824 || (k.global_work_size() / k.local_work_size()) == 4
00825 || (k.global_work_size() / k.local_work_size()) == 8
00826 || (k.global_work_size() / k.local_work_size()) == 16
00827 || (k.global_work_size() / k.local_work_size()) == 32
00828 || (k.global_work_size() / k.local_work_size()) == 64
00829 || (k.global_work_size() / k.local_work_size()) == 128
00830 || (k.global_work_size() / k.local_work_size()) == 256
00831 || (k.global_work_size() / k.local_work_size()) == 512 );
00832
00833 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00834 viennacl::ocl::local_mem(sizeof(value_type) * k.local_work_size()),
00835 temp)
00836 );
00837
00838 viennacl::ocl::kernel & sqrt_sum = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "sqrt_sum");
00839
00840 sqrt_sum.local_work_size(0, work_groups);
00841 sqrt_sum.global_work_size(0, work_groups);
00842 viennacl::ocl::enqueue(
00843 sqrt_sum(viennacl::traits::handle(temp), cl_uint(viennacl::traits::start(temp)), cl_uint(viennacl::traits::size(temp)),
00844 result)
00845 );
00846 }
00847
00853 template <typename V1, typename S2>
00854 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00855 && viennacl::is_scalar<S2>::value
00856 >::type
00857 norm_inf_impl(V1 const & vec,
00858 S2 & result)
00859 {
00860 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00861
00862
00863 const unsigned int ALIGNMENT = V1::alignment;
00864
00865
00866 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "norm_inf");
00867
00868 if (k.local_work_size() != k.global_work_size())
00869 {
00870
00871 k.local_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00872 k.global_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00873 }
00874
00875 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00876 viennacl::vector<value_type> temp(work_groups);
00877
00878
00879
00880 assert( work_groups * k.local_work_size() == k.global_work_size() );
00881 assert( work_groups == 1
00882 || work_groups == 2
00883 || work_groups == 4
00884 || work_groups == 8
00885 || work_groups == 16
00886 || work_groups == 32
00887 || work_groups == 64
00888 || work_groups == 128
00889 || work_groups == 256
00890 || work_groups == 512 );
00891
00892 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00893 viennacl::ocl::local_mem(sizeof(value_type) * k.local_work_size()),
00894 temp));
00895
00896
00897
00898 viennacl::ocl::kernel & max_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "vmax");
00899 max_kernel.local_work_size(0, work_groups);
00900 max_kernel.global_work_size(0, work_groups);
00901
00902 viennacl::ocl::enqueue(
00903 max_kernel(viennacl::traits::handle(temp), cl_uint(viennacl::traits::start(temp)), cl_uint(viennacl::traits::size(temp)),
00904 result)
00905 );
00906 }
00907
00908
00909
00910
00916 template <typename V1>
00917 typename viennacl::enable_if< viennacl::is_vector<V1>::value,
00918 cl_uint
00919 >::type
00920 index_norm_inf(V1 const & vec)
00921 {
00922 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00923
00924
00925 const unsigned int ALIGNMENT = V1::alignment;
00926
00927 viennacl::ocl::handle<cl_mem> h = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint));
00928
00929 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<value_type, ALIGNMENT>::program_name(), "index_norm_inf");
00930
00931
00932 k.global_work_size(0, k.local_work_size());
00933 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec), cl_uint(viennacl::traits::start(vec)), cl_uint(viennacl::traits::size(vec)),
00934 viennacl::ocl::local_mem(sizeof(value_type) * k.local_work_size()),
00935 viennacl::ocl::local_mem(sizeof(cl_uint) * k.local_work_size()), h));
00936
00937
00938 cl_uint result;
00939 cl_int err;
00940 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), h, CL_TRUE, 0, sizeof(cl_uint), &result, 0, NULL, NULL);
00941 VIENNACL_ERR_CHECK(err);
00942 return result;
00943 }
00944
00945
00955 template <typename V1, typename V2, typename SCALARTYPE>
00956 typename viennacl::enable_if< viennacl::is_vector<V1>::value
00957 && viennacl::is_vector<V2>::value
00958 && viennacl::is_cpu_scalar<SCALARTYPE>::value
00959 >::type
00960 plane_rotation(V1 & vec1,
00961 V2 & vec2,
00962 SCALARTYPE alpha,
00963 SCALARTYPE beta)
00964 {
00965 typedef typename viennacl::result_of::cpu_value_type<V1>::type value_type;
00966
00967
00968 const unsigned int ALIGNMENT = V1::alignment;
00969
00970 assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2));
00971 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "plane_rotation");
00972
00973 viennacl::ocl::enqueue(k(viennacl::traits::handle(vec1), cl_uint(viennacl::traits::start(vec1)), cl_uint(viennacl::traits::size(vec1)),
00974 viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)),
00975 alpha,
00976 beta)
00977 );
00978 }
00979
00980 }
00981 }
00982
00983
00984 #endif