1 #ifndef VIENNACL_OCL_KERNEL_HPP_
2 #define VIENNACL_OCL_KERNEL_HPP_
26 #include <OpenCL/cl.h>
60 template<
typename KernelType>
66 kernel() : handle_(), p_program_(NULL), p_context_(NULL), name_()
68 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
69 std::cout <<
"ViennaCL: Creating kernel object (default CTOR): " << name_ << std::endl;
74 : handle_(kernel_handle, kernel_context), p_program_(&kernel_program), p_context_(&kernel_context), name_(name)
76 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
77 std::cout <<
"ViennaCL: Creating kernel object (full CTOR): " << name_ << std::endl;
79 set_work_size_defaults();
83 : handle_(other.handle_), p_program_(other.p_program_), p_context_(other.p_context_), name_(other.name_)
85 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
86 std::cout <<
"ViennaCL: Creating kernel object (Copy CTOR): " << name_ << std::endl;
88 local_work_size_[0] = other.local_work_size_[0];
89 local_work_size_[1] = other.local_work_size_[1];
90 local_work_size_[2] = other.local_work_size_[2];
92 global_work_size_[0] = other.global_work_size_[0];
93 global_work_size_[1] = other.global_work_size_[1];
94 global_work_size_[2] = other.global_work_size_[2];
99 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
100 std::cout <<
"ViennaCL: Assigning kernel object: " << other.name_ << std::endl;
102 handle_ = other.handle_;
103 p_program_ = other.p_program_;
104 p_context_ = other.p_context_;
106 local_work_size_[0] = other.local_work_size_[0];
107 local_work_size_[1] = other.local_work_size_[1];
108 local_work_size_[2] = other.local_work_size_[2];
109 global_work_size_[0] = other.global_work_size_[0];
110 global_work_size_[1] = other.global_work_size_[1];
111 global_work_size_[2] = other.global_work_size_[2];
116 void arg(
unsigned int pos, cl_char val)
118 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
119 std::cout <<
"ViennaCL: Setting char kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
121 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_char), (
void*)&val);
126 void arg(
unsigned int pos, cl_uchar val)
128 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
129 std::cout <<
"ViennaCL: Setting unsigned char kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
131 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_uchar), (
void*)&val);
136 void arg(
unsigned int pos, cl_short val)
138 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
139 std::cout <<
"ViennaCL: Setting short kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
141 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_short), (
void*)&val);
146 void arg(
unsigned int pos, cl_ushort val)
148 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
149 std::cout <<
"ViennaCL: Setting unsigned short kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
151 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_ushort), (
void*)&val);
157 void arg(
unsigned int pos, cl_uint val)
159 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
160 std::cout <<
"ViennaCL: Setting unsigned int kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
162 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_uint), (
void*)&val);
169 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
170 std::cout <<
"ViennaCL: Setting packed_cl_uint kernel argument (" << val.
start <<
", " << val.
stride <<
", " << val.
size <<
", " << val.
internal_size <<
") at pos " << pos <<
" for kernel " << name_ << std::endl;
172 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(
packed_cl_uint), (
void*)&val);
177 void arg(
unsigned int pos,
float val)
179 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
180 std::cout <<
"ViennaCL: Setting floating point kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
182 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(float), (
void*)&val);
187 void arg(
unsigned int pos,
double val)
189 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
190 std::cout <<
"ViennaCL: Setting double precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
192 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(double), (
void*)&val);
197 void arg(
unsigned int pos, cl_int val)
199 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
200 std::cout <<
"ViennaCL: Setting int precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
202 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_int), (
void*)&val);
207 void arg(
unsigned int pos, cl_ulong val)
209 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
210 std::cout <<
"ViennaCL: Setting ulong precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
212 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_ulong), (
void*)&val);
217 void arg(
unsigned int pos, cl_long val)
219 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
220 std::cout <<
"ViennaCL: Setting long precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
222 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_long), (
void*)&val);
228 template<
class VCL_TYPE>
229 void arg(
unsigned int pos, VCL_TYPE
const & val)
231 assert(&val.handle().opencl_handle().context() == &handle_.
context() && bool(
"Kernel and memory object not in the same context!"));
233 cl_mem temp = val.handle().opencl_handle().get();
234 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
235 std::cout <<
"ViennaCL: Setting generic kernel argument " << temp <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
237 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_mem), (
void*)&temp);
243 template<
class CL_TYPE>
246 CL_TYPE temp = h.
get();
247 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
248 std::cout <<
"ViennaCL: Setting handle kernel argument " << temp <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
250 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(CL_TYPE), (
void*)&temp);
259 cl_uint
size =
static_cast<cl_uint
>(mem.
size());
260 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
261 std::cout <<
"ViennaCL: Setting local memory kernel argument of size " << size <<
" bytes at pos " << pos <<
" for kernel " << name_ << std::endl;
263 cl_int err = clSetKernelArg(handle_.
get(), pos,
size, 0);
270 template<
typename T0>
278 template<
typename T0,
typename T1>
286 template<
typename T0,
typename T1,
typename T2>
294 template<
typename T0,
typename T1,
typename T2,
typename T3>
302 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4>
303 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4)
310 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5>
311 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5)
318 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6>
319 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6)
326 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6,
typename T7>
327 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6, T7
const & t7)
334 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6,
typename T7,
typename T8>
335 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6, T7
const & t7, T8
const & t8)
342 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
343 typename T5,
typename T6,
typename T7,
typename T8,
typename T9>
344 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4,
345 T5
const & t5, T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9)
347 arg(0, t0);
arg(1, t1);
arg(2, t2);
arg(3, t3);
arg(4, t4);
arg(5, t5);
arg(6, t6);
arg(7, t7);
arg(8, t8);
arg(9, t9);
352 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
353 typename T6,
typename T7,
typename T8,
typename T9,
typename T10>
354 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
355 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10)
357 arg(0, t0);
arg(1, t1);
arg(2, t2);
arg(3, t3);
arg(4, t4);
arg(5, t5);
arg(6, t6);
arg(7, t7);
arg(8, t8);
arg(9, t9);
arg(10, t10);
362 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
363 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11>
364 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
365 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11)
373 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
374 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
typename T12>
375 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
376 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11, T12
const & t12)
384 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
385 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
386 typename T12,
typename T13>
387 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
388 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
389 T12
const & t12, T13
const & t13)
393 arg(12, t12);
arg(13, t13);
398 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
399 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
400 typename T12,
typename T13,
typename T14>
401 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
402 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
403 T12
const & t12, T13
const & t13, T14
const & t14)
407 arg(12, t12);
arg(13, t13);
arg(14, t14);
412 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
413 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
414 typename T12,
typename T13,
typename T14,
typename T15>
415 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
416 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
417 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15)
426 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
427 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
428 typename T12,
typename T13,
typename T14,
typename T15,
typename T16>
429 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
430 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
431 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16)
440 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
441 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
442 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17>
443 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
444 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
445 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17)
454 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
455 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
456 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
458 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
459 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
460 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
472 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
473 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
474 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
475 typename T18,
typename T19>
476 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
477 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
478 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
479 T18
const & t18, T19
const & t19
485 arg(18, t18);
arg(19, t19);
490 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
491 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
492 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
493 typename T18,
typename T19,
typename T20>
494 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
495 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
496 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
497 T18
const & t18, T19
const & t19, T20
const & t20
503 arg(18, t18);
arg(19, t19);
arg(20, t20);
508 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
509 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
510 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
511 typename T18,
typename T19,
typename T20,
typename T21>
512 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
513 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
514 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
515 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21
526 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
527 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
528 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
529 typename T18,
typename T19,
typename T20,
typename T21,
typename T22>
530 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
531 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
532 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
533 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22
544 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
545 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
546 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
547 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23>
548 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
549 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
550 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
551 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23
562 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
563 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
564 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
565 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
567 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
568 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
569 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
570 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
583 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
584 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
585 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
586 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
587 typename T24,
typename T25>
588 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
589 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
590 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
591 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
592 T24
const & t24, T25
const & t25
599 arg(24, t24);
arg(25, t25);
604 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
605 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
606 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
607 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
608 typename T24,
typename T25,
typename T26>
609 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
610 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
611 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
612 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
613 T24
const & t24, T25
const & t25, T26
const & t26
620 arg(24, t24);
arg(25, t25);
arg(26, t26);
625 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
626 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
627 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
628 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
629 typename T24,
typename T25,
typename T26,
typename T27>
630 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
631 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
632 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
633 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
634 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27
646 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
647 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
648 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
649 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
650 typename T24,
typename T25,
typename T26,
typename T27,
typename T28>
651 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
652 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
653 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
654 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
655 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28
667 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
668 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
669 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
670 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
671 typename T24,
typename T25,
typename T26,
typename T27,
typename T28,
typename T29>
672 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
673 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
674 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
675 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
676 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28, T29
const & t29
688 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
689 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
690 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
691 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
692 typename T24,
typename T25,
typename T26,
typename T27,
typename T28,
typename T29,
694 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
695 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
696 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
697 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
698 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28, T29
const & t29,
712 template<
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
713 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
714 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
715 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
716 typename T24,
typename T25,
typename T26,
typename T27,
typename T28,
typename T29,
717 typename T30,
typename T31>
718 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
719 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
720 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
721 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
722 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28, T29
const & t29,
723 T30
const & t30, T31
const & t31
731 arg(30, t30);
arg(31, t31);
744 assert(index < 3 &&
bool(
"Work size index out of bounds"));
745 return local_work_size_[index];
753 assert(index < 3 &&
bool(
"Work size index out of bounds"));
754 return global_work_size_[index];
764 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
765 std::cout <<
"ViennaCL: Setting local work size to " << s <<
" at index " << index <<
" for kernel " << name_ << std::endl;
767 assert(index < 3 &&
bool(
"Work size index out of bounds"));
768 local_work_size_[index] = s;
777 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
778 std::cout <<
"ViennaCL: Setting global work size to " << s <<
" at index " << index <<
" for kernel " << name_ << std::endl;
780 assert(index < 3 &&
bool(
"Work size index out of bounds"));
781 global_work_size_[index] = s;
784 std::string
const &
name()
const {
return name_; }
792 inline void set_work_size_defaults();
798 size_type local_work_size_[3];
799 size_type global_work_size_[3];
cl_uint stride
Increment between integers.
kernel & operator()(T0 const &t0, T1 const &t1)
Convenience function for setting two kernel parameters.
This file provides the forward declarations for the OpenCL layer of ViennaCL.
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Represents an OpenCL device within ViennaCL.
void arg(unsigned int pos, cl_ulong val)
Sets an unsigned long argument at the provided position.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3)
Convenience function for setting four kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19)
Convenience function for setting twenty kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24)
Convenience function for setting 25 kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10)
Convenience function for setting eleven kernel parameters.
friend void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11)
Convenience function for setting twelve kernel parameters.
viennacl::ocl::handle< cl_kernel > const & handle() const
Represents an OpenCL kernel within ViennaCL.
kernel(kernel const &other)
cl_uint start
Starting value of the integer stride.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
void arg(unsigned int pos, float val)
Sets a single precision floating point argument at the provided position.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14)
Convenience function for setting fifteen kernel parameters.
void arg(unsigned int pos, cl_uchar val)
Sets a char argument at the provided position.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6)
Convenience function for setting seven kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22)
Convenience function for setting 23 kernel parameters.
std::string const & name() const
A class representing a command queue.
void arg(unsigned int pos, const local_mem &mem)
Sets an OpenCL local memory object at the provided position.
void local_work_size(int index, size_type s)
Sets the local work size at the respective dimension.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27)
Convenience function for setting 28 kernel parameters.
void arg(unsigned int pos, cl_ushort val)
Sets a argument of type unsigned short at the provided position.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
viennacl::ocl::context const & context() const
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15)
Convenience function for setting sixteen kernel parameters.
#define VIENNACL_ERR_CHECK(err)
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16)
Convenience function for setting seventeen kernel parameters.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9)
Convenience function for setting ten kernel parameters.
const OCL_TYPE & get() const
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28, T29 const &t29, T30 const &t30, T31 const &t31)
Convenience function for setting 32 kernel parameters.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7)
Convenience function for setting eight kernel parameters.
Implements an OpenCL program class for ViennaCL.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5)
Convenience function for setting six kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2)
Convenience function for setting three kernel parameters.
Implementation of a smart-pointer-like class for handling OpenCL handles.
viennacl::ocl::kernel & operator=(const kernel &other)
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20)
Convenience function for setting twentyone kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17)
Convenience function for setting eighteen kernel parameters.
kernel(cl_kernel kernel_handle, viennacl::ocl::program const &kernel_program, viennacl::ocl::context const &kernel_context, std::string const &name)
void arg(unsigned int pos, packed_cl_uint val)
Sets four packed unsigned integers as argument at the provided position.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4)
Convenience function for setting five kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28, T29 const &t29, T30 const &t30)
Convenience function for setting 31 kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28)
Convenience function for setting 29 kernel parameters.
Wrapper class for an OpenCL program.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23)
Convenience function for setting 24 kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18)
Convenience function for setting nineteen kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26)
Convenience function for setting 27 kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21)
Convenience function for setting twentytwo kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28, T29 const &t29)
Convenience function for setting 30 kernel parameters.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13)
Convenience function for setting fourteen kernel parameters.
void arg(unsigned int pos, cl_int val)
Sets an int argument at the provided position.
void arg(unsigned int pos, VCL_TYPE const &val)
Sets an OpenCL memory object at the provided position.
viennacl::ocl::context const & context() const
kernel & operator()(T0 const &t0)
Convenience function for setting one kernel parameter.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
vcl_size_t size() const
Returns size in bytes.
void arg(unsigned int pos, viennacl::ocl::handle< CL_TYPE > const &h)
Sets an OpenCL object at the provided position.
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25)
Convenience function for setting 26 kernel parameters.
void arg(unsigned int pos, double val)
Sets a double precision floating point argument at the provided position.
void global_work_size(int index, size_type s)
Sets the global work size at the respective dimension.
A local (shared) memory object for OpenCL.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12)
Convenience function for setting thirteen kernel parameters.
void arg(unsigned int pos, cl_long val)
Sets an unsigned long argument at the provided position.
void arg(unsigned int pos, cl_uint val)
Sets an unsigned integer argument at the provided position.
void arg(unsigned int pos, cl_short val)
Sets a argument of type short at the provided position.
Handle class the effectively represents a smart pointer for OpenCL handles.
cl_uint size
Number of values in the stride.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8)
Convenience function for setting nine kernel parameters.