00001 #ifndef VIENNACL_OCL_KERNEL_HPP_
00002 #define VIENNACL_OCL_KERNEL_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #ifdef __APPLE__
00025 #include <OpenCL/cl.h>
00026 #else
00027 #include <CL/cl.h>
00028 #endif
00029
00030 #include "viennacl/ocl/forwards.h"
00031 #include "viennacl/ocl/backend.hpp"
00032 #include "viennacl/ocl/handle.hpp"
00033 #include "viennacl/ocl/program.hpp"
00034 #include "viennacl/ocl/device.hpp"
00035 #include "viennacl/ocl/local_mem.hpp"
00036
00037 namespace viennacl
00038 {
00039 namespace ocl
00040 {
00041
00043 class kernel
00044 {
00045 template <typename KernelType>
00046 friend void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue);
00047
00048
00049 public:
00050 kernel() : handle_(0)
00051 {
00052 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00053 std::cout << "ViennaCL: Creating kernel object (default CTOR)" << std::endl;
00054 #endif
00055 set_work_size_defaults();
00056 }
00057
00058 kernel(viennacl::ocl::handle<cl_program> const & prog, std::string const & name)
00059 : handle_(0), program_(prog), name_(name), init_done_(false)
00060 {
00061 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00062 std::cout << "ViennaCL: Creating kernel object (full CTOR)" << std::endl;
00063 #endif
00064 set_work_size_defaults();
00065 }
00066
00067 kernel(kernel const & other)
00068 : handle_(other.handle_), program_(other.program_), name_(other.name_), init_done_(other.init_done_)
00069 {
00070 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00071 std::cout << "ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
00072 #endif
00073 local_work_size_[0] = other.local_work_size_[0];
00074 local_work_size_[1] = other.local_work_size_[1];
00075
00076 global_work_size_[0] = other.global_work_size_[0];
00077 global_work_size_[1] = other.global_work_size_[1];
00078 }
00079
00080 viennacl::ocl::kernel & operator=(const kernel & other)
00081 {
00082 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00083 std::cout << "ViennaCL: Assigning kernel object" << std::endl;
00084 #endif
00085 handle_ = other.handle_;
00086 program_ = other.program_;
00087 name_ = other.name_;
00088 init_done_ = other.init_done_;
00089 local_work_size_[0] = other.local_work_size_[0];
00090 local_work_size_[1] = other.local_work_size_[1];
00091 global_work_size_[0] = other.global_work_size_[0];
00092 global_work_size_[1] = other.global_work_size_[1];
00093 return *this;
00094 }
00095
00096
00098 void arg(unsigned int pos, cl_uint val)
00099 {
00100 init();
00101 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00102 std::cout << "ViennaCL: Setting unsigned long kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00103 #endif
00104 cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_uint), (void*)&val);
00105 VIENNACL_ERR_CHECK(err);
00106 }
00107
00109 void arg(unsigned int pos, float val)
00110 {
00111 init();
00112 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00113 std::cout << "ViennaCL: Setting floating point kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00114 #endif
00115 cl_int err = clSetKernelArg(handle_, pos, sizeof(float), (void*)&val);
00116 VIENNACL_ERR_CHECK(err);
00117 }
00118
00120 void arg(unsigned int pos, double val)
00121 {
00122 init();
00123 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00124 std::cout << "ViennaCL: Setting double precision kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00125 #endif
00126 cl_int err = clSetKernelArg(handle_, pos, sizeof(double), (void*)&val);
00127 VIENNACL_ERR_CHECK(err);
00128 }
00129
00130
00132 template<class VCL_TYPE>
00133 void arg(unsigned int pos, VCL_TYPE const & val)
00134 {
00135 init();
00136 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00137 std::cout << "ViennaCL: Setting generic kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00138 #endif
00139 cl_mem temp = val.handle();
00140 cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_mem), (void*)&temp);
00141 VIENNACL_ERR_CHECK(err);
00142 }
00143
00144
00146 template<class CL_TYPE>
00147 void arg(unsigned int pos, viennacl::ocl::handle<CL_TYPE> const & h)
00148 {
00149
00150 init();
00151 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00152 std::cout << "ViennaCL: Setting handle kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00153 #endif
00154 CL_TYPE temp = h;
00155 cl_int err = clSetKernelArg(handle_, pos, sizeof(CL_TYPE), (void*)&temp);
00156 VIENNACL_ERR_CHECK(err);
00157 }
00158
00159
00160
00162 void arg(unsigned int pos, const local_mem & mem)
00163 {
00164 unsigned int size = mem.size();
00165 init();
00166 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00167 std::cout << "ViennaCL: Setting local memory kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00168 #endif
00169 cl_int err = clSetKernelArg(handle_, pos, size, 0);
00170 VIENNACL_ERR_CHECK(err);
00171 }
00172
00173
00174
00176 template <typename T0>
00177 kernel & operator()(T0 const & t0)
00178 {
00179 arg(0, t0);
00180 return *this;
00181 }
00182
00184 template <typename T0, typename T1>
00185 kernel & operator()(T0 const & t0, T1 const & t1)
00186 {
00187 arg(0, t0); arg(1, t1);
00188 return *this;
00189 }
00190
00192 template <typename T0, typename T1, typename T2>
00193 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2)
00194 {
00195 arg(0, t0); arg(1, t1); arg(2, t2);
00196 return *this;
00197 }
00198
00200 template <typename T0, typename T1, typename T2, typename T3>
00201 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3)
00202 {
00203 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3);
00204 return *this;
00205 }
00206
00208 template <typename T0, typename T1, typename T2, typename T3, typename T4>
00209 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4)
00210 {
00211 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4);
00212 return *this;
00213 }
00214
00216 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
00217 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5)
00218 {
00219 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00220 return *this;
00221 }
00222
00224 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
00225 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6)
00226 {
00227 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6);
00228 return *this;
00229 }
00230
00232 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
00233 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7)
00234 {
00235 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7);
00236 return *this;
00237 }
00238
00240 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
00241 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8)
00242 {
00243 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8);
00244 return *this;
00245 }
00246
00248 template <typename T0, typename T1, typename T2, typename T3, typename T4,
00249 typename T5, typename T6, typename T7, typename T8, typename T9>
00250 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4,
00251 T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9)
00252 {
00253 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9);
00254 return *this;
00255 }
00256
00258 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00259 typename T6, typename T7, typename T8, typename T9, typename T10>
00260 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00261 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10)
00262 {
00263 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10);
00264 return *this;
00265 }
00266
00268 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00269 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11>
00270 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00271 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11)
00272 {
00273 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00274 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00275 return *this;
00276 }
00277
00279 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00280 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11, typename T12>
00281 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00282 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12)
00283 {
00284 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00285 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12);
00286 return *this;
00287 }
00288
00290 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00291 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00292 typename T12, typename T13>
00293 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00294 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00295 T12 const & t12, T13 const & t13)
00296 {
00297 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00298 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00299 arg(12, t12); arg(13, t13);
00300 return *this;
00301 }
00302
00304 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00305 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00306 typename T12, typename T13, typename T14>
00307 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00308 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00309 T12 const & t12, T13 const & t13, T14 const & t14)
00310 {
00311 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00312 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00313 arg(12, t12); arg(13, t13); arg(14, t14);
00314 return *this;
00315 }
00316
00318 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00319 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00320 typename T12, typename T13, typename T14, typename T15>
00321 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00322 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00323 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15)
00324 {
00325 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00326 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00327 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15);
00328 return *this;
00329 }
00330
00332 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00333 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00334 typename T12, typename T13, typename T14, typename T15, typename T16>
00335 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00336 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00337 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16)
00338 {
00339 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00340 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00341 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16);
00342 return *this;
00343 }
00344
00346 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00347 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00348 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17>
00349 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00350 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00351 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17)
00352 {
00353 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00354 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00355 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00356 return *this;
00357 }
00358
00360 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00361 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00362 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00363 typename T18>
00364 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00365 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00366 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00367 T18 const & t18
00368 )
00369 {
00370 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00371 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00372 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00373 arg(18, t18);
00374 return *this;
00375 }
00376
00378 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00379 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00380 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00381 typename T18, typename T19>
00382 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00383 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00384 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00385 T18 const & t18, T19 const & t19
00386 )
00387 {
00388 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00389 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00390 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00391 arg(18, t18); arg(19, t19);
00392 return *this;
00393 }
00394
00396 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00397 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00398 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00399 typename T18, typename T19, typename T20>
00400 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00401 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00402 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00403 T18 const & t18, T19 const & t19, T20 const & t20
00404 )
00405 {
00406 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00407 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00408 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00409 arg(18, t18); arg(19, t19); arg(20, t20);
00410 return *this;
00411 }
00412
00414 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00415 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00416 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00417 typename T18, typename T19, typename T20, typename T21>
00418 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00419 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00420 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00421 T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21
00422 )
00423 {
00424 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00425 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00426 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00427 arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21);
00428 return *this;
00429 }
00430
00432 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00433 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00434 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00435 typename T18, typename T19, typename T20, typename T21, typename T22>
00436 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00437 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00438 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00439 T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22
00440 )
00441 {
00442 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00443 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00444 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00445 arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22);
00446 return *this;
00447 }
00448
00453 size_t local_work_size(int index = 0) const
00454 {
00455 assert(index == 0 || index == 1);
00456 return local_work_size_[index];
00457 }
00462 size_t global_work_size(int index = 0) const
00463 {
00464 assert(index == 0 || index == 1);
00465 return global_work_size_[index];
00466 }
00467
00473 void local_work_size(int index, size_t s)
00474 {
00475 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00476 std::cout << "ViennaCL: Setting local work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00477 #endif
00478 assert(index == 0 || index == 1);
00479 local_work_size_[index] = s;
00480 }
00486 void global_work_size(int index, size_t s)
00487 {
00488 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00489 std::cout << "ViennaCL: Setting global work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00490 #endif
00491 assert(index == 0 || index == 1);
00492 global_work_size_[index] = s;
00493 }
00494
00495 std::string const & name() const { return name_; }
00496
00497 viennacl::ocl::handle<cl_kernel> const & handle() const { return handle_; }
00498
00499
00500 private:
00501 void create_kernel()
00502 {
00503 cl_int err;
00504 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00505 std::cout << "ViennaCL: Building kernel " << name_ << std::endl;
00506 #endif
00507 handle_ = clCreateKernel(program_, name_.c_str(), &err);
00508
00509 if (err != CL_SUCCESS)
00510 {
00511 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00512 std::cout << "ViennaCL: Could not create kernel '" << name_ << "'." << std::endl;
00513 #endif
00514
00515 }
00516 VIENNACL_ERR_CHECK(err);
00517 }
00518
00519 void set_work_size_defaults()
00520 {
00521 if (viennacl::ocl::current_device().type() == CL_DEVICE_TYPE_GPU)
00522 {
00523 local_work_size_[0] = 128; local_work_size_[1] = 0;
00524 global_work_size_[0] = 128*128; global_work_size_[1] = 0;
00525 }
00526 else
00527 {
00528
00529 local_work_size_[0] = 1; local_work_size_[1] = 0;
00530 global_work_size_[0] = viennacl::ocl::current_device().max_compute_units(); global_work_size_[1] = 0;
00531 }
00532 }
00533
00534 void init()
00535 {
00536 if (!init_done_)
00537 {
00538 create_kernel();
00539 init_done_ = true;
00540 }
00541 }
00542
00543 viennacl::ocl::handle<cl_kernel> handle_;
00544 viennacl::ocl::handle<cl_program> program_;
00545 std::string name_;
00546 bool init_done_;
00547 size_t local_work_size_[2];
00548 size_t global_work_size_[2];
00549 };
00550
00551 }
00552 }
00553
00554 #endif