• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

/data/development/ViennaCL/dev/viennacl/ocl/kernel.hpp

Go to the documentation of this file.
00001 #ifndef VIENNACL_OCL_KERNEL_HPP_
00002 #define VIENNACL_OCL_KERNEL_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2011, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008 
00009                             -----------------
00010                   ViennaCL - The Vienna Computing Library
00011                             -----------------
00012 
00013    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00014                
00015    (A list of authors and contributors can be found in the PDF manual)
00016 
00017    License:         MIT (X11), see file LICENSE in the base directory
00018 ============================================================================= */
00019 
00024 #ifdef __APPLE__
00025 #include <OpenCL/cl.h>
00026 #else
00027 #include <CL/cl.h>
00028 #endif
00029 
00030 #include "viennacl/ocl/forwards.h"
00031 #include "viennacl/ocl/backend.hpp"
00032 #include "viennacl/ocl/handle.hpp"
00033 #include "viennacl/ocl/program.hpp"
00034 #include "viennacl/ocl/device.hpp"
00035 #include "viennacl/ocl/local_mem.hpp"
00036 
00037 namespace viennacl
00038 {
00039   namespace ocl
00040   {
00041     
00043     class kernel
00044     {
00045       template <typename KernelType>
00046       friend void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue);
00047       
00048       
00049     public:
00050       kernel() : handle_(0)
00051       {
00052         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00053         std::cout << "ViennaCL: Creating kernel object (default CTOR)" << std::endl;
00054         #endif
00055         set_work_size_defaults();
00056       }
00057       
00058       kernel(viennacl::ocl::handle<cl_program> const & prog, std::string const & name) 
00059        : handle_(0), program_(prog), name_(name), init_done_(false)
00060       {
00061         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00062         std::cout << "ViennaCL: Creating kernel object (full CTOR)" << std::endl;
00063         #endif
00064         set_work_size_defaults();
00065       }
00066       
00067       kernel(kernel const & other) 
00068        : handle_(other.handle_), program_(other.program_), name_(other.name_), init_done_(other.init_done_)
00069       {
00070         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00071         std::cout << "ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
00072         #endif
00073         local_work_size_[0] = other.local_work_size_[0];
00074         local_work_size_[1] = other.local_work_size_[1];
00075         
00076         global_work_size_[0] = other.global_work_size_[0];
00077         global_work_size_[1] = other.global_work_size_[1];
00078       }
00079       
00080       viennacl::ocl::kernel & operator=(const kernel & other)
00081       {
00082         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00083         std::cout << "ViennaCL: Assigning kernel object" << std::endl;
00084         #endif
00085         handle_ = other.handle_;
00086         program_ = other.program_;
00087         name_ = other.name_;
00088         init_done_ = other.init_done_;
00089         local_work_size_[0] = other.local_work_size_[0];
00090         local_work_size_[1] = other.local_work_size_[1];
00091         global_work_size_[0] = other.global_work_size_[0];
00092         global_work_size_[1] = other.global_work_size_[1];
00093         return *this;
00094       }
00095       
00096       
00098       void arg(unsigned int pos, cl_uint val)
00099       {
00100         init();
00101         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00102         std::cout << "ViennaCL: Setting unsigned long kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00103         #endif
00104         cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_uint), (void*)&val);
00105         VIENNACL_ERR_CHECK(err);
00106       }
00107 
00109       void arg(unsigned int pos, float val)
00110       {
00111         init();
00112         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00113         std::cout << "ViennaCL: Setting floating point kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00114         #endif
00115         cl_int err = clSetKernelArg(handle_, pos, sizeof(float), (void*)&val);
00116         VIENNACL_ERR_CHECK(err);
00117       }
00118 
00120       void arg(unsigned int pos, double val)
00121       {
00122         init();
00123         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00124         std::cout << "ViennaCL: Setting double precision kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00125         #endif
00126         cl_int err = clSetKernelArg(handle_, pos, sizeof(double), (void*)&val);
00127         VIENNACL_ERR_CHECK(err);
00128       }
00129 
00130       //generic handling: call .handle() member
00132       template<class VCL_TYPE>
00133       void arg(unsigned int pos, VCL_TYPE const & val)
00134       {
00135         init();
00136         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00137         std::cout << "ViennaCL: Setting generic kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00138         #endif
00139         cl_mem temp = val.handle();
00140         cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_mem), (void*)&temp);
00141         VIENNACL_ERR_CHECK(err);
00142       }
00143       
00144       //forward handles directly:
00146       template<class CL_TYPE>
00147       void arg(unsigned int pos, viennacl::ocl::handle<CL_TYPE> const & h)
00148       {
00149         //arg(pos, h);
00150         init();
00151         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00152         std::cout << "ViennaCL: Setting handle kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00153         #endif
00154         CL_TYPE temp = h;
00155         cl_int err = clSetKernelArg(handle_, pos, sizeof(CL_TYPE), (void*)&temp);
00156         VIENNACL_ERR_CHECK(err);
00157       }
00158       
00159       
00160       //local buffer argument:
00162       void arg(unsigned int pos, const local_mem & mem)
00163       {
00164         unsigned int size =  mem.size();
00165         init();
00166         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00167         std::cout << "ViennaCL: Setting local memory kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00168         #endif
00169         cl_int err = clSetKernelArg(handle_, pos, size, 0);
00170         VIENNACL_ERR_CHECK(err);
00171       }
00172       
00173       
00174       
00176       template <typename T0>
00177       kernel & operator()(T0 const & t0)
00178       {
00179          arg(0, t0);
00180          return *this;
00181       }     
00182 
00184       template <typename T0, typename T1>
00185       kernel & operator()(T0 const & t0, T1 const & t1)
00186       {
00187          arg(0, t0); arg(1, t1);
00188          return *this;
00189       }     
00190 
00192       template <typename T0, typename T1, typename T2>
00193       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2)
00194       {
00195          arg(0, t0); arg(1, t1); arg(2, t2);
00196          return *this;
00197       }     
00198 
00200       template <typename T0, typename T1, typename T2, typename T3>
00201       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3)
00202       {
00203          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3);
00204          return *this;
00205       }     
00206 
00208       template <typename T0, typename T1, typename T2, typename T3, typename T4>
00209       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4)
00210       {
00211          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4);
00212          return *this;
00213       }     
00214 
00216       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
00217       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5)
00218       {
00219          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00220          return *this;
00221       }     
00222 
00224       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
00225       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6)
00226       {
00227          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6);
00228          return *this;
00229       }     
00230 
00232       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
00233       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7)
00234       {
00235          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7);
00236          return *this;
00237       }     
00238 
00240       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
00241       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8)
00242       {
00243          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8);
00244          return *this;
00245       }     
00246 
00248       template <typename T0, typename T1, typename T2, typename T3, typename T4,
00249                 typename T5, typename T6, typename T7, typename T8, typename T9>
00250       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4,
00251                           T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9)
00252       {
00253          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9);
00254          return *this;
00255       }     
00256 
00258       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00259                 typename T6, typename T7, typename T8, typename T9, typename T10>
00260       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00261                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10)
00262       {
00263          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10);
00264          return *this;
00265       }     
00266 
00268       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00269                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11>
00270       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00271                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11)
00272       {
00273          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00274          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00275          return *this;
00276       }     
00277 
00279       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00280                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11, typename T12>
00281       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00282                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12)
00283       {
00284          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00285          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12);
00286          return *this;
00287       }     
00288 
00290       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00291                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00292                 typename T12, typename T13>
00293       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00294                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00295                           T12 const & t12, T13 const & t13)
00296       {
00297          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00298          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00299          arg(12, t12); arg(13, t13);
00300          return *this;
00301       }     
00302 
00304       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00305                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00306                 typename T12, typename T13, typename T14>
00307       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00308                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00309                           T12 const & t12, T13 const & t13, T14 const & t14)
00310       {
00311          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00312          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00313          arg(12, t12); arg(13, t13); arg(14, t14);
00314          return *this;
00315       }     
00316 
00318       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00319                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00320                 typename T12, typename T13, typename T14, typename T15>
00321       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00322                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00323                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15)
00324       {
00325          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00326          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00327          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15);
00328          return *this;
00329       }     
00330 
00332       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00333                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00334                 typename T12, typename T13, typename T14, typename T15, typename T16>
00335       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00336                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00337                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16)
00338       {
00339          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00340          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00341          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16);
00342          return *this;
00343       }     
00344 
00346       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00347                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00348                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17>
00349       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00350                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00351                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17)
00352       {
00353          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00354          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00355          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00356          return *this;
00357       }     
00358 
00360       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00361                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00362                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00363                 typename T18>
00364       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00365                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00366                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00367                           T18 const & t18
00368                          )
00369       {
00370          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00371          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00372          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00373          arg(18, t18);
00374          return *this;
00375       }     
00376 
00378       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00379                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00380                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00381                 typename T18, typename T19>
00382       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00383                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00384                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00385                           T18 const & t18, T19 const & t19
00386                          )
00387       {
00388          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00389          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00390          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00391          arg(18, t18); arg(19, t19);
00392          return *this;
00393       }     
00394 
00396       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00397                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00398                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00399                 typename T18, typename T19, typename T20>
00400       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00401                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00402                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00403                           T18 const & t18, T19 const & t19, T20 const & t20
00404                          )
00405       {
00406          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00407          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00408          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00409          arg(18, t18); arg(19, t19); arg(20, t20);
00410          return *this;
00411       }     
00412 
00414       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00415                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00416                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00417                 typename T18, typename T19, typename T20, typename T21>
00418       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00419                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00420                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00421                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21
00422                          )
00423       {
00424          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00425          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00426          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00427          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21);
00428          return *this;
00429       }     
00430 
00432       template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00433                 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00434                 typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
00435                 typename T18, typename T19, typename T20, typename T21, typename T22>
00436       kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00437                           T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00438                           T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
00439                           T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22
00440                          )
00441       {
00442          arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00443          arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00444          arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
00445          arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21);  arg(22, t22);
00446          return *this;
00447       }     
00448 
00453       size_t local_work_size(int index = 0) const
00454       {
00455         assert(index == 0 || index == 1);
00456         return local_work_size_[index];
00457       }
00462       size_t global_work_size(int index = 0) const
00463       { 
00464         assert(index == 0 || index == 1);
00465         return global_work_size_[index];
00466       }
00467 
00473       void local_work_size(int index, size_t s)
00474       {
00475         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00476         std::cout << "ViennaCL: Setting local work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00477         #endif
00478         assert(index == 0 || index == 1);
00479         local_work_size_[index] = s;
00480       }
00486       void global_work_size(int index, size_t s)
00487       { 
00488         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00489         std::cout << "ViennaCL: Setting global work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00490         #endif
00491         assert(index == 0 || index == 1);
00492         global_work_size_[index] = s;
00493       }
00494 
00495       std::string const & name() const { return name_; }
00496 
00497       viennacl::ocl::handle<cl_kernel> const & handle() const { return handle_; }
00498 
00499 
00500     private:
00501       void create_kernel()
00502       {
00503         cl_int err;
00504         #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00505         std::cout << "ViennaCL: Building kernel " << name_ << std::endl;
00506         #endif
00507         handle_ = clCreateKernel(program_, name_.c_str(), &err);
00508         
00509         if (err != CL_SUCCESS)
00510         {
00511           #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00512           std::cout << "ViennaCL: Could not create kernel '" << name_ << "'." << std::endl;
00513           #endif
00514           //std::cerr << "Could not build kernel '" << name_ << "'." << std::endl;
00515         }
00516         VIENNACL_ERR_CHECK(err);
00517       }
00518 
00519       void set_work_size_defaults()
00520       {
00521         if (viennacl::ocl::current_device().type() == CL_DEVICE_TYPE_GPU)
00522         {
00523           local_work_size_[0] = 128; local_work_size_[1] = 0;
00524           global_work_size_[0] = 128*128; global_work_size_[1] = 0;
00525         }
00526         else //assume CPU type:
00527         {
00528           //conservative assumption: one thread per CPU core:
00529           local_work_size_[0] = 1; local_work_size_[1] = 0;
00530           global_work_size_[0] = viennacl::ocl::current_device().max_compute_units(); global_work_size_[1] = 0;
00531         }
00532       }
00533 
00534       void init()
00535       {
00536         if (!init_done_)
00537         {
00538           create_kernel();
00539           init_done_ = true;
00540         }
00541       }
00542       
00543       viennacl::ocl::handle<cl_kernel> handle_;
00544       viennacl::ocl::handle<cl_program> program_;
00545       std::string name_;
00546       bool init_done_;
00547       size_t local_work_size_[2];
00548       size_t global_work_size_[2];
00549     };
00550     
00551   } //namespace ocl
00552 } //namespace viennacl
00553 
00554 #endif

Generated on Fri Dec 30 2011 23:20:43 for ViennaCL - The Vienna Computing Library by  doxygen 1.7.1