Go to the documentation of this file.00001 #ifndef VIENNACL_OCL_ENQUEUE_HPP_
00002 #define VIENNACL_OCL_ENQUEUE_HPP_
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00024 #ifdef __APPLE__
00025 #include <OpenCL/cl.h>
00026 #else
00027 #include <CL/cl.h>
00028 #endif
00029
00030 #include "viennacl/ocl/kernel.hpp"
00031 #include "viennacl/ocl/command_queue.hpp"
00032
00033 namespace viennacl
00034 {
00035 namespace ocl
00036 {
00037
00039 template <typename KernelType>
00040 void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue)
00041 {
00042
00043 if (k.local_work_size(1) == 0)
00044 {
00045 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00046 std::cout << "ViennaCL: Starting 1D-kernel '" << k.name() << "'..." << std::endl;
00047 std::cout << "ViennaCL: Global work size: '" << k.global_work_size() << "'..." << std::endl;
00048 std::cout << "ViennaCL: Local work size: '" << k.local_work_size() << "'..." << std::endl;
00049 #endif
00050
00051 size_t tmp_global = k.global_work_size();
00052 size_t tmp_local = k.local_work_size();
00053
00054 cl_int err;
00055 if (tmp_global == 1 && tmp_local == 1)
00056 err = clEnqueueTask(queue.handle(), k.handle(), 0, NULL, NULL);
00057 else
00058 err = clEnqueueNDRangeKernel(queue.handle(), k.handle(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);
00059
00060 if (err != CL_SUCCESS)
00061 {
00062
00063 while (err != CL_SUCCESS && tmp_local > 1)
00064 {
00065
00066
00067
00068 tmp_global /= 2;
00069 tmp_local /= 2;
00070
00071 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00072 std::cout << "ViennaCL: Kernel start failed for '" << k.name() << "'." << std::endl;
00073 std::cout << "ViennaCL: Global work size: '" << tmp_global << "'..." << std::endl;
00074 std::cout << "ViennaCL: Local work size: '" << tmp_local << "'..." << std::endl;
00075 #endif
00076
00077 queue.finish();
00078 err = clEnqueueNDRangeKernel(queue.handle(), k.handle(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL);
00079 }
00080
00081 if (err != CL_SUCCESS)
00082 {
00083
00084 std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
00085 std::cerr << "ViennaCL: Smaller work sizes could not solve the problem. " << std::endl;
00086 VIENNACL_ERR_CHECK(err);
00087 }
00088 else
00089 {
00090
00091 k.local_work_size(0, tmp_local);
00092 k.global_work_size(0, tmp_global);
00093 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00094 std::cout << "ViennaCL: Kernel '" << k.name() << "' now uses global work size " << tmp_global << " and local work size " << tmp_local << "." << std::endl;
00095 #endif
00096 }
00097 }
00098 }
00099 else
00100 {
00101 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00102 std::cout << "ViennaCL: Starting 2D-kernel '" << k.name() << "'..." << std::endl;
00103 std::cout << "ViennaCL: Global work size: '" << k.global_work_size(0) << ", " << k.global_work_size(1) << "'..." << std::endl;
00104 std::cout << "ViennaCL: Local work size: '" << k.local_work_size(0) << ", " << k.local_work_size(1) << "'..." << std::endl;
00105 #endif
00106
00107 size_t tmp_global[2];
00108 tmp_global[0] = k.global_work_size(0);
00109 tmp_global[1] = k.global_work_size(1);
00110
00111 size_t tmp_local[2];
00112 tmp_local[0] = k.local_work_size(0);
00113 tmp_local[1] = k.local_work_size(1);
00114
00115 cl_int err = clEnqueueNDRangeKernel(queue.handle(), k.handle(), 2, NULL, tmp_global, tmp_local, 0, NULL, NULL);
00116
00117 if (err != CL_SUCCESS)
00118 {
00119
00120 std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl;
00121 VIENNACL_ERR_CHECK(err);
00122 }
00123
00124 }
00125
00126 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00127 queue.finish();
00128 std::cout << "ViennaCL: Kernel " << k.name() << " finished!" << std::endl;
00129 #endif
00130 }
00131
00132
00134 template <typename KernelType>
00135 void enqueue(KernelType & k)
00136 {
00137 enqueue(k, viennacl::ocl::current_context().get_queue());
00138 }
00139 }
00140 }
00141 #endif