29 #ifndef VIENNACL_WITH_OPENCL
30 #define VIENNACL_WITH_OPENCL
55 static const char * my_compute_program =
56 "__kernel void elementwise_prod(\n"
57 " __global const float * vec1,\n"
58 " __global const float * vec2, \n"
59 " __global float * result,\n"
60 " unsigned int size) \n"
62 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
63 " result[i] = vec1[i] * vec2[i];\n"
83 std::vector<cl_device_id> device_id_array;
87 std::cout <<
"Platform info: " << pf.
info() << std::endl;
88 std::vector<viennacl::ocl::device> devices = pf.
devices(CL_DEVICE_TYPE_DEFAULT);
89 std::cout << devices[0].name() << std::endl;
90 std::cout <<
"Number of devices for custom context: " << devices.size() << std::endl;
93 for (std::size_t i=0; i<devices.size(); ++i)
95 device_id_array.push_back(devices[i].
id());
98 std::cout <<
"Creating context..." << std::endl;
100 cl_context my_context = clCreateContext(0, cl_uint(device_id_array.size()), &(device_id_array[0]), NULL, NULL, &err);
105 unsigned int vector_size = 10;
106 std::vector<ScalarType> vec1(vector_size);
107 std::vector<ScalarType> vec2(vector_size);
108 std::vector<ScalarType> result(vector_size);
113 for (
unsigned int i=0; i<vector_size; ++i)
115 vec1[i] =
static_cast<ScalarType
>(i);
116 vec2[i] =
static_cast<ScalarType
>(vector_size-i);
122 cl_mem mem_vec1 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size *
sizeof(ScalarType), &(vec1[0]), &err);
124 cl_mem mem_vec2 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size *
sizeof(ScalarType), &(vec2[0]), &err);
126 cl_mem mem_result = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size *
sizeof(ScalarType), &(result[0]), &err);
133 std::vector<cl_command_queue> queues(devices.size());
134 for (std::size_t i=0; i<devices.size(); ++i)
136 queues[i] = clCreateCommandQueue(my_context, devices[i].
id(), 0, &err);
143 std::size_t source_len = std::string(my_compute_program).length();
144 cl_program my_prog = clCreateProgramWithSource(my_context, 1, &my_compute_program, &source_len, &err);
145 err = clBuildProgram(my_prog, 0, NULL, NULL, NULL, NULL);
159 const char * kernel_name =
"elementwise_prod";
160 cl_kernel my_kernel = clCreateKernel(my_prog, kernel_name, &err);
167 err = clSetKernelArg(my_kernel, 0,
sizeof(cl_mem), (
void*)&mem_vec1);
169 err = clSetKernelArg(my_kernel, 1,
sizeof(cl_mem), (
void*)&mem_vec2);
171 err = clSetKernelArg(my_kernel, 2,
sizeof(cl_mem), (
void*)&mem_result);
173 err = clSetKernelArg(my_kernel, 3,
sizeof(
unsigned int), (
void*)&vector_size);
175 std::size_t global_size = vector_size;
176 std::size_t local_size = vector_size;
177 err = clEnqueueNDRangeKernel(queues[0], my_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
184 err = clEnqueueReadBuffer(queues[0], mem_vec1, CL_TRUE, 0,
sizeof(ScalarType)*vector_size, &(vec1[0]), 0, NULL, NULL);
186 err = clEnqueueReadBuffer(queues[0], mem_result, CL_TRUE, 0,
sizeof(ScalarType)*vector_size, &(result[0]), 0, NULL, NULL);
189 std::cout <<
"vec1 : ";
190 for (std::size_t i=0; i<vec1.size(); ++i)
191 std::cout << vec1[i] <<
" ";
192 std::cout << std::endl;
194 std::cout <<
"vec2 : ";
195 for (std::size_t i=0; i<vec2.size(); ++i)
196 std::cout << vec2[i] <<
" ";
197 std::cout << std::endl;
199 std::cout <<
"result: ";
200 for (std::size_t i=0; i<result.size(); ++i)
201 std::cout << result[i] <<
" ";
202 std::cout << std::endl;
216 std::cout <<
"Existing context: " << my_context << std::endl;
227 std::cout <<
"Standard vector operations within ViennaCL:" << std::endl;
228 vcl_result = vcl_s * vcl_vec1 + vcl_vec2;
230 std::cout <<
"vec1 : ";
231 std::cout << vcl_vec1 << std::endl;
233 std::cout <<
"vec2 : ";
234 std::cout << vcl_vec2 << std::endl;
236 std::cout <<
"result: ";
237 std::cout << vcl_result << std::endl;
244 std::cout <<
"Using existing kernel within the OpenCL backend of ViennaCL:" << std::endl;
247 viennacl::ocl::enqueue(my_vcl_kernel(vcl_vec1, vcl_vec2, vcl_result, static_cast<cl_uint>(vcl_vec1.size())));
249 std::cout <<
"vec1 : ";
250 std::cout << vcl_vec1 << std::endl;
252 std::cout <<
"vec2 : ";
253 std::cout << vcl_vec2 << std::endl;
255 std::cout <<
"result: ";
256 std::cout << vcl_result << std::endl;
267 vcl_result.resize(3);
270 std::cout <<
"result of matrix-vector product: ";
271 std::cout << vcl_result << std::endl;
277 std::cout <<
"!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
viennacl::ocl::kernel & add_kernel(cl_kernel kernel_handle, std::string const &kernel_name)
Adds a kernel to the program.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
Implementations of dense matrix related operations including matrix-vector products.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
#define VIENNACL_ERR_CHECK(err)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
const OCL_TYPE & get() const
Wrapper class for an OpenCL program.
Implementations of the OpenCL backend, where all contexts are stored in.
void switch_context(long i)
Convenience function for switching the current context.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void setup_context(long i, std::vector< cl_device_id > const &devices)
Convenience function for setting devices for a context.