1 #ifndef VIENNACL_OCL_DEVICE_HPP_
2 #define VIENNACL_OCL_DEVICE_HPP_
26 #include <OpenCL/cl.h>
52 explicit device() : device_(0) { flush_cache(); }
54 explicit device(cl_device_id dev) : device_(dev)
56 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
57 std::cout <<
"ViennaCL: Creating device object (CTOR with cl_device_id)" << std::endl;
64 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
65 std::cout <<
"ViennaCL: Creating device object (Copy CTOR)" << std::endl;
67 if (device_ != other.device_)
69 device_ = other.device_;
77 if (!address_bits_valid_)
79 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ADDRESS_BITS,
sizeof(cl_uint), static_cast<void *>(&address_bits_), NULL);
81 address_bits_valid_ =
true;
89 if (!available_valid_)
91 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_AVAILABLE,
sizeof(cl_bool), static_cast<void *>(&available_), NULL);
93 available_valid_ =
true;
101 if (!compiler_available_valid_)
103 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_COMPILER_AVAILABLE ,
sizeof(cl_bool), static_cast<void *>(&compiler_available_), NULL);
105 compiler_available_valid_ =
true;
107 return compiler_available_;
110 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
124 cl_device_fp_config double_fp_config()
const
128 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_DOUBLE_FP_CONFIG,
sizeof(cl_device_fp_config), static_cast<void *>(&double_fp_config_), NULL);
130 double_fp_config_valid_ =
true;
132 return double_fp_config_;
139 if (!endian_little_valid_)
141 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ENDIAN_LITTLE,
sizeof(cl_bool), static_cast<void *>(&endian_little_), NULL);
143 endian_little_valid_ =
true;
145 return endian_little_;
151 if (!error_correction_support_valid_)
153 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ERROR_CORRECTION_SUPPORT ,
sizeof(cl_bool), static_cast<void *>(&error_correction_support_), NULL);
155 error_correction_support_valid_ =
true;
157 return error_correction_support_;
169 if (!execution_capabilities_valid_)
171 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXECUTION_CAPABILITIES ,
sizeof(cl_device_exec_capabilities), static_cast<void *>(&execution_capabilities_), NULL);
173 execution_capabilities_valid_ =
true;
175 return execution_capabilities_;
191 if (!extensions_valid_)
193 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXTENSIONS,
sizeof(
char) * 2048, static_cast<void *>(&extensions_), NULL);
195 extensions_valid_ =
true;
203 if (!global_mem_cache_size_valid_)
205 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
sizeof(cl_ulong), static_cast<void *>(&global_mem_cache_size_), NULL);
207 global_mem_cache_size_valid_ =
true;
209 return global_mem_cache_size_;
215 if (!global_mem_cache_type_valid_)
217 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
sizeof(cl_device_mem_cache_type), static_cast<void *>(&global_mem_cache_type_), NULL);
219 global_mem_cache_type_valid_ =
true;
221 return global_mem_cache_type_;
227 if (!global_mem_cacheline_size_valid_)
229 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE,
sizeof(cl_uint), static_cast<void *>(&global_mem_cacheline_size_), NULL);
231 global_mem_cacheline_size_valid_ =
true;
233 return global_mem_cacheline_size_;
239 if (!global_mem_size_valid_)
241 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(cl_ulong), static_cast<void *>(&global_mem_size_), NULL);
243 global_mem_size_valid_ =
true;
245 return global_mem_size_;
248 #ifdef CL_DEVICE_HALF_FP_CONFIG
261 cl_device_fp_config half_fp_config()
const
263 if (!half_fp_config_valid_)
265 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HALF_FP_CONFIG,
sizeof(cl_device_fp_config), static_cast<void *>(&half_fp_config_), NULL);
267 half_fp_config_valid_ =
true;
269 return half_fp_config_;
274 #ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
275 cl_bool host_unified_memory()
const
277 if (!host_unified_memory_valid_)
279 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HOST_UNIFIED_MEMORY,
sizeof(cl_bool), static_cast<void *>(&host_unified_memory_), NULL);
281 host_unified_memory_valid_ =
true;
283 return host_unified_memory_;
290 if (!image_support_valid_)
292 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE_SUPPORT,
sizeof(cl_bool), static_cast<void *>(&image_support_), NULL);
294 image_support_valid_ =
true;
296 return image_support_;
302 if (!image2d_max_height_valid_)
304 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
sizeof(
size_t), static_cast<void *>(&image2d_max_height_), NULL);
306 image2d_max_height_valid_ =
true;
308 return image2d_max_height_;
314 if (!image2d_max_width_valid_)
316 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_WIDTH,
sizeof(
size_t), static_cast<void *>(&image2d_max_width_), NULL);
318 image2d_max_width_valid_ =
true;
320 return image2d_max_width_;
326 if (!image3d_max_depth_valid_)
328 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_DEPTH,
sizeof(
size_t), static_cast<void *>(&image3d_max_depth_), NULL);
330 image3d_max_depth_valid_ =
true;
332 return image3d_max_depth_;
338 if (!image3d_max_height_valid_)
340 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
sizeof(
size_t), static_cast<void *>(&image3d_max_height_), NULL);
342 image3d_max_height_valid_ =
true;
344 return image3d_max_height_;
350 if (!image3d_max_width_valid_)
352 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_WIDTH,
sizeof(
size_t), static_cast<void *>(&image3d_max_width_), NULL);
354 image3d_max_width_valid_ =
true;
356 return image3d_max_width_;
362 if (!local_mem_size_valid_)
364 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(cl_ulong), static_cast<void *>(&local_mem_size_), NULL);
366 local_mem_size_valid_ =
true;
368 return local_mem_size_;
374 if (!local_mem_type_valid_)
376 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_TYPE,
sizeof(cl_device_local_mem_type), static_cast<void *>(&local_mem_type_), NULL);
378 local_mem_type_valid_ =
true;
380 return local_mem_type_;
386 if (!max_clock_frequency_valid_)
388 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(cl_uint), static_cast<void *>(&max_clock_frequency_), NULL);
390 max_clock_frequency_valid_ =
true;
392 return max_clock_frequency_;
398 if (!max_compute_units_valid_)
400 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint), static_cast<void *>(&max_compute_units_), NULL);
402 max_compute_units_valid_ =
true;
404 return max_compute_units_;
410 if (!max_constant_args_valid_)
412 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_ARGS,
sizeof(cl_uint), static_cast<void *>(&max_constant_args_), NULL);
414 max_constant_args_valid_ =
true;
416 return max_constant_args_;
422 if (!max_constant_buffer_size_valid_)
424 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(cl_ulong), static_cast<void *>(&max_constant_buffer_size_), NULL);
426 max_constant_buffer_size_valid_ =
true;
428 return max_constant_buffer_size_;
434 if (!max_mem_alloc_size_valid_)
436 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), static_cast<void *>(&max_mem_alloc_size_), NULL);
438 max_mem_alloc_size_valid_ =
true;
440 return max_mem_alloc_size_;
449 if (!max_parameter_size_valid_)
451 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_PARAMETER_SIZE,
sizeof(
size_t), static_cast<void *>(&max_parameter_size_), NULL);
453 max_parameter_size_valid_ =
true;
455 return max_parameter_size_;
461 if (!max_read_image_args_valid_)
463 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_READ_IMAGE_ARGS,
sizeof(cl_uint), static_cast<void *>(&max_read_image_args_), NULL);
465 max_read_image_args_valid_ =
true;
467 return max_read_image_args_;
473 if (!max_samplers_valid_)
475 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_SAMPLERS,
sizeof(cl_uint), static_cast<void *>(&max_samplers_), NULL);
477 max_samplers_valid_ =
true;
479 return max_samplers_;
485 if (!max_work_group_size_valid_)
487 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(
size_t), static_cast<void *>(&max_work_group_size_), NULL);
489 max_work_group_size_valid_ =
true;
491 return max_work_group_size_;
497 if (!max_work_item_dimensions_valid_)
499 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(cl_uint), static_cast<void *>(&max_work_item_dimensions_), NULL);
501 max_work_item_dimensions_valid_ =
true;
503 return max_work_item_dimensions_;
514 assert(result.size() < 16 && bool(
"Supported work item dimensions exceed available capacity!"));
516 if (!max_work_item_sizes_valid_)
518 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(
size_t) * 16, static_cast<void *>(&max_work_item_sizes_), NULL);
520 max_work_item_sizes_valid_ =
true;
524 result[i] = max_work_item_sizes_[i];
532 if (!max_write_image_args_valid_)
534 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
sizeof(cl_uint), static_cast<void *>(&max_write_image_args_), NULL);
536 max_write_image_args_valid_ =
true;
538 return max_write_image_args_;
544 if (!mem_base_addr_align_valid_)
546 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(cl_uint), static_cast<void *>(&mem_base_addr_align_), NULL);
548 mem_base_addr_align_valid_ =
true;
550 return mem_base_addr_align_;
556 if (!min_data_type_align_size_valid_)
558 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE,
sizeof(cl_uint), static_cast<void *>(&min_data_type_align_size_), NULL);
560 min_data_type_align_size_valid_ =
true;
562 return min_data_type_align_size_;
570 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NAME,
sizeof(
char) * 256, static_cast<void *>(name_), NULL);
580 if ( !architecture_family_valid_)
583 architecture_family_valid_ =
true;
585 return architecture_family_;
588 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
590 cl_uint native_vector_width_char()
const
592 if (!native_vector_width_char_valid_)
594 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_char_), NULL);
596 native_vector_width_char_valid_ =
true;
598 return native_vector_width_char_;
602 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
604 cl_uint native_vector_width_short()
const
606 if (!native_vector_width_short_valid_)
608 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_short_), NULL);
610 native_vector_width_short_valid_ =
true;
612 return native_vector_width_short_;
616 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
618 cl_uint native_vector_width_int()
const
620 if (!native_vector_width_int_valid_)
622 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_int_), NULL);
624 native_vector_width_int_valid_ =
true;
626 return native_vector_width_int_;
630 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
632 cl_uint native_vector_width_long()
const
634 if (!native_vector_width_long_valid_)
636 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_long_), NULL);
638 native_vector_width_long_valid_ =
true;
640 return native_vector_width_long_;
644 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
646 cl_uint native_vector_width_float()
const
648 if (!native_vector_width_float_valid_)
650 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_float_), NULL);
652 native_vector_width_float_valid_ =
true;
654 return native_vector_width_float_;
658 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
663 cl_uint native_vector_width_double()
const
665 if (!native_vector_width_double_valid_)
667 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_double_), NULL);
669 native_vector_width_double_valid_ =
true;
671 return native_vector_width_double_;
675 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
680 cl_uint native_vector_width_half()
const
682 if (!native_vector_width_half_valid_)
684 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_half_), NULL);
686 native_vector_width_half_valid_ =
true;
688 return native_vector_width_half_;
692 #if CL_DEVICE_OPENCL_C_VERSION
701 std::string opencl_c_version()
const
703 if (!opencl_c_version_valid_)
705 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_OPENCL_C_VERSION,
sizeof(
char) * 128, static_cast<void *>(opencl_c_version_), NULL);
707 opencl_c_version_valid_ =
true;
709 return opencl_c_version_;
716 if (!platform_valid_)
718 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PLATFORM,
sizeof(cl_platform_id), static_cast<void *>(&platform_), NULL);
720 platform_valid_ =
true;
728 if (!preferred_vector_width_char_valid_)
730 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_char_), NULL);
732 preferred_vector_width_char_valid_ =
true;
734 return preferred_vector_width_char_;
740 if (!preferred_vector_width_short_valid_)
742 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_short_), NULL);
744 preferred_vector_width_short_valid_ =
true;
746 return preferred_vector_width_short_;
752 if (!preferred_vector_width_int_valid_)
754 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_int_), NULL);
756 preferred_vector_width_int_valid_ =
true;
758 return preferred_vector_width_int_;
764 if (!preferred_vector_width_long_valid_)
766 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_long_), NULL);
768 preferred_vector_width_long_valid_ =
true;
770 return preferred_vector_width_long_;
776 if (!preferred_vector_width_float_valid_)
778 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_float_), NULL);
780 preferred_vector_width_float_valid_ =
true;
782 return preferred_vector_width_float_;
791 if (!preferred_vector_width_double_valid_)
793 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_double_), NULL);
795 preferred_vector_width_double_valid_ =
true;
797 return preferred_vector_width_double_;
804 #ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
805 cl_uint preferred_vector_width_half()
const
807 if (!preferred_vector_width_half_valid_)
809 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_half_), NULL);
811 preferred_vector_width_half_valid_ =
true;
813 return preferred_vector_width_half_;
827 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILE,
sizeof(
char) * 32, static_cast<void *>(profile_), NULL);
829 profile_valid_ =
true;
837 if (!profiling_timer_resolution_valid_)
839 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILING_TIMER_RESOLUTION,
sizeof(
size_t), static_cast<void *>(&profiling_timer_resolution_), NULL);
841 profiling_timer_resolution_valid_ =
true;
843 return profiling_timer_resolution_;
856 if (!queue_properties_valid_)
858 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(cl_command_queue_properties), static_cast<void *>(&queue_properties_), NULL);
860 queue_properties_valid_ =
true;
862 return queue_properties_;
880 if (!single_fp_config_valid_)
882 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(cl_device_fp_config), static_cast<void *>(&single_fp_config_), NULL);
884 single_fp_config_valid_ =
true;
886 return single_fp_config_;
897 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_TYPE,
sizeof(cl_device_type), static_cast<void *>(&type_), NULL);
909 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR,
sizeof(
char) * 256, static_cast<void *>(vendor_), NULL);
911 vendor_valid_ =
true;
919 if (!vendor_id_valid_)
921 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR_ID,
sizeof(cl_uint), static_cast<void *>(&vendor_id_), NULL);
923 vendor_id_valid_ =
true;
933 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VERSION,
sizeof(
char) * 256, static_cast<void *>(version_), NULL);
935 version_valid_ =
true;
943 if (!driver_version_valid_)
945 cl_int err = clGetDeviceInfo(device_, CL_DRIVER_VERSION,
sizeof(
char) * 256, static_cast<void *>(driver_version_), NULL);
947 driver_version_valid_ =
true;
949 return driver_version_;
960 if (ext.find(
"cl_khr_fp64") != std::string::npos || ext.find(
"cl_amd_fp64") != std::string::npos)
971 if (ext.find(
"cl_amd_fp64") != std::string::npos)
972 return "cl_amd_fp64";
974 if (ext.find(
"cl_khr_fp64") != std::string::npos)
975 return "cl_khr_fp64";
981 cl_device_id
id()
const
983 assert(device_ != 0 &&
bool(
"Device ID invalid!"));
997 std::string line_indent(indent, indent_char);
998 std::ostringstream oss;
999 oss << line_indent <<
"Name: " <<
name() << std::endl;
1000 oss << line_indent <<
"Vendor: " <<
vendor() << std::endl;
1002 oss << line_indent <<
"Available: " <<
available() << std::endl;
1003 oss << line_indent <<
"Max Compute Units: " <<
max_compute_units() << std::endl;
1005 oss << line_indent <<
"Global Mem Size: " <<
global_mem_size() << std::endl;
1006 oss << line_indent <<
"Local Mem Size: " <<
local_mem_size() << std::endl;
1007 oss << line_indent <<
"Local Mem Type: " <<
local_mem_type() << std::endl;
1008 #ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
1009 oss << line_indent <<
"Host Unified Memory: " << host_unified_memory() << std::endl;
1022 std::string line_indent(indent, indent_char);
1023 std::ostringstream oss;
1024 oss << line_indent <<
"Address Bits: " <<
address_bits() << std::endl;
1025 oss << line_indent <<
"Available: " <<
available() << std::endl;
1027 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1028 oss << line_indent <<
"Double FP Config: " <<
fp_config_to_string(double_fp_config()) << std::endl;
1030 oss << line_indent <<
"Endian Little: " <<
endian_little() << std::endl;
1033 oss << line_indent <<
"Extensions: " <<
extensions() << std::endl;
1034 oss << line_indent <<
"Global Mem Cache Size: " <<
global_mem_cache_size() <<
" Bytes" << std::endl;
1037 oss << line_indent <<
"Global Mem Size: " <<
global_mem_size() <<
" Bytes" << std::endl;
1038 #ifdef CL_DEVICE_HALF_FP_CONFIG
1039 oss << line_indent <<
"Half PF Config: " <<
fp_config_to_string(half_fp_config()) << std::endl;
1041 #ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
1042 oss << line_indent <<
"Host Unified Memory: " << host_unified_memory() << std::endl;
1044 oss << line_indent <<
"Image Support: " <<
image_support() << std::endl;
1046 oss << line_indent <<
"Image2D Max Width: " <<
image2d_max_width() << std::endl;
1047 oss << line_indent <<
"Image3D Max Depth: " <<
image3d_max_depth() << std::endl;
1049 oss << line_indent <<
"Image3D Max Width: " <<
image3d_max_width() << std::endl;
1050 oss << line_indent <<
"Local Mem Size: " <<
local_mem_size() <<
" Bytes" << std::endl;
1052 oss << line_indent <<
"Max Clock Frequency: " <<
max_clock_frequency() <<
" MHz" << std::endl;
1053 oss << line_indent <<
"Max Compute Units: " <<
max_compute_units() << std::endl;
1054 oss << line_indent <<
"Max Constant Args: " <<
max_constant_args() << std::endl;
1056 oss << line_indent <<
"Max Mem Alloc Size: " <<
max_mem_alloc_size() <<
" Bytes" << std::endl;
1057 oss << line_indent <<
"Max Parameter Size: " <<
max_parameter_size() <<
" Bytes" << std::endl;
1059 oss << line_indent <<
"Max Samplers: " <<
max_samplers() << std::endl;
1066 oss << line_indent <<
"Name: " <<
name() << std::endl;
1067 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
1068 oss << line_indent <<
"Native Vector Width char: " << native_vector_width_char() << std::endl;
1070 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
1071 oss << line_indent <<
"Native Vector Width short: " << native_vector_width_short() << std::endl;
1073 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
1074 oss << line_indent <<
"Native Vector Width int: " << native_vector_width_int() << std::endl;
1076 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
1077 oss << line_indent <<
"Native Vector Width long: " << native_vector_width_long() << std::endl;
1079 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
1080 oss << line_indent <<
"Native Vector Width float: " << native_vector_width_float() << std::endl;
1082 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
1083 oss << line_indent <<
"Native Vector Width double: " << native_vector_width_double() << std::endl;
1085 #ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
1086 oss << line_indent <<
"Native Vector Width half: " << native_vector_width_half() << std::endl;
1088 #ifdef CL_DEVICE_OPENCL_C_VERSION
1089 oss << line_indent <<
"OpenCL C Version: " << opencl_c_version() << std::endl;
1091 oss << line_indent <<
"Platform: " <<
platform() << std::endl;
1098 #ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
1099 oss << line_indent <<
"Preferred Vector Width half: " << preferred_vector_width_half() << std::endl;
1101 oss << line_indent <<
"Profile: " <<
profile() << std::endl;
1106 oss << line_indent <<
"Vendor: " <<
vendor() << std::endl;
1107 oss << line_indent <<
"Vendor ID: " <<
vendor_id() << std::endl;
1108 oss << line_indent <<
"Version: " <<
version() << std::endl;
1109 oss << line_indent <<
"Driver Version: " <<
driver_version() << std::endl;
1116 return device_ == other.device_;
1121 return device_ == other;
1127 std::ostringstream oss;
1128 if (conf & CL_FP_DENORM)
1129 oss <<
"CL_FP_DENORM ";
1130 if (conf & CL_FP_INF_NAN)
1131 oss <<
"CL_FP_INF_NAN ";
1132 if (conf & CL_FP_ROUND_TO_NEAREST)
1133 oss <<
"CL_FP_ROUND_TO_NEAREST ";
1134 if (conf & CL_FP_ROUND_TO_ZERO)
1135 oss <<
"CL_FP_ROUND_TO_ZERO ";
1136 if (conf & CL_FP_ROUND_TO_INF)
1137 oss <<
"CL_FP_ROUND_TO_INF ";
1138 if (conf & CL_FP_FMA)
1139 oss <<
"CL_FP_FMA ";
1140 #ifdef CL_FP_SOFT_FLOAT
1141 if (conf & CL_FP_SOFT_FLOAT)
1142 oss <<
"CL_FP_SOFT_FLOAT ";
1150 std::ostringstream oss;
1151 if (cap & CL_EXEC_KERNEL)
1152 oss <<
"CL_EXEC_KERNEL ";
1153 if (cap & CL_EXEC_NATIVE_KERNEL)
1154 oss <<
"CL_EXEC_NATIVE_KERNEL ";
1161 std::ostringstream oss;
1162 if (cachetype == CL_NONE)
1164 else if (cachetype == CL_READ_ONLY_CACHE)
1165 oss <<
"CL_READ_ONLY_CACHE ";
1166 else if (cachetype == CL_READ_WRITE_CACHE)
1167 oss <<
"CL_READ_WRITE_CACHE ";
1174 std::ostringstream oss;
1175 if (loc_mem_type & CL_LOCAL)
1177 if (loc_mem_type & CL_GLOBAL)
1178 oss <<
"CL_GLOBAL ";
1185 std::ostringstream oss;
1187 oss << vec[i] <<
" ";
1194 std::ostringstream oss;
1195 if (queue_prop & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
1196 oss <<
"CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ";
1197 if (queue_prop & CL_QUEUE_PROFILING_ENABLE)
1198 oss <<
"CL_QUEUE_PROFILING_ENABLE ";
1205 std::ostringstream oss;
1206 if (dev_type & CL_DEVICE_TYPE_GPU)
1208 if (dev_type & CL_DEVICE_TYPE_CPU)
1210 if (dev_type & CL_DEVICE_TYPE_ACCELERATOR)
1211 oss <<
"Accelerator ";
1212 if (dev_type & CL_DEVICE_TYPE_DEFAULT)
1222 address_bits_valid_ =
false;
1223 architecture_family_valid_ =
false;
1224 available_valid_ =
false;
1225 compiler_available_valid_ =
false;
1226 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1227 double_fp_config_valid_ =
false;
1229 endian_little_valid_ =
false;
1230 error_correction_support_valid_ =
false;
1231 execution_capabilities_valid_ =
false;
1232 extensions_valid_ =
false;
1233 global_mem_cache_size_valid_ =
false;
1234 global_mem_cache_type_valid_ =
false;
1235 global_mem_cacheline_size_valid_ =
false;
1236 global_mem_size_valid_ =
false;
1237 #ifdef CL_DEVICE_HALF_FP_CONFIG
1238 half_fp_config_valid_ =
false;
1240 host_unified_memory_valid_ =
false;
1241 image_support_valid_ =
false;
1242 image2d_max_height_valid_ =
false;
1243 image2d_max_width_valid_ =
false;
1244 image3d_max_depth_valid_ =
false;
1245 image3d_max_height_valid_ =
false;
1246 image3d_max_width_valid_ =
false;
1247 local_mem_size_valid_ =
false;
1248 local_mem_type_valid_ =
false;
1249 max_clock_frequency_valid_ =
false;
1250 max_compute_units_valid_ =
false;
1251 max_constant_args_valid_ =
false;
1252 max_constant_buffer_size_valid_ =
false;
1253 max_mem_alloc_size_valid_ =
false;
1254 max_parameter_size_valid_ =
false;
1255 max_read_image_args_valid_ =
false;
1256 max_samplers_valid_ =
false;
1257 max_work_group_size_valid_ =
false;
1258 max_work_item_dimensions_valid_ =
false;
1259 max_work_item_sizes_valid_ =
false;
1260 max_write_image_args_valid_ =
false;
1261 mem_base_addr_align_valid_ =
false;
1262 min_data_type_align_size_valid_ =
false;
1263 name_valid_ =
false;
1264 native_vector_width_char_valid_ =
false;
1265 native_vector_width_short_valid_ =
false;
1266 native_vector_width_int_valid_ =
false;
1267 native_vector_width_long_valid_ =
false;
1268 native_vector_width_float_valid_ =
false;
1269 native_vector_width_double_valid_ =
false;
1270 native_vector_width_half_valid_ =
false;
1271 opencl_c_version_valid_ =
false;
1272 platform_valid_ =
false;
1273 preferred_vector_width_char_valid_ =
false;
1274 preferred_vector_width_short_valid_ =
false;
1275 preferred_vector_width_int_valid_ =
false;
1276 preferred_vector_width_long_valid_ =
false;
1277 preferred_vector_width_float_valid_ =
false;
1278 preferred_vector_width_double_valid_ =
false;
1279 preferred_vector_width_half_valid_ =
false;
1280 profile_valid_ =
false;
1281 profiling_timer_resolution_valid_ =
false;
1282 queue_properties_valid_ =
false;
1283 single_fp_config_valid_ =
false;
1284 type_valid_ =
false;
1285 vendor_valid_ =
false;
1286 vendor_id_valid_ =
false;
1287 version_valid_ =
false;
1288 driver_version_valid_ =
false;
1291 cl_device_id device_;
1299 mutable bool address_bits_valid_;
1300 mutable cl_uint address_bits_;
1302 mutable bool available_valid_;
1303 mutable cl_bool available_;
1305 mutable bool compiler_available_valid_;
1306 mutable cl_bool compiler_available_;
1308 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1309 mutable bool double_fp_config_valid_;
1310 mutable cl_device_fp_config double_fp_config_;
1313 mutable bool endian_little_valid_;
1314 mutable cl_bool endian_little_;
1316 mutable bool error_correction_support_valid_;
1317 mutable cl_bool error_correction_support_;
1319 mutable bool execution_capabilities_valid_;
1320 mutable cl_device_exec_capabilities execution_capabilities_;
1322 mutable bool extensions_valid_;
1323 mutable char extensions_[2048];
1325 mutable bool global_mem_cache_size_valid_;
1326 mutable cl_ulong global_mem_cache_size_;
1328 mutable bool global_mem_cache_type_valid_;
1329 mutable cl_device_mem_cache_type global_mem_cache_type_;
1331 mutable bool global_mem_cacheline_size_valid_;
1332 mutable cl_uint global_mem_cacheline_size_;
1334 mutable bool global_mem_size_valid_;
1335 mutable cl_ulong global_mem_size_;
1337 #ifdef CL_DEVICE_HALF_FP_CONFIG
1338 mutable bool half_fp_config_valid_;
1339 mutable cl_device_fp_config half_fp_config_;
1342 mutable bool host_unified_memory_valid_;
1343 mutable cl_bool host_unified_memory_;
1345 mutable bool image_support_valid_;
1346 mutable cl_bool image_support_;
1348 mutable bool image2d_max_height_valid_;
1349 mutable size_t image2d_max_height_;
1351 mutable bool image2d_max_width_valid_;
1352 mutable size_t image2d_max_width_;
1354 mutable bool image3d_max_depth_valid_;
1355 mutable size_t image3d_max_depth_;
1357 mutable bool image3d_max_height_valid_;
1358 mutable size_t image3d_max_height_;
1360 mutable bool image3d_max_width_valid_;
1361 mutable size_t image3d_max_width_;
1363 mutable bool local_mem_size_valid_;
1364 mutable cl_ulong local_mem_size_;
1366 mutable bool local_mem_type_valid_;
1367 mutable cl_device_local_mem_type local_mem_type_;
1369 mutable bool max_clock_frequency_valid_;
1370 mutable cl_uint max_clock_frequency_;
1372 mutable bool max_compute_units_valid_;
1373 mutable cl_uint max_compute_units_;
1375 mutable bool max_constant_args_valid_;
1376 mutable cl_uint max_constant_args_;
1378 mutable bool max_constant_buffer_size_valid_;
1379 mutable cl_ulong max_constant_buffer_size_;
1381 mutable bool max_mem_alloc_size_valid_;
1382 mutable cl_ulong max_mem_alloc_size_;
1384 mutable bool max_parameter_size_valid_;
1385 mutable size_t max_parameter_size_;
1387 mutable bool max_read_image_args_valid_;
1388 mutable cl_uint max_read_image_args_;
1390 mutable bool max_samplers_valid_;
1391 mutable cl_uint max_samplers_;
1393 mutable bool max_work_group_size_valid_;
1394 mutable size_t max_work_group_size_;
1396 mutable bool max_work_item_dimensions_valid_;
1397 mutable cl_uint max_work_item_dimensions_;
1399 mutable bool max_work_item_sizes_valid_;
1400 mutable size_t max_work_item_sizes_[16];
1402 mutable bool max_write_image_args_valid_;
1403 mutable cl_uint max_write_image_args_;
1405 mutable bool mem_base_addr_align_valid_;
1406 mutable cl_uint mem_base_addr_align_;
1408 mutable bool min_data_type_align_size_valid_;
1409 mutable cl_uint min_data_type_align_size_;
1411 mutable bool name_valid_;
1412 mutable char name_[256];
1414 mutable bool native_vector_width_char_valid_;
1415 mutable cl_uint native_vector_width_char_;
1417 mutable bool native_vector_width_short_valid_;
1418 mutable cl_uint native_vector_width_short_;
1420 mutable bool native_vector_width_int_valid_;
1421 mutable cl_uint native_vector_width_int_;
1423 mutable bool native_vector_width_long_valid_;
1424 mutable cl_uint native_vector_width_long_;
1426 mutable bool native_vector_width_float_valid_;
1427 mutable cl_uint native_vector_width_float_;
1429 mutable bool native_vector_width_double_valid_;
1430 mutable cl_uint native_vector_width_double_;
1432 mutable bool native_vector_width_half_valid_;
1433 mutable cl_uint native_vector_width_half_;
1435 mutable bool opencl_c_version_valid_;
1436 mutable char opencl_c_version_[128];
1438 mutable bool platform_valid_;
1439 mutable cl_platform_id platform_;
1441 mutable bool preferred_vector_width_char_valid_;
1442 mutable cl_uint preferred_vector_width_char_;
1444 mutable bool preferred_vector_width_short_valid_;
1445 mutable cl_uint preferred_vector_width_short_;
1447 mutable bool preferred_vector_width_int_valid_;
1448 mutable cl_uint preferred_vector_width_int_;
1450 mutable bool preferred_vector_width_long_valid_;
1451 mutable cl_uint preferred_vector_width_long_;
1453 mutable bool preferred_vector_width_float_valid_;
1454 mutable cl_uint preferred_vector_width_float_;
1456 mutable bool preferred_vector_width_double_valid_;
1457 mutable cl_uint preferred_vector_width_double_;
1459 mutable bool preferred_vector_width_half_valid_;
1460 mutable cl_uint preferred_vector_width_half_;
1462 mutable bool profile_valid_;
1463 mutable char profile_[32];
1465 mutable bool profiling_timer_resolution_valid_;
1466 mutable size_t profiling_timer_resolution_;
1468 mutable bool queue_properties_valid_;
1469 mutable cl_command_queue_properties queue_properties_;
1471 mutable bool single_fp_config_valid_;
1472 mutable cl_device_fp_config single_fp_config_;
1474 mutable bool type_valid_;
1475 mutable cl_device_type type_;
1477 mutable bool vendor_valid_;
1478 mutable char vendor_[256];
1480 mutable bool vendor_id_valid_;
1481 mutable cl_uint vendor_id_;
1483 mutable bool version_valid_;
1484 mutable char version_[256];
1486 mutable bool driver_version_valid_;
1487 mutable char driver_version_[256];
1489 mutable bool architecture_family_valid_;
size_t image3d_max_depth() const
Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
cl_device_id id() const
Returns the OpenCL device id.
cl_uint min_data_type_align_size() const
The smallest alignment in bytes which can be used for any data type.
cl_device_exec_capabilities execution_capabilities() const
Describes the execution capabilities of the device.
std::string driver_version() const
Vendor name string.
cl_bool image_support() const
Is CL_TRUE if the device and the host have a unified memory subsystem and is CL_FALSE otherwise...
cl_uint max_read_image_args() const
Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL...
cl_device_mem_cache_type global_mem_cache_type() const
Type of global memory cache supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and CL_READ_WRITE_CACHE.
cl_uint preferred_vector_width_char() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
cl_bool compiler_available() const
Is CL_FALSE if the implementation does not have a compiler available to compile the program source...
cl_ulong max_constant_buffer_size() const
Max size in bytes of a constant buffer allocation. The minimum value is 64 KB.
bool operator==(device const &other) const
cl_uint max_write_image_args() const
Max number of simultaneous image objects that can be written to by a kernel. The minimum value is 8 i...
cl_ulong max_mem_alloc_size() const
Max size of memory object allocation in bytes. The minimum value is max(1/4th of CL_DEVICE_GLOBAL_MEM...
cl_uint preferred_vector_width_int() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
cl_uint preferred_vector_width_short() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
A class representing a compute device (e.g. a GPU)
cl_ulong local_mem_size() const
Size of local memory arena in bytes. The minimum value is 32 KB.
std::string queue_properties_to_string(cl_command_queue_properties queue_prop) const
cl_device_type type() const
The OpenCL device type.
bool operator==(cl_device_id other) const
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
cl_uint preferred_vector_width_float() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
cl_bool error_correction_support() const
Is CL_TRUE if the device implements error correction for all accesses to compute device memory (globa...
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
#define VIENNACL_ERR_CHECK(err)
size_t image3d_max_width() const
Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
std::string fp_config_to_string(cl_device_fp_config conf) const
Helper function converting a floating point configuration to a string.
cl_device_local_mem_type local_mem_type() const
Type of local memory supported. This can be set to CL_LOCAL implying dedicated local memory storage s...
size_t image2d_max_height() const
Max height of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
size_t image3d_max_height() const
Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
cl_platform_id platform() const
The platform associated with this device.
Implementation of a smart-pointer-like class for handling OpenCL handles.
size_t image2d_max_width() const
Max width of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
cl_uint global_mem_cacheline_size() const
Size of global memory cache in bytes.
Various utility implementations for dispatching with respect to the different devices available on th...
cl_ulong global_mem_cache_size() const
Size of global memory cache in bytes.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
cl_uint max_constant_args() const
Max number of arguments declared with the __constant qualifier in a kernel. The minimum value is 8...
device(const device &other)
cl_device_fp_config single_fp_config() const
Describes single precision floating-point capability of the OpenCL device.
device_architecture_family get_architecture_family(cl_uint vendor_id, std::string const &name)
std::string mem_cache_type_to_string(cl_device_mem_cache_type cachetype) const
std::string name() const
Device name string.
device_architecture_family architecture_family() const
Device architecture family.
cl_ulong global_mem_size() const
Size of global memory in bytes.
std::string double_support_extension() const
ViennaCL convenience function: Returns the device extension which enables double precision (usually c...
Error handling for the OpenCL layer of ViennaCL.
cl_bool available() const
Is CL_TRUE if the device is available and CL_FALSE if the device is not available.
std::string extensions() const
Returns a space-separated list of extension names (the extension names themselves do not contain any ...
std::string version() const
Vendor name string.
cl_uint max_work_item_dimensions() const
Maximum dimensions that specify the global and local work-item IDs used by the data parallel executio...
std::string exec_capabilities_to_string(cl_device_exec_capabilities cap) const
cl_uint preferred_vector_width_double() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
cl_bool endian_little() const
Is CL_TRUE if the OpenCL device is a little endian device and CL_FALSE otherwise. ...
cl_uint address_bits() const
The default compute device address space size specified as an unsigned integer value in bits...
std::string vendor() const
Vendor name string.
cl_uint max_clock_frequency() const
Maximum configured clock frequency of the device in MHz.
device_architecture_family
std::string device_type_to_string(cl_device_type dev_type) const
std::string profile() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
std::string convert_to_string(std::vector< size_t > const &vec) const
std::string local_mem_type_to_string(cl_device_local_mem_type loc_mem_type) const
cl_uint max_samplers() const
Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL...
cl_uint max_compute_units() const
The number of parallel compute cores on the OpenCL device. The minimum value is 1.
size_t profiling_timer_resolution() const
Describes the resolution of device timer. This is measured in nanoseconds.
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
std::string full_info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with all device properties defined in the OpenCL 1.1 standard, listed in alphabetical order. Use info() for a short overview.
cl_uint mem_base_addr_align() const
Describes the alignment in bits of the base address of any allocated memory object.
size_t max_parameter_size() const
Max size in bytes of the arguments that can be passed to a kernel. The minimum value is 1024...
cl_uint preferred_vector_width_long() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
std::vector< size_t > max_work_item_sizes() const
Maximum number of work-items that can be specified in each dimension of the work-group.
cl_command_queue_properties queue_properties() const
Describes the command-queue properties supported by the device.