1 #ifndef VIENNACL_BACKEND_CUDA_HPP_
2 #define VIENNACL_BACKEND_CUDA_HPP_
36 #include <cuda_runtime.h>
38 #define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__)
62 cuda_exception(std::string
const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {}
67 cudaError_t error_code_;
75 if (cudaSuccess != error_code)
78 ss << file <<
"(" << line <<
"): " <<
": CUDA Runtime API error " << error_code <<
": " << cudaGetErrorString( error_code ) << std::endl;
105 void * dev_ptr = NULL;
116 cudaMemcpy(new_handle.
get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
131 handle_type & dst_buffer,
136 assert( (dst_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
137 assert( (src_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
139 cudaMemcpy(reinterpret_cast<void *>(dst_buffer.
get() + dst_offset),
140 reinterpret_cast<void *>(src_buffer.
get() + src_offset),
142 cudaMemcpyDeviceToDevice);
160 assert( (dst_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
163 cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.
get()) + dst_offset,
164 reinterpret_cast<const char *>(ptr),
166 cudaMemcpyHostToDevice);
168 cudaMemcpy(reinterpret_cast<char *>(dst_buffer.
get()) + dst_offset,
169 reinterpret_cast<const char *>(ptr),
171 cudaMemcpyHostToDevice);
189 assert( (src_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
192 cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
193 reinterpret_cast<char *>(src_buffer.
get()) + src_offset,
195 cudaMemcpyDeviceToHost);
197 cudaMemcpy(reinterpret_cast<char *>(ptr),
198 reinterpret_cast<char *>(src_buffer.
get()) + src_offset,
200 cudaMemcpyDeviceToHost);
void cuda_error_check(cudaError error_code, const char *file, const int line)
void operator()(U *p) const
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'.
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
This file provides the forward declarations for the main types used within ViennaCL.
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
viennacl::tools::shared_ptr< char > handle_type
#define VIENNACL_CUDA_ERROR_CHECK(err)
Functor for deleting a CUDA handle. Used within the smart pointer class.
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM.
cudaError_t error_code() const
cuda_exception(std::string const &what_arg, cudaError_t err_code)