ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
cuda.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_BACKEND_CUDA_HPP_
2 #define VIENNACL_BACKEND_CUDA_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
26 #include <iostream>
27 #include <vector>
28 #include <cassert>
29 #include <stdexcept>
30 #include <sstream>
31 
32 #include "viennacl/forwards.h"
34 
35 // includes CUDA
36 #include <cuda_runtime.h>
37 
38 #define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__)
39 
40 namespace viennacl
41 {
42 namespace backend
43 {
44 namespace cuda
45 {
46 
48 // Requirements for backend:
49 
50 // * memory_create(size, host_ptr)
51 // * memory_copy(src, dest, offset_src, offset_dest, size)
52 // * memory_write_from_main_memory(src, offset, size,
53 // dest, offset, size)
54 // * memory_read_to_main_memory(src, offset, size
55 // dest, offset, size)
56 // *
57 //
58 
59 class cuda_exception : public std::runtime_error
60 {
61 public:
62  cuda_exception(std::string const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {}
63 
64  cudaError_t error_code() const { return error_code_; }
65 
66 private:
67  cudaError_t error_code_;
68 };
69 
70 namespace detail
71 {
72 
73  inline void cuda_error_check(cudaError error_code, const char *file, const int line )
74  {
75  if (cudaSuccess != error_code)
76  {
77  std::stringstream ss;
78  ss << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;
79  throw viennacl::backend::cuda::cuda_exception(ss.str(), error_code);
80  }
81  }
82 
83 
85  template<typename U>
86  struct cuda_deleter
87  {
88  void operator()(U * p) const
89  {
90  //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;
91  cudaFree(p);
92  }
93  };
94 
95 }
96 
103 inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
104 {
105  void * dev_ptr = NULL;
106  VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );
107  //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl;
108 
109  if (!host_ptr)
110  return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());
111 
112  handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());
113 
114  // copy data:
115  //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;
116  cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
117 
118  return new_handle;
119 }
120 
121 
130 inline void memory_copy(handle_type const & src_buffer,
131  handle_type & dst_buffer,
132  vcl_size_t src_offset,
133  vcl_size_t dst_offset,
134  vcl_size_t bytes_to_copy)
135 {
136  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
137  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
138 
139  cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),
140  reinterpret_cast<void *>(src_buffer.get() + src_offset),
141  bytes_to_copy,
142  cudaMemcpyDeviceToDevice);
143 }
144 
145 
154 inline void memory_write(handle_type & dst_buffer,
155  vcl_size_t dst_offset,
156  vcl_size_t bytes_to_copy,
157  const void * ptr,
158  bool async = false)
159 {
160  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
161 
162  if (async)
163  cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
164  reinterpret_cast<const char *>(ptr),
165  bytes_to_copy,
166  cudaMemcpyHostToDevice);
167  else
168  cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
169  reinterpret_cast<const char *>(ptr),
170  bytes_to_copy,
171  cudaMemcpyHostToDevice);
172 }
173 
174 
183 inline void memory_read(handle_type const & src_buffer,
184  vcl_size_t src_offset,
185  vcl_size_t bytes_to_copy,
186  void * ptr,
187  bool async = false)
188 {
189  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
190 
191  if (async)
192  cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
193  reinterpret_cast<char *>(src_buffer.get()) + src_offset,
194  bytes_to_copy,
195  cudaMemcpyDeviceToHost);
196  else
197  cudaMemcpy(reinterpret_cast<char *>(ptr),
198  reinterpret_cast<char *>(src_buffer.get()) + src_offset,
199  bytes_to_copy,
200  cudaMemcpyDeviceToHost);
201 }
202 
203 } //cuda
204 } //backend
205 } //viennacl
206 #endif
void cuda_error_check(cudaError error_code, const char *file, const int line)
Definition: cuda.hpp:73
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'.
Definition: cuda.hpp:154
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
Definition: cuda.hpp:130
This file provides the forward declarations for the main types used within ViennaCL.
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
Definition: blas3.hpp:36
viennacl::tools::shared_ptr< char > handle_type
Definition: cuda.hpp:47
std::size_t vcl_size_t
Definition: forwards.h:75
#define VIENNACL_CUDA_ERROR_CHECK(err)
Definition: cuda.hpp:38
Functor for deleting a CUDA handle. Used within the smart pointer class.
Definition: cuda.hpp:86
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
Definition: cuda.hpp:103
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM.
Definition: cuda.hpp:183
cudaError_t error_code() const
Definition: cuda.hpp:64
cuda_exception(std::string const &what_arg, cudaError_t err_code)
Definition: cuda.hpp:62