1 #ifndef VIENNACL_BACKEND_CUDA_HPP_
2 #define VIENNACL_BACKEND_CUDA_HPP_
32 #include <cuda_runtime.h>
34 #define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__)
61 if (cudaSuccess != error_code)
63 std::cerr << file <<
"(" << line <<
"): " <<
": CUDA Runtime API error " << error_code <<
": " << cudaGetErrorString( error_code ) << std::endl;
90 void * dev_ptr = NULL;
101 cudaMemcpy(new_handle.
get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
116 handle_type & dst_buffer,
121 assert( (dst_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
122 assert( (src_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
124 cudaMemcpy(reinterpret_cast<void *>(dst_buffer.
get() + dst_offset),
125 reinterpret_cast<void *>(src_buffer.
get() + src_offset),
127 cudaMemcpyDeviceToDevice);
145 assert( (dst_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
148 cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.
get()) + dst_offset,
149 reinterpret_cast<const char *>(ptr),
151 cudaMemcpyHostToDevice);
153 cudaMemcpy(reinterpret_cast<char *>(dst_buffer.
get()) + dst_offset,
154 reinterpret_cast<const char *>(ptr),
156 cudaMemcpyHostToDevice);
174 assert( (src_buffer.
get() != NULL) &&
bool(
"Memory not initialized!"));
177 cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
178 reinterpret_cast<char *>(src_buffer.
get()) + src_offset,
180 cudaMemcpyDeviceToHost);
182 cudaMemcpy(reinterpret_cast<char *>(ptr),
183 reinterpret_cast<char *>(src_buffer.
get()) + src_offset,
185 cudaMemcpyDeviceToHost);
void cuda_error_check(cudaError error_code, const char *file, const int line)
void operator()(U *p) const
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'.
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
viennacl::tools::shared_ptr< char > handle_type
#define VIENNACL_CUDA_ERROR_CHECK(err)
Functor for deleting a CUDA handle. Used within the smart pointer class.
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM.