10namespace gtsam_points {
14void* cuda_malloc(
size_t size, CUstream_st* stream =
nullptr);
16void* cuda_malloc_and_upload(
const void* data,
size_t size, CUstream_st* stream =
nullptr);
18void cuda_free(
void* ptr, CUstream_st* stream =
nullptr);
20void cuda_host_to_device(
void* dst,
const void* src,
size_t size, CUstream_st* stream =
nullptr);
22void cuda_device_to_host(
void* dst,
const void* src,
size_t size, CUstream_st* stream =
nullptr);
24void cuda_mem_get_info(
size_t* free,
size_t* total);
27T* cuda_new(CUstream_st* stream =
nullptr) {
28 return cuda_malloc(
sizeof(T), stream);
32T* cuda_new(
const T& data, CUstream_st* stream =
nullptr) {
33 void* ptr = cuda_malloc_and_upload(
reinterpret_cast<const void*
>(&data),
sizeof(T), stream);
34 return reinterpret_cast<T*
>(ptr);
38void cuda_delete(T* ptr, CUstream_st* stream =
nullptr) {
39 cuda_free(
reinterpret_cast<void*
>(ptr), stream);
43void cuda_host_to_device(T* dst,
const T* src,
size_t size, CUstream_st* stream =
nullptr) {
44 cuda_host_to_device(
reinterpret_cast<void*
>(dst),
reinterpret_cast<const void*
>(src), size, stream);
48void cuda_device_to_host(T* dst,
const T* src,
size_t size, CUstream_st* stream =
nullptr) {
49 cuda_device_to_host(
reinterpret_cast<void*
>(dst),
reinterpret_cast<const void*
>(src), size, stream);
53std::shared_ptr<T> cuda_make_shared(CUstream_st* stream =
nullptr) {
54 T* ptr =
reinterpret_cast<T*
>(cuda_malloc(
sizeof(T), stream));
55 return std::shared_ptr<T>(ptr, [](T* ptr) { cuda_free(ptr); });
59std::shared_ptr<T> cuda_make_shared(
const T& data, CUstream_st* stream =
nullptr) {
60 T* ptr =
reinterpret_cast<T*
>(cuda_malloc_and_upload(&data,
sizeof(T), stream));
61 return std::shared_ptr<T>(ptr, [](T* ptr) { cuda_free(ptr); });
65auto cuda_make_unique(CUstream_st* stream =
nullptr) {
66 T* ptr =
reinterpret_cast<T*
>(cuda_malloc(
sizeof(T), stream));
67 const auto deleter = [](T* ptr) { cuda_free(ptr); };
68 return std::unique_ptr<T, decltype(deleter)>(ptr);
72auto cuda_make_unique(
const T& data, CUstream_st* stream =
nullptr) {
73 T* ptr = cuda_malloc_and_upload(data, stream);
74 const auto deleter = [](T* ptr) { cuda_free(ptr); };
75 return std::unique_ptr<T, decltype(deleter)>(ptr);
79T cuda_download(
const T* ptr, CUstream_st* stream =
nullptr) {
81 cuda_device_to_host(&data, ptr,
sizeof(T), stream);
86void cuda_upload(T* dst,
const T* src, CUstream_st* stream =
nullptr) {
87 cuda_host_to_device(dst, src,
sizeof(T), stream);