gtsam_points
Loading...
Searching...
No Matches
cuda_memory.hpp
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2021 Kenji Koide (k.koide@aist.go.jp)
3#pragma once
4
5#include <memory>
6#include <iostream>
7
8struct CUstream_st;
9
10namespace gtsam_points {
11
12// Simple wrapper functions to manipulate objects on GPU memory from non-CUDA code
13
14void* cuda_malloc(size_t size, CUstream_st* stream = nullptr);
15
16void* cuda_malloc_and_upload(const void* data, size_t size, CUstream_st* stream = nullptr);
17
18void cuda_free(void* ptr, CUstream_st* stream = nullptr);
19
20void cuda_host_to_device(void* dst, const void* src, size_t size, CUstream_st* stream = nullptr);
21
22void cuda_device_to_host(void* dst, const void* src, size_t size, CUstream_st* stream = nullptr);
23
24void cuda_mem_get_info(size_t* free, size_t* total);
25
26template <typename T>
27T* cuda_new(CUstream_st* stream = nullptr) {
28 return cuda_malloc(sizeof(T), stream);
29}
30
31template <typename T>
32T* cuda_new(const T& data, CUstream_st* stream = nullptr) {
33 void* ptr = cuda_malloc_and_upload(reinterpret_cast<const void*>(&data), sizeof(T), stream);
34 return reinterpret_cast<T*>(ptr);
35}
36
37template <typename T>
38void cuda_delete(T* ptr, CUstream_st* stream = nullptr) {
39 cuda_free(reinterpret_cast<void*>(ptr), stream);
40}
41
42template <typename T>
43void cuda_host_to_device(T* dst, const T* src, size_t size, CUstream_st* stream = nullptr) {
44 cuda_host_to_device(reinterpret_cast<void*>(dst), reinterpret_cast<const void*>(src), size, stream);
45}
46
47template <typename T>
48void cuda_device_to_host(T* dst, const T* src, size_t size, CUstream_st* stream = nullptr) {
49 cuda_device_to_host(reinterpret_cast<void*>(dst), reinterpret_cast<const void*>(src), size, stream);
50}
51
52template <typename T>
53std::shared_ptr<T> cuda_make_shared(CUstream_st* stream = nullptr) {
54 T* ptr = reinterpret_cast<T*>(cuda_malloc(sizeof(T), stream));
55 return std::shared_ptr<T>(ptr, [](T* ptr) { cuda_free(ptr); });
56}
57
58template <typename T>
59std::shared_ptr<T> cuda_make_shared(const T& data, CUstream_st* stream = nullptr) {
60 T* ptr = reinterpret_cast<T*>(cuda_malloc_and_upload(&data, sizeof(T), stream));
61 return std::shared_ptr<T>(ptr, [](T* ptr) { cuda_free(ptr); });
62}
63
64template <typename T>
65auto cuda_make_unique(CUstream_st* stream = nullptr) {
66 T* ptr = reinterpret_cast<T*>(cuda_malloc(sizeof(T), stream));
67 const auto deleter = [](T* ptr) { cuda_free(ptr); };
68 return std::unique_ptr<T, decltype(deleter)>(ptr);
69}
70
71template <typename T>
72auto cuda_make_unique(const T& data, CUstream_st* stream = nullptr) {
73 T* ptr = cuda_malloc_and_upload(data, stream);
74 const auto deleter = [](T* ptr) { cuda_free(ptr); };
75 return std::unique_ptr<T, decltype(deleter)>(ptr);
76}
77
78template <typename T>
79T cuda_download(const T* ptr, CUstream_st* stream = nullptr) {
80 T data;
81 cuda_device_to_host(&data, ptr, sizeof(T), stream);
82 return data;
83}
84
85template <typename T>
86void cuda_upload(T* dst, const T* src, CUstream_st* stream = nullptr) {
87 cuda_host_to_device(dst, src, sizeof(T), stream);
88}
89
90} // namespace gtsam_points