Actual source code: cupmallocator.hpp
1: #pragma once
3: #include <petsc/private/cpp/object_pool.hpp>
5: #include "../segmentedmempool.hpp"
6: #include "cupmthrustutility.hpp"
8: #include <thrust/device_ptr.h>
9: #include <thrust/fill.h>
11: #include <limits> // std::numeric_limits
13: namespace Petsc
14: {
16: namespace device
17: {
19: namespace cupm
20: {
22: // ==========================================================================================
23: // CUPM Host Allocator
24: // ==========================================================================================
26: template <DeviceType T, typename PetscType = char>
27: class HostAllocator;
29: // Allocator class to allocate pinned host memory for use with device
30: template <DeviceType T, typename PetscType>
31: class HostAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
32: public:
33: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
34: using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
35: using real_value_type = typename base_type::real_value_type;
36: using size_type = typename base_type::size_type;
37: using value_type = typename base_type::value_type;
39: template <typename U>
40: static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
41: template <typename U>
42: static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
43: template <typename U>
44: static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
45: };
47: template <DeviceType T, typename P>
48: template <typename U>
49: inline PetscErrorCode HostAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *) noexcept
50: {
51: PetscFunctionBegin;
52: PetscCall(PetscCUPMMallocHost(ptr, n));
53: PetscFunctionReturn(PETSC_SUCCESS);
54: }
56: template <DeviceType T, typename P>
57: template <typename U>
58: inline PetscErrorCode HostAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *) noexcept
59: {
60: PetscFunctionBegin;
61: PetscCallCUPM(cupmFreeHost(ptr));
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: template <DeviceType T, typename P>
66: template <typename U>
67: inline PetscErrorCode HostAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
68: {
69: PetscFunctionBegin;
70: PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyHostToHost, stream->get_stream(), true));
71: PetscFunctionReturn(PETSC_SUCCESS);
72: }
74: // ==========================================================================================
75: // CUPM Device Allocator
76: // ==========================================================================================
78: template <DeviceType T, typename PetscType = char>
79: class DeviceAllocator;
81: template <DeviceType T, typename PetscType>
82: class DeviceAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
83: public:
84: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
85: using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
86: using real_value_type = typename base_type::real_value_type;
87: using size_type = typename base_type::size_type;
88: using value_type = typename base_type::value_type;
90: template <typename U>
91: static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
92: template <typename U>
93: static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
94: template <typename U>
95: static PetscErrorCode zero(value_type *, size_type, const StreamBase<U> *) noexcept;
96: template <typename U>
97: static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
98: template <typename U>
99: static PetscErrorCode set_canary(value_type *, size_type, const StreamBase<U> *) noexcept;
100: };
102: template <DeviceType T, typename P>
103: template <typename U>
104: inline PetscErrorCode DeviceAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *stream) noexcept
105: {
106: PetscFunctionBegin;
107: PetscCall(PetscCUPMMallocAsync(ptr, n, stream->get_stream()));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: template <DeviceType T, typename P>
112: template <typename U>
113: inline PetscErrorCode DeviceAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *stream) noexcept
114: {
115: PetscFunctionBegin;
116: PetscCallCUPM(cupmFreeAsync(ptr, stream->get_stream()));
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: template <DeviceType T, typename P>
121: template <typename U>
122: inline PetscErrorCode DeviceAllocator<T, P>::zero(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
123: {
124: PetscFunctionBegin;
125: PetscCall(PetscCUPMMemsetAsync(ptr, 0, n, stream->get_stream(), true));
126: PetscFunctionReturn(PETSC_SUCCESS);
127: }
129: template <DeviceType T, typename P>
130: template <typename U>
131: inline PetscErrorCode DeviceAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
132: {
133: PetscFunctionBegin;
134: PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyDeviceToDevice, stream->get_stream(), true));
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
138: template <DeviceType T, typename P>
139: template <typename U>
140: inline PetscErrorCode DeviceAllocator<T, P>::set_canary(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
141: {
142: using limit_t = std::numeric_limits<real_value_type>;
143: const value_type canary = limit_t::has_signaling_NaN ? limit_t::signaling_NaN() : limit_t::max();
144: const auto xptr = thrust::device_pointer_cast(ptr);
146: PetscFunctionBegin;
147: PetscCallThrust(THRUST_CALL(thrust::fill, stream->get_stream(), xptr, xptr + n, canary));
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }
151: } // namespace cupm
153: } // namespace device
155: } // namespace Petsc