Actual source code: segmentedmempool.hpp
1: #pragma once
3: #include <petsc/private/deviceimpl.h>
5: #include <petsc/private/cpp/macros.hpp>
6: #include <petsc/private/cpp/type_traits.hpp>
7: #include <petsc/private/cpp/utility.hpp>
8: #include <petsc/private/cpp/register_finalize.hpp>
9: #include <petsc/private/cpp/memory.hpp>
11: #include <limits>
12: #include <deque>
13: #include <vector>
15: namespace Petsc
16: {
18: namespace device
19: {
21: template <typename T>
22: class StreamBase {
23: public:
24: using id_type = int;
25: using derived_type = T;
27: static const id_type INVALID_ID;
29: // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
30: template <typename U = T>
31: PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());
33: PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }
35: template <typename E>
36: PetscErrorCode record_event(E &&event) const noexcept
37: {
38: return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
39: }
41: template <typename E>
42: PetscErrorCode wait_for_event(E &&event) const noexcept
43: {
44: return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
45: }
47: protected:
48: constexpr StreamBase() noexcept = default;
50: struct default_event_type { };
51: using default_stream_type = std::nullptr_t;
53: PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }
55: PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }
57: template <typename U = T>
58: static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
59: {
60: return PETSC_SUCCESS;
61: }
63: template <typename U = T>
64: static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
65: {
66: return PETSC_SUCCESS;
67: }
68: };
70: template <typename T>
71: const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;
73: struct DefaultStream : StreamBase<DefaultStream> {
74: using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
75: using id_type = typename StreamBase<DefaultStream>::id_type;
76: using event_type = typename StreamBase<DefaultStream>::default_event_type;
77: };
79: } // namespace device
81: namespace memory
82: {
84: namespace impl
85: {
87: // ==========================================================================================
88: // MemoryChunk
89: //
90: // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
91: // MemoryBlock and its size/capacity
92: // ==========================================================================================
94: template <typename EventType>
95: class MemoryChunk {
96: public:
97: using event_type = EventType;
98: using size_type = std::size_t;
100: MemoryChunk(size_type, size_type) noexcept;
101: explicit MemoryChunk(size_type) noexcept;
103: MemoryChunk(MemoryChunk &&) noexcept;
104: MemoryChunk &operator=(MemoryChunk &&) noexcept;
106: MemoryChunk(const MemoryChunk &) noexcept = delete;
107: MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;
109: PETSC_NODISCARD size_type start() const noexcept { return start_; }
110: PETSC_NODISCARD size_type size() const noexcept { return size_; }
111: // REVIEW ME:
112: // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
113: // theory only the last chunk needs to do this
114: PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
115: PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }
117: template <typename U>
118: PetscErrorCode release(const device::StreamBase<U> *) noexcept;
119: template <typename U>
120: PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
121: template <typename U>
122: PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
123: PetscErrorCode resize(size_type) noexcept;
124: PETSC_NODISCARD bool contains(size_type) const noexcept;
126: private:
127: // clang-format off
128: event_type event_{}; // event recorded when the chunk was released
129: bool open_ = true; // is this chunk open?
130: // id of the last stream to use the chunk, populated on release
131: int stream_id_ = device::DefaultStream::INVALID_ID;
132: size_type size_ = 0; // size of the chunk
133: const size_type start_ = 0; // offset from the start of the owning block
134: // clang-format on
136: template <typename U>
137: PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
138: };
140: // ==========================================================================================
141: // MemoryChunk - Private API
142: // ==========================================================================================
144: // asks and answers the question: can this stream claim this chunk without serializing?
145: template <typename E>
146: template <typename U>
147: inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
148: {
149: return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
150: }
152: // ==========================================================================================
153: // MemoryChunk - Public API
154: // ==========================================================================================
156: template <typename E>
157: inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
158: {
159: }
161: template <typename E>
162: inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
163: {
164: }
166: template <typename E>
167: inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
168: event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
169: {
170: }
172: template <typename E>
173: inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
174: {
175: PetscFunctionBegin;
176: if (this != &other) {
177: event_ = std::move(other.event_);
178: open_ = util::exchange(other.open_, false);
179: stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
180: size_ = util::exchange(other.size_, 0);
181: start_ = std::move(other.start_);
182: }
183: PetscFunctionReturn(*this);
184: }
186: /*
187: MemoryChunk::release - release a chunk on a stream
189: Input Parameter:
190: . stream - the stream to release the chunk with
192: Notes:
193: Inserts a release operation on stream and records the state of stream at the time this
194: routine was called.
196: Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
197: the chunk without serialization.
199: If another stream attempts to claim the chunk they must wait for the recorded event before
200: claiming the chunk.
201: */
202: template <typename E>
203: template <typename U>
204: inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
205: {
206: PetscFunctionBegin;
207: open_ = true;
208: stream_id_ = stream->get_id();
209: PetscCall(stream->record_event(event_));
210: PetscFunctionReturn(PETSC_SUCCESS);
211: }
213: /*
214: MemoryChunk::claim - attempt to claim a particular chunk
216: Input Parameters:
217: + stream - the stream on which to attempt to claim
218: . req_size - the requested size (in elements) to attempt to claim
219: - serialize - (optional, false) whether the claimant allows serialization
221: Output Parameter:
222: . success - true if the chunk was claimed, false otherwise
223: */
224: template <typename E>
225: template <typename U>
226: inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
227: {
228: PetscFunctionBegin;
229: if ((*success = can_claim(stream, req_size, serialize))) {
230: if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
231: PetscCall(resize(req_size));
232: open_ = false;
233: }
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: /*
238: MemoryChunk::can_claim - test whether a particular chunk can be claimed
240: Input Parameters:
241: + stream - the stream on which to attempt to claim
242: . req_size - the requested size (in elements) to attempt to claim
243: - serialize - whether the claimant allows serialization
245: Output:
246: . [return] - true if the chunk is claimable given the configuration, false otherwise
247: */
248: template <typename E>
249: template <typename U>
250: inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
251: {
252: if (open_ && (req_size <= capacity())) {
253: // fully compatible
254: if (stream_compat_(stream)) return true;
255: // stream wasn't compatible, but could claim if we serialized
256: if (serialize) return true;
257: // incompatible stream and did not want to serialize
258: }
259: return false;
260: }
262: /*
263: MemoryChunk::resize - grow a chunk to new size
265: Input Parameter:
266: . newsize - the new size Requested
268: Notes:
269: newsize cannot be larger than capacity
270: */
271: template <typename E>
272: inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
273: {
274: PetscFunctionBegin;
275: PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
276: size_ = newsize;
277: PetscFunctionReturn(PETSC_SUCCESS);
278: }
280: /*
281: MemoryChunk::contains - query whether a memory chunk contains a particular offset
283: Input Parameters:
284: . offset - The offset from the MemoryBlock start
286: Notes:
287: Returns true if the chunk contains the offset, false otherwise
288: */
289: template <typename E>
290: inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
291: {
292: return (offset >= start()) && (offset < total_offset());
293: }
295: // ==========================================================================================
296: // MemoryBlock
297: //
298: // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
299: // and restoring a block is thread-safe (so may be used by multiple device streams).
300: // ==========================================================================================
302: template <typename T, typename AllocatorType, typename StreamType>
303: class MemoryBlock {
304: public:
305: using value_type = T;
306: using allocator_type = AllocatorType;
307: using stream_type = StreamType;
308: using event_type = typename stream_type::event_type;
309: using chunk_type = MemoryChunk<event_type>;
310: using size_type = typename chunk_type::size_type;
311: using chunk_list_type = std::vector<chunk_type>;
313: template <typename U>
314: MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;
316: ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);
318: MemoryBlock(MemoryBlock &&) noexcept;
319: MemoryBlock &operator=(MemoryBlock &&) noexcept;
321: // memory blocks are not copyable
322: MemoryBlock(const MemoryBlock &) = delete;
323: MemoryBlock &operator=(const MemoryBlock &) = delete;
325: /* --- actual functions --- */
326: PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
327: PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
328: PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept;
329: PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;
331: PETSC_NODISCARD size_type size() const noexcept { return size_; }
332: PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
333: PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }
335: private:
336: value_type *mem_{};
337: allocator_type *allocator_{};
338: size_type size_{};
339: chunk_list_type chunks_{};
341: PetscErrorCode clear_(const stream_type *) noexcept;
342: };
344: // ==========================================================================================
345: // MemoryBlock - Private API
346: // ==========================================================================================
348: // clear the memory block, called from destructors and move assignment/construction
349: template <typename T, typename A, typename S>
350: PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
351: {
352: PetscFunctionBegin;
353: if (PetscLikely(mem_)) {
354: PetscCall(allocator_->deallocate(mem_, stream));
355: mem_ = nullptr;
356: }
357: size_ = 0;
358: PetscCallCXX(chunks_.clear());
359: PetscFunctionReturn(PETSC_SUCCESS);
360: }
362: // ==========================================================================================
363: // MemoryBlock - Public API
364: // ==========================================================================================
366: // default constructor, allocates memory immediately
367: template <typename T, typename A, typename S>
368: template <typename U>
369: MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
370: {
371: PetscFunctionBegin;
372: PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
373: PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
374: PetscFunctionReturnVoid();
375: }
377: template <typename T, typename A, typename S>
378: MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
379: {
380: stream_type stream;
382: PetscFunctionBegin;
383: PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
384: PetscFunctionReturnVoid();
385: }
387: template <typename T, typename A, typename S>
388: MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
389: {
390: }
392: template <typename T, typename A, typename S>
393: MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
394: {
395: PetscFunctionBegin;
396: if (this != &other) {
397: stream_type stream;
399: PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
400: mem_ = util::exchange(other.mem_, nullptr);
401: allocator_ = other.allocator_;
402: size_ = util::exchange(other.size_, 0);
403: chunks_ = std::move(other.chunks_);
404: }
405: PetscFunctionReturn(*this);
406: }
408: /*
409: MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
410: */
411: template <typename T, typename A, typename S>
412: inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
413: {
414: // each pool is linear in memory, so it suffices to check the bounds
415: return (ptr >= mem_) && (ptr < std::next(mem_, size()));
416: }
418: /*
419: MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock
421: Input Parameters:
422: + req_size - the requested size of the allocation (in elements)
423: . ptr - ptr to fill
424: - stream - stream to fill the pointer on
426: Output Parameter:
427: . success - true if chunk was gotten, false otherwise
429: Notes:
430: If the current memory could not satisfy the memory request, ptr is unchanged
431: */
432: template <typename T, typename A, typename S>
433: inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
434: {
435: PetscFunctionBegin;
436: *success = false;
437: if (req_size <= size()) {
438: const auto try_create_chunk = [&]() {
439: const auto was_empty = chunks_.empty();
440: const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();
442: PetscFunctionBegin;
443: if (block_alloced + req_size <= size()) {
444: PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
445: PetscCall(chunks_.back().claim(stream, req_size, success));
446: *ptr = mem_ + block_alloced;
447: if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
448: }
449: PetscFunctionReturn(PETSC_SUCCESS);
450: };
451: const auto try_find_open_chunk = [&](bool serialize = false) {
452: PetscFunctionBegin;
453: for (auto &chunk : chunks_) {
454: PetscCall(chunk.claim(stream, req_size, success, serialize));
455: if (*success) {
456: *ptr = mem_ + chunk.start();
457: break;
458: }
459: }
460: PetscFunctionReturn(PETSC_SUCCESS);
461: };
462: const auto try_steal_other_stream_chunk = [&]() {
463: PetscFunctionBegin;
464: PetscCall(try_find_open_chunk(true));
465: PetscFunctionReturn(PETSC_SUCCESS);
466: };
468: // search previously distributed chunks, but only claim one if it is on the same stream
469: // as us
470: PetscCall(try_find_open_chunk());
472: // if we are here we couldn't reuse one of our own chunks so check first if the pool
473: // has room for a new one
474: if (!*success) PetscCall(try_create_chunk());
476: // try pruning dead chunks off the back, note we do this regardless of whether we are
477: // successful
478: while (chunks_.back().can_claim(stream, 0, false)) {
479: PetscCallCXX(chunks_.pop_back());
480: if (chunks_.empty()) {
481: // if chunks are empty it implies we have managed to claim (and subsequently destroy)
482: // our own chunk twice! something has gone wrong
483: PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
484: break;
485: }
486: }
488: // if previously unsuccessful see if enough space has opened up due to pruning. note that
489: // if the chunk list was emptied from the pruning this call must succeed in allocating a
490: // chunk, otherwise something is wrong
491: if (!*success) PetscCall(try_create_chunk());
493: // last resort, iterate over all chunks and see if we can steal one by waiting on the
494: // current owner to finish using it
495: if (!*success) PetscCall(try_steal_other_stream_chunk());
496: }
497: PetscFunctionReturn(PETSC_SUCCESS);
498: }
500: /*
501: MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock
503: Input Parameters:
504: + ptr - ptr to restore
505: - stream - stream to restore the pointer on
507: Output Parameter:
508: . success - true if chunk was restored, false otherwise
510: Notes:
511: ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
512: by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
513: without synchronization, but other streams may not do so until either serializing or the
514: stream is idle again.
515: */
516: template <typename T, typename A, typename S>
517: inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
518: {
519: chunk_type *chunk = nullptr;
521: PetscFunctionBegin;
522: PetscCall(try_find_chunk(*ptr, &chunk));
523: if (chunk) {
524: PetscCall(chunk->release(stream));
525: *ptr = nullptr;
526: *success = true;
527: } else {
528: *success = false;
529: }
530: PetscFunctionReturn(PETSC_SUCCESS);
531: }
533: /*
534: MemoryBlock::try_find_chunk - try to find the chunk which owns ptr
536: Input Parameter:
537: . ptr - the pointer to look for
539: Output Parameter:
540: . ret_chunk - pointer to the owning chunk or nullptr if not found
541: */
542: template <typename T, typename A, typename S>
543: inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
544: {
545: PetscFunctionBegin;
546: *ret_chunk = nullptr;
547: if (owns_pointer(ptr)) {
548: const auto offset = static_cast<size_type>(ptr - mem_);
550: for (auto &chunk : chunks_) {
551: if (chunk.contains(offset)) {
552: *ret_chunk = &chunk;
553: break;
554: }
555: }
557: PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
558: }
559: PetscFunctionReturn(PETSC_SUCCESS);
560: }
562: namespace detail
563: {
565: template <typename T>
566: struct real_type {
567: using type = T;
568: };
570: template <>
571: struct real_type<PetscScalar> {
572: using type = PetscReal;
573: };
575: } // namespace detail
577: template <typename T>
578: struct SegmentedMemoryPoolAllocatorBase {
579: using value_type = T;
580: using size_type = std::size_t;
581: using real_value_type = typename detail::real_type<T>::type;
583: template <typename U>
584: static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
585: template <typename U>
586: static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
587: template <typename U>
588: static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
589: template <typename U>
590: static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
591: template <typename U>
592: static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
593: };
595: template <typename T>
596: template <typename U>
597: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
598: {
599: PetscFunctionBegin;
600: PetscCall(PetscMalloc1(n, ptr));
601: PetscFunctionReturn(PETSC_SUCCESS);
602: }
604: template <typename T>
605: template <typename U>
606: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
607: {
608: PetscFunctionBegin;
609: PetscCall(PetscFree(ptr));
610: PetscFunctionReturn(PETSC_SUCCESS);
611: }
613: template <typename T>
614: template <typename U>
615: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
616: {
617: PetscFunctionBegin;
618: PetscCall(PetscArrayzero(ptr, n));
619: PetscFunctionReturn(PETSC_SUCCESS);
620: }
622: template <typename T>
623: template <typename U>
624: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
625: {
626: PetscFunctionBegin;
627: PetscCall(PetscArraycpy(dest, src, n));
628: PetscFunctionReturn(PETSC_SUCCESS);
629: }
631: template <typename T>
632: template <typename U>
633: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
634: {
635: using limit_type = std::numeric_limits<real_value_type>;
636: constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();
638: PetscFunctionBegin;
639: for (size_type i = 0; i < n; ++i) ptr[i] = canary;
640: PetscFunctionReturn(PETSC_SUCCESS);
641: }
643: } // namespace impl
645: // ==========================================================================================
646: // SegmentedMemoryPool
647: //
648: // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
649: // allocated buffer. This buffer is further split into memory "chunks" which control
650: // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
651: //
652: // 1. Open:
653: // The chunk is free to be claimed by the next suitable allocation request. If the
654: // allocation request is made on the same stream as the chunk was deallocated on, no
655: // serialization needs to occur. If not, the allocating stream must wait for the
656: // event. Claiming the chunk "closes" the chunk.
657: //
658: // 2. Closed:
659: // The chunk has been claimed by an allocation request. It cannot be opened again until it
660: // is deallocated; doing so "opens" the chunk.
661: //
662: // Note that there does not need to be a chunk for every region, chunks are created to satisfy
663: // an allocation request.
664: //
665: // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
666: // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
667: // request. This region exists _only_ at the end, as there are no gaps between chunks.
668: //
669: //
670: // |-----------------------------------------------------------------------------------------
671: // | SegmentedMemoryPool
672: // |
673: // | ||-------------||
674: // | || || -------------------------------------------------------------------
675: // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
676: // | || || | | | | | |
677: // | || || | x-----x-------x-----xx---------x---------x------x-----x
678: // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
679: // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
680: // | || || | ---------------------------------------------------------
681: // | || || -------------------------------------------------------------------
682: // | ||-------------||
683: // | || ||
684: // | || ... ||
685: // | || ||
686: // ==========================================================================================
688: template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
689: class SegmentedMemoryPool;
691: // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
692: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
693: class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
694: public:
695: using value_type = MemType;
696: using stream_type = StreamType;
697: using allocator_type = AllocType;
698: using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>;
699: using pool_type = std::deque<block_type>;
700: using size_type = typename block_type::size_type;
702: explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);
704: PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
705: PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
706: PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;
708: private:
709: pool_type pool_;
710: allocator_type allocator_;
711: size_type chunk_size_;
713: PetscErrorCode make_block_(size_type, const stream_type *) noexcept;
715: friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
716: PetscErrorCode register_finalize_(const stream_type *) noexcept;
717: PetscErrorCode finalize_() noexcept;
719: PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
720: };
722: // ==========================================================================================
723: // SegmentedMemoryPool - Private API
724: // ==========================================================================================
726: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
727: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
728: {
729: const auto block_size = std::max(size, chunk_size_);
731: PetscFunctionBegin;
732: PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
733: PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
734: PetscFunctionReturn(PETSC_SUCCESS);
735: }
737: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
738: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
739: {
740: PetscFunctionBegin;
741: PetscCall(make_block_(chunk_size_, stream));
742: PetscFunctionReturn(PETSC_SUCCESS);
743: }
745: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
746: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
747: {
748: PetscFunctionBegin;
749: PetscCallCXX(pool_.clear());
750: chunk_size_ = DefaultChunkSize;
751: PetscFunctionReturn(PETSC_SUCCESS);
752: }
754: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
755: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
756: {
757: auto found = false;
759: PetscFunctionBegin;
760: PetscCall(this->register_finalize(stream));
761: for (auto &block : pool_) {
762: PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
763: if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS);
764: }
766: PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
767: // if we are here we couldn't find an open block in the pool, so make a new block
768: PetscCall(make_block_(size, stream));
769: // and assign it
770: PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
771: PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
772: PetscFunctionReturn(PETSC_SUCCESS);
773: }
775: // ==========================================================================================
776: // SegmentedMemoryPool - Public API
777: // ==========================================================================================
779: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
780: inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
781: {
782: }
784: /*
785: SegmentedMemoryPool::allocate - get an allocation from the memory pool
787: Input Parameters:
788: + req_size - size (in elements) to get
789: . ptr - the pointer to hold the allocation
790: - stream - the stream on which to get the allocation
792: Output Parameter:
793: . ptr - the pointer holding the allocation
795: Notes:
796: req_size cannot be negative. If req_size if zero, ptr is set to nullptr
797: */
798: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
799: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
800: {
801: value_type *ret_ptr = nullptr;
803: PetscFunctionBegin;
804: PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
805: PetscAssertPointer(ptr, 2);
806: PetscAssertPointer(stream, 3);
807: if (req_size) {
808: const auto size = static_cast<size_type>(req_size);
809: auto aligned_size = alignment == alignof(char) ? size : size + alignment;
810: void *vptr = nullptr;
812: PetscCall(allocate_(aligned_size, &ret_ptr, stream));
813: vptr = ret_ptr;
814: std::align(alignment, size, vptr, aligned_size);
815: ret_ptr = reinterpret_cast<value_type *>(vptr);
816: // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
817: // accesses.
818: if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream));
819: }
820: *ptr = ret_ptr;
821: PetscFunctionReturn(PETSC_SUCCESS);
822: }
824: /*
825: SegmentedMemoryPool::deallocate - release a pointer back to the memory pool
827: Input Parameters:
828: + ptr - the pointer to release
829: - stream - the stream to release it on
831: Notes:
832: If ptr is not owned by the pool it is unchanged.
833: */
834: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
835: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
836: {
837: PetscFunctionBegin;
838: PetscAssertPointer(ptr, 1);
839: PetscAssertPointer(stream, 2);
840: // nobody owns a nullptr, and if they do then they have bigger problems
841: if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS);
842: for (auto &block : pool_) {
843: auto found = false;
845: PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
846: if (PetscLikely(found)) break;
847: }
848: PetscFunctionReturn(PETSC_SUCCESS);
849: }
851: /*
852: SegmentedMemoryPool::reallocate - Resize an allocated buffer
854: Input Parameters:
855: + new_req_size - the new buffer size
856: . ptr - pointer to the buffer
857: - stream - stream to resize with
859: Output Parameter:
860: . ptr - pointer to the new region
862: Notes:
863: ptr must have been allocated by the pool.
865: It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
866: */
867: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
868: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
869: {
870: using chunk_type = typename block_type::chunk_type;
872: const auto new_size = static_cast<size_type>(new_req_size);
873: const auto old_ptr = *ptr;
874: chunk_type *chunk = nullptr;
876: PetscFunctionBegin;
877: PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
878: PetscAssertPointer(ptr, 2);
879: PetscAssertPointer(stream, 3);
881: // if reallocating to zero, just free
882: if (PetscUnlikely(new_size == 0)) {
883: PetscCall(deallocate(ptr, stream));
884: PetscFunctionReturn(PETSC_SUCCESS);
885: }
887: // search the blocks for the owning chunk
888: for (auto &block : pool_) {
889: PetscCall(block.try_find_chunk(old_ptr, &chunk));
890: if (chunk) break; // found
891: }
892: PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);
894: if (chunk->capacity() < new_size) {
895: // chunk does not have enough room, need to grab a fresh chunk and copy to it
896: *ptr = nullptr;
897: PetscCall(chunk->release(stream));
898: PetscCall(allocate(new_size, ptr, stream));
899: PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
900: } else {
901: // chunk had enough room we can simply grow (or shrink) to fit the new size
902: PetscCall(chunk->resize(new_size));
903: }
904: PetscFunctionReturn(PETSC_SUCCESS);
905: }
907: } // namespace memory
909: } // namespace Petsc