Actual source code: segmentedmempool.hpp

  1: #pragma once

  3: #include <petsc/private/deviceimpl.h>

  5: #include <petsc/private/cpp/macros.hpp>
  6: #include <petsc/private/cpp/type_traits.hpp>
  7: #include <petsc/private/cpp/utility.hpp>
  8: #include <petsc/private/cpp/register_finalize.hpp>
  9: #include <petsc/private/cpp/memory.hpp>

 11: #include <limits>
 12: #include <deque>
 13: #include <vector>

 15: namespace Petsc
 16: {

 18: namespace device
 19: {

 21: template <typename T>
 22: class StreamBase {
 23: public:
 24:   using id_type      = int;
 25:   using derived_type = T;

 27:   static const id_type INVALID_ID;

 29:   // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
 30:   template <typename U = T>
 31:   PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());

 33:   PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }

 35:   template <typename E>
 36:   PetscErrorCode record_event(E &&event) const noexcept
 37:   {
 38:     return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
 39:   }

 41:   template <typename E>
 42:   PetscErrorCode wait_for_event(E &&event) const noexcept
 43:   {
 44:     return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
 45:   }

 47: protected:
 48:   constexpr StreamBase() noexcept = default;

 50:   struct default_event_type { };
 51:   using default_stream_type = std::nullptr_t;

 53:   PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }

 55:   PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }

 57:   template <typename U = T>
 58:   static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
 59:   {
 60:     return PETSC_SUCCESS;
 61:   }

 63:   template <typename U = T>
 64:   static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
 65:   {
 66:     return PETSC_SUCCESS;
 67:   }
 68: };

 70: template <typename T>
 71: const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;

 73: struct DefaultStream : StreamBase<DefaultStream> {
 74:   using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
 75:   using id_type     = typename StreamBase<DefaultStream>::id_type;
 76:   using event_type  = typename StreamBase<DefaultStream>::default_event_type;
 77: };

 79: } // namespace device

 81: namespace memory
 82: {

 84: namespace impl
 85: {

 87: // ==========================================================================================
 88: // MemoryChunk
 89: //
 90: // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
 91: // MemoryBlock and its size/capacity
 92: // ==========================================================================================

 94: template <typename EventType>
 95: class MemoryChunk {
 96: public:
 97:   using event_type = EventType;
 98:   using size_type  = std::size_t;

100:   MemoryChunk(size_type, size_type) noexcept;
101:   explicit MemoryChunk(size_type) noexcept;

103:   MemoryChunk(MemoryChunk &&) noexcept;
104:   MemoryChunk &operator=(MemoryChunk &&) noexcept;

106:   MemoryChunk(const MemoryChunk &) noexcept            = delete;
107:   MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;

109:   PETSC_NODISCARD size_type start() const noexcept { return start_; }
110:   PETSC_NODISCARD size_type size() const noexcept { return size_; }
111:   // REVIEW ME:
112:   // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
113:   // theory only the last chunk needs to do this
114:   PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
115:   PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }

117:   template <typename U>
118:   PetscErrorCode release(const device::StreamBase<U> *) noexcept;
119:   template <typename U>
120:   PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
121:   template <typename U>
122:   PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
123:   PetscErrorCode       resize(size_type) noexcept;
124:   PETSC_NODISCARD bool contains(size_type) const noexcept;

126: private:
127:   // clang-format off
128:   event_type      event_{};          // event recorded when the chunk was released
129:   bool            open_      = true; // is this chunk open?
130:   // id of the last stream to use the chunk, populated on release
131:   int             stream_id_ = device::DefaultStream::INVALID_ID;
132:   size_type       size_      = 0;    // size of the chunk
133:   const size_type start_     = 0;    // offset from the start of the owning block
134:   // clang-format on

136:   template <typename U>
137:   PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
138: };

140: // ==========================================================================================
141: // MemoryChunk - Private API
142: // ==========================================================================================

144: // asks and answers the question: can this stream claim this chunk without serializing?
145: template <typename E>
146: template <typename U>
147: inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
148: {
149:   return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
150: }

152: // ==========================================================================================
153: // MemoryChunk - Public API
154: // ==========================================================================================

156: template <typename E>
157: inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
158: {
159: }

161: template <typename E>
162: inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
163: {
164: }

166: template <typename E>
167: inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
168:   event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
169: {
170: }

172: template <typename E>
173: inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
174: {
175:   PetscFunctionBegin;
176:   if (this != &other) {
177:     event_     = std::move(other.event_);
178:     open_      = util::exchange(other.open_, false);
179:     stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
180:     size_      = util::exchange(other.size_, 0);
181:     start_     = std::move(other.start_);
182:   }
183:   PetscFunctionReturn(*this);
184: }

186: /*
187:   MemoryChunk::release - release a chunk on a stream

189:   Input Parameter:
190: . stream - the stream to release the chunk with

192:   Notes:
193:   Inserts a release operation on stream and records the state of stream at the time this
194:   routine was called.

196:   Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
197:   the chunk without serialization.

199:   If another stream attempts to claim the chunk they must wait for the recorded event before
200:   claiming the chunk.
201: */
202: template <typename E>
203: template <typename U>
204: inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
205: {
206:   PetscFunctionBegin;
207:   open_      = true;
208:   stream_id_ = stream->get_id();
209:   PetscCall(stream->record_event(event_));
210:   PetscFunctionReturn(PETSC_SUCCESS);
211: }

213: /*
214:   MemoryChunk::claim - attempt to claim a particular chunk

216:   Input Parameters:
217: + stream    - the stream on which to attempt to claim
218: . req_size  - the requested size (in elements) to attempt to claim
219: - serialize - (optional, false) whether the claimant allows serialization

221:   Output Parameter:
222: . success - true if the chunk was claimed, false otherwise
223: */
224: template <typename E>
225: template <typename U>
226: inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
227: {
228:   PetscFunctionBegin;
229:   if ((*success = can_claim(stream, req_size, serialize))) {
230:     if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
231:     PetscCall(resize(req_size));
232:     open_ = false;
233:   }
234:   PetscFunctionReturn(PETSC_SUCCESS);
235: }

237: /*
238:   MemoryChunk::can_claim - test whether a particular chunk can be claimed

240:   Input Parameters:
241: + stream    - the stream on which to attempt to claim
242: . req_size  - the requested size (in elements) to attempt to claim
243: - serialize - whether the claimant allows serialization

245:   Output:
246: . [return] - true if the chunk is claimable given the configuration, false otherwise
247: */
248: template <typename E>
249: template <typename U>
250: inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
251: {
252:   if (open_ && (req_size <= capacity())) {
253:     // fully compatible
254:     if (stream_compat_(stream)) return true;
255:     // stream wasn't compatible, but could claim if we serialized
256:     if (serialize) return true;
257:     // incompatible stream and did not want to serialize
258:   }
259:   return false;
260: }

262: /*
263:   MemoryChunk::resize - grow a chunk to new size

265:   Input Parameter:
266: . newsize - the new size Requested

268:   Notes:
269:   newsize cannot be larger than capacity
270: */
271: template <typename E>
272: inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
273: {
274:   PetscFunctionBegin;
275:   PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
276:   size_ = newsize;
277:   PetscFunctionReturn(PETSC_SUCCESS);
278: }

280: /*
281:   MemoryChunk::contains - query whether a memory chunk contains a particular offset

283:   Input Parameters:
284: . offset - The offset from the MemoryBlock start

286:   Notes:
287:   Returns true if the chunk contains the offset, false otherwise
288: */
289: template <typename E>
290: inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
291: {
292:   return (offset >= start()) && (offset < total_offset());
293: }

295: // ==========================================================================================
296: // MemoryBlock
297: //
298: // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
299: // and restoring a block is thread-safe (so may be used by multiple device streams).
300: // ==========================================================================================

302: template <typename T, typename AllocatorType, typename StreamType>
303: class MemoryBlock {
304: public:
305:   using value_type      = T;
306:   using allocator_type  = AllocatorType;
307:   using stream_type     = StreamType;
308:   using event_type      = typename stream_type::event_type;
309:   using chunk_type      = MemoryChunk<event_type>;
310:   using size_type       = typename chunk_type::size_type;
311:   using chunk_list_type = std::vector<chunk_type>;

313:   template <typename U>
314:   MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;

316:   ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);

318:   MemoryBlock(MemoryBlock &&) noexcept;
319:   MemoryBlock &operator=(MemoryBlock &&) noexcept;

321:   // memory blocks are not copyable
322:   MemoryBlock(const MemoryBlock &)            = delete;
323:   MemoryBlock &operator=(const MemoryBlock &) = delete;

325:   /* --- actual functions --- */
326:   PetscErrorCode       try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
327:   PetscErrorCode       try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
328:   PetscErrorCode       try_find_chunk(const T *, chunk_type **) noexcept;
329:   PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;

331:   PETSC_NODISCARD size_type size() const noexcept { return size_; }
332:   PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
333:   PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }

335: private:
336:   value_type     *mem_{};
337:   allocator_type *allocator_{};
338:   size_type       size_{};
339:   chunk_list_type chunks_{};

341:   PetscErrorCode clear_(const stream_type *) noexcept;
342: };

344: // ==========================================================================================
345: // MemoryBlock - Private API
346: // ==========================================================================================

348: // clear the memory block, called from destructors and move assignment/construction
349: template <typename T, typename A, typename S>
350: PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
351: {
352:   PetscFunctionBegin;
353:   if (PetscLikely(mem_)) {
354:     PetscCall(allocator_->deallocate(mem_, stream));
355:     mem_ = nullptr;
356:   }
357:   size_ = 0;
358:   PetscCallCXX(chunks_.clear());
359:   PetscFunctionReturn(PETSC_SUCCESS);
360: }

362: // ==========================================================================================
363: // MemoryBlock - Public API
364: // ==========================================================================================

366: // default constructor, allocates memory immediately
367: template <typename T, typename A, typename S>
368: template <typename U>
369: MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
370: {
371:   PetscFunctionBegin;
372:   PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
373:   PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
374:   PetscFunctionReturnVoid();
375: }

377: template <typename T, typename A, typename S>
378: MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
379: {
380:   stream_type stream;

382:   PetscFunctionBegin;
383:   PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
384:   PetscFunctionReturnVoid();
385: }

387: template <typename T, typename A, typename S>
388: MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
389: {
390: }

392: template <typename T, typename A, typename S>
393: MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
394: {
395:   PetscFunctionBegin;
396:   if (this != &other) {
397:     stream_type stream;

399:     PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
400:     mem_       = util::exchange(other.mem_, nullptr);
401:     allocator_ = other.allocator_;
402:     size_      = util::exchange(other.size_, 0);
403:     chunks_    = std::move(other.chunks_);
404:   }
405:   PetscFunctionReturn(*this);
406: }

408: /*
409:   MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
410: */
411: template <typename T, typename A, typename S>
412: inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
413: {
414:   // each pool is linear in memory, so it suffices to check the bounds
415:   return (ptr >= mem_) && (ptr < std::next(mem_, size()));
416: }

418: /*
419:   MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock

421:   Input Parameters:
422: + req_size - the requested size of the allocation (in elements)
423: . ptr      - ptr to fill
424: - stream   - stream to fill the pointer on

426:   Output Parameter:
427: . success  - true if chunk was gotten, false otherwise

429:   Notes:
430:   If the current memory could not satisfy the memory request, ptr is unchanged
431: */
432: template <typename T, typename A, typename S>
433: inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
434: {
435:   PetscFunctionBegin;
436:   *success = false;
437:   if (req_size <= size()) {
438:     const auto try_create_chunk = [&]() {
439:       const auto was_empty     = chunks_.empty();
440:       const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();

442:       PetscFunctionBegin;
443:       if (block_alloced + req_size <= size()) {
444:         PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
445:         PetscCall(chunks_.back().claim(stream, req_size, success));
446:         *ptr = mem_ + block_alloced;
447:         if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
448:       }
449:       PetscFunctionReturn(PETSC_SUCCESS);
450:     };
451:     const auto try_find_open_chunk = [&](bool serialize = false) {
452:       PetscFunctionBegin;
453:       for (auto &chunk : chunks_) {
454:         PetscCall(chunk.claim(stream, req_size, success, serialize));
455:         if (*success) {
456:           *ptr = mem_ + chunk.start();
457:           break;
458:         }
459:       }
460:       PetscFunctionReturn(PETSC_SUCCESS);
461:     };
462:     const auto try_steal_other_stream_chunk = [&]() {
463:       PetscFunctionBegin;
464:       PetscCall(try_find_open_chunk(true));
465:       PetscFunctionReturn(PETSC_SUCCESS);
466:     };

468:     // search previously distributed chunks, but only claim one if it is on the same stream
469:     // as us
470:     PetscCall(try_find_open_chunk());

472:     // if we are here we couldn't reuse one of our own chunks so check first if the pool
473:     // has room for a new one
474:     if (!*success) PetscCall(try_create_chunk());

476:     // try pruning dead chunks off the back, note we do this regardless of whether we are
477:     // successful
478:     while (chunks_.back().can_claim(stream, 0, false)) {
479:       PetscCallCXX(chunks_.pop_back());
480:       if (chunks_.empty()) {
481:         // if chunks are empty it implies we have managed to claim (and subsequently destroy)
482:         // our own chunk twice! something has gone wrong
483:         PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
484:         break;
485:       }
486:     }

488:     // if previously unsuccessful see if enough space has opened up due to pruning. note that
489:     // if the chunk list was emptied from the pruning this call must succeed in allocating a
490:     // chunk, otherwise something is wrong
491:     if (!*success) PetscCall(try_create_chunk());

493:     // last resort, iterate over all chunks and see if we can steal one by waiting on the
494:     // current owner to finish using it
495:     if (!*success) PetscCall(try_steal_other_stream_chunk());
496:   }
497:   PetscFunctionReturn(PETSC_SUCCESS);
498: }

500: /*
501:   MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock

503:   Input Parameters:
504: + ptr     - ptr to restore
505: - stream  - stream to restore the pointer on

507:   Output Parameter:
508: . success - true if chunk was restored, false otherwise

510:   Notes:
511:   ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
512:   by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
513:   without synchronization, but other streams may not do so until either serializing or the
514:   stream is idle again.
515: */
516: template <typename T, typename A, typename S>
517: inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
518: {
519:   chunk_type *chunk = nullptr;

521:   PetscFunctionBegin;
522:   PetscCall(try_find_chunk(*ptr, &chunk));
523:   if (chunk) {
524:     PetscCall(chunk->release(stream));
525:     *ptr     = nullptr;
526:     *success = true;
527:   } else {
528:     *success = false;
529:   }
530:   PetscFunctionReturn(PETSC_SUCCESS);
531: }

533: /*
534:   MemoryBlock::try_find_chunk - try to find the chunk which owns ptr

536:   Input Parameter:
537: . ptr - the pointer to look for

539:   Output Parameter:
540: . ret_chunk - pointer to the owning chunk or nullptr if not found
541: */
542: template <typename T, typename A, typename S>
543: inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
544: {
545:   PetscFunctionBegin;
546:   *ret_chunk = nullptr;
547:   if (owns_pointer(ptr)) {
548:     const auto offset = static_cast<size_type>(ptr - mem_);

550:     for (auto &chunk : chunks_) {
551:       if (chunk.contains(offset)) {
552:         *ret_chunk = &chunk;
553:         break;
554:       }
555:     }

557:     PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
558:   }
559:   PetscFunctionReturn(PETSC_SUCCESS);
560: }

562: namespace detail
563: {

565: template <typename T>
566: struct real_type {
567:   using type = T;
568: };

570: template <>
571: struct real_type<PetscScalar> {
572:   using type = PetscReal;
573: };

575: } // namespace detail

577: template <typename T>
578: struct SegmentedMemoryPoolAllocatorBase {
579:   using value_type      = T;
580:   using size_type       = std::size_t;
581:   using real_value_type = typename detail::real_type<T>::type;

583:   template <typename U>
584:   static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
585:   template <typename U>
586:   static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
587:   template <typename U>
588:   static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
589:   template <typename U>
590:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
591:   template <typename U>
592:   static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
593: };

595: template <typename T>
596: template <typename U>
597: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
598: {
599:   PetscFunctionBegin;
600:   PetscCall(PetscMalloc1(n, ptr));
601:   PetscFunctionReturn(PETSC_SUCCESS);
602: }

604: template <typename T>
605: template <typename U>
606: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
607: {
608:   PetscFunctionBegin;
609:   PetscCall(PetscFree(ptr));
610:   PetscFunctionReturn(PETSC_SUCCESS);
611: }

613: template <typename T>
614: template <typename U>
615: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
616: {
617:   PetscFunctionBegin;
618:   PetscCall(PetscArrayzero(ptr, n));
619:   PetscFunctionReturn(PETSC_SUCCESS);
620: }

622: template <typename T>
623: template <typename U>
624: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
625: {
626:   PetscFunctionBegin;
627:   PetscCall(PetscArraycpy(dest, src, n));
628:   PetscFunctionReturn(PETSC_SUCCESS);
629: }

631: template <typename T>
632: template <typename U>
633: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
634: {
635:   using limit_type            = std::numeric_limits<real_value_type>;
636:   constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();

638:   PetscFunctionBegin;
639:   for (size_type i = 0; i < n; ++i) ptr[i] = canary;
640:   PetscFunctionReturn(PETSC_SUCCESS);
641: }

643: } // namespace impl

645: // ==========================================================================================
646: // SegmentedMemoryPool
647: //
648: // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
649: // allocated buffer. This buffer is further split into memory "chunks" which control
650: // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
651: //
652: // 1. Open:
653: //    The chunk is free to be claimed by the next suitable allocation request. If the
654: //    allocation request is made on the same stream as the chunk was deallocated on, no
655: //    serialization needs to occur. If not, the allocating stream must wait for the
656: //    event. Claiming the chunk "closes" the chunk.
657: //
658: // 2. Closed:
659: //    The chunk has been claimed by an allocation request. It cannot be opened again until it
660: //    is deallocated; doing so "opens" the chunk.
661: //
662: // Note that there does not need to be a chunk for every region, chunks are created to satisfy
663: // an allocation request.
664: //
665: // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
666: // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
667: // request. This region exists _only_ at the end, as there are no gaps between chunks.
668: //
669: //
670: // |-----------------------------------------------------------------------------------------
671: // | SegmentedMemoryPool
672: // |
673: // | ||-------------||
674: // | ||             ||    -------------------------------------------------------------------
675: // | ||             ||    | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
676: // | ||             ||    | |             |      |                   |            |
677: // | ||             ||    | x-----x-------x-----xx---------x---------x------x-----x
678: // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
679: // | ||             ||    | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
680: // | ||             ||    | ---------------------------------------------------------
681: // | ||             ||    -------------------------------------------------------------------
682: // | ||-------------||
683: // | ||             ||
684: // | ||     ...     ||
685: // | ||             ||
686: // ==========================================================================================

688: template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
689: class SegmentedMemoryPool;

691: // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
692: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
693: class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
694: public:
695:   using value_type     = MemType;
696:   using stream_type    = StreamType;
697:   using allocator_type = AllocType;
698:   using block_type     = impl::MemoryBlock<value_type, allocator_type, stream_type>;
699:   using pool_type      = std::deque<block_type>;
700:   using size_type      = typename block_type::size_type;

702:   explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);

704:   PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
705:   PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
706:   PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;

708: private:
709:   pool_type      pool_;
710:   allocator_type allocator_;
711:   size_type      chunk_size_;

713:   PetscErrorCode make_block_(size_type, const stream_type *) noexcept;

715:   friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
716:   PetscErrorCode register_finalize_(const stream_type *) noexcept;
717:   PetscErrorCode finalize_() noexcept;

719:   PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
720: };

722: // ==========================================================================================
723: // SegmentedMemoryPool - Private API
724: // ==========================================================================================

726: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
727: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
728: {
729:   const auto block_size = std::max(size, chunk_size_);

731:   PetscFunctionBegin;
732:   PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
733:   PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
734:   PetscFunctionReturn(PETSC_SUCCESS);
735: }

737: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
738: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
739: {
740:   PetscFunctionBegin;
741:   PetscCall(make_block_(chunk_size_, stream));
742:   PetscFunctionReturn(PETSC_SUCCESS);
743: }

745: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
746: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
747: {
748:   PetscFunctionBegin;
749:   PetscCallCXX(pool_.clear());
750:   chunk_size_ = DefaultChunkSize;
751:   PetscFunctionReturn(PETSC_SUCCESS);
752: }

754: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
755: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
756: {
757:   auto found = false;

759:   PetscFunctionBegin;
760:   PetscCall(this->register_finalize(stream));
761:   for (auto &block : pool_) {
762:     PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
763:     if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS);
764:   }

766:   PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
767:   // if we are here we couldn't find an open block in the pool, so make a new block
768:   PetscCall(make_block_(size, stream));
769:   // and assign it
770:   PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
771:   PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
772:   PetscFunctionReturn(PETSC_SUCCESS);
773: }

775: // ==========================================================================================
776: // SegmentedMemoryPool - Public API
777: // ==========================================================================================

779: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
780: inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
781: {
782: }

784: /*
785:   SegmentedMemoryPool::allocate - get an allocation from the memory pool

787:   Input Parameters:
788: + req_size - size (in elements) to get
789: . ptr      - the pointer to hold the allocation
790: - stream   - the stream on which to get the allocation

792:   Output Parameter:
793: . ptr - the pointer holding the allocation

795:   Notes:
796:   req_size cannot be negative. If req_size if zero, ptr is set to nullptr
797: */
798: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
799: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
800: {
801:   value_type *ret_ptr = nullptr;

803:   PetscFunctionBegin;
804:   PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
805:   PetscAssertPointer(ptr, 2);
806:   PetscAssertPointer(stream, 3);
807:   if (req_size) {
808:     const auto size         = static_cast<size_type>(req_size);
809:     auto       aligned_size = alignment == alignof(char) ? size : size + alignment;
810:     void      *vptr         = nullptr;

812:     PetscCall(allocate_(aligned_size, &ret_ptr, stream));
813:     vptr = ret_ptr;
814:     std::align(alignment, size, vptr, aligned_size);
815:     ret_ptr = reinterpret_cast<value_type *>(vptr);
816:     // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
817:     // accesses.
818:     if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream));
819:   }
820:   *ptr = ret_ptr;
821:   PetscFunctionReturn(PETSC_SUCCESS);
822: }

824: /*
825:   SegmentedMemoryPool::deallocate - release a pointer back to the memory pool

827:   Input Parameters:
828: + ptr    - the pointer to release
829: - stream - the stream to release it on

831:   Notes:
832:   If ptr is not owned by the pool it is unchanged.
833: */
834: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
835: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
836: {
837:   PetscFunctionBegin;
838:   PetscAssertPointer(ptr, 1);
839:   PetscAssertPointer(stream, 2);
840:   // nobody owns a nullptr, and if they do then they have bigger problems
841:   if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS);
842:   for (auto &block : pool_) {
843:     auto found = false;

845:     PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
846:     if (PetscLikely(found)) break;
847:   }
848:   PetscFunctionReturn(PETSC_SUCCESS);
849: }

851: /*
852:   SegmentedMemoryPool::reallocate - Resize an allocated buffer

854:   Input Parameters:
855: + new_req_size - the new buffer size
856: . ptr          - pointer to the buffer
857: - stream       - stream to resize with

859:   Output Parameter:
860: . ptr - pointer to the new region

862:   Notes:
863:   ptr must have been allocated by the pool.

865:   It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
866: */
867: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
868: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
869: {
870:   using chunk_type = typename block_type::chunk_type;

872:   const auto  new_size = static_cast<size_type>(new_req_size);
873:   const auto  old_ptr  = *ptr;
874:   chunk_type *chunk    = nullptr;

876:   PetscFunctionBegin;
877:   PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
878:   PetscAssertPointer(ptr, 2);
879:   PetscAssertPointer(stream, 3);

881:   // if reallocating to zero, just free
882:   if (PetscUnlikely(new_size == 0)) {
883:     PetscCall(deallocate(ptr, stream));
884:     PetscFunctionReturn(PETSC_SUCCESS);
885:   }

887:   // search the blocks for the owning chunk
888:   for (auto &block : pool_) {
889:     PetscCall(block.try_find_chunk(old_ptr, &chunk));
890:     if (chunk) break; // found
891:   }
892:   PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);

894:   if (chunk->capacity() < new_size) {
895:     // chunk does not have enough room, need to grab a fresh chunk and copy to it
896:     *ptr = nullptr;
897:     PetscCall(chunk->release(stream));
898:     PetscCall(allocate(new_size, ptr, stream));
899:     PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
900:   } else {
901:     // chunk had enough room we can simply grow (or shrink) to fit the new size
902:     PetscCall(chunk->resize(new_size));
903:   }
904:   PetscFunctionReturn(PETSC_SUCCESS);
905: }

907: } // namespace memory

909: } // namespace Petsc