Actual source code: cupmevent.hpp

  1: #pragma once

  3: #include <petsc/private/cupminterface.hpp>
  4: #include <petsc/private/cpp/memory.hpp>
  5: #include <petsc/private/cpp/object_pool.hpp>

  7: #include <stack>

  9: namespace Petsc
 10: {

 12: namespace device
 13: {

 15: namespace cupm
 16: {

 18: // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
 19: // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
 20: // speedup.
 21: template <DeviceType T, unsigned long flags>
 22: class CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> {
 23: public:
 24:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);

 26:   PetscErrorCode allocate(cupmEvent_t *) noexcept;
 27:   PetscErrorCode deallocate(cupmEvent_t *) noexcept;

 29:   PetscErrorCode finalize_() noexcept;

 31: private:
 32:   std::stack<cupmEvent_t> pool_;
 33: };

 35: template <DeviceType T, unsigned long flags>
 36: inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept
 37: {
 38:   PetscFunctionBegin;
 39:   while (!pool_.empty()) {
 40:     PetscCallCUPM(cupmEventDestroy(std::move(pool_.top())));
 41:     PetscCallCXX(pool_.pop());
 42:   }
 43:   PetscFunctionReturn(PETSC_SUCCESS);
 44: }

 46: template <DeviceType T, unsigned long flags>
 47: inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept
 48: {
 49:   PetscFunctionBegin;
 50:   PetscAssertPointer(event, 1);
 51:   if (pool_.empty()) {
 52:     PetscCall(this->register_finalize());
 53:     PetscCallCUPM(cupmEventCreateWithFlags(event, flags));
 54:   } else {
 55:     PetscCallCXX(*event = std::move(pool_.top()));
 56:     PetscCallCXX(pool_.pop());
 57:   }
 58:   PetscFunctionReturn(PETSC_SUCCESS);
 59: }

 61: template <DeviceType T, unsigned long flags>
 62: inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept
 63: {
 64:   PetscFunctionBegin;
 65:   PetscAssertPointer(in_event, 1);
 66:   if (auto event = std::exchange(*in_event, cupmEvent_t{})) {
 67:     if (this->registered()) {
 68:       PetscCallCXX(pool_.push(std::move(event)));
 69:     } else {
 70:       PetscCallCUPM(cupmEventDestroy(event));
 71:     }
 72:   }
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }

 76: template <DeviceType T, unsigned long flags>
 77: CUPMEventPool<T, flags> &cupm_event_pool() noexcept
 78: {
 79:   static CUPMEventPool<T, flags> pool;
 80:   return pool;
 81: }

 83: // pool of events with timing disabled
 84: template <DeviceType T>
 85: inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
 86: {
 87:   return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
 88: }

 90: // pool of events with timing enabled
 91: template <DeviceType T>
 92: inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
 93: {
 94:   return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
 95: }

 97: // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
 98: // event-stream pairing for the async allocator. It is also used as the data member of
 99: // PetscEvent.
100: template <DeviceType T>
101: class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated {
102:   using pool_type = memory::PoolAllocated;

104: public:
105:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);

107:   constexpr CUPMEvent() noexcept = default;
108:   ~CUPMEvent() noexcept;

110:   CUPMEvent(CUPMEvent &&) noexcept;
111:   CUPMEvent &operator=(CUPMEvent &&) noexcept;

113:   // event is not copyable
114:   CUPMEvent(const CUPMEvent &)            = delete;
115:   CUPMEvent &operator=(const CUPMEvent &) = delete;

117:   PETSC_NODISCARD cupmEvent_t get() noexcept;
118:   PetscErrorCode              record(cupmStream_t) noexcept;

120:   explicit operator bool() const noexcept;

122: private:
123:   cupmEvent_t event_{};
124: };

126: template <DeviceType T>
127: inline CUPMEvent<T>::~CUPMEvent() noexcept
128: {
129:   PetscFunctionBegin;
130:   PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
131:   PetscFunctionReturnVoid();
132: }

134: template <DeviceType T>
135: inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
136: {
137:   static_assert(std::is_empty<impl::Interface<T>>::value, "");
138: }

140: template <DeviceType T>
141: inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
142: {
143:   PetscFunctionBegin;
144:   if (this != &other) {
145:     pool_type::operator=(std::move(other));
146:     PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
147:     event_ = util::exchange(other.event_, cupmEvent_t{});
148:   }
149:   PetscFunctionReturn(*this);
150: }

152: template <DeviceType T>
153: inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
154: {
155:   PetscFunctionBegin;
156:   if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_));
157:   PetscFunctionReturn(event_);
158: }

160: template <DeviceType T>
161: inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
162: {
163:   PetscFunctionBegin;
164:   PetscCallCUPM(cupmEventRecord(get(), stream));
165:   PetscFunctionReturn(PETSC_SUCCESS);
166: }

168: template <DeviceType T>
169: inline CUPMEvent<T>::operator bool() const noexcept
170: {
171:   return event_ != cupmEvent_t{};
172: }

174: } // namespace cupm

176: } // namespace device

178: } // namespace Petsc