Actual source code: mpikok.kokkos.cxx

  1: /*
  2:    This file contains routines for Parallel vector operations.
  3:  */

  5: #include <petscvec_kokkos.hpp>
  6: #include <petsc/private/deviceimpl.h>
  7: #include <petsc/private/vecimpl.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
 10: #include <petscsf.h>

 12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 13: {
 14:   PetscFunctionBegin;
 15:   delete static_cast<Vec_Kokkos *>(v->spptr);
 16:   PetscCall(VecDestroy_MPI(v));
 17:   PetscFunctionReturn(PETSC_SUCCESS);
 18: }

 20: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
 21: {
 22:   PetscFunctionBegin;
 23:   PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
 24:   PetscFunctionReturn(PETSC_SUCCESS);
 25: }

 27: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
 28: {
 29:   PetscFunctionBegin;
 30:   PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: /* z = y^H x */
 35: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 36: {
 37:   PetscFunctionBegin;
 38:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
 39:   PetscFunctionReturn(PETSC_SUCCESS);
 40: }

 42: /* z = y^T x */
 43: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 44: {
 45:   PetscFunctionBegin;
 46:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 51: {
 52:   PetscFunctionBegin;
 53:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 58: {
 59:   PetscFunctionBegin;
 60:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
 61:   PetscFunctionReturn(PETSC_SUCCESS);
 62: }

 64: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 65: {
 66:   const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};

 68:   PetscFunctionBegin;
 69:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
 70:   PetscFunctionReturn(PETSC_SUCCESS);
 71: }

 73: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 74: {
 75:   const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};

 77:   PetscFunctionBegin;
 78:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
 83: {
 84:   Vec         v;
 85:   Vec_MPI    *vecmpi;
 86:   Vec_Kokkos *veckok;

 88:   PetscFunctionBegin;
 89:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
 90:   PetscCall(VecDuplicate_MPI(win, &v)); /* after the call, v is a VECMPI, with data zero'ed */
 91:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
 92:   v->ops[0] = win->ops[0];

 94:   /* Build the Vec_Kokkos struct */
 95:   vecmpi = static_cast<Vec_MPI *>(v->data);
 96:   veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
 97:   Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
 98:   v->spptr       = veckok;
 99:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
100:   *vv            = v;
101:   PetscFunctionReturn(PETSC_SUCCESS);
102: }

104: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
105: {
106:   PetscFunctionBegin;
107:   PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
108:   PetscFunctionReturn(PETSC_SUCCESS);
109: }

111: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
112: {
113:   PetscFunctionBegin;
114:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
115:   PetscFunctionReturn(PETSC_SUCCESS);
116: }

118: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
119: {
120:   const auto vecmpi = static_cast<Vec_MPI *>(x->data);
121:   const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
122:   PetscInt   m;

124:   PetscFunctionBegin;
125:   PetscCall(VecGetLocalSize(x, &m));
126:   PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
127:   PetscCall(veckok->SetUpCOO(vecmpi, m));
128:   PetscFunctionReturn(PETSC_SUCCESS);
129: }

131: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
132: {
133:   const auto                  vecmpi  = static_cast<Vec_MPI *>(x->data);
134:   const auto                  veckok  = static_cast<Vec_Kokkos *>(x->spptr);
135:   const PetscCountKokkosView &jmap1   = veckok->jmap1_d;
136:   const PetscCountKokkosView &perm1   = veckok->perm1_d;
137:   const PetscCountKokkosView &imap2   = veckok->imap2_d;
138:   const PetscCountKokkosView &jmap2   = veckok->jmap2_d;
139:   const PetscCountKokkosView &perm2   = veckok->perm2_d;
140:   const PetscCountKokkosView &Cperm   = veckok->Cperm_d;
141:   PetscScalarKokkosView      &sendbuf = veckok->sendbuf_d;
142:   PetscScalarKokkosView      &recvbuf = veckok->recvbuf_d;
143:   PetscScalarKokkosView       xv;
144:   ConstPetscScalarKokkosView  vv;
145:   PetscMemType                memtype;
146:   PetscInt                    m;

148:   PetscFunctionBegin;
149:   PetscCall(VecGetLocalSize(x, &m));
150:   PetscCall(PetscGetMemType(v, &memtype));
151:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
152:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
153:   } else {
154:     vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
155:   }

157:   /* Pack entries to be sent to remote */
158:   Kokkos::parallel_for(
159:     vecmpi->sendlen, KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
160:   PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));

162:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
163:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */

165:   Kokkos::parallel_for(
166:     m, KOKKOS_LAMBDA(const PetscCount i) {
167:       PetscScalar sum = 0.0;
168:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
169:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
170:     });

172:   PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));

174:   /* Add received remote entries */
175:   Kokkos::parallel_for(
176:     vecmpi->nnz2, KOKKOS_LAMBDA(PetscCount i) {
177:       for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
178:     });

180:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
181:   else PetscCall(VecRestoreKokkosView(x, &xv));
182:   PetscFunctionReturn(PETSC_SUCCESS);
183: }

185: static PetscErrorCode VecSetOps_MPIKokkos(Vec v)
186: {
187:   PetscFunctionBegin;
188:   v->ops->abs             = VecAbs_SeqKokkos;
189:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
190:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
191:   v->ops->setrandom       = VecSetRandom_SeqKokkos;
192:   v->ops->dotnorm2        = VecDotNorm2_MPIKokkos;
193:   v->ops->waxpy           = VecWAXPY_SeqKokkos;
194:   v->ops->norm            = VecNorm_MPIKokkos;
195:   v->ops->min             = VecMin_MPIKokkos;
196:   v->ops->max             = VecMax_MPIKokkos;
197:   v->ops->sum             = VecSum_SeqKokkos;
198:   v->ops->shift           = VecShift_SeqKokkos;
199:   v->ops->scale           = VecScale_SeqKokkos;
200:   v->ops->copy            = VecCopy_SeqKokkos;
201:   v->ops->set             = VecSet_SeqKokkos;
202:   v->ops->swap            = VecSwap_SeqKokkos;
203:   v->ops->axpy            = VecAXPY_SeqKokkos;
204:   v->ops->axpby           = VecAXPBY_SeqKokkos;
205:   v->ops->maxpy           = VecMAXPY_SeqKokkos;
206:   v->ops->aypx            = VecAYPX_SeqKokkos;
207:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
208:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
209:   v->ops->placearray      = VecPlaceArray_SeqKokkos;
210:   v->ops->replacearray    = VecReplaceArray_SeqKokkos;
211:   v->ops->resetarray      = VecResetArray_SeqKokkos;

213:   v->ops->dot   = VecDot_MPIKokkos;
214:   v->ops->tdot  = VecTDot_MPIKokkos;
215:   v->ops->mdot  = VecMDot_MPIKokkos;
216:   v->ops->mtdot = VecMTDot_MPIKokkos;

218:   v->ops->dot_local   = VecDot_SeqKokkos;
219:   v->ops->tdot_local  = VecTDot_SeqKokkos;
220:   v->ops->mdot_local  = VecMDot_SeqKokkos;
221:   v->ops->mtdot_local = VecMTDot_SeqKokkos;

223:   v->ops->norm_local              = VecNorm_SeqKokkos;
224:   v->ops->duplicate               = VecDuplicate_MPIKokkos;
225:   v->ops->destroy                 = VecDestroy_MPIKokkos;
226:   v->ops->getlocalvector          = VecGetLocalVector_SeqKokkos;
227:   v->ops->restorelocalvector      = VecRestoreLocalVector_SeqKokkos;
228:   v->ops->getlocalvectorread      = VecGetLocalVector_SeqKokkos;
229:   v->ops->restorelocalvectorread  = VecRestoreLocalVector_SeqKokkos;
230:   v->ops->getarraywrite           = VecGetArrayWrite_SeqKokkos;
231:   v->ops->getarray                = VecGetArray_SeqKokkos;
232:   v->ops->restorearray            = VecRestoreArray_SeqKokkos;
233:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
234:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
235:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
236:   v->ops->getsubvector            = VecGetSubVector_MPIKokkos;
237:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;

239:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
240:   v->ops->setvaluescoo        = VecSetValuesCOO_MPIKokkos;

242:   v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;
243:   PetscFunctionReturn(PETSC_SUCCESS);
244: }

246: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
247: {
248:   Vec_MPI *vecmpi;

250:   PetscFunctionBegin;
251:   PetscCall(PetscKokkosInitializeCheck());
252:   PetscCall(PetscLayoutSetUp(v->map));
253:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
254:   PetscCall(VecSetOps_MPIKokkos(v));
255:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
256:   vecmpi = static_cast<Vec_MPI *>(v->data);
257:   PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
258:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
259:   PetscFunctionReturn(PETSC_SUCCESS);
260: }

262: /*MC
263:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos

265:    Options Database Keys:
266: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()

268:   Level: beginner

270: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
271: M*/
272: PetscErrorCode VecCreate_MPIKokkos(Vec v)
273: {
274:   Vec_MPI    *vecmpi;
275:   Vec_Kokkos *veckok;

277:   PetscFunctionBegin;
278:   PetscCall(PetscKokkosInitializeCheck());
279:   PetscCall(PetscLayoutSetUp(v->map));
280:   PetscCall(VecCreate_MPI(v)); /* Calloc host array */

282:   vecmpi = static_cast<Vec_MPI *>(v->data);
283:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
284:   PetscCall(VecSetOps_MPIKokkos(v));
285:   veckok         = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
286:   v->spptr       = static_cast<void *>(veckok);
287:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
288:   PetscFunctionReturn(PETSC_SUCCESS);
289: }

291: /*@C
292:   VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
293:   where the user provides the GPU array space to store the vector values.

295:   Collective

297:   Input Parameters:
298: + comm   - the MPI communicator to use
299: . bs     - block size, same meaning as VecSetBlockSize()
300: . n      - local vector length, cannot be PETSC_DECIDE
301: . N      - global vector length (or PETSC_DECIDE to have calculated)
302: - darray - the user provided GPU array to store the vector values

304:   Output Parameter:
305: . v - the vector

307:   Notes:
308:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
309:   same type as an existing vector.

311:   If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
312:   at a later stage to SET the array for storing the vector values.

314:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
315:   The user should not free the array until the vector is destroyed.

317:   Level: intermediate

319: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
320:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
321:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`

323: @*/
324: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
325: {
326:   Vec          w;
327:   Vec_Kokkos  *veckok;
328:   Vec_MPI     *vecmpi;
329:   PetscScalar *harray;

331:   PetscFunctionBegin;
332:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
333:   PetscCall(PetscKokkosInitializeCheck());
334:   PetscCall(PetscSplitOwnership(comm, &n, &N));
335:   PetscCall(VecCreate(comm, &w));
336:   PetscCall(VecSetSizes(w, n, N));
337:   PetscCall(VecSetBlockSize(w, bs));
338:   PetscCall(PetscLayoutSetUp(w->map));

340:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
341:     harray = const_cast<PetscScalar *>(darray);
342:   } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */

344:   PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
345:   vecmpi = static_cast<Vec_MPI *>(w->data);

347:   if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */

349:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
350:   PetscCall(VecSetOps_MPIKokkos(w));
351:   veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
352:   veckok->v_dual.modify_device(); /* Mark the device is modified */
353:   w->spptr       = static_cast<void *>(veckok);
354:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
355:   *v             = w;
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: /*
360:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
361:    with user-provided arrays on host and device.

363:    Collective

365:    Input Parameter:
366: +  comm - the communicator
367: .  bs - the block size
368: .  n - the local vector length
369: .  N - the global vector length
370: -  harray - host memory where the vector elements are to be stored.
371: -  darray - device memory where the vector elements are to be stored.

373:    Output Parameter:
374: .  v - the vector

376:    Notes:
377:    If there is no device, then harray and darray must be the same.
378:    If n is not zero, then harray and darray must be allocated.
379:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
380:    we suppose harray and darray have the same data.

382:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
383:    The user should not free the array until the vector is destroyed.
384: */
385: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
386: {
387:   Vec w;

389:   PetscFunctionBegin;
390:   PetscCall(PetscKokkosInitializeCheck());
391:   if (n) {
392:     PetscAssertPointer(harray, 5);
393:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
394:   }
395:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
396:   PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
397:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
398:   PetscCall(VecSetOps_MPIKokkos(w));
399:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
400:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
401:   *v             = w;
402:   PetscFunctionReturn(PETSC_SUCCESS);
403: }

405: /*MC
406:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos

408:    Options Database Keys:
409: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()

411:   Level: beginner

413: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
414: M*/
415: PetscErrorCode VecCreate_Kokkos(Vec v)
416: {
417:   PetscMPIInt size;

419:   PetscFunctionBegin;
420:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
421:   if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
422:   else PetscCall(VecSetType(v, VECMPIKOKKOS));
423:   PetscFunctionReturn(PETSC_SUCCESS);
424: }