Actual source code: math2opusutils.cu

  1: #include <petsc/private/matimpl.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petscsf.h>
  4: #if defined(PETSC_HAVE_CUDA)
  5:   #include <thrust/for_each.h>
  6:   #include <thrust/device_vector.h>
  7:   #include <thrust/execution_policy.h>
  8: #endif

 10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
 11: {
 12:   PetscSF            rankssf;
 13:   const PetscSFNode *iremote;
 14:   PetscSFNode       *viremote, *rremotes;
 15:   const PetscInt    *ilocal;
 16:   PetscInt          *vilocal = NULL, *ldrs;
 17:   const PetscMPIInt *ranks;
 18:   PetscMPIInt       *sranks;
 19:   PetscInt           nranks, nr, nl, vnr, vnl, i, v, j, maxl;
 20:   MPI_Comm           comm;

 22:   PetscFunctionBegin;
 25:   PetscAssertPointer(vsf, 5);
 26:   if (nv == 1) {
 27:     PetscCall(PetscObjectReference((PetscObject)sf));
 28:     *vsf = sf;
 29:     PetscFunctionReturn(PETSC_SUCCESS);
 30:   }
 31:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
 32:   PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote));
 33:   PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl));
 34:   maxl += 1;
 35:   if (ldl == PETSC_DECIDE) ldl = maxl;
 36:   if (ldr == PETSC_DECIDE) ldr = nr;
 37:   PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldr, nr);
 38:   PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldl, maxl);
 39:   vnr = nr * nv;
 40:   vnl = nl * nv;
 41:   PetscCall(PetscMalloc1(vnl, &viremote));
 42:   if (ilocal) PetscCall(PetscMalloc1(vnl, &vilocal));

 44:   /* TODO: Should this special SF be available, e.g.
 45:      PetscSFGetRanksSF or similar? */
 46:   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
 47:   PetscCall(PetscMalloc1(nranks, &sranks));
 48:   PetscCall(PetscArraycpy(sranks, ranks, nranks));
 49:   PetscCall(PetscSortMPIInt(nranks, sranks));
 50:   PetscCall(PetscMalloc1(nranks, &rremotes));
 51:   for (i = 0; i < nranks; i++) {
 52:     rremotes[i].rank  = sranks[i];
 53:     rremotes[i].index = 0;
 54:   }
 55:   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf));
 56:   PetscCall(PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
 57:   PetscCall(PetscMalloc1(nranks, &ldrs));
 58:   PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
 59:   PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
 60:   PetscCall(PetscSFDestroy(&rankssf));

 62:   j = -1;
 63:   for (i = 0; i < nl; i++) {
 64:     const PetscInt r  = iremote[i].rank;
 65:     const PetscInt ii = iremote[i].index;

 67:     if (j < 0 || sranks[j] != r) PetscCall(PetscFindMPIInt(r, nranks, sranks, &j));
 68:     PetscCheck(j >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r);
 69:     for (v = 0; v < nv; v++) {
 70:       viremote[v * nl + i].rank  = r;
 71:       viremote[v * nl + i].index = v * ldrs[j] + ii;
 72:       if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i];
 73:     }
 74:   }
 75:   PetscCall(PetscFree(sranks));
 76:   PetscCall(PetscFree(ldrs));
 77:   PetscCall(PetscSFCreate(comm, vsf));
 78:   PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER));
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf)
 83: {
 84:   PetscSF asf;

 86:   PetscFunctionBegin;
 89:   PetscAssertPointer(osf, 3);
 90:   PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf));
 91:   if (!asf) {
 92:     PetscInt lda;

 94:     PetscCall(MatDenseGetLDA(A, &lda));
 95:     PetscCall(PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
 96:     PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf));
 97:     PetscCall(PetscObjectDereference((PetscObject)asf));
 98:   }
 99:   *osf = asf;
100:   PetscFunctionReturn(PETSC_SUCCESS);
101: }

103: #if defined(PETSC_HAVE_CUDA)
104: struct SignVector_Functor {
105:   const PetscScalar *v;
106:   PetscScalar       *s;
107:   SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }

109:   __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
110: };
111: #endif

113: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
114: {
115:   const PetscScalar *av;
116:   PetscScalar       *as;
117:   PetscInt           i, n;
118: #if defined(PETSC_HAVE_CUDA)
119:   PetscBool viscuda, siscuda;
120: #endif

122:   PetscFunctionBegin;
125:   PetscCall(VecGetLocalSize(s, &n));
126:   PetscCall(VecGetLocalSize(v, &i));
127:   PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
128: #if defined(PETSC_HAVE_CUDA)
129:   PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
130:   PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
131:   viscuda = (PetscBool)(viscuda && !v->boundtocpu);
132:   siscuda = (PetscBool)(siscuda && !s->boundtocpu);
133:   if (viscuda && siscuda) {
134:     PetscCall(VecCUDAGetArrayRead(v, &av));
135:     PetscCall(VecCUDAGetArrayWrite(s, &as));
136:     SignVector_Functor sign_vector(av, as);
137:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
138:     PetscCall(VecCUDARestoreArrayWrite(s, &as));
139:     PetscCall(VecCUDARestoreArrayRead(v, &av));
140:   } else
141: #endif
142:   {
143:     PetscCall(VecGetArrayRead(v, &av));
144:     PetscCall(VecGetArrayWrite(s, &as));
145:     for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
146:     PetscCall(VecRestoreArrayWrite(s, &as));
147:     PetscCall(VecRestoreArrayRead(v, &av));
148:   }
149:   PetscFunctionReturn(PETSC_SUCCESS);
150: }

152: #if defined(PETSC_HAVE_CUDA)
153: struct StandardBasis_Functor {
154:   PetscScalar *v;
155:   PetscInt     j;

157:   StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
158:   __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
159: };
160: #endif

162: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
163: {
164: #if defined(PETSC_HAVE_CUDA)
165:   PetscBool iscuda;
166: #endif
167:   PetscInt st, en;

169:   PetscFunctionBegin;
170:   PetscCall(VecGetOwnershipRange(x, &st, &en));
171: #if defined(PETSC_HAVE_CUDA)
172:   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
173:   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
174:   if (iscuda) {
175:     PetscScalar *ax;
176:     PetscCall(VecCUDAGetArrayWrite(x, &ax));
177:     StandardBasis_Functor delta(ax, i - st);
178:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
179:     PetscCall(VecCUDARestoreArrayWrite(x, &ax));
180:   } else
181: #endif
182:   {
183:     PetscCall(VecSet(x, 0.));
184:     if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
185:     PetscCall(VecAssemblyBegin(x));
186:     PetscCall(VecAssemblyEnd(x));
187:   }
188:   PetscFunctionReturn(PETSC_SUCCESS);
189: }

191: /* these are approximate norms */
192: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
193:    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
194: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
195: {
196:   Vec         x, y, w, z;
197:   PetscReal   normz, adot;
198:   PetscScalar dot;
199:   PetscInt    i, j, N, jold = -1;
200:   PetscBool   boundtocpu = PETSC_TRUE;

202:   PetscFunctionBegin;
203: #if defined(PETSC_HAVE_DEVICE)
204:   boundtocpu = A->boundtocpu;
205: #endif
206:   switch (normtype) {
207:   case NORM_INFINITY:
208:   case NORM_1:
209:     if (normsamples < 0) normsamples = 10; /* pure guess */
210:     if (normtype == NORM_INFINITY) {
211:       Mat B;
212:       PetscCall(MatCreateTranspose(A, &B));
213:       A = B;
214:     } else {
215:       PetscCall(PetscObjectReference((PetscObject)A));
216:     }
217:     PetscCall(MatCreateVecs(A, &x, &y));
218:     PetscCall(MatCreateVecs(A, &z, &w));
219:     PetscCall(VecBindToCPU(x, boundtocpu));
220:     PetscCall(VecBindToCPU(y, boundtocpu));
221:     PetscCall(VecBindToCPU(z, boundtocpu));
222:     PetscCall(VecBindToCPU(w, boundtocpu));
223:     PetscCall(VecGetSize(x, &N));
224:     PetscCall(VecSet(x, 1. / N));
225:     *n = 0.0;
226:     for (i = 0; i < normsamples; i++) {
227:       PetscCall(MatMult(A, x, y));
228:       PetscCall(VecSign(y, w));
229:       PetscCall(MatMultTranspose(A, w, z));
230:       PetscCall(VecNorm(z, NORM_INFINITY, &normz));
231:       PetscCall(VecDot(x, z, &dot));
232:       adot = PetscAbsScalar(dot);
233:       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
234:       if (normz <= adot && i > 0) {
235:         PetscCall(VecNorm(y, NORM_1, n));
236:         break;
237:       }
238:       PetscCall(VecMax(z, &j, &normz));
239:       if (j == jold) {
240:         PetscCall(VecNorm(y, NORM_1, n));
241:         PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
242:         break;
243:       }
244:       jold = j;
245:       PetscCall(VecSetDelta(x, j));
246:     }
247:     PetscCall(MatDestroy(&A));
248:     PetscCall(VecDestroy(&x));
249:     PetscCall(VecDestroy(&w));
250:     PetscCall(VecDestroy(&y));
251:     PetscCall(VecDestroy(&z));
252:     break;
253:   case NORM_2:
254:     if (normsamples < 0) normsamples = 20; /* pure guess */
255:     PetscCall(MatCreateVecs(A, &x, &y));
256:     PetscCall(MatCreateVecs(A, &z, NULL));
257:     PetscCall(VecBindToCPU(x, boundtocpu));
258:     PetscCall(VecBindToCPU(y, boundtocpu));
259:     PetscCall(VecBindToCPU(z, boundtocpu));
260:     PetscCall(VecSetRandom(x, NULL));
261:     PetscCall(VecNormalize(x, NULL));
262:     *n = 0.0;
263:     for (i = 0; i < normsamples; i++) {
264:       PetscCall(MatMult(A, x, y));
265:       PetscCall(VecNormalize(y, n));
266:       PetscCall(MatMultTranspose(A, y, z));
267:       PetscCall(VecNorm(z, NORM_2, &normz));
268:       PetscCall(VecDot(x, z, &dot));
269:       adot = PetscAbsScalar(dot);
270:       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
271:       if (normz <= adot) break;
272:       if (i < normsamples - 1) {
273:         Vec t;

275:         PetscCall(VecNormalize(z, NULL));
276:         t = x;
277:         x = z;
278:         z = t;
279:       }
280:     }
281:     PetscCall(VecDestroy(&x));
282:     PetscCall(VecDestroy(&y));
283:     PetscCall(VecDestroy(&z));
284:     break;
285:   default:
286:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
287:   }
288:   PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
289:   PetscFunctionReturn(PETSC_SUCCESS);
290: }