Actual source code: bddcprivate.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <petsc/private/pcbddcimpl.h>
  3: #include <petsc/private/pcbddcprivateimpl.h>
  4: #include <../src/mat/impls/dense/seq/dense.h>
  5: #include <petscdmplex.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/sfimpl.h>
  8: #include <petsc/private/dmpleximpl.h>
  9: #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17:   PetscScalar *uwork, *data, *U, ds = 0.;
 18:   PetscReal   *sing;
 19:   PetscBLASInt bM, bN, lwork, lierr, di = 1;
 20:   PetscInt     ulw, i, nr, nc, n;
 21: #if defined(PETSC_USE_COMPLEX)
 22:   PetscReal *rwork2;
 23: #endif

 25:   PetscFunctionBegin;
 26:   PetscCall(MatGetSize(A, &nr, &nc));
 27:   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);

 29:   /* workspace */
 30:   if (!work) {
 31:     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
 32:     PetscCall(PetscMalloc1(ulw, &uwork));
 33:   } else {
 34:     ulw   = lw;
 35:     uwork = work;
 36:   }
 37:   n = PetscMin(nr, nc);
 38:   if (!rwork) {
 39:     PetscCall(PetscMalloc1(n, &sing));
 40:   } else {
 41:     sing = rwork;
 42:   }

 44:   /* SVD */
 45:   PetscCall(PetscMalloc1(nr * nr, &U));
 46:   PetscCall(PetscBLASIntCast(nr, &bM));
 47:   PetscCall(PetscBLASIntCast(nc, &bN));
 48:   PetscCall(PetscBLASIntCast(ulw, &lwork));
 49:   PetscCall(MatDenseGetArray(A, &data));
 50:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
 51: #if !defined(PETSC_USE_COMPLEX)
 52:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
 53: #else
 54:   PetscCall(PetscMalloc1(5 * n, &rwork2));
 55:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
 56:   PetscCall(PetscFree(rwork2));
 57: #endif
 58:   PetscCall(PetscFPTrapPop());
 59:   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
 60:   PetscCall(MatDenseRestoreArray(A, &data));
 61:   for (i = 0; i < n; i++)
 62:     if (sing[i] < PETSC_SMALL) break;
 63:   if (!rwork) PetscCall(PetscFree(sing));
 64:   if (!work) PetscCall(PetscFree(uwork));
 65:   /* create B */
 66:   if (!range) {
 67:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
 68:     PetscCall(MatDenseGetArray(*B, &data));
 69:     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
 70:   } else {
 71:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
 72:     PetscCall(MatDenseGetArray(*B, &data));
 73:     PetscCall(PetscArraycpy(data, U, i * nr));
 74:   }
 75:   PetscCall(MatDenseRestoreArray(*B, &data));
 76:   PetscCall(PetscFree(U));
 77:   PetscFunctionReturn(PETSC_SUCCESS);
 78: }

 80: /* TODO REMOVE */
 81: #if defined(PRINT_GDET)
 82: static int inc = 0;
 83: static int lev = 0;
 84: #endif

 86: static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 87: {
 88:   Mat          GE, GEd;
 89:   PetscInt     rsize, csize, esize;
 90:   PetscScalar *ptr;

 92:   PetscFunctionBegin;
 93:   PetscCall(ISGetSize(edge, &esize));
 94:   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
 95:   PetscCall(ISGetSize(extrow, &rsize));
 96:   PetscCall(ISGetSize(extcol, &csize));

 98:   /* gradients */
 99:   ptr = work + 5 * esize;
100:   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
101:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
102:   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
103:   PetscCall(MatDestroy(&GE));

105:   /* constants */
106:   ptr += rsize * csize;
107:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
108:   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
109:   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
110:   PetscCall(MatDestroy(&GE));
111:   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
112:   PetscCall(MatDestroy(&GEd));

114:   if (corners) {
115:     Mat                GEc;
116:     const PetscScalar *vals;
117:     PetscScalar        v;

119:     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
120:     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
121:     PetscCall(MatDenseGetArrayRead(GEd, &vals));
122:     /* v       = PetscAbsScalar(vals[0]); */
123:     v        = 1.;
124:     cvals[0] = vals[0] / v;
125:     cvals[1] = vals[1] / v;
126:     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
127:     PetscCall(MatScale(*GKins, 1. / v));
128: #if defined(PRINT_GDET)
129:     {
130:       PetscViewer viewer;
131:       char        filename[256];
132:       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
133:       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
134:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
135:       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
136:       PetscCall(MatView(GEc, viewer));
137:       PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK"));
138:       PetscCall(MatView(*GKins, viewer));
139:       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
140:       PetscCall(MatView(GEd, viewer));
141:       PetscCall(PetscViewerDestroy(&viewer));
142:     }
143: #endif
144:     PetscCall(MatDestroy(&GEd));
145:     PetscCall(MatDestroy(&GEc));
146:   }

148:   PetscFunctionReturn(PETSC_SUCCESS);
149: }

151: PetscErrorCode PCBDDCNedelecSupport(PC pc)
152: {
153:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
154:   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
155:   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
156:   Vec                    tvec;
157:   PetscSF                sfv;
158:   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
159:   MPI_Comm               comm;
160:   IS                     lned, primals, allprimals, nedfieldlocal;
161:   IS                    *eedges, *extrows, *extcols, *alleedges;
162:   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
163:   PetscScalar           *vals, *work;
164:   PetscReal             *rwork;
165:   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
166:   PetscInt               ne, nv, Lv, order, n, field;
167:   PetscInt               n_neigh, *neigh, *n_shared, **shared;
168:   PetscInt               i, j, extmem, cum, maxsize, nee;
169:   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170:   PetscInt              *sfvleaves, *sfvroots;
171:   PetscInt              *corners, *cedges;
172:   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173:   PetscInt              *emarks;
174:   PetscBool              print, eerr, done, lrc[2], conforming, global, singular, setprimal;

176:   PetscFunctionBegin;
177:   /* If the discrete gradient is defined for a subset of dofs and global is true,
178:      it assumes G is given in global ordering for all the dofs.
179:      Otherwise, the ordering is global for the Nedelec field */
180:   order      = pcbddc->nedorder;
181:   conforming = pcbddc->conforming;
182:   field      = pcbddc->nedfield;
183:   global     = pcbddc->nedglobal;
184:   setprimal  = PETSC_FALSE;
185:   print      = PETSC_FALSE;
186:   singular   = PETSC_FALSE;

188:   /* Command line customization */
189:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
190:   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
191:   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL));
192:   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
193:   /* print debug info TODO: to be removed */
194:   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
195:   PetscOptionsEnd();

197:   /* Return if there are no edges in the decomposition and the problem is not singular */
198:   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
199:   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
200:   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
201:   if (!singular) {
202:     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
203:     lrc[0] = PETSC_FALSE;
204:     for (i = 0; i < n; i++) {
205:       if (PetscRealPart(vals[i]) > 2.) {
206:         lrc[0] = PETSC_TRUE;
207:         break;
208:       }
209:     }
210:     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
211:     PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
212:     if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
213:   }

215:   /* Get Nedelec field */
216:   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
217:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
218:     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
219:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
220:     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
221:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
222:     ne            = n;
223:     nedfieldlocal = NULL;
224:     global        = PETSC_TRUE;
225:   } else if (field == PETSC_DECIDE) {
226:     PetscInt rst, ren, *idx;

228:     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
229:     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
230:     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
231:     for (i = rst; i < ren; i++) {
232:       PetscInt nc;

234:       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
235:       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
236:       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
237:     }
238:     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
239:     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
240:     PetscCall(PetscMalloc1(n, &idx));
241:     for (i = 0, ne = 0; i < n; i++)
242:       if (matis->sf_leafdata[i]) idx[ne++] = i;
243:     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
244:   } else {
245:     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
246:   }

248:   /* Sanity checks */
249:   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
250:   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
251:   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);

253:   /* Just set primal dofs and return */
254:   if (setprimal) {
255:     IS        enedfieldlocal;
256:     PetscInt *eidxs;

258:     PetscCall(PetscMalloc1(ne, &eidxs));
259:     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
260:     if (nedfieldlocal) {
261:       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
262:       for (i = 0, cum = 0; i < ne; i++) {
263:         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
264:       }
265:       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
266:     } else {
267:       for (i = 0, cum = 0; i < ne; i++) {
268:         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
269:       }
270:     }
271:     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
272:     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
273:     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
274:     PetscCall(PetscFree(eidxs));
275:     PetscCall(ISDestroy(&nedfieldlocal));
276:     PetscCall(ISDestroy(&enedfieldlocal));
277:     PetscFunctionReturn(PETSC_SUCCESS);
278:   }

280:   /* Compute some l2g maps */
281:   if (nedfieldlocal) {
282:     IS is;

284:     /* need to map from the local Nedelec field to local numbering */
285:     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
286:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
287:     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
288:     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
289:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
290:     if (global) {
291:       PetscCall(PetscObjectReference((PetscObject)al2g));
292:       el2g = al2g;
293:     } else {
294:       IS gis;

296:       PetscCall(ISRenumber(is, NULL, NULL, &gis));
297:       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
298:       PetscCall(ISDestroy(&gis));
299:     }
300:     PetscCall(ISDestroy(&is));
301:   } else {
302:     /* restore default */
303:     pcbddc->nedfield = -1;
304:     /* one ref for the destruction of al2g, one for el2g */
305:     PetscCall(PetscObjectReference((PetscObject)al2g));
306:     PetscCall(PetscObjectReference((PetscObject)al2g));
307:     el2g = al2g;
308:     fl2g = NULL;
309:   }

311:   /* Start communication to drop connections for interior edges (for cc analysis only) */
312:   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
313:   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
314:   if (nedfieldlocal) {
315:     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
316:     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
317:     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
318:   } else {
319:     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
320:   }
321:   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
322:   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));

324:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
325:     PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
326:     PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
327:     if (global) {
328:       PetscInt rst;

330:       PetscCall(MatGetOwnershipRange(G, &rst, NULL));
331:       for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
332:         if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
333:       }
334:       PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
335:       PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
336:     } else {
337:       PetscInt *tbz;

339:       PetscCall(PetscMalloc1(ne, &tbz));
340:       PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341:       PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
342:       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
343:       for (i = 0, cum = 0; i < ne; i++)
344:         if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
345:       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
346:       PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
347:       PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
348:       PetscCall(PetscFree(tbz));
349:     }
350:   } else { /* we need the entire G to infer the nullspace */
351:     PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient));
352:     G = pcbddc->discretegradient;
353:   }

355:   /* Extract subdomain relevant rows of G */
356:   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
357:   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
358:   PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall));
359:   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
360:   PetscCall(ISDestroy(&lned));
361:   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
362:   PetscCall(MatDestroy(&lGall));
363:   PetscCall(MatISGetLocalMat(lGis, &lG));

365:   /* SF for nodal dofs communications */
366:   PetscCall(MatGetLocalSize(G, NULL, &Lv));
367:   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
368:   PetscCall(PetscObjectReference((PetscObject)vl2g));
369:   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
370:   PetscCall(PetscSFCreate(comm, &sfv));
371:   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
372:   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
373:   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
374:   i = singular ? 2 : 1;
375:   PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots));

377:   /* Destroy temporary G created in MATIS format and modified G */
378:   PetscCall(PetscObjectReference((PetscObject)lG));
379:   PetscCall(MatDestroy(&lGis));
380:   PetscCall(MatDestroy(&G));

382:   if (print) {
383:     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
384:     PetscCall(MatView(lG, NULL));
385:   }

387:   /* Save lG for values insertion in change of basis */
388:   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));

390:   /* Analyze the edge-nodes connections (duplicate lG) */
391:   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
392:   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
393:   PetscCall(PetscBTCreate(nv, &btv));
394:   PetscCall(PetscBTCreate(ne, &bte));
395:   PetscCall(PetscBTCreate(ne, &btb));
396:   PetscCall(PetscBTCreate(ne, &btbd));
397:   PetscCall(PetscBTCreate(nv, &btvcand));
398:   /* need to import the boundary specification to ensure the
399:      proper detection of coarse edges' endpoints */
400:   if (pcbddc->DirichletBoundariesLocal) {
401:     IS is;

403:     if (fl2g) {
404:       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
405:     } else {
406:       is = pcbddc->DirichletBoundariesLocal;
407:     }
408:     PetscCall(ISGetLocalSize(is, &cum));
409:     PetscCall(ISGetIndices(is, &idxs));
410:     for (i = 0; i < cum; i++) {
411:       if (idxs[i] >= 0) {
412:         PetscCall(PetscBTSet(btb, idxs[i]));
413:         PetscCall(PetscBTSet(btbd, idxs[i]));
414:       }
415:     }
416:     PetscCall(ISRestoreIndices(is, &idxs));
417:     if (fl2g) PetscCall(ISDestroy(&is));
418:   }
419:   if (pcbddc->NeumannBoundariesLocal) {
420:     IS is;

422:     if (fl2g) {
423:       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
424:     } else {
425:       is = pcbddc->NeumannBoundariesLocal;
426:     }
427:     PetscCall(ISGetLocalSize(is, &cum));
428:     PetscCall(ISGetIndices(is, &idxs));
429:     for (i = 0; i < cum; i++) {
430:       if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i]));
431:     }
432:     PetscCall(ISRestoreIndices(is, &idxs));
433:     if (fl2g) PetscCall(ISDestroy(&is));
434:   }

436:   /* Count neighs per dof */
437:   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs));
438:   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs));

440:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
441:      for proper detection of coarse edges' endpoints */
442:   PetscCall(PetscBTCreate(ne, &btee));
443:   for (i = 0; i < ne; i++) {
444:     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
445:   }
446:   PetscCall(PetscMalloc1(ne, &marks));
447:   if (!conforming) {
448:     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
449:     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
450:   }
451:   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
452:   PetscCall(MatSeqAIJGetArray(lGe, &vals));
453:   cum = 0;
454:   for (i = 0; i < ne; i++) {
455:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
456:     if (!PetscBTLookup(btee, i)) {
457:       marks[cum++] = i;
458:       continue;
459:     }
460:     /* set badly connected edge dofs as primal */
461:     if (!conforming) {
462:       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
463:         marks[cum++] = i;
464:         PetscCall(PetscBTSet(bte, i));
465:         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
466:       } else {
467:         /* every edge dofs should be connected through a certain number of nodal dofs
468:            to other edge dofs belonging to coarse edges
469:            - at most 2 endpoints
470:            - order-1 interior nodal dofs
471:            - no undefined nodal dofs (nconn < order)
472:         */
473:         PetscInt ends = 0, ints = 0, undef = 0;
474:         for (j = ii[i]; j < ii[i + 1]; j++) {
475:           PetscInt v     = jj[j], k;
476:           PetscInt nconn = iit[v + 1] - iit[v];
477:           for (k = iit[v]; k < iit[v + 1]; k++)
478:             if (!PetscBTLookup(btee, jjt[k])) nconn--;
479:           if (nconn > order) ends++;
480:           else if (nconn == order) ints++;
481:           else undef++;
482:         }
483:         if (undef || ends > 2 || ints != order - 1) {
484:           marks[cum++] = i;
485:           PetscCall(PetscBTSet(bte, i));
486:           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
487:         }
488:       }
489:     }
490:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
491:     if (!order && ii[i + 1] != ii[i]) {
492:       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
493:       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
494:     }
495:   }
496:   PetscCall(PetscBTDestroy(&btee));
497:   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
498:   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
499:   if (!conforming) {
500:     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
501:     PetscCall(MatDestroy(&lGt));
502:   }
503:   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));

505:   /* identify splitpoints and corner candidates */
506:   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
507:   if (print) {
508:     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
509:     PetscCall(MatView(lGe, NULL));
510:     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
511:     PetscCall(MatView(lGt, NULL));
512:   }
513:   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
514:   PetscCall(MatSeqAIJGetArray(lGt, &vals));
515:   for (i = 0; i < nv; i++) {
516:     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
517:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
518:     if (!order) { /* variable order */
519:       PetscReal vorder = 0.;

521:       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
522:       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
523:       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
524:       ord = 1;
525:     }
526:     PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord);
527:     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
528:       if (PetscBTLookup(btbd, jj[j])) {
529:         bdir = PETSC_TRUE;
530:         break;
531:       }
532:       if (vc != ecount[jj[j]]) {
533:         sneighs = PETSC_FALSE;
534:       } else {
535:         PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]];
536:         for (k = 0; k < vc; k++) {
537:           if (vn[k] != en[k]) {
538:             sneighs = PETSC_FALSE;
539:             break;
540:           }
541:         }
542:       }
543:     }
544:     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
545:       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
546:       PetscCall(PetscBTSet(btv, i));
547:     } else if (test == ord) {
548:       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
549:         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
550:         PetscCall(PetscBTSet(btv, i));
551:       } else {
552:         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
553:         PetscCall(PetscBTSet(btvcand, i));
554:       }
555:     }
556:   }
557:   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
558:   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
559:   PetscCall(PetscBTDestroy(&btbd));

561:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
562:   if (order != 1) {
563:     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
564:     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
565:     for (i = 0; i < nv; i++) {
566:       if (PetscBTLookup(btvcand, i)) {
567:         PetscBool found = PETSC_FALSE;
568:         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
569:           PetscInt k, e = jj[j];
570:           if (PetscBTLookup(bte, e)) continue;
571:           for (k = iit[e]; k < iit[e + 1]; k++) {
572:             PetscInt v = jjt[k];
573:             if (v != i && PetscBTLookup(btvcand, v)) {
574:               found = PETSC_TRUE;
575:               break;
576:             }
577:           }
578:         }
579:         if (!found) {
580:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
581:           PetscCall(PetscBTClear(btvcand, i));
582:         } else {
583:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
584:         }
585:       }
586:     }
587:     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
588:   }
589:   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
590:   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
591:   PetscCall(MatDestroy(&lGe));

593:   /* Get the local G^T explicitly */
594:   PetscCall(MatDestroy(&lGt));
595:   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
596:   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));

598:   /* Mark interior nodal dofs */
599:   PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared));
600:   PetscCall(PetscBTCreate(nv, &btvi));
601:   for (i = 1; i < n_neigh; i++) {
602:     for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j]));
603:   }
604:   PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared));

606:   /* communicate corners and splitpoints */
607:   PetscCall(PetscMalloc1(nv, &vmarks));
608:   PetscCall(PetscArrayzero(sfvleaves, nv));
609:   PetscCall(PetscArrayzero(sfvroots, Lv));
610:   for (i = 0; i < nv; i++)
611:     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;

613:   if (print) {
614:     IS tbz;

616:     cum = 0;
617:     for (i = 0; i < nv; i++)
618:       if (sfvleaves[i]) vmarks[cum++] = i;

620:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
621:     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
622:     PetscCall(ISView(tbz, NULL));
623:     PetscCall(ISDestroy(&tbz));
624:   }

626:   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
627:   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
628:   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
629:   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));

631:   /* Zero rows of lGt corresponding to identified corners
632:      and interior nodal dofs */
633:   cum = 0;
634:   for (i = 0; i < nv; i++) {
635:     if (sfvleaves[i]) {
636:       vmarks[cum++] = i;
637:       PetscCall(PetscBTSet(btv, i));
638:     }
639:     if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
640:   }
641:   PetscCall(PetscBTDestroy(&btvi));
642:   if (print) {
643:     IS tbz;

645:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
646:     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
647:     PetscCall(ISView(tbz, NULL));
648:     PetscCall(ISDestroy(&tbz));
649:   }
650:   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
651:   PetscCall(PetscFree(vmarks));
652:   PetscCall(PetscSFDestroy(&sfv));
653:   PetscCall(PetscFree2(sfvleaves, sfvroots));

655:   /* Recompute G */
656:   PetscCall(MatDestroy(&lG));
657:   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
658:   if (print) {
659:     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
660:     PetscCall(MatView(lG, NULL));
661:     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
662:     PetscCall(MatView(lGt, NULL));
663:   }

665:   /* Get primal dofs (if any) */
666:   cum = 0;
667:   for (i = 0; i < ne; i++) {
668:     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
669:   }
670:   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
671:   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
672:   if (print) {
673:     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
674:     PetscCall(ISView(primals, NULL));
675:   }
676:   PetscCall(PetscBTDestroy(&bte));
677:   /* TODO: what if the user passed in some of them ?  */
678:   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
679:   PetscCall(ISDestroy(&primals));

681:   /* Compute edge connectivity */
682:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));

684:   /* Symbolic conn = lG*lGt */
685:   PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
686:   PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
687:   PetscCall(MatProductSetAlgorithm(conn, "default"));
688:   PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
689:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
690:   PetscCall(MatProductSetFromOptions(conn));
691:   PetscCall(MatProductSymbolic(conn));

693:   PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
694:   if (fl2g) {
695:     PetscBT   btf;
696:     PetscInt *iia, *jja, *iiu, *jju;
697:     PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;

699:     /* create CSR for all local dofs */
700:     PetscCall(PetscMalloc1(n + 1, &iia));
701:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
702:       PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
703:       iiu = pcbddc->mat_graph->xadj;
704:       jju = pcbddc->mat_graph->adjncy;
705:     } else if (pcbddc->use_local_adj) {
706:       rest = PETSC_TRUE;
707:       PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
708:     } else {
709:       free = PETSC_TRUE;
710:       PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
711:       iiu[0] = 0;
712:       for (i = 0; i < n; i++) {
713:         iiu[i + 1] = i + 1;
714:         jju[i]     = -1;
715:       }
716:     }

718:     /* import sizes of CSR */
719:     iia[0] = 0;
720:     for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];

722:     /* overwrite entries corresponding to the Nedelec field */
723:     PetscCall(PetscBTCreate(n, &btf));
724:     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
725:     for (i = 0; i < ne; i++) {
726:       PetscCall(PetscBTSet(btf, idxs[i]));
727:       iia[idxs[i] + 1] = ii[i + 1] - ii[i];
728:     }

730:     /* iia in CSR */
731:     for (i = 0; i < n; i++) iia[i + 1] += iia[i];

733:     /* jja in CSR */
734:     PetscCall(PetscMalloc1(iia[n], &jja));
735:     for (i = 0; i < n; i++)
736:       if (!PetscBTLookup(btf, i))
737:         for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];

739:     /* map edge dofs connectivity */
740:     if (jj) {
741:       PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
742:       for (i = 0; i < ne; i++) {
743:         PetscInt e = idxs[i];
744:         for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
745:       }
746:     }
747:     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
748:     PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER));
749:     if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
750:     if (free) PetscCall(PetscFree2(iiu, jju));
751:     PetscCall(PetscBTDestroy(&btf));
752:   } else {
753:     PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER));
754:   }

756:   /* Analyze interface for edge dofs */
757:   PetscCall(PCBDDCAnalyzeInterface(pc));
758:   pcbddc->mat_graph->twodim = PETSC_FALSE;

760:   /* Get coarse edges in the edge space */
761:   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
762:   PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));

764:   if (fl2g) {
765:     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
766:     PetscCall(PetscMalloc1(nee, &eedges));
767:     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
768:   } else {
769:     eedges  = alleedges;
770:     primals = allprimals;
771:   }

773:   /* Mark fine edge dofs with their coarse edge id */
774:   PetscCall(PetscArrayzero(marks, ne));
775:   PetscCall(ISGetLocalSize(primals, &cum));
776:   PetscCall(ISGetIndices(primals, &idxs));
777:   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
778:   PetscCall(ISRestoreIndices(primals, &idxs));
779:   if (print) {
780:     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
781:     PetscCall(ISView(primals, NULL));
782:   }

784:   maxsize = 0;
785:   for (i = 0; i < nee; i++) {
786:     PetscInt size, mark = i + 1;

788:     PetscCall(ISGetLocalSize(eedges[i], &size));
789:     PetscCall(ISGetIndices(eedges[i], &idxs));
790:     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
791:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
792:     maxsize = PetscMax(maxsize, size);
793:   }

795:   /* Find coarse edge endpoints */
796:   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
797:   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
798:   for (i = 0; i < nee; i++) {
799:     PetscInt mark = i + 1, size;

801:     PetscCall(ISGetLocalSize(eedges[i], &size));
802:     if (!size && nedfieldlocal) continue;
803:     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
804:     PetscCall(ISGetIndices(eedges[i], &idxs));
805:     if (print) {
806:       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
807:       PetscCall(ISView(eedges[i], NULL));
808:     }
809:     for (j = 0; j < size; j++) {
810:       PetscInt k, ee = idxs[j];
811:       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
812:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
813:         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
814:         if (PetscBTLookup(btv, jj[k])) {
815:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
816:         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
817:           PetscInt  k2;
818:           PetscBool corner = PETSC_FALSE;
819:           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
820:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
821:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
822:                if the edge dof lie on the natural part of the boundary */
823:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
824:               corner = PETSC_TRUE;
825:               break;
826:             }
827:           }
828:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
829:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
830:             PetscCall(PetscBTSet(btv, jj[k]));
831:           } else {
832:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
833:           }
834:         }
835:       }
836:     }
837:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
838:   }
839:   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
840:   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
841:   PetscCall(PetscBTDestroy(&btb));

843:   /* Reset marked primal dofs */
844:   PetscCall(ISGetLocalSize(primals, &cum));
845:   PetscCall(ISGetIndices(primals, &idxs));
846:   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
847:   PetscCall(ISRestoreIndices(primals, &idxs));

849:   /* Now use the initial lG */
850:   PetscCall(MatDestroy(&lG));
851:   PetscCall(MatDestroy(&lGt));
852:   lG = lGinit;
853:   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));

855:   /* Compute extended cols indices */
856:   PetscCall(PetscBTCreate(nv, &btvc));
857:   PetscCall(PetscBTCreate(nee, &bter));
858:   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
859:   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
860:   i *= maxsize;
861:   PetscCall(PetscCalloc1(nee, &extcols));
862:   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
863:   eerr = PETSC_FALSE;
864:   for (i = 0; i < nee; i++) {
865:     PetscInt size, found = 0;

867:     cum = 0;
868:     PetscCall(ISGetLocalSize(eedges[i], &size));
869:     if (!size && nedfieldlocal) continue;
870:     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
871:     PetscCall(ISGetIndices(eedges[i], &idxs));
872:     PetscCall(PetscBTMemzero(nv, btvc));
873:     for (j = 0; j < size; j++) {
874:       PetscInt k, ee = idxs[j];
875:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
876:         PetscInt vv = jj[k];
877:         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
878:         else if (!PetscBTLookupSet(btvc, vv)) found++;
879:       }
880:     }
881:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
882:     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
883:     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
884:     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
885:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
886:     /* it may happen that endpoints are not defined at this point
887:        if it is the case, mark this edge for a second pass */
888:     if (cum != size - 1 || found != 2) {
889:       PetscCall(PetscBTSet(bter, i));
890:       if (print) {
891:         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
892:         PetscCall(ISView(eedges[i], NULL));
893:         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
894:         PetscCall(ISView(extcols[i], NULL));
895:       }
896:       eerr = PETSC_TRUE;
897:     }
898:   }
899:   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
900:   PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
901:   if (done) {
902:     PetscInt *newprimals;

904:     PetscCall(PetscMalloc1(ne, &newprimals));
905:     PetscCall(ISGetLocalSize(primals, &cum));
906:     PetscCall(ISGetIndices(primals, &idxs));
907:     PetscCall(PetscArraycpy(newprimals, idxs, cum));
908:     PetscCall(ISRestoreIndices(primals, &idxs));
909:     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
910:     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
911:     for (i = 0; i < nee; i++) {
912:       PetscBool has_candidates = PETSC_FALSE;
913:       if (PetscBTLookup(bter, i)) {
914:         PetscInt size, mark = i + 1;

916:         PetscCall(ISGetLocalSize(eedges[i], &size));
917:         PetscCall(ISGetIndices(eedges[i], &idxs));
918:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
919:         for (j = 0; j < size; j++) {
920:           PetscInt k, ee = idxs[j];
921:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
922:           for (k = ii[ee]; k < ii[ee + 1]; k++) {
923:             /* set all candidates located on the edge as corners */
924:             if (PetscBTLookup(btvcand, jj[k])) {
925:               PetscInt k2, vv = jj[k];
926:               has_candidates = PETSC_TRUE;
927:               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
928:               PetscCall(PetscBTSet(btv, vv));
929:               /* set all edge dofs connected to candidate as primals */
930:               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
931:                 if (marks[jjt[k2]] == mark) {
932:                   PetscInt k3, ee2 = jjt[k2];
933:                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
934:                   newprimals[cum++] = ee2;
935:                   /* finally set the new corners */
936:                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
937:                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
938:                     PetscCall(PetscBTSet(btv, jj[k3]));
939:                   }
940:                 }
941:               }
942:             } else {
943:               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
944:             }
945:           }
946:         }
947:         if (!has_candidates) { /* circular edge */
948:           PetscInt k, ee = idxs[0], *tmarks;

950:           PetscCall(PetscCalloc1(ne, &tmarks));
951:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
952:           for (k = ii[ee]; k < ii[ee + 1]; k++) {
953:             PetscInt k2;
954:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
955:             PetscCall(PetscBTSet(btv, jj[k]));
956:             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
957:           }
958:           for (j = 0; j < size; j++) {
959:             if (tmarks[idxs[j]] > 1) {
960:               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
961:               newprimals[cum++] = idxs[j];
962:             }
963:           }
964:           PetscCall(PetscFree(tmarks));
965:         }
966:         PetscCall(ISRestoreIndices(eedges[i], &idxs));
967:       }
968:       PetscCall(ISDestroy(&extcols[i]));
969:     }
970:     PetscCall(PetscFree(extcols));
971:     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
972:     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
973:     if (fl2g) {
974:       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
975:       PetscCall(ISDestroy(&primals));
976:       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
977:       PetscCall(PetscFree(eedges));
978:     }
979:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
980:     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
981:     PetscCall(PetscFree(newprimals));
982:     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
983:     PetscCall(ISDestroy(&primals));
984:     PetscCall(PCBDDCAnalyzeInterface(pc));
985:     pcbddc->mat_graph->twodim = PETSC_FALSE;
986:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
987:     if (fl2g) {
988:       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
989:       PetscCall(PetscMalloc1(nee, &eedges));
990:       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
991:     } else {
992:       eedges  = alleedges;
993:       primals = allprimals;
994:     }
995:     PetscCall(PetscCalloc1(nee, &extcols));

997:     /* Mark again */
998:     PetscCall(PetscArrayzero(marks, ne));
999:     for (i = 0; i < nee; i++) {
1000:       PetscInt size, mark = i + 1;

1002:       PetscCall(ISGetLocalSize(eedges[i], &size));
1003:       PetscCall(ISGetIndices(eedges[i], &idxs));
1004:       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1005:       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1006:     }
1007:     if (print) {
1008:       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1009:       PetscCall(ISView(primals, NULL));
1010:     }

1012:     /* Recompute extended cols */
1013:     eerr = PETSC_FALSE;
1014:     for (i = 0; i < nee; i++) {
1015:       PetscInt size;

1017:       cum = 0;
1018:       PetscCall(ISGetLocalSize(eedges[i], &size));
1019:       if (!size && nedfieldlocal) continue;
1020:       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1021:       PetscCall(ISGetIndices(eedges[i], &idxs));
1022:       for (j = 0; j < size; j++) {
1023:         PetscInt k, ee = idxs[j];
1024:         for (k = ii[ee]; k < ii[ee + 1]; k++)
1025:           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1026:       }
1027:       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1028:       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1029:       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1030:       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1031:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1032:       if (cum != size - 1) {
1033:         if (print) {
1034:           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1035:           PetscCall(ISView(eedges[i], NULL));
1036:           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1037:           PetscCall(ISView(extcols[i], NULL));
1038:         }
1039:         eerr = PETSC_TRUE;
1040:       }
1041:     }
1042:   }
1043:   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1044:   PetscCall(PetscFree2(extrow, gidxs));
1045:   PetscCall(PetscBTDestroy(&bter));
1046:   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1047:   /* an error should not occur at this point */
1048:   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1050:   /* Check the number of endpoints */
1051:   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1052:   PetscCall(PetscMalloc1(2 * nee, &corners));
1053:   PetscCall(PetscMalloc1(nee, &cedges));
1054:   for (i = 0; i < nee; i++) {
1055:     PetscInt size, found = 0, gc[2];

1057:     /* init with defaults */
1058:     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1059:     PetscCall(ISGetLocalSize(eedges[i], &size));
1060:     if (!size && nedfieldlocal) continue;
1061:     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1062:     PetscCall(ISGetIndices(eedges[i], &idxs));
1063:     PetscCall(PetscBTMemzero(nv, btvc));
1064:     for (j = 0; j < size; j++) {
1065:       PetscInt k, ee = idxs[j];
1066:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1067:         PetscInt vv = jj[k];
1068:         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1069:           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1070:           corners[i * 2 + found++] = vv;
1071:         }
1072:       }
1073:     }
1074:     if (found != 2) {
1075:       PetscInt e;
1076:       if (fl2g) {
1077:         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1078:       } else {
1079:         e = idxs[0];
1080:       }
1081:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1082:     }

1084:     /* get primal dof index on this coarse edge */
1085:     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1086:     if (gc[0] > gc[1]) {
1087:       PetscInt swap      = corners[2 * i];
1088:       corners[2 * i]     = corners[2 * i + 1];
1089:       corners[2 * i + 1] = swap;
1090:     }
1091:     cedges[i] = idxs[size - 1];
1092:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1093:     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1094:   }
1095:   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1096:   PetscCall(PetscBTDestroy(&btvc));

1098:   if (PetscDefined(USE_DEBUG)) {
1099:     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1100:      not interfere with neighbouring coarse edges */
1101:     PetscCall(PetscMalloc1(nee + 1, &emarks));
1102:     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1103:     for (i = 0; i < nv; i++) {
1104:       PetscInt emax = 0, eemax = 0;

1106:       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1107:       PetscCall(PetscArrayzero(emarks, nee + 1));
1108:       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1109:       for (j = 1; j < nee + 1; j++) {
1110:         if (emax < emarks[j]) {
1111:           emax  = emarks[j];
1112:           eemax = j;
1113:         }
1114:       }
1115:       /* not relevant for edges */
1116:       if (!eemax) continue;

1118:       for (j = ii[i]; j < ii[i + 1]; j++) {
1119:         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1120:       }
1121:     }
1122:     PetscCall(PetscFree(emarks));
1123:     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1124:   }

1126:   /* Compute extended rows indices for edge blocks of the change of basis */
1127:   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1128:   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1129:   extmem *= maxsize;
1130:   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1131:   PetscCall(PetscMalloc1(nee, &extrows));
1132:   PetscCall(PetscCalloc1(nee, &extrowcum));
1133:   for (i = 0; i < nv; i++) {
1134:     PetscInt mark = 0, size, start;

1136:     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1137:     for (j = ii[i]; j < ii[i + 1]; j++)
1138:       if (marks[jj[j]] && !mark) mark = marks[jj[j]];

1140:     /* not relevant */
1141:     if (!mark) continue;

1143:     /* import extended row */
1144:     mark--;
1145:     start = mark * extmem + extrowcum[mark];
1146:     size  = ii[i + 1] - ii[i];
1147:     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1148:     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1149:     extrowcum[mark] += size;
1150:   }
1151:   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1152:   PetscCall(MatDestroy(&lGt));
1153:   PetscCall(PetscFree(marks));

1155:   /* Compress extrows */
1156:   cum = 0;
1157:   for (i = 0; i < nee; i++) {
1158:     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1159:     PetscCall(PetscSortRemoveDupsInt(&size, start));
1160:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1161:     cum = PetscMax(cum, size);
1162:   }
1163:   PetscCall(PetscFree(extrowcum));
1164:   PetscCall(PetscBTDestroy(&btv));
1165:   PetscCall(PetscBTDestroy(&btvcand));

1167:   /* Workspace for lapack inner calls and VecSetValues */
1168:   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));

1170:   /* Create change of basis matrix (preallocation can be improved) */
1171:   PetscCall(MatCreate(comm, &T));
1172:   PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N));
1173:   PetscCall(MatSetType(T, MATAIJ));
1174:   PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL));
1175:   PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL));
1176:   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1177:   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1178:   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1179:   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));

1181:   /* Defaults to identity */
1182:   PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
1183:   PetscCall(VecSet(tvec, 1.0));
1184:   PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES));
1185:   PetscCall(VecDestroy(&tvec));

1187:   /* Create discrete gradient for the coarser level if needed */
1188:   PetscCall(MatDestroy(&pcbddc->nedcG));
1189:   PetscCall(ISDestroy(&pcbddc->nedclocal));
1190:   if (pcbddc->current_level < pcbddc->max_levels) {
1191:     ISLocalToGlobalMapping cel2g, cvl2g;
1192:     IS                     wis, gwis;
1193:     PetscInt               cnv, cne;

1195:     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1196:     if (fl2g) {
1197:       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1198:     } else {
1199:       PetscCall(PetscObjectReference((PetscObject)wis));
1200:       pcbddc->nedclocal = wis;
1201:     }
1202:     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1203:     PetscCall(ISDestroy(&wis));
1204:     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1205:     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1206:     PetscCall(ISDestroy(&wis));
1207:     PetscCall(ISDestroy(&gwis));

1209:     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1210:     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1211:     PetscCall(ISDestroy(&wis));
1212:     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1213:     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1214:     PetscCall(ISDestroy(&wis));
1215:     PetscCall(ISDestroy(&gwis));

1217:     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1218:     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1219:     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1220:     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1221:     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1222:     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1223:     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1224:     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1225:   }
1226:   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));

1228: #if defined(PRINT_GDET)
1229:   inc = 0;
1230:   lev = pcbddc->current_level;
1231: #endif

1233:   /* Insert values in the change of basis matrix */
1234:   for (i = 0; i < nee; i++) {
1235:     Mat         Gins = NULL, GKins = NULL;
1236:     IS          cornersis = NULL;
1237:     PetscScalar cvals[2];

1239:     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1240:     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1241:     if (Gins && GKins) {
1242:       const PetscScalar *data;
1243:       const PetscInt    *rows, *cols;
1244:       PetscInt           nrh, nch, nrc, ncc;

1246:       PetscCall(ISGetIndices(eedges[i], &cols));
1247:       /* H1 */
1248:       PetscCall(ISGetIndices(extrows[i], &rows));
1249:       PetscCall(MatGetSize(Gins, &nrh, &nch));
1250:       PetscCall(MatDenseGetArrayRead(Gins, &data));
1251:       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1252:       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1253:       PetscCall(ISRestoreIndices(extrows[i], &rows));
1254:       /* complement */
1255:       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1256:       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1257:       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1258:       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1259:       PetscCall(MatDenseGetArrayRead(GKins, &data));
1260:       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1261:       PetscCall(MatDenseRestoreArrayRead(GKins, &data));

1263:       /* coarse discrete gradient */
1264:       if (pcbddc->nedcG) {
1265:         PetscInt cols[2];

1267:         cols[0] = 2 * i;
1268:         cols[1] = 2 * i + 1;
1269:         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1270:       }
1271:       PetscCall(ISRestoreIndices(eedges[i], &cols));
1272:     }
1273:     PetscCall(ISDestroy(&extrows[i]));
1274:     PetscCall(ISDestroy(&extcols[i]));
1275:     PetscCall(ISDestroy(&cornersis));
1276:     PetscCall(MatDestroy(&Gins));
1277:     PetscCall(MatDestroy(&GKins));
1278:   }
1279:   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));

1281:   /* Start assembling */
1282:   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1283:   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));

1285:   /* Free */
1286:   if (fl2g) {
1287:     PetscCall(ISDestroy(&primals));
1288:     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1289:     PetscCall(PetscFree(eedges));
1290:   }

1292:   /* hack mat_graph with primal dofs on the coarse edges */
1293:   {
1294:     PCBDDCGraph graph  = pcbddc->mat_graph;
1295:     PetscInt   *oqueue = graph->queue;
1296:     PetscInt   *ocptr  = graph->cptr;
1297:     PetscInt    ncc, *idxs;

1299:     /* find first primal edge */
1300:     if (pcbddc->nedclocal) {
1301:       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1302:     } else {
1303:       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1304:       idxs = cedges;
1305:     }
1306:     cum = 0;
1307:     while (cum < nee && cedges[cum] < 0) cum++;

1309:     /* adapt connected components */
1310:     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1311:     graph->cptr[0] = 0;
1312:     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1313:       PetscInt lc = ocptr[i + 1] - ocptr[i];
1314:       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1315:         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1316:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1317:         ncc++;
1318:         lc--;
1319:         cum++;
1320:         while (cum < nee && cedges[cum] < 0) cum++;
1321:       }
1322:       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1323:       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1324:       ncc++;
1325:     }
1326:     graph->ncc = ncc;
1327:     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1328:     PetscCall(PetscFree2(ocptr, oqueue));
1329:   }
1330:   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1331:   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1332:   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1333:   PetscCall(MatDestroy(&conn));

1335:   PetscCall(ISDestroy(&nedfieldlocal));
1336:   PetscCall(PetscFree(extrow));
1337:   PetscCall(PetscFree2(work, rwork));
1338:   PetscCall(PetscFree(corners));
1339:   PetscCall(PetscFree(cedges));
1340:   PetscCall(PetscFree(extrows));
1341:   PetscCall(PetscFree(extcols));
1342:   PetscCall(MatDestroy(&lG));

1344:   /* Complete assembling */
1345:   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1346:   if (pcbddc->nedcG) {
1347:     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1348: #if 0
1349:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G"));
1350:     PetscCall(MatView(pcbddc->nedcG,NULL));
1351: #endif
1352:   }

1354:   /* set change of basis */
1355:   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular));
1356:   PetscCall(MatDestroy(&T));

1358:   PetscFunctionReturn(PETSC_SUCCESS);
1359: }

1361: /* the near-null space of BDDC carries information on quadrature weights,
1362:    and these can be collinear -> so cheat with MatNullSpaceCreate
1363:    and create a suitable set of basis vectors first */
1364: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1365: {
1366:   PetscInt i;

1368:   PetscFunctionBegin;
1369:   for (i = 0; i < nvecs; i++) {
1370:     PetscInt first, last;

1372:     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1373:     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1374:     if (i >= first && i < last) {
1375:       PetscScalar *data;
1376:       PetscCall(VecGetArray(quad_vecs[i], &data));
1377:       if (!has_const) {
1378:         data[i - first] = 1.;
1379:       } else {
1380:         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1381:         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1382:       }
1383:       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1384:     }
1385:     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1386:   }
1387:   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1388:   for (i = 0; i < nvecs; i++) { /* reset vectors */
1389:     PetscInt first, last;
1390:     PetscCall(VecLockReadPop(quad_vecs[i]));
1391:     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1392:     if (i >= first && i < last) {
1393:       PetscScalar *data;
1394:       PetscCall(VecGetArray(quad_vecs[i], &data));
1395:       if (!has_const) {
1396:         data[i - first] = 0.;
1397:       } else {
1398:         data[2 * i - first]     = 0.;
1399:         data[2 * i - first + 1] = 0.;
1400:       }
1401:       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1402:     }
1403:     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1404:     PetscCall(VecLockReadPush(quad_vecs[i]));
1405:   }
1406:   PetscFunctionReturn(PETSC_SUCCESS);
1407: }

1409: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1410: {
1411:   Mat                    loc_divudotp;
1412:   Vec                    p, v, vins, quad_vec, *quad_vecs;
1413:   ISLocalToGlobalMapping map;
1414:   PetscScalar           *vals;
1415:   const PetscScalar     *array;
1416:   PetscInt               i, maxneighs = 0, maxsize, *gidxs;
1417:   PetscInt               n_neigh, *neigh, *n_shared, **shared;
1418:   PetscMPIInt            rank;

1420:   PetscFunctionBegin;
1421:   PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1422:   for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs);
1423:   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
1424:   if (!maxneighs) {
1425:     PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1426:     *nnsp = NULL;
1427:     PetscFunctionReturn(PETSC_SUCCESS);
1428:   }
1429:   maxsize = 0;
1430:   for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize);
1431:   PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals));
1432:   /* create vectors to hold quadrature weights */
1433:   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1434:   if (!transpose) {
1435:     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1436:   } else {
1437:     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1438:   }
1439:   PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs));
1440:   PetscCall(VecDestroy(&quad_vec));
1441:   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp));
1442:   for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i]));

1444:   /* compute local quad vec */
1445:   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1446:   if (!transpose) {
1447:     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1448:   } else {
1449:     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1450:   }
1451:   PetscCall(VecSet(p, 1.));
1452:   if (!transpose) {
1453:     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1454:   } else {
1455:     PetscCall(MatMult(loc_divudotp, p, v));
1456:   }
1457:   if (vl2l) {
1458:     Mat        lA;
1459:     VecScatter sc;

1461:     PetscCall(MatISGetLocalMat(A, &lA));
1462:     PetscCall(MatCreateVecs(lA, &vins, NULL));
1463:     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1464:     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1465:     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1466:     PetscCall(VecScatterDestroy(&sc));
1467:   } else {
1468:     vins = v;
1469:   }
1470:   PetscCall(VecGetArrayRead(vins, &array));
1471:   PetscCall(VecDestroy(&p));

1473:   /* insert in global quadrature vecs */
1474:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1475:   for (i = 1; i < n_neigh; i++) {
1476:     const PetscInt *idxs;
1477:     PetscInt        idx, nn, j;

1479:     idxs = shared[i];
1480:     nn   = n_shared[i];
1481:     for (j = 0; j < nn; j++) vals[j] = array[idxs[j]];
1482:     PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx));
1483:     idx = -(idx + 1);
1484:     PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs);
1485:     PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs));
1486:     PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES));
1487:   }
1488:   PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1489:   PetscCall(VecRestoreArrayRead(vins, &array));
1490:   if (vl2l) PetscCall(VecDestroy(&vins));
1491:   PetscCall(VecDestroy(&v));
1492:   PetscCall(PetscFree2(gidxs, vals));

1494:   /* assemble near null space */
1495:   for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i]));
1496:   for (i = 0; i < maxneighs; i++) {
1497:     PetscCall(VecAssemblyEnd(quad_vecs[i]));
1498:     PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view"));
1499:     PetscCall(VecLockReadPush(quad_vecs[i]));
1500:   }
1501:   PetscCall(VecDestroyVecs(maxneighs, &quad_vecs));
1502:   PetscFunctionReturn(PETSC_SUCCESS);
1503: }

1505: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1506: {
1507:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

1509:   PetscFunctionBegin;
1510:   if (primalv) {
1511:     if (pcbddc->user_primal_vertices_local) {
1512:       IS list[2], newp;

1514:       list[0] = primalv;
1515:       list[1] = pcbddc->user_primal_vertices_local;
1516:       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1517:       PetscCall(ISSortRemoveDups(newp));
1518:       PetscCall(ISDestroy(&list[1]));
1519:       pcbddc->user_primal_vertices_local = newp;
1520:     } else {
1521:       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1522:     }
1523:   }
1524:   PetscFunctionReturn(PETSC_SUCCESS);
1525: }

1527: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1528: {
1529:   PetscInt f, *comp = (PetscInt *)ctx;

1531:   PetscFunctionBegin;
1532:   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1533:   PetscFunctionReturn(PETSC_SUCCESS);
1534: }

1536: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1537: {
1538:   Vec       local, global;
1539:   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1540:   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1541:   PetscBool monolithic = PETSC_FALSE;

1543:   PetscFunctionBegin;
1544:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1545:   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1546:   PetscOptionsEnd();
1547:   /* need to convert from global to local topology information and remove references to information in global ordering */
1548:   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1549:   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1550:   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1551:   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1552:   if (monolithic) { /* just get block size to properly compute vertices */
1553:     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1554:     goto boundary;
1555:   }

1557:   if (pcbddc->user_provided_isfordofs) {
1558:     if (pcbddc->n_ISForDofs) {
1559:       PetscInt i;

1561:       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1562:       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1563:         PetscInt bs;

1565:         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1566:         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1567:         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1568:         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1569:       }
1570:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1571:       pcbddc->n_ISForDofs      = 0;
1572:       PetscCall(PetscFree(pcbddc->ISForDofs));
1573:     }
1574:   } else {
1575:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1576:       DM dm;

1578:       PetscCall(MatGetDM(pc->pmat, &dm));
1579:       if (!dm) PetscCall(PCGetDM(pc, &dm));
1580:       if (dm) {
1581:         IS      *fields;
1582:         PetscInt nf, i;

1584:         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1585:         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1586:         for (i = 0; i < nf; i++) {
1587:           PetscInt bs;

1589:           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1590:           PetscCall(ISGetBlockSize(fields[i], &bs));
1591:           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1592:           PetscCall(ISDestroy(&fields[i]));
1593:         }
1594:         PetscCall(PetscFree(fields));
1595:         pcbddc->n_ISForDofsLocal = nf;
1596:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1597:         PetscContainer c;

1599:         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1600:         if (c) {
1601:           MatISLocalFields lf;
1602:           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1603:           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1604:         } else { /* fallback, create the default fields if bs > 1 */
1605:           PetscInt i, n = matis->A->rmap->n;
1606:           PetscCall(MatGetBlockSize(pc->pmat, &i));
1607:           if (i > 1) {
1608:             pcbddc->n_ISForDofsLocal = i;
1609:             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1610:             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1611:           }
1612:         }
1613:       }
1614:     } else {
1615:       PetscInt i;
1616:       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1617:     }
1618:   }

1620: boundary:
1621:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1622:     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1623:   } else if (pcbddc->DirichletBoundariesLocal) {
1624:     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1625:   }
1626:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1627:     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1628:   } else if (pcbddc->NeumannBoundariesLocal) {
1629:     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1630:   }
1631:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1632:   PetscCall(VecDestroy(&global));
1633:   PetscCall(VecDestroy(&local));
1634:   /* detect local disconnected subdomains if requested (use matis->A) */
1635:   if (pcbddc->detect_disconnected) {
1636:     IS        primalv = NULL;
1637:     PetscInt  i;
1638:     PetscBool filter = pcbddc->detect_disconnected_filter;

1640:     for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1641:     PetscCall(PetscFree(pcbddc->local_subs));
1642:     PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1643:     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1644:     PetscCall(ISDestroy(&primalv));
1645:   }
1646:   /* early stage corner detection */
1647:   {
1648:     DM dm;

1650:     PetscCall(MatGetDM(pc->pmat, &dm));
1651:     if (!dm) PetscCall(PCGetDM(pc, &dm));
1652:     if (dm) {
1653:       PetscBool isda;

1655:       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1656:       if (isda) {
1657:         ISLocalToGlobalMapping l2l;
1658:         IS                     corners;
1659:         Mat                    lA;
1660:         PetscBool              gl, lo;

1662:         {
1663:           Vec                cvec;
1664:           const PetscScalar *coords;
1665:           PetscInt           dof, n, cdim;
1666:           PetscBool          memc = PETSC_TRUE;

1668:           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1669:           PetscCall(DMGetCoordinates(dm, &cvec));
1670:           PetscCall(VecGetLocalSize(cvec, &n));
1671:           PetscCall(VecGetBlockSize(cvec, &cdim));
1672:           n /= cdim;
1673:           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1674:           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1675:           PetscCall(VecGetArrayRead(cvec, &coords));
1676: #if defined(PETSC_USE_COMPLEX)
1677:           memc = PETSC_FALSE;
1678: #endif
1679:           if (dof != 1) memc = PETSC_FALSE;
1680:           if (memc) {
1681:             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1682:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1683:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1684:             PetscInt   i, b, d;

1686:             for (i = 0; i < n; i++) {
1687:               for (b = 0; b < dof; b++) {
1688:                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1689:               }
1690:             }
1691:           }
1692:           PetscCall(VecRestoreArrayRead(cvec, &coords));
1693:           pcbddc->mat_graph->cdim  = cdim;
1694:           pcbddc->mat_graph->cnloc = dof * n;
1695:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1696:         }
1697:         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1698:         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1699:         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1700:         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1701:         lo = (PetscBool)(l2l && corners);
1702:         PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1703:         if (gl) { /* From PETSc's DMDA */
1704:           const PetscInt *idx;
1705:           PetscInt        dof, bs, *idxout, n;

1707:           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1708:           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1709:           PetscCall(ISGetLocalSize(corners, &n));
1710:           PetscCall(ISGetIndices(corners, &idx));
1711:           if (bs == dof) {
1712:             PetscCall(PetscMalloc1(n, &idxout));
1713:             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1714:           } else { /* the original DMDA local-to-local map have been modified */
1715:             PetscInt i, d;

1717:             PetscCall(PetscMalloc1(dof * n, &idxout));
1718:             for (i = 0; i < n; i++)
1719:               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1720:             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));

1722:             bs = 1;
1723:             n *= dof;
1724:           }
1725:           PetscCall(ISRestoreIndices(corners, &idx));
1726:           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1727:           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1728:           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1729:           PetscCall(ISDestroy(&corners));
1730:           pcbddc->corner_selected  = PETSC_TRUE;
1731:           pcbddc->corner_selection = PETSC_TRUE;
1732:         }
1733:         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1734:       }
1735:     }
1736:   }
1737:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1738:     DM dm;

1740:     PetscCall(MatGetDM(pc->pmat, &dm));
1741:     if (!dm) PetscCall(PCGetDM(pc, &dm));
1742:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1743:       Vec          vcoords;
1744:       PetscSection section;
1745:       PetscReal   *coords;
1746:       PetscInt     d, cdim, nl, nf, **ctxs;
1747:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1748:       /* debug coordinates */
1749:       PetscViewer       viewer;
1750:       PetscBool         flg;
1751:       PetscViewerFormat format;
1752:       const char       *prefix;

1754:       PetscCall(DMGetCoordinateDim(dm, &cdim));
1755:       PetscCall(DMGetLocalSection(dm, &section));
1756:       PetscCall(PetscSectionGetNumFields(section, &nf));
1757:       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1758:       PetscCall(VecGetLocalSize(vcoords, &nl));
1759:       PetscCall(PetscMalloc1(nl * cdim, &coords));
1760:       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1761:       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1762:       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1763:       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;

1765:       /* debug coordinates */
1766:       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1767:       PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1768:       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1769:       for (d = 0; d < cdim; d++) {
1770:         PetscInt           i;
1771:         const PetscScalar *v;
1772:         char               name[16];

1774:         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1775:         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d));
1776:         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1777:         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1778:         if (flg) PetscCall(VecView(vcoords, viewer));
1779:         PetscCall(VecGetArrayRead(vcoords, &v));
1780:         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1781:         PetscCall(VecRestoreArrayRead(vcoords, &v));
1782:       }
1783:       PetscCall(VecDestroy(&vcoords));
1784:       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1785:       PetscCall(PetscFree(coords));
1786:       PetscCall(PetscFree(ctxs[0]));
1787:       PetscCall(PetscFree2(funcs, ctxs));
1788:       if (flg) {
1789:         PetscCall(PetscViewerPopFormat(viewer));
1790:         PetscCall(PetscViewerDestroy(&viewer));
1791:       }
1792:     }
1793:   }
1794:   PetscFunctionReturn(PETSC_SUCCESS);
1795: }

1797: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1798: {
1799:   Mat_IS         *matis = (Mat_IS *)(pc->pmat->data);
1800:   IS              nis;
1801:   const PetscInt *idxs;
1802:   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;

1804:   PetscFunctionBegin;
1805:   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1806:   if (mop == MPI_LAND) {
1807:     /* init rootdata with true */
1808:     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1809:   } else {
1810:     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1811:   }
1812:   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1813:   PetscCall(ISGetLocalSize(*is, &nd));
1814:   PetscCall(ISGetIndices(*is, &idxs));
1815:   for (i = 0; i < nd; i++)
1816:     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1817:   PetscCall(ISRestoreIndices(*is, &idxs));
1818:   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
1819:   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
1820:   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
1821:   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
1822:   if (mop == MPI_LAND) {
1823:     PetscCall(PetscMalloc1(nd, &nidxs));
1824:   } else {
1825:     PetscCall(PetscMalloc1(n, &nidxs));
1826:   }
1827:   for (i = 0, nnd = 0; i < n; i++)
1828:     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
1829:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis));
1830:   PetscCall(ISDestroy(is));
1831:   *is = nis;
1832:   PetscFunctionReturn(PETSC_SUCCESS);
1833: }

1835: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
1836: {
1837:   PC_IS   *pcis   = (PC_IS *)(pc->data);
1838:   PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);

1840:   PetscFunctionBegin;
1841:   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
1842:   if (pcbddc->ChangeOfBasisMatrix) {
1843:     Vec swap;

1845:     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
1846:     swap                = pcbddc->work_change;
1847:     pcbddc->work_change = r;
1848:     r                   = swap;
1849:   }
1850:   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
1851:   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
1852:   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
1853:   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
1854:   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
1855:   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
1856:   PetscCall(VecSet(z, 0.));
1857:   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
1858:   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
1859:   if (pcbddc->ChangeOfBasisMatrix) {
1860:     pcbddc->work_change = r;
1861:     PetscCall(VecCopy(z, pcbddc->work_change));
1862:     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
1863:   }
1864:   PetscFunctionReturn(PETSC_SUCCESS);
1865: }

1867: static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1868: {
1869:   PCBDDCBenignMatMult_ctx ctx;
1870:   PetscBool               apply_right, apply_left, reset_x;

1872:   PetscFunctionBegin;
1873:   PetscCall(MatShellGetContext(A, &ctx));
1874:   if (transpose) {
1875:     apply_right = ctx->apply_left;
1876:     apply_left  = ctx->apply_right;
1877:   } else {
1878:     apply_right = ctx->apply_right;
1879:     apply_left  = ctx->apply_left;
1880:   }
1881:   reset_x = PETSC_FALSE;
1882:   if (apply_right) {
1883:     const PetscScalar *ax;
1884:     PetscInt           nl, i;

1886:     PetscCall(VecGetLocalSize(x, &nl));
1887:     PetscCall(VecGetArrayRead(x, &ax));
1888:     PetscCall(PetscArraycpy(ctx->work, ax, nl));
1889:     PetscCall(VecRestoreArrayRead(x, &ax));
1890:     for (i = 0; i < ctx->benign_n; i++) {
1891:       PetscScalar     sum, val;
1892:       const PetscInt *idxs;
1893:       PetscInt        nz, j;
1894:       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
1895:       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
1896:       sum = 0.;
1897:       if (ctx->apply_p0) {
1898:         val = ctx->work[idxs[nz - 1]];
1899:         for (j = 0; j < nz - 1; j++) {
1900:           sum += ctx->work[idxs[j]];
1901:           ctx->work[idxs[j]] += val;
1902:         }
1903:       } else {
1904:         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
1905:       }
1906:       ctx->work[idxs[nz - 1]] -= sum;
1907:       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
1908:     }
1909:     PetscCall(VecPlaceArray(x, ctx->work));
1910:     reset_x = PETSC_TRUE;
1911:   }
1912:   if (transpose) {
1913:     PetscCall(MatMultTranspose(ctx->A, x, y));
1914:   } else {
1915:     PetscCall(MatMult(ctx->A, x, y));
1916:   }
1917:   if (reset_x) PetscCall(VecResetArray(x));
1918:   if (apply_left) {
1919:     PetscScalar *ay;
1920:     PetscInt     i;

1922:     PetscCall(VecGetArray(y, &ay));
1923:     for (i = 0; i < ctx->benign_n; i++) {
1924:       PetscScalar     sum, val;
1925:       const PetscInt *idxs;
1926:       PetscInt        nz, j;
1927:       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
1928:       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
1929:       val = -ay[idxs[nz - 1]];
1930:       if (ctx->apply_p0) {
1931:         sum = 0.;
1932:         for (j = 0; j < nz - 1; j++) {
1933:           sum += ay[idxs[j]];
1934:           ay[idxs[j]] += val;
1935:         }
1936:         ay[idxs[nz - 1]] += sum;
1937:       } else {
1938:         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
1939:         ay[idxs[nz - 1]] = 0.;
1940:       }
1941:       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
1942:     }
1943:     PetscCall(VecRestoreArray(y, &ay));
1944:   }
1945:   PetscFunctionReturn(PETSC_SUCCESS);
1946: }

1948: static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1949: {
1950:   PetscFunctionBegin;
1951:   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
1952:   PetscFunctionReturn(PETSC_SUCCESS);
1953: }

1955: static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1956: {
1957:   PetscFunctionBegin;
1958:   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
1959:   PetscFunctionReturn(PETSC_SUCCESS);
1960: }

1962: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1963: {
1964:   PC_IS                  *pcis   = (PC_IS *)pc->data;
1965:   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
1966:   PCBDDCBenignMatMult_ctx ctx;

1968:   PetscFunctionBegin;
1969:   if (!restore) {
1970:     Mat                A_IB, A_BI;
1971:     PetscScalar       *work;
1972:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

1974:     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
1975:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
1976:     PetscCall(PetscMalloc1(pcis->n, &work));
1977:     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
1978:     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
1979:     PetscCall(MatSetType(A_IB, MATSHELL));
1980:     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
1981:     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
1982:     PetscCall(PetscNew(&ctx));
1983:     PetscCall(MatShellSetContext(A_IB, ctx));
1984:     ctx->apply_left  = PETSC_TRUE;
1985:     ctx->apply_right = PETSC_FALSE;
1986:     ctx->apply_p0    = PETSC_FALSE;
1987:     ctx->benign_n    = pcbddc->benign_n;
1988:     if (reuse) {
1989:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1990:       ctx->free                 = PETSC_FALSE;
1991:     } else { /* TODO: could be optimized for successive solves */
1992:       ISLocalToGlobalMapping N_to_D;
1993:       PetscInt               i;

1995:       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
1996:       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
1997:       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
1998:       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
1999:       ctx->free = PETSC_TRUE;
2000:     }
2001:     ctx->A    = pcis->A_IB;
2002:     ctx->work = work;
2003:     PetscCall(MatSetUp(A_IB));
2004:     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2005:     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2006:     pcis->A_IB = A_IB;

2008:     /* A_BI as A_IB^T */
2009:     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2010:     pcbddc->benign_original_mat = pcis->A_BI;
2011:     pcis->A_BI                  = A_BI;
2012:   } else {
2013:     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2014:     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2015:     PetscCall(MatDestroy(&pcis->A_IB));
2016:     pcis->A_IB = ctx->A;
2017:     ctx->A     = NULL;
2018:     PetscCall(MatDestroy(&pcis->A_BI));
2019:     pcis->A_BI                  = pcbddc->benign_original_mat;
2020:     pcbddc->benign_original_mat = NULL;
2021:     if (ctx->free) {
2022:       PetscInt i;
2023:       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2024:       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2025:     }
2026:     PetscCall(PetscFree(ctx->work));
2027:     PetscCall(PetscFree(ctx));
2028:   }
2029:   PetscFunctionReturn(PETSC_SUCCESS);
2030: }

2032: /* used just in bddc debug mode */
2033: static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2034: {
2035:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2036:   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2037:   Mat      An;

2039:   PetscFunctionBegin;
2040:   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2041:   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2042:   if (is1) {
2043:     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2044:     PetscCall(MatDestroy(&An));
2045:   } else {
2046:     *B = An;
2047:   }
2048:   PetscFunctionReturn(PETSC_SUCCESS);
2049: }

2051: /* TODO: add reuse flag */
2052: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2053: {
2054:   Mat             Bt;
2055:   PetscScalar    *a, *bdata;
2056:   const PetscInt *ii, *ij;
2057:   PetscInt        m, n, i, nnz, *bii, *bij;
2058:   PetscBool       flg_row;

2060:   PetscFunctionBegin;
2061:   PetscCall(MatGetSize(A, &n, &m));
2062:   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2063:   PetscCall(MatSeqAIJGetArray(A, &a));
2064:   nnz = n;
2065:   for (i = 0; i < ii[n]; i++) {
2066:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2067:   }
2068:   PetscCall(PetscMalloc1(n + 1, &bii));
2069:   PetscCall(PetscMalloc1(nnz, &bij));
2070:   PetscCall(PetscMalloc1(nnz, &bdata));
2071:   nnz    = 0;
2072:   bii[0] = 0;
2073:   for (i = 0; i < n; i++) {
2074:     PetscInt j;
2075:     for (j = ii[i]; j < ii[i + 1]; j++) {
2076:       PetscScalar entry = a[j];
2077:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2078:         bij[nnz]   = ij[j];
2079:         bdata[nnz] = entry;
2080:         nnz++;
2081:       }
2082:     }
2083:     bii[i + 1] = nnz;
2084:   }
2085:   PetscCall(MatSeqAIJRestoreArray(A, &a));
2086:   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2087:   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2088:   {
2089:     Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data);
2090:     b->free_a     = PETSC_TRUE;
2091:     b->free_ij    = PETSC_TRUE;
2092:   }
2093:   if (*B == A) PetscCall(MatDestroy(&A));
2094:   *B = Bt;
2095:   PetscFunctionReturn(PETSC_SUCCESS);
2096: }

2098: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2099: {
2100:   Mat                    B = NULL;
2101:   DM                     dm;
2102:   IS                     is_dummy, *cc_n;
2103:   ISLocalToGlobalMapping l2gmap_dummy;
2104:   PCBDDCGraph            graph;
2105:   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2106:   PetscInt               i, n;
2107:   PetscInt              *xadj, *adjncy;
2108:   PetscBool              isplex = PETSC_FALSE;

2110:   PetscFunctionBegin;
2111:   if (ncc) *ncc = 0;
2112:   if (cc) *cc = NULL;
2113:   if (primalv) *primalv = NULL;
2114:   PetscCall(PCBDDCGraphCreate(&graph));
2115:   PetscCall(MatGetDM(pc->pmat, &dm));
2116:   if (!dm) PetscCall(PCGetDM(pc, &dm));
2117:   if (dm) PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex));
2118:   if (filter) isplex = PETSC_FALSE;

2120:   if (isplex) { /* this code has been modified from plexpartition.c */
2121:     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2122:     PetscInt       *adj = NULL;
2123:     IS              cellNumbering;
2124:     const PetscInt *cellNum;
2125:     PetscBool       useCone, useClosure;
2126:     PetscSection    section;
2127:     PetscSegBuffer  adjBuffer;
2128:     PetscSF         sfPoint;

2130:     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2131:     PetscCall(DMGetPointSF(dm, &sfPoint));
2132:     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2133:     /* Build adjacency graph via a section/segbuffer */
2134:     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2135:     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2136:     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2137:     /* Always use FVM adjacency to create partitioner graph */
2138:     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2139:     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2140:     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2141:     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2142:     for (n = 0, p = pStart; p < pEnd; p++) {
2143:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2144:       if (nroots > 0) {
2145:         if (cellNum[p] < 0) continue;
2146:       }
2147:       adjSize = PETSC_DETERMINE;
2148:       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2149:       for (a = 0; a < adjSize; ++a) {
2150:         const PetscInt point = adj[a];
2151:         if (pStart <= point && point < pEnd) {
2152:           PetscInt *PETSC_RESTRICT pBuf;
2153:           PetscCall(PetscSectionAddDof(section, p, 1));
2154:           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2155:           *pBuf = point;
2156:         }
2157:       }
2158:       n++;
2159:     }
2160:     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2161:     /* Derive CSR graph from section/segbuffer */
2162:     PetscCall(PetscSectionSetUp(section));
2163:     PetscCall(PetscSectionGetStorageSize(section, &size));
2164:     PetscCall(PetscMalloc1(n + 1, &xadj));
2165:     for (idx = 0, p = pStart; p < pEnd; p++) {
2166:       if (nroots > 0) {
2167:         if (cellNum[p] < 0) continue;
2168:       }
2169:       PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++])));
2170:     }
2171:     xadj[n] = size;
2172:     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2173:     /* Clean up */
2174:     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2175:     PetscCall(PetscSectionDestroy(&section));
2176:     PetscCall(PetscFree(adj));
2177:     graph->xadj   = xadj;
2178:     graph->adjncy = adjncy;
2179:   } else {
2180:     Mat       A;
2181:     PetscBool isseqaij, flg_row;

2183:     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2184:     if (!A->rmap->N || !A->cmap->N) {
2185:       PetscCall(PCBDDCGraphDestroy(&graph));
2186:       PetscFunctionReturn(PETSC_SUCCESS);
2187:     }
2188:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2189:     if (!isseqaij && filter) {
2190:       PetscBool isseqdense;

2192:       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2193:       if (!isseqdense) {
2194:         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2195:       } else { /* TODO: rectangular case and LDA */
2196:         PetscScalar *array;
2197:         PetscReal    chop = 1.e-6;

2199:         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2200:         PetscCall(MatDenseGetArray(B, &array));
2201:         PetscCall(MatGetSize(B, &n, NULL));
2202:         for (i = 0; i < n; i++) {
2203:           PetscInt j;
2204:           for (j = i + 1; j < n; j++) {
2205:             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2206:             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2207:             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2208:           }
2209:         }
2210:         PetscCall(MatDenseRestoreArray(B, &array));
2211:         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2212:       }
2213:     } else {
2214:       PetscCall(PetscObjectReference((PetscObject)A));
2215:       B = A;
2216:     }
2217:     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));

2219:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2220:     if (filter) {
2221:       PetscScalar *data;
2222:       PetscInt     j, cum;

2224:       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2225:       PetscCall(MatSeqAIJGetArray(B, &data));
2226:       cum = 0;
2227:       for (i = 0; i < n; i++) {
2228:         PetscInt t;

2230:         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2231:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2232:           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2233:         }
2234:         t                = xadj_filtered[i];
2235:         xadj_filtered[i] = cum;
2236:         cum += t;
2237:       }
2238:       PetscCall(MatSeqAIJRestoreArray(B, &data));
2239:       graph->xadj   = xadj_filtered;
2240:       graph->adjncy = adjncy_filtered;
2241:     } else {
2242:       graph->xadj   = xadj;
2243:       graph->adjncy = adjncy;
2244:     }
2245:   }
2246:   /* compute local connected components using PCBDDCGraph */
2247:   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2248:   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2249:   PetscCall(ISDestroy(&is_dummy));
2250:   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT));
2251:   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2252:   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2253:   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));

2255:   /* partial clean up */
2256:   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2257:   if (B) {
2258:     PetscBool flg_row;
2259:     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2260:     PetscCall(MatDestroy(&B));
2261:   }
2262:   if (isplex) {
2263:     PetscCall(PetscFree(xadj));
2264:     PetscCall(PetscFree(adjncy));
2265:   }

2267:   /* get back data */
2268:   if (isplex) {
2269:     if (ncc) *ncc = graph->ncc;
2270:     if (cc || primalv) {
2271:       Mat          A;
2272:       PetscBT      btv, btvt;
2273:       PetscSection subSection;
2274:       PetscInt    *ids, cum, cump, *cids, *pids;

2276:       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2277:       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2278:       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2279:       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2280:       PetscCall(PetscBTCreate(A->rmap->n, &btvt));

2282:       cids[0] = 0;
2283:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2284:         PetscInt j;

2286:         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2287:         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2288:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2290:           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2291:           for (k = 0; k < 2 * size; k += 2) {
2292:             PetscInt s, pp, p = closure[k], off, dof, cdof;

2294:             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2295:             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2296:             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2297:             for (s = 0; s < dof - cdof; s++) {
2298:               if (PetscBTLookupSet(btvt, off + s)) continue;
2299:               if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2300:               else pids[cump++] = off + s; /* cross-vertex */
2301:             }
2302:             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2303:             if (pp != p) {
2304:               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2305:               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2306:               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2307:               for (s = 0; s < dof - cdof; s++) {
2308:                 if (PetscBTLookupSet(btvt, off + s)) continue;
2309:                 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2310:                 else pids[cump++] = off + s; /* cross-vertex */
2311:               }
2312:             }
2313:           }
2314:           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2315:         }
2316:         cids[i + 1] = cum;
2317:         /* mark dofs as already assigned */
2318:         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2319:       }
2320:       if (cc) {
2321:         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2322:         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2323:         *cc = cc_n;
2324:       }
2325:       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2326:       PetscCall(PetscFree3(ids, cids, pids));
2327:       PetscCall(PetscBTDestroy(&btv));
2328:       PetscCall(PetscBTDestroy(&btvt));
2329:     }
2330:   } else {
2331:     if (ncc) *ncc = graph->ncc;
2332:     if (cc) {
2333:       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2334:       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2335:       *cc = cc_n;
2336:     }
2337:   }
2338:   /* clean up graph */
2339:   graph->xadj   = NULL;
2340:   graph->adjncy = NULL;
2341:   PetscCall(PCBDDCGraphDestroy(&graph));
2342:   PetscFunctionReturn(PETSC_SUCCESS);
2343: }

2345: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2346: {
2347:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2348:   PC_IS   *pcis   = (PC_IS *)(pc->data);
2349:   IS       dirIS  = NULL;
2350:   PetscInt i;

2352:   PetscFunctionBegin;
2353:   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2354:   if (zerodiag) {
2355:     Mat             A;
2356:     Vec             vec3_N;
2357:     PetscScalar    *vals;
2358:     const PetscInt *idxs;
2359:     PetscInt        nz, *count;

2361:     /* p0 */
2362:     PetscCall(VecSet(pcis->vec1_N, 0.));
2363:     PetscCall(PetscMalloc1(pcis->n, &vals));
2364:     PetscCall(ISGetLocalSize(zerodiag, &nz));
2365:     PetscCall(ISGetIndices(zerodiag, &idxs));
2366:     for (i = 0; i < nz; i++) vals[i] = 1.;
2367:     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2368:     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2369:     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2370:     /* v_I */
2371:     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2372:     for (i = 0; i < nz; i++) vals[i] = 0.;
2373:     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2374:     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2375:     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2376:     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2377:     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2378:     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2379:     if (dirIS) {
2380:       PetscInt n;

2382:       PetscCall(ISGetLocalSize(dirIS, &n));
2383:       PetscCall(ISGetIndices(dirIS, &idxs));
2384:       for (i = 0; i < n; i++) vals[i] = 0.;
2385:       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2386:       PetscCall(ISRestoreIndices(dirIS, &idxs));
2387:     }
2388:     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2389:     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2390:     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2391:     PetscCall(VecSet(vec3_N, 0.));
2392:     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2393:     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2394:     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2395:     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2396:     PetscCall(PetscFree(vals));
2397:     PetscCall(VecDestroy(&vec3_N));

2399:     /* there should not be any pressure dofs lying on the interface */
2400:     PetscCall(PetscCalloc1(pcis->n, &count));
2401:     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2402:     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2403:     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2404:     PetscCall(ISGetIndices(zerodiag, &idxs));
2405:     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2406:     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2407:     PetscCall(PetscFree(count));
2408:   }
2409:   PetscCall(ISDestroy(&dirIS));

2411:   /* check PCBDDCBenignGetOrSetP0 */
2412:   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2413:   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2414:   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2415:   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2416:   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2417:   for (i = 0; i < pcbddc->benign_n; i++) {
2418:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2419:     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2420:   }
2421:   PetscFunctionReturn(PETSC_SUCCESS);
2422: }

2424: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2425: {
2426:   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2427:   Mat_IS   *matis     = (Mat_IS *)(pc->pmat->data);
2428:   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2429:   PetscInt  nz, n, benign_n, bsp = 1;
2430:   PetscInt *interior_dofs, n_interior_dofs, nneu;
2431:   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;

2433:   PetscFunctionBegin;
2434:   if (reuse) goto project_b0;
2435:   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2436:   PetscCall(MatDestroy(&pcbddc->benign_B0));
2437:   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2438:   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2439:   has_null_pressures = PETSC_TRUE;
2440:   have_null          = PETSC_TRUE;
2441:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2442:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2443:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2444:      If not, a change of basis on pressures is not needed
2445:      since the local Schur complements are already SPD
2446:   */
2447:   if (pcbddc->n_ISForDofsLocal) {
2448:     IS        iP = NULL;
2449:     PetscInt  p, *pp;
2450:     PetscBool flg;

2452:     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2453:     n = pcbddc->n_ISForDofsLocal;
2454:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2455:     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2456:     PetscOptionsEnd();
2457:     if (!flg) {
2458:       n     = 1;
2459:       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2460:     }

2462:     bsp = 0;
2463:     for (p = 0; p < n; p++) {
2464:       PetscInt bs;

2466:       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2467:       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2468:       bsp += bs;
2469:     }
2470:     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2471:     bsp = 0;
2472:     for (p = 0; p < n; p++) {
2473:       const PetscInt *idxs;
2474:       PetscInt        b, bs, npl, *bidxs;

2476:       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2477:       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2478:       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2479:       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2480:       for (b = 0; b < bs; b++) {
2481:         PetscInt i;

2483:         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2484:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2485:         bsp++;
2486:       }
2487:       PetscCall(PetscFree(bidxs));
2488:       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2489:     }
2490:     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));

2492:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2493:     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2494:     if (iP) {
2495:       IS newpressures;

2497:       PetscCall(ISDifference(pressures, iP, &newpressures));
2498:       PetscCall(ISDestroy(&pressures));
2499:       pressures = newpressures;
2500:     }
2501:     PetscCall(ISSorted(pressures, &sorted));
2502:     if (!sorted) PetscCall(ISSort(pressures));
2503:     PetscCall(PetscFree(pp));
2504:   }

2506:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2507:   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2508:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2509:   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2510:   PetscCall(ISSorted(zerodiag, &sorted));
2511:   if (!sorted) PetscCall(ISSort(zerodiag));
2512:   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2513:   zerodiag_save = zerodiag;
2514:   PetscCall(ISGetLocalSize(zerodiag, &nz));
2515:   if (!nz) {
2516:     if (n) have_null = PETSC_FALSE;
2517:     has_null_pressures = PETSC_FALSE;
2518:     PetscCall(ISDestroy(&zerodiag));
2519:   }
2520:   recompute_zerodiag = PETSC_FALSE;

2522:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2523:   zerodiag_subs   = NULL;
2524:   benign_n        = 0;
2525:   n_interior_dofs = 0;
2526:   interior_dofs   = NULL;
2527:   nneu            = 0;
2528:   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2529:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2530:   if (checkb) { /* need to compute interior nodes */
2531:     PetscInt  n, i, j;
2532:     PetscInt  n_neigh, *neigh, *n_shared, **shared;
2533:     PetscInt *iwork;

2535:     PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n));
2536:     PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared));
2537:     PetscCall(PetscCalloc1(n, &iwork));
2538:     PetscCall(PetscMalloc1(n, &interior_dofs));
2539:     for (i = 1; i < n_neigh; i++)
2540:       for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1;
2541:     for (i = 0; i < n; i++)
2542:       if (!iwork[i]) interior_dofs[n_interior_dofs++] = i;
2543:     PetscCall(PetscFree(iwork));
2544:     PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared));
2545:   }
2546:   if (has_null_pressures) {
2547:     IS             *subs;
2548:     PetscInt        nsubs, i, j, nl;
2549:     const PetscInt *idxs;
2550:     PetscScalar    *array;
2551:     Vec            *work;

2553:     subs  = pcbddc->local_subs;
2554:     nsubs = pcbddc->n_local_subs;
2555:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2556:     if (checkb) {
2557:       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2558:       PetscCall(ISGetLocalSize(zerodiag, &nl));
2559:       PetscCall(ISGetIndices(zerodiag, &idxs));
2560:       /* work[0] = 1_p */
2561:       PetscCall(VecSet(work[0], 0.));
2562:       PetscCall(VecGetArray(work[0], &array));
2563:       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2564:       PetscCall(VecRestoreArray(work[0], &array));
2565:       /* work[0] = 1_v */
2566:       PetscCall(VecSet(work[1], 1.));
2567:       PetscCall(VecGetArray(work[1], &array));
2568:       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2569:       PetscCall(VecRestoreArray(work[1], &array));
2570:       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2571:     }

2573:     if (nsubs > 1 || bsp > 1) {
2574:       IS      *is;
2575:       PetscInt b, totb;

2577:       totb  = bsp;
2578:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2579:       nsubs = PetscMax(nsubs, 1);
2580:       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2581:       for (b = 0; b < totb; b++) {
2582:         for (i = 0; i < nsubs; i++) {
2583:           ISLocalToGlobalMapping l2g;
2584:           IS                     t_zerodiag_subs;
2585:           PetscInt               nl;

2587:           if (subs) {
2588:             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2589:           } else {
2590:             IS tis;

2592:             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2593:             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2594:             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2595:             PetscCall(ISDestroy(&tis));
2596:           }
2597:           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2598:           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2599:           if (nl) {
2600:             PetscBool valid = PETSC_TRUE;

2602:             if (checkb) {
2603:               PetscCall(VecSet(matis->x, 0));
2604:               PetscCall(ISGetLocalSize(subs[i], &nl));
2605:               PetscCall(ISGetIndices(subs[i], &idxs));
2606:               PetscCall(VecGetArray(matis->x, &array));
2607:               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2608:               PetscCall(VecRestoreArray(matis->x, &array));
2609:               PetscCall(ISRestoreIndices(subs[i], &idxs));
2610:               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2611:               PetscCall(MatMult(matis->A, matis->x, matis->y));
2612:               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2613:               PetscCall(VecGetArray(matis->y, &array));
2614:               for (j = 0; j < n_interior_dofs; j++) {
2615:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2616:                   valid = PETSC_FALSE;
2617:                   break;
2618:                 }
2619:               }
2620:               PetscCall(VecRestoreArray(matis->y, &array));
2621:             }
2622:             if (valid && nneu) {
2623:               const PetscInt *idxs;
2624:               PetscInt        nzb;

2626:               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2627:               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2628:               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2629:               if (nzb) valid = PETSC_FALSE;
2630:             }
2631:             if (valid && pressures) {
2632:               IS       t_pressure_subs, tmp;
2633:               PetscInt i1, i2;

2635:               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2636:               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2637:               PetscCall(ISGetLocalSize(tmp, &i1));
2638:               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2639:               if (i2 != i1) valid = PETSC_FALSE;
2640:               PetscCall(ISDestroy(&t_pressure_subs));
2641:               PetscCall(ISDestroy(&tmp));
2642:             }
2643:             if (valid) {
2644:               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2645:               benign_n++;
2646:             } else recompute_zerodiag = PETSC_TRUE;
2647:           }
2648:           PetscCall(ISDestroy(&t_zerodiag_subs));
2649:           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2650:         }
2651:       }
2652:     } else { /* there's just one subdomain (or zero if they have not been detected */
2653:       PetscBool valid = PETSC_TRUE;

2655:       if (nneu) valid = PETSC_FALSE;
2656:       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2657:       if (valid && checkb) {
2658:         PetscCall(MatMult(matis->A, work[0], matis->x));
2659:         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2660:         PetscCall(VecGetArray(matis->x, &array));
2661:         for (j = 0; j < n_interior_dofs; j++) {
2662:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2663:             valid = PETSC_FALSE;
2664:             break;
2665:           }
2666:         }
2667:         PetscCall(VecRestoreArray(matis->x, &array));
2668:       }
2669:       if (valid) {
2670:         benign_n = 1;
2671:         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2672:         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2673:         zerodiag_subs[0] = zerodiag;
2674:       }
2675:     }
2676:     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2677:   }
2678:   PetscCall(PetscFree(interior_dofs));

2680:   if (!benign_n) {
2681:     PetscInt n;

2683:     PetscCall(ISDestroy(&zerodiag));
2684:     recompute_zerodiag = PETSC_FALSE;
2685:     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2686:     if (n) have_null = PETSC_FALSE;
2687:   }

2689:   /* final check for null pressures */
2690:   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));

2692:   if (recompute_zerodiag) {
2693:     PetscCall(ISDestroy(&zerodiag));
2694:     if (benign_n == 1) {
2695:       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2696:       zerodiag = zerodiag_subs[0];
2697:     } else {
2698:       PetscInt i, nzn, *new_idxs;

2700:       nzn = 0;
2701:       for (i = 0; i < benign_n; i++) {
2702:         PetscInt ns;
2703:         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2704:         nzn += ns;
2705:       }
2706:       PetscCall(PetscMalloc1(nzn, &new_idxs));
2707:       nzn = 0;
2708:       for (i = 0; i < benign_n; i++) {
2709:         PetscInt ns, *idxs;
2710:         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2711:         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2712:         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2713:         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2714:         nzn += ns;
2715:       }
2716:       PetscCall(PetscSortInt(nzn, new_idxs));
2717:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2718:     }
2719:     have_null = PETSC_FALSE;
2720:   }

2722:   /* determines if the coarse solver will be singular or not */
2723:   PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));

2725:   /* Prepare matrix to compute no-net-flux */
2726:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2727:     Mat                    A, loc_divudotp;
2728:     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2729:     IS                     row, col, isused = NULL;
2730:     PetscInt               M, N, n, st, n_isused;

2732:     if (pressures) {
2733:       isused = pressures;
2734:     } else {
2735:       isused = zerodiag_save;
2736:     }
2737:     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2738:     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2739:     PetscCall(MatGetLocalSize(A, &n, NULL));
2740:     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2741:     n_isused = 0;
2742:     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2743:     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2744:     st = st - n_isused;
2745:     if (n) {
2746:       const PetscInt *gidxs;

2748:       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2749:       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2750:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2751:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2752:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2753:       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2754:     } else {
2755:       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
2756:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2757:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
2758:     }
2759:     PetscCall(MatGetSize(pc->pmat, NULL, &N));
2760:     PetscCall(ISGetSize(row, &M));
2761:     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
2762:     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
2763:     PetscCall(ISDestroy(&row));
2764:     PetscCall(ISDestroy(&col));
2765:     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
2766:     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
2767:     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
2768:     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
2769:     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
2770:     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
2771:     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
2772:     PetscCall(MatDestroy(&loc_divudotp));
2773:     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
2774:     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
2775:   }
2776:   PetscCall(ISDestroy(&zerodiag_save));
2777:   PetscCall(ISDestroy(&pressures));
2778:   if (bzerodiag) {
2779:     PetscInt i;

2781:     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
2782:     PetscCall(PetscFree(bzerodiag));
2783:   }
2784:   pcbddc->benign_n             = benign_n;
2785:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

2787:   /* determines if the problem has subdomains with 0 pressure block */
2788:   have_null = (PetscBool)(!!pcbddc->benign_n);
2789:   PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));

2791: project_b0:
2792:   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2793:   /* change of basis and p0 dofs */
2794:   if (pcbddc->benign_n) {
2795:     PetscInt i, s, *nnz;

2797:     /* local change of basis for pressures */
2798:     PetscCall(MatDestroy(&pcbddc->benign_change));
2799:     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
2800:     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
2801:     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
2802:     PetscCall(PetscMalloc1(n, &nnz));
2803:     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
2804:     for (i = 0; i < pcbddc->benign_n; i++) {
2805:       const PetscInt *idxs;
2806:       PetscInt        nzs, j;

2808:       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
2809:       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
2810:       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
2811:       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
2812:       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
2813:     }
2814:     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
2815:     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
2816:     PetscCall(PetscFree(nnz));
2817:     /* set identity by default */
2818:     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
2819:     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
2820:     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
2821:     /* set change on pressures */
2822:     for (s = 0; s < pcbddc->benign_n; s++) {
2823:       PetscScalar    *array;
2824:       const PetscInt *idxs;
2825:       PetscInt        nzs;

2827:       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
2828:       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
2829:       for (i = 0; i < nzs - 1; i++) {
2830:         PetscScalar vals[2];
2831:         PetscInt    cols[2];

2833:         cols[0] = idxs[i];
2834:         cols[1] = idxs[nzs - 1];
2835:         vals[0] = 1.;
2836:         vals[1] = 1.;
2837:         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
2838:       }
2839:       PetscCall(PetscMalloc1(nzs, &array));
2840:       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
2841:       array[nzs - 1] = 1.;
2842:       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
2843:       /* store local idxs for p0 */
2844:       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
2845:       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
2846:       PetscCall(PetscFree(array));
2847:     }
2848:     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
2849:     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));

2851:     /* project if needed */
2852:     if (pcbddc->benign_change_explicit) {
2853:       Mat M;

2855:       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
2856:       PetscCall(MatDestroy(&pcbddc->local_mat));
2857:       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
2858:       PetscCall(MatDestroy(&M));
2859:     }
2860:     /* store global idxs for p0 */
2861:     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
2862:   }
2863:   *zerodiaglocal = zerodiag;
2864:   PetscFunctionReturn(PETSC_SUCCESS);
2865: }

2867: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2868: {
2869:   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
2870:   PetscScalar *array;

2872:   PetscFunctionBegin;
2873:   if (!pcbddc->benign_sf) {
2874:     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
2875:     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
2876:   }
2877:   if (get) {
2878:     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
2879:     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
2880:     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
2881:     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
2882:   } else {
2883:     PetscCall(VecGetArray(v, &array));
2884:     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
2885:     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
2886:     PetscCall(VecRestoreArray(v, &array));
2887:   }
2888:   PetscFunctionReturn(PETSC_SUCCESS);
2889: }

2891: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2892: {
2893:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

2895:   PetscFunctionBegin;
2896:   /* TODO: add error checking
2897:     - avoid nested pop (or push) calls.
2898:     - cannot push before pop.
2899:     - cannot call this if pcbddc->local_mat is NULL
2900:   */
2901:   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
2902:   if (pop) {
2903:     if (pcbddc->benign_change_explicit) {
2904:       IS       is_p0;
2905:       MatReuse reuse;

2907:       /* extract B_0 */
2908:       reuse = MAT_INITIAL_MATRIX;
2909:       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
2910:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
2911:       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
2912:       /* remove rows and cols from local problem */
2913:       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
2914:       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
2915:       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
2916:       PetscCall(ISDestroy(&is_p0));
2917:     } else {
2918:       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
2919:       PetscScalar *vals;
2920:       PetscInt     i, n, *idxs_ins;

2922:       PetscCall(VecGetLocalSize(matis->y, &n));
2923:       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
2924:       if (!pcbddc->benign_B0) {
2925:         PetscInt *nnz;
2926:         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
2927:         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
2928:         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
2929:         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
2930:         for (i = 0; i < pcbddc->benign_n; i++) {
2931:           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
2932:           nnz[i] = n - nnz[i];
2933:         }
2934:         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
2935:         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
2936:         PetscCall(PetscFree(nnz));
2937:       }

2939:       for (i = 0; i < pcbddc->benign_n; i++) {
2940:         PetscScalar *array;
2941:         PetscInt    *idxs, j, nz, cum;

2943:         PetscCall(VecSet(matis->x, 0.));
2944:         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
2945:         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
2946:         for (j = 0; j < nz; j++) vals[j] = 1.;
2947:         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
2948:         PetscCall(VecAssemblyBegin(matis->x));
2949:         PetscCall(VecAssemblyEnd(matis->x));
2950:         PetscCall(VecSet(matis->y, 0.));
2951:         PetscCall(MatMult(matis->A, matis->x, matis->y));
2952:         PetscCall(VecGetArray(matis->y, &array));
2953:         cum = 0;
2954:         for (j = 0; j < n; j++) {
2955:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2956:             vals[cum]     = array[j];
2957:             idxs_ins[cum] = j;
2958:             cum++;
2959:           }
2960:         }
2961:         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
2962:         PetscCall(VecRestoreArray(matis->y, &array));
2963:         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
2964:       }
2965:       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
2966:       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
2967:       PetscCall(PetscFree2(idxs_ins, vals));
2968:     }
2969:   } else { /* push */

2971:     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
2972:     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
2973:       PetscScalar *B0_vals;
2974:       PetscInt    *B0_cols, B0_ncol;

2976:       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
2977:       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
2978:       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
2979:       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
2980:       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
2981:     }
2982:     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
2983:     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
2984:   }
2985:   PetscFunctionReturn(PETSC_SUCCESS);
2986: }

2988: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2989: {
2990:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
2991:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2992:   PetscBLASInt    B_dummyint, B_neigs, B_ierr, B_lwork;
2993:   PetscBLASInt   *B_iwork, *B_ifail;
2994:   PetscScalar    *work, lwork;
2995:   PetscScalar    *St, *S, *eigv;
2996:   PetscScalar    *Sarray, *Starray;
2997:   PetscReal      *eigs, thresh, lthresh, uthresh;
2998:   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
2999:   PetscBool       allocated_S_St, upart;
3000: #if defined(PETSC_USE_COMPLEX)
3001:   PetscReal *rwork;
3002: #endif

3004:   PetscFunctionBegin;
3005:   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3006:   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3007:   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3008:   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3009:              sub_schurs->is_posdef);
3010:   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));

3012:   if (pcbddc->dbg_flag) {
3013:     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3014:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3015:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3016:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3017:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3018:   }

3020:   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));

3022:   /* max size of subsets */
3023:   mss = 0;
3024:   for (i = 0; i < sub_schurs->n_subs; i++) {
3025:     PetscInt subset_size;

3027:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3028:     mss = PetscMax(mss, subset_size);
3029:   }

3031:   /* min/max and threshold */
3032:   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3033:   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3034:   nmax           = PetscMax(nmin, nmax);
3035:   allocated_S_St = PETSC_FALSE;
3036:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3037:     allocated_S_St = PETSC_TRUE;
3038:   }

3040:   /* allocate lapack workspace */
3041:   cum = cum2 = 0;
3042:   maxneigs   = 0;
3043:   for (i = 0; i < sub_schurs->n_subs; i++) {
3044:     PetscInt n, subset_size;

3046:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3047:     n = PetscMin(subset_size, nmax);
3048:     cum += subset_size;
3049:     cum2 += subset_size * n;
3050:     maxneigs = PetscMax(maxneigs, n);
3051:   }
3052:   lwork = 0;
3053:   if (mss) {
3054:     PetscScalar  sdummy  = 0.;
3055:     PetscBLASInt B_itype = 1;
3056:     PetscBLASInt B_N = mss, idummy = 0;
3057:     PetscReal    rdummy = 0., zero = 0.0;
3058:     PetscReal    eps = 0.0; /* dlamch? */

3060:     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3061:     B_lwork = -1;
3062:     /* some implementations may complain about NULL pointers, even if we are querying */
3063:     S       = &sdummy;
3064:     St      = &sdummy;
3065:     eigs    = &rdummy;
3066:     eigv    = &sdummy;
3067:     B_iwork = &idummy;
3068:     B_ifail = &idummy;
3069: #if defined(PETSC_USE_COMPLEX)
3070:     rwork = &rdummy;
3071: #endif
3072:     thresh = 1.0;
3073:     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3074: #if defined(PETSC_USE_COMPLEX)
3075:     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3076: #else
3077:     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3078: #endif
3079:     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr);
3080:     PetscCall(PetscFPTrapPop());
3081:   }

3083:   nv = 0;
3084:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3085:     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3086:   }
3087:   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3088:   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3089:   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3090: #if defined(PETSC_USE_COMPLEX)
3091:   PetscCall(PetscMalloc1(7 * mss, &rwork));
3092: #endif
3093:   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3094:                          &pcbddc->adaptive_constraints_data));
3095:   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));

3097:   maxneigs = 0;
3098:   cum = cumarray                           = 0;
3099:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3100:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3101:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3102:     const PetscInt *idxs;

3104:     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3105:     for (cum = 0; cum < nv; cum++) {
3106:       pcbddc->adaptive_constraints_n[cum]            = 1;
3107:       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3108:       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3109:       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3110:       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3111:     }
3112:     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3113:   }

3115:   if (mss) { /* multilevel */
3116:     if (sub_schurs->gdsw) {
3117:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3118:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3119:     } else {
3120:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3121:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3122:     }
3123:   }

3125:   lthresh = pcbddc->adaptive_threshold[0];
3126:   uthresh = pcbddc->adaptive_threshold[1];
3127:   upart   = pcbddc->use_deluxe_scaling;
3128:   for (i = 0; i < sub_schurs->n_subs; i++) {
3129:     const PetscInt *idxs;
3130:     PetscReal       upper, lower;
3131:     PetscInt        j, subset_size, eigs_start = 0;
3132:     PetscBLASInt    B_N;
3133:     PetscBool       same_data = PETSC_FALSE;
3134:     PetscBool       scal      = PETSC_FALSE;

3136:     if (upart) {
3137:       upper = PETSC_MAX_REAL;
3138:       lower = uthresh;
3139:     } else {
3140:       if (sub_schurs->gdsw) {
3141:         upper = uthresh;
3142:         lower = PETSC_MIN_REAL;
3143:       } else {
3144:         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3145:         upper = 1. / uthresh;
3146:         lower = 0.;
3147:       }
3148:     }
3149:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3150:     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3151:     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3152:     /* this is experimental: we assume the dofs have been properly grouped to have
3153:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3154:     if (!sub_schurs->is_posdef) {
3155:       Mat T;

3157:       for (j = 0; j < subset_size; j++) {
3158:         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3159:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3160:           PetscCall(MatScale(T, -1.0));
3161:           PetscCall(MatDestroy(&T));
3162:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3163:           PetscCall(MatScale(T, -1.0));
3164:           PetscCall(MatDestroy(&T));
3165:           if (sub_schurs->change_primal_sub) {
3166:             PetscInt        nz, k;
3167:             const PetscInt *idxs;

3169:             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3170:             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3171:             for (k = 0; k < nz; k++) {
3172:               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3173:               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3174:             }
3175:             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3176:           }
3177:           scal = PETSC_TRUE;
3178:           break;
3179:         }
3180:       }
3181:     }

3183:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3184:       if (sub_schurs->is_symmetric) {
3185:         PetscInt j, k;
3186:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3187:           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3188:           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3189:         }
3190:         for (j = 0; j < subset_size; j++) {
3191:           for (k = j; k < subset_size; k++) {
3192:             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3193:             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3194:           }
3195:         }
3196:       } else {
3197:         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3198:         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3199:       }
3200:     } else {
3201:       S  = Sarray + cumarray;
3202:       St = Starray + cumarray;
3203:     }
3204:     /* see if we can save some work */
3205:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));

3207:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3208:       B_neigs = 0;
3209:     } else {
3210:       PetscBLASInt B_itype = 1;
3211:       PetscBLASInt B_IL, B_IU;
3212:       PetscReal    eps = -1.0; /* dlamch? */
3213:       PetscInt     nmin_s;
3214:       PetscBool    compute_range;

3216:       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3217:       B_neigs       = 0;
3218:       compute_range = (PetscBool)!same_data;
3219:       if (nmin >= subset_size) compute_range = PETSC_FALSE;

3221:       if (pcbddc->dbg_flag) {
3222:         PetscInt nc = 0;

3224:         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3225:         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i,
3226:                                                      sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc));
3227:       }

3229:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3230:       if (compute_range) {
3231:         /* ask for eigenvalues larger than thresh */
3232:         if (sub_schurs->is_posdef) {
3233: #if defined(PETSC_USE_COMPLEX)
3234:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3235: #else
3236:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3237: #endif
3238:           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3239:         } else { /* no theory so far, but it works nicely */
3240:           PetscInt  recipe = 0, recipe_m = 1;
3241:           PetscReal bb[2];

3243:           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3244:           switch (recipe) {
3245:           case 0:
3246:             if (scal) {
3247:               bb[0] = PETSC_MIN_REAL;
3248:               bb[1] = lthresh;
3249:             } else {
3250:               bb[0] = uthresh;
3251:               bb[1] = PETSC_MAX_REAL;
3252:             }
3253: #if defined(PETSC_USE_COMPLEX)
3254:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3255: #else
3256:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3257: #endif
3258:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3259:             break;
3260:           case 1:
3261:             bb[0] = PETSC_MIN_REAL;
3262:             bb[1] = lthresh * lthresh;
3263: #if defined(PETSC_USE_COMPLEX)
3264:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3265: #else
3266:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3267: #endif
3268:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3269:             if (!scal) {
3270:               PetscBLASInt B_neigs2 = 0;

3272:               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3273:               bb[1] = PETSC_MAX_REAL;
3274:               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3275:               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3276: #if defined(PETSC_USE_COMPLEX)
3277:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3278: #else
3279:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3280: #endif
3281:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3282:               B_neigs += B_neigs2;
3283:             }
3284:             break;
3285:           case 2:
3286:             if (scal) {
3287:               bb[0] = PETSC_MIN_REAL;
3288:               bb[1] = 0;
3289: #if defined(PETSC_USE_COMPLEX)
3290:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3291: #else
3292:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3293: #endif
3294:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3295:             } else {
3296:               PetscBLASInt B_neigs2 = 0;
3297:               PetscBool    do_copy  = PETSC_FALSE;

3299:               lthresh = PetscMax(lthresh, 0.0);
3300:               if (lthresh > 0.0) {
3301:                 bb[0] = PETSC_MIN_REAL;
3302:                 bb[1] = lthresh * lthresh;

3304:                 do_copy = PETSC_TRUE;
3305: #if defined(PETSC_USE_COMPLEX)
3306:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3307: #else
3308:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3309: #endif
3310:                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3311:               }
3312:               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3313:               bb[1] = PETSC_MAX_REAL;
3314:               if (do_copy) {
3315:                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3316:                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3317:               }
3318: #if defined(PETSC_USE_COMPLEX)
3319:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3320: #else
3321:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3322: #endif
3323:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3324:               B_neigs += B_neigs2;
3325:             }
3326:             break;
3327:           case 3:
3328:             if (scal) {
3329:               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3330:             } else {
3331:               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3332:             }
3333:             if (!scal) {
3334:               bb[0] = uthresh;
3335:               bb[1] = PETSC_MAX_REAL;
3336: #if defined(PETSC_USE_COMPLEX)
3337:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3338: #else
3339:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3340: #endif
3341:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3342:             }
3343:             if (recipe_m > 0 && B_N - B_neigs > 0) {
3344:               PetscBLASInt B_neigs2 = 0;

3346:               B_IL = 1;
3347:               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3348:               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3349:               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3350: #if defined(PETSC_USE_COMPLEX)
3351:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3352: #else
3353:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3354: #endif
3355:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3356:               B_neigs += B_neigs2;
3357:             }
3358:             break;
3359:           case 4:
3360:             bb[0] = PETSC_MIN_REAL;
3361:             bb[1] = lthresh;
3362: #if defined(PETSC_USE_COMPLEX)
3363:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3364: #else
3365:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3366: #endif
3367:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3368:             {
3369:               PetscBLASInt B_neigs2 = 0;

3371:               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3372:               bb[1] = PETSC_MAX_REAL;
3373:               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3374:               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3375: #if defined(PETSC_USE_COMPLEX)
3376:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3377: #else
3378:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3379: #endif
3380:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3381:               B_neigs += B_neigs2;
3382:             }
3383:             break;
3384:           case 5: /* same as before: first compute all eigenvalues, then filter */
3385: #if defined(PETSC_USE_COMPLEX)
3386:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3387: #else
3388:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3389: #endif
3390:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3391:             {
3392:               PetscInt e, k, ne;
3393:               for (e = 0, ne = 0; e < B_neigs; e++) {
3394:                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3395:                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3396:                   eigs[ne] = eigs[e];
3397:                   ne++;
3398:                 }
3399:               }
3400:               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3401:               B_neigs = ne;
3402:             }
3403:             break;
3404:           default:
3405:             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3406:           }
3407:         }
3408:       } else if (!same_data) { /* this is just to see all the eigenvalues */
3409:         B_IU = PetscMax(1, PetscMin(B_N, nmax));
3410:         B_IL = 1;
3411: #if defined(PETSC_USE_COMPLEX)
3412:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3413: #else
3414:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3415: #endif
3416:         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3417:       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3418:         PetscInt k;
3419:         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3420:         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3421:         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3422:         nmin = nmax;
3423:         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3424:         for (k = 0; k < nmax; k++) {
3425:           eigs[k]                     = 1. / PETSC_SMALL;
3426:           eigv[k * (subset_size + 1)] = 1.0;
3427:         }
3428:       }
3429:       PetscCall(PetscFPTrapPop());
3430:       if (B_ierr) {
3431:         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3432:         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3433:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3434:       }

3436:       if (B_neigs > nmax) {
3437:         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3438:         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3439:         B_neigs = nmax;
3440:       }

3442:       nmin_s = PetscMin(nmin, B_N);
3443:       if (B_neigs < nmin_s) {
3444:         PetscBLASInt B_neigs2 = 0;

3446:         if (upart) {
3447:           if (scal) {
3448:             B_IU = nmin_s;
3449:             B_IL = B_neigs + 1;
3450:           } else {
3451:             B_IL = B_N - nmin_s + 1;
3452:             B_IU = B_N - B_neigs;
3453:           }
3454:         } else {
3455:           B_IL = B_neigs + 1;
3456:           B_IU = nmin_s;
3457:         }
3458:         if (pcbddc->dbg_flag) {
3459:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3460:         }
3461:         if (sub_schurs->is_symmetric) {
3462:           PetscInt j, k;
3463:           for (j = 0; j < subset_size; j++) {
3464:             for (k = j; k < subset_size; k++) {
3465:               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3466:               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3467:             }
3468:           }
3469:         } else {
3470:           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3471:           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3472:         }
3473:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3474: #if defined(PETSC_USE_COMPLEX)
3475:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3476: #else
3477:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3478: #endif
3479:         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3480:         PetscCall(PetscFPTrapPop());
3481:         B_neigs += B_neigs2;
3482:       }
3483:       if (B_ierr) {
3484:         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3485:         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3486:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3487:       }
3488:       if (pcbddc->dbg_flag) {
3489:         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3490:         for (j = 0; j < B_neigs; j++) {
3491:           if (!sub_schurs->gdsw) {
3492:             if (eigs[j] == 0.0) {
3493:               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3494:             } else {
3495:               if (upart) {
3496:                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3497:               } else {
3498:                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1. / eigs[j + eigs_start])));
3499:               }
3500:             }
3501:           } else {
3502:             double pg = (double)eigs[j + eigs_start];
3503:             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3504:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3505:           }
3506:         }
3507:       }
3508:     }
3509:     /* change the basis back to the original one */
3510:     if (sub_schurs->change) {
3511:       Mat change, phi, phit;

3513:       if (pcbddc->dbg_flag > 2) {
3514:         PetscInt ii;
3515:         for (ii = 0; ii < B_neigs; ii++) {
3516:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3517:           for (j = 0; j < B_N; j++) {
3518: #if defined(PETSC_USE_COMPLEX)
3519:             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3520:             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3521:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3522: #else
3523:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3524: #endif
3525:           }
3526:         }
3527:       }
3528:       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3529:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3530:       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi));
3531:       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3532:       PetscCall(MatDestroy(&phit));
3533:       PetscCall(MatDestroy(&phi));
3534:     }
3535:     maxneigs                               = PetscMax(B_neigs, maxneigs);
3536:     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3537:     if (B_neigs) {
3538:       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));

3540:       if (pcbddc->dbg_flag > 1) {
3541:         PetscInt ii;
3542:         for (ii = 0; ii < B_neigs; ii++) {
3543:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3544:           for (j = 0; j < B_N; j++) {
3545: #if defined(PETSC_USE_COMPLEX)
3546:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3547:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3548:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3549: #else
3550:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3551: #endif
3552:           }
3553:         }
3554:       }
3555:       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3556:       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3557:       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3558:       cum++;
3559:     }
3560:     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3561:     /* shift for next computation */
3562:     cumarray += subset_size * subset_size;
3563:   }
3564:   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));

3566:   if (mss) {
3567:     if (sub_schurs->gdsw) {
3568:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3569:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3570:     } else {
3571:       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3572:       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3573:       /* destroy matrices (junk) */
3574:       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3575:       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3576:     }
3577:   }
3578:   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3579:   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3580: #if defined(PETSC_USE_COMPLEX)
3581:   PetscCall(PetscFree(rwork));
3582: #endif
3583:   if (pcbddc->dbg_flag) {
3584:     PetscInt maxneigs_r;
3585:     PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3586:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3587:   }
3588:   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3589:   PetscFunctionReturn(PETSC_SUCCESS);
3590: }

3592: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3593: {
3594:   PetscScalar *coarse_submat_vals;

3596:   PetscFunctionBegin;
3597:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3598:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3599:   PetscCall(PCBDDCSetUpLocalScatters(pc));

3601:   /* Setup local neumann solver ksp_R */
3602:   /* PCBDDCSetUpLocalScatters should be called first! */
3603:   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));

3605:   /*
3606:      Setup local correction and local part of coarse basis.
3607:      Gives back the dense local part of the coarse matrix in column major ordering
3608:   */
3609:   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals));

3611:   /* Compute total number of coarse nodes and setup coarse solver */
3612:   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals));

3614:   /* free */
3615:   PetscCall(PetscFree(coarse_submat_vals));
3616:   PetscFunctionReturn(PETSC_SUCCESS);
3617: }

3619: PetscErrorCode PCBDDCResetCustomization(PC pc)
3620: {
3621:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

3623:   PetscFunctionBegin;
3624:   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3625:   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3626:   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3627:   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3628:   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3629:   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3630:   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3631:   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3632:   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3633:   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3634:   PetscFunctionReturn(PETSC_SUCCESS);
3635: }

3637: PetscErrorCode PCBDDCResetTopography(PC pc)
3638: {
3639:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3640:   PetscInt i;

3642:   PetscFunctionBegin;
3643:   PetscCall(MatDestroy(&pcbddc->nedcG));
3644:   PetscCall(ISDestroy(&pcbddc->nedclocal));
3645:   PetscCall(MatDestroy(&pcbddc->discretegradient));
3646:   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3647:   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3648:   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3649:   PetscCall(VecDestroy(&pcbddc->work_change));
3650:   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3651:   PetscCall(MatDestroy(&pcbddc->divudotp));
3652:   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3653:   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3654:   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3655:   pcbddc->n_local_subs = 0;
3656:   PetscCall(PetscFree(pcbddc->local_subs));
3657:   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3658:   pcbddc->graphanalyzed        = PETSC_FALSE;
3659:   pcbddc->recompute_topography = PETSC_TRUE;
3660:   pcbddc->corner_selected      = PETSC_FALSE;
3661:   PetscFunctionReturn(PETSC_SUCCESS);
3662: }

3664: PetscErrorCode PCBDDCResetSolvers(PC pc)
3665: {
3666:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

3668:   PetscFunctionBegin;
3669:   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3670:   if (pcbddc->coarse_phi_B) {
3671:     PetscScalar *array;
3672:     PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array));
3673:     PetscCall(PetscFree(array));
3674:   }
3675:   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3676:   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3677:   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3678:   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3679:   PetscCall(VecDestroy(&pcbddc->vec1_P));
3680:   PetscCall(VecDestroy(&pcbddc->vec1_C));
3681:   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3682:   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3683:   PetscCall(VecDestroy(&pcbddc->vec1_R));
3684:   PetscCall(VecDestroy(&pcbddc->vec2_R));
3685:   PetscCall(ISDestroy(&pcbddc->is_R_local));
3686:   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3687:   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3688:   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3689:   PetscCall(KSPReset(pcbddc->ksp_D));
3690:   PetscCall(KSPReset(pcbddc->ksp_R));
3691:   PetscCall(KSPReset(pcbddc->coarse_ksp));
3692:   PetscCall(MatDestroy(&pcbddc->local_mat));
3693:   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3694:   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3695:   PetscCall(PetscFree(pcbddc->global_primal_indices));
3696:   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3697:   PetscCall(MatDestroy(&pcbddc->benign_change));
3698:   PetscCall(VecDestroy(&pcbddc->benign_vec));
3699:   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3700:   PetscCall(MatDestroy(&pcbddc->benign_B0));
3701:   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3702:   if (pcbddc->benign_zerodiag_subs) {
3703:     PetscInt i;
3704:     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3705:     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3706:   }
3707:   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3708:   PetscFunctionReturn(PETSC_SUCCESS);
3709: }

3711: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3712: {
3713:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3714:   PC_IS   *pcis   = (PC_IS *)pc->data;
3715:   VecType  impVecType;
3716:   PetscInt n_constraints, n_R, old_size;

3718:   PetscFunctionBegin;
3719:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3720:   n_R           = pcis->n - pcbddc->n_vertices;
3721:   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3722:   /* local work vectors (try to avoid unneeded work)*/
3723:   /* R nodes */
3724:   old_size = -1;
3725:   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3726:   if (n_R != old_size) {
3727:     PetscCall(VecDestroy(&pcbddc->vec1_R));
3728:     PetscCall(VecDestroy(&pcbddc->vec2_R));
3729:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3730:     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3731:     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3732:     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3733:   }
3734:   /* local primal dofs */
3735:   old_size = -1;
3736:   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3737:   if (pcbddc->local_primal_size != old_size) {
3738:     PetscCall(VecDestroy(&pcbddc->vec1_P));
3739:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3740:     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3741:     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3742:   }
3743:   /* local explicit constraints */
3744:   old_size = -1;
3745:   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3746:   if (n_constraints && n_constraints != old_size) {
3747:     PetscCall(VecDestroy(&pcbddc->vec1_C));
3748:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3749:     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3750:     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3751:   }
3752:   PetscFunctionReturn(PETSC_SUCCESS);
3753: }

3755: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3756: {
3757:   /* pointers to pcis and pcbddc */
3758:   PC_IS          *pcis       = (PC_IS *)pc->data;
3759:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3760:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3761:   /* submatrices of local problem */
3762:   Mat A_RV, A_VR, A_VV, local_auxmat2_R;
3763:   /* submatrices of local coarse problem */
3764:   Mat S_VV, S_CV, S_VC, S_CC;
3765:   /* working matrices */
3766:   Mat C_CR;
3767:   /* additional working stuff */
3768:   PC           pc_R;
3769:   Mat          F, Brhs = NULL;
3770:   Vec          dummy_vec;
3771:   PetscBool    isLU, isCHOL, need_benign_correction, sparserhs;
3772:   PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3773:   PetscScalar *work;
3774:   PetscInt    *idx_V_B;
3775:   PetscInt     lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I;
3776:   PetscInt     i, n_R, n_D, n_B;
3777:   PetscScalar  one = 1.0, m_one = -1.0;

3779:   PetscFunctionBegin;
3780:   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
3781:   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));

3783:   /* Set Non-overlapping dimensions */
3784:   n_vertices    = pcbddc->n_vertices;
3785:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3786:   n_B           = pcis->n_B;
3787:   n_D           = pcis->n - n_B;
3788:   n_R           = pcis->n - n_vertices;

3790:   /* vertices in boundary numbering */
3791:   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
3792:   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
3793:   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);

3795:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3796:   PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals));
3797:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV));
3798:   PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size));
3799:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV));
3800:   PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size));
3801:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC));
3802:   PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size));
3803:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC));
3804:   PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size));

3806:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3807:   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
3808:   PetscCall(PCSetUp(pc_R));
3809:   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
3810:   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
3811:   lda_rhs                = n_R;
3812:   need_benign_correction = PETSC_FALSE;
3813:   if (isLU || isCHOL) {
3814:     PetscCall(PCFactorGetMatrix(pc_R, &F));
3815:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3816:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3817:     MatFactorType      type;

3819:     F = reuse_solver->F;
3820:     PetscCall(MatGetFactorType(F, &type));
3821:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3822:     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3823:     PetscCall(MatGetSize(F, &lda_rhs, NULL));
3824:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3825:   } else F = NULL;

3827:   /* determine if we can use a sparse right-hand side */
3828:   sparserhs = PETSC_FALSE;
3829:   if (F) {
3830:     MatSolverType solver;

3832:     PetscCall(MatFactorGetSolverType(F, &solver));
3833:     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
3834:   }

3836:   /* allocate workspace */
3837:   n = 0;
3838:   if (n_constraints) n += lda_rhs * n_constraints;
3839:   if (n_vertices) {
3840:     n = PetscMax(2 * lda_rhs * n_vertices, n);
3841:     n = PetscMax((lda_rhs + n_B) * n_vertices, n);
3842:   }
3843:   if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n);
3844:   PetscCall(PetscMalloc1(n, &work));

3846:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3847:   dummy_vec = NULL;
3848:   if (need_benign_correction && lda_rhs != n_R && F) {
3849:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
3850:     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
3851:     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
3852:   }

3854:   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3855:   PetscCall(MatDestroy(&pcbddc->local_auxmat2));

3857:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3858:   if (n_constraints) {
3859:     Mat M3, C_B;
3860:     IS  is_aux;

3862:     /* Extract constraints on R nodes: C_{CR}  */
3863:     PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux));
3864:     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
3865:     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));

3867:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3868:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3869:     if (!sparserhs) {
3870:       PetscCall(PetscArrayzero(work, lda_rhs * n_constraints));
3871:       for (i = 0; i < n_constraints; i++) {
3872:         const PetscScalar *row_cmat_values;
3873:         const PetscInt    *row_cmat_indices;
3874:         PetscInt           size_of_constraint, j;

3876:         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
3877:         for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j];
3878:         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
3879:       }
3880:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs));
3881:     } else {
3882:       Mat tC_CR;

3884:       PetscCall(MatScale(C_CR, -1.0));
3885:       if (lda_rhs != n_R) {
3886:         PetscScalar *aa;
3887:         PetscInt     r, *ii, *jj;
3888:         PetscBool    done;

3890:         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
3891:         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
3892:         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
3893:         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
3894:         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
3895:         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
3896:       } else {
3897:         PetscCall(PetscObjectReference((PetscObject)C_CR));
3898:         tC_CR = C_CR;
3899:       }
3900:       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
3901:       PetscCall(MatDestroy(&tC_CR));
3902:     }
3903:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R));
3904:     if (F) {
3905:       if (need_benign_correction) {
3906:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3908:         /* rhs is already zero on interior dofs, no need to change the rhs */
3909:         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
3910:       }
3911:       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
3912:       if (need_benign_correction) {
3913:         PetscScalar       *marr;
3914:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3916:         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
3917:         if (lda_rhs != n_R) {
3918:           for (i = 0; i < n_constraints; i++) {
3919:             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
3920:             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
3921:             PetscCall(VecResetArray(dummy_vec));
3922:           }
3923:         } else {
3924:           for (i = 0; i < n_constraints; i++) {
3925:             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
3926:             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
3927:             PetscCall(VecResetArray(pcbddc->vec1_R));
3928:           }
3929:         }
3930:         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
3931:       }
3932:     } else {
3933:       PetscScalar *marr;

3935:       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
3936:       for (i = 0; i < n_constraints; i++) {
3937:         PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs));
3938:         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
3939:         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
3940:         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
3941:         PetscCall(VecResetArray(pcbddc->vec1_R));
3942:         PetscCall(VecResetArray(pcbddc->vec2_R));
3943:       }
3944:       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
3945:     }
3946:     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
3947:     PetscCall(MatDestroy(&Brhs));
3948:     if (!pcbddc->switch_static) {
3949:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2));
3950:       for (i = 0; i < n_constraints; i++) {
3951:         Vec r, b;
3952:         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
3953:         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
3954:         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
3955:         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
3956:         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
3957:         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
3958:       }
3959:       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3));
3960:     } else {
3961:       if (lda_rhs != n_R) {
3962:         IS dummy;

3964:         PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy));
3965:         PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
3966:         PetscCall(ISDestroy(&dummy));
3967:       } else {
3968:         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
3969:         pcbddc->local_auxmat2 = local_auxmat2_R;
3970:       }
3971:       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3));
3972:     }
3973:     PetscCall(ISDestroy(&is_aux));
3974:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
3975:     PetscCall(MatScale(M3, m_one));
3976:     if (isCHOL) {
3977:       PetscCall(MatCholeskyFactor(M3, NULL, NULL));
3978:     } else {
3979:       PetscCall(MatLUFactor(M3, NULL, NULL, NULL));
3980:     }
3981:     PetscCall(MatSeqDenseInvertFactors_Private(M3));
3982:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3983:     PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1));
3984:     PetscCall(MatDestroy(&C_B));
3985:     PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3986:     PetscCall(MatDestroy(&M3));
3987:   }

3989:   /* Get submatrices from subdomain matrix */
3990:   if (n_vertices) {
3991: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3992:     PetscBool oldpin;
3993: #endif
3994:     PetscBool isaij;
3995:     IS        is_aux;

3997:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3998:       IS tis;

4000:       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4001:       PetscCall(ISSort(tis));
4002:       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4003:       PetscCall(ISDestroy(&tis));
4004:     } else {
4005:       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4006:     }
4007: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4008:     oldpin = pcbddc->local_mat->boundtocpu;
4009: #endif
4010:     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4011:     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4012:     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4013:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij));
4014:     if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4015:       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4016:     }
4017:     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4018: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4019:     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4020: #endif
4021:     PetscCall(ISDestroy(&is_aux));
4022:   }

4024:   /* Matrix of coarse basis functions (local) */
4025:   if (pcbddc->coarse_phi_B) {
4026:     PetscInt on_B, on_primal, on_D = n_D;
4027:     if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL));
4028:     PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal));
4029:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4030:       PetscScalar *marray;

4032:       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray));
4033:       PetscCall(PetscFree(marray));
4034:       PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4035:       PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4036:       PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4037:       PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4038:     }
4039:   }

4041:   if (!pcbddc->coarse_phi_B) {
4042:     PetscScalar *marr;

4044:     /* memory size */
4045:     n = n_B * pcbddc->local_primal_size;
4046:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size;
4047:     if (!pcbddc->symmetric_primal) n *= 2;
4048:     PetscCall(PetscCalloc1(n, &marr));
4049:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B));
4050:     marr += n_B * pcbddc->local_primal_size;
4051:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4052:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D));
4053:       marr += n_D * pcbddc->local_primal_size;
4054:     }
4055:     if (!pcbddc->symmetric_primal) {
4056:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B));
4057:       marr += n_B * pcbddc->local_primal_size;
4058:       if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D));
4059:     } else {
4060:       PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
4061:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4062:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4063:         PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
4064:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4065:       }
4066:     }
4067:   }

4069:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4070:   p0_lidx_I = NULL;
4071:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4072:     const PetscInt *idxs;

4074:     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4075:     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4076:     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4077:     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4078:   }

4080:   /* vertices */
4081:   if (n_vertices) {
4082:     PetscBool restoreavr = PETSC_FALSE;

4084:     PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV));

4086:     if (n_R) {
4087:       Mat                A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */
4088:       PetscBLASInt       B_N, B_one            = 1;
4089:       const PetscScalar *x;
4090:       PetscScalar       *y;

4092:       PetscCall(MatScale(A_RV, m_one));
4093:       if (need_benign_correction) {
4094:         ISLocalToGlobalMapping RtoN;
4095:         IS                     is_p0;
4096:         PetscInt              *idxs_p0, n;

4098:         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4099:         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4100:         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4101:         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4102:         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4103:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4104:         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4105:         PetscCall(ISDestroy(&is_p0));
4106:       }

4108:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV));
4109:       if (!sparserhs || need_benign_correction) {
4110:         if (lda_rhs == n_R) {
4111:           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4112:         } else {
4113:           PetscScalar    *av, *array;
4114:           const PetscInt *xadj, *adjncy;
4115:           PetscInt        n;
4116:           PetscBool       flg_row;

4118:           array = work + lda_rhs * n_vertices;
4119:           PetscCall(PetscArrayzero(array, lda_rhs * n_vertices));
4120:           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4121:           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4122:           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4123:           for (i = 0; i < n; i++) {
4124:             PetscInt j;
4125:             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j];
4126:           }
4127:           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4128:           PetscCall(MatDestroy(&A_RV));
4129:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV));
4130:         }
4131:         if (need_benign_correction) {
4132:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4133:           PetscScalar       *marr;

4135:           PetscCall(MatDenseGetArray(A_RV, &marr));
4136:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4138:                  | 0 0  0 | (V)
4139:              L = | 0 0 -1 | (P-p0)
4140:                  | 0 0 -1 | (p0)

4142:           */
4143:           for (i = 0; i < reuse_solver->benign_n; i++) {
4144:             const PetscScalar *vals;
4145:             const PetscInt    *idxs, *idxs_zero;
4146:             PetscInt           n, j, nz;

4148:             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4149:             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4150:             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4151:             for (j = 0; j < n; j++) {
4152:               PetscScalar val = vals[j];
4153:               PetscInt    k, col = idxs[j];
4154:               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4155:             }
4156:             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4157:             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4158:           }
4159:           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4160:         }
4161:         PetscCall(PetscObjectReference((PetscObject)A_RV));
4162:         Brhs = A_RV;
4163:       } else {
4164:         Mat tA_RVT, A_RVT;

4166:         if (!pcbddc->symmetric_primal) {
4167:           /* A_RV already scaled by -1 */
4168:           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4169:         } else {
4170:           restoreavr = PETSC_TRUE;
4171:           PetscCall(MatScale(A_VR, -1.0));
4172:           PetscCall(PetscObjectReference((PetscObject)A_VR));
4173:           A_RVT = A_VR;
4174:         }
4175:         if (lda_rhs != n_R) {
4176:           PetscScalar *aa;
4177:           PetscInt     r, *ii, *jj;
4178:           PetscBool    done;

4180:           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4181:           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4182:           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4183:           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4184:           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4185:           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4186:         } else {
4187:           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4188:           tA_RVT = A_RVT;
4189:         }
4190:         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4191:         PetscCall(MatDestroy(&tA_RVT));
4192:         PetscCall(MatDestroy(&A_RVT));
4193:       }
4194:       if (F) {
4195:         /* need to correct the rhs */
4196:         if (need_benign_correction) {
4197:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4198:           PetscScalar       *marr;

4200:           PetscCall(MatDenseGetArray(Brhs, &marr));
4201:           if (lda_rhs != n_R) {
4202:             for (i = 0; i < n_vertices; i++) {
4203:               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4204:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4205:               PetscCall(VecResetArray(dummy_vec));
4206:             }
4207:           } else {
4208:             for (i = 0; i < n_vertices; i++) {
4209:               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4210:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4211:               PetscCall(VecResetArray(pcbddc->vec1_R));
4212:             }
4213:           }
4214:           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4215:         }
4216:         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4217:         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4218:         /* need to correct the solution */
4219:         if (need_benign_correction) {
4220:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4221:           PetscScalar       *marr;

4223:           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4224:           if (lda_rhs != n_R) {
4225:             for (i = 0; i < n_vertices; i++) {
4226:               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4227:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4228:               PetscCall(VecResetArray(dummy_vec));
4229:             }
4230:           } else {
4231:             for (i = 0; i < n_vertices; i++) {
4232:               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4233:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4234:               PetscCall(VecResetArray(pcbddc->vec1_R));
4235:             }
4236:           }
4237:           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4238:         }
4239:       } else {
4240:         PetscCall(MatDenseGetArray(Brhs, &y));
4241:         for (i = 0; i < n_vertices; i++) {
4242:           PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs));
4243:           PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs));
4244:           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4245:           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4246:           PetscCall(VecResetArray(pcbddc->vec1_R));
4247:           PetscCall(VecResetArray(pcbddc->vec2_R));
4248:         }
4249:         PetscCall(MatDenseRestoreArray(Brhs, &y));
4250:       }
4251:       PetscCall(MatDestroy(&A_RV));
4252:       PetscCall(MatDestroy(&Brhs));
4253:       /* S_VV and S_CV */
4254:       if (n_constraints) {
4255:         Mat B;

4257:         PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices));
4258:         for (i = 0; i < n_vertices; i++) {
4259:           PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs));
4260:           PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B));
4261:           PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4262:           PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4263:           PetscCall(VecResetArray(pcis->vec1_B));
4264:           PetscCall(VecResetArray(pcbddc->vec1_R));
4265:         }
4266:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B));
4267:         /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4268:         PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV));
4269:         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4270:         PetscCall(MatProductSetFromOptions(S_CV));
4271:         PetscCall(MatProductSymbolic(S_CV));
4272:         PetscCall(MatProductNumeric(S_CV));
4273:         PetscCall(MatProductClear(S_CV));

4275:         PetscCall(MatDestroy(&B));
4276:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B));
4277:         /* Reuse B = local_auxmat2_R * S_CV */
4278:         PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B));
4279:         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4280:         PetscCall(MatProductSetFromOptions(B));
4281:         PetscCall(MatProductSymbolic(B));
4282:         PetscCall(MatProductNumeric(B));

4284:         PetscCall(MatScale(S_CV, m_one));
4285:         PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N));
4286:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one));
4287:         PetscCall(MatDestroy(&B));
4288:       }
4289:       if (lda_rhs != n_R) {
4290:         PetscCall(MatDestroy(&A_RRmA_RV));
4291:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV));
4292:         PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs));
4293:       }
4294:       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt));
4295:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4296:       if (need_benign_correction) {
4297:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4298:         PetscScalar       *marr, *sums;

4300:         PetscCall(PetscMalloc1(n_vertices, &sums));
4301:         PetscCall(MatDenseGetArray(S_VVt, &marr));
4302:         for (i = 0; i < reuse_solver->benign_n; i++) {
4303:           const PetscScalar *vals;
4304:           const PetscInt    *idxs, *idxs_zero;
4305:           PetscInt           n, j, nz;

4307:           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4308:           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4309:           for (j = 0; j < n_vertices; j++) {
4310:             PetscInt k;
4311:             sums[j] = 0.;
4312:             for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs];
4313:           }
4314:           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4315:           for (j = 0; j < n; j++) {
4316:             PetscScalar val = vals[j];
4317:             PetscInt    k;
4318:             for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k];
4319:           }
4320:           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4321:           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4322:         }
4323:         PetscCall(PetscFree(sums));
4324:         PetscCall(MatDenseRestoreArray(S_VVt, &marr));
4325:         PetscCall(MatDestroy(&A_RV_bcorr));
4326:       }
4327:       PetscCall(MatDestroy(&A_RRmA_RV));
4328:       PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N));
4329:       PetscCall(MatDenseGetArrayRead(A_VV, &x));
4330:       PetscCall(MatDenseGetArray(S_VVt, &y));
4331:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one));
4332:       PetscCall(MatDenseRestoreArrayRead(A_VV, &x));
4333:       PetscCall(MatDenseRestoreArray(S_VVt, &y));
4334:       PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN));
4335:       PetscCall(MatDestroy(&S_VVt));
4336:     } else {
4337:       PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN));
4338:     }
4339:     PetscCall(MatDestroy(&A_VV));

4341:     /* coarse basis functions */
4342:     for (i = 0; i < n_vertices; i++) {
4343:       Vec         v;
4344:       PetscScalar one = 1.0, zero = 0.0;

4346:       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i));
4347:       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v));
4348:       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4349:       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4350:       if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4351:         PetscMPIInt rank;
4352:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank));
4353:         PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix");
4354:       }
4355:       PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
4356:       PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4357:       PetscCall(VecAssemblyEnd(v));
4358:       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v));

4360:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4361:         PetscInt j;

4363:         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v));
4364:         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4365:         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4366:         if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4367:           PetscMPIInt rank;
4368:           PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank));
4369:           PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix");
4370:         }
4371:         for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES));
4372:         PetscCall(VecAssemblyBegin(v));
4373:         PetscCall(VecAssemblyEnd(v));
4374:         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v));
4375:       }
4376:       PetscCall(VecResetArray(pcbddc->vec1_R));
4377:     }
4378:     /* if n_R == 0 the object is not destroyed */
4379:     PetscCall(MatDestroy(&A_RV));
4380:   }
4381:   PetscCall(VecDestroy(&dummy_vec));

4383:   if (n_constraints) {
4384:     Mat B;

4386:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B));
4387:     PetscCall(MatScale(S_CC, m_one));
4388:     PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B));
4389:     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4390:     PetscCall(MatProductSetFromOptions(B));
4391:     PetscCall(MatProductSymbolic(B));
4392:     PetscCall(MatProductNumeric(B));

4394:     PetscCall(MatScale(S_CC, m_one));
4395:     if (n_vertices) {
4396:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4397:         PetscCall(MatTransposeSetPrecursor(S_CV, S_VC));
4398:         PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC));
4399:       } else {
4400:         Mat S_VCt;

4402:         if (lda_rhs != n_R) {
4403:           PetscCall(MatDestroy(&B));
4404:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B));
4405:           PetscCall(MatDenseSetLDA(B, lda_rhs));
4406:         }
4407:         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt));
4408:         PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN));
4409:         PetscCall(MatDestroy(&S_VCt));
4410:       }
4411:     }
4412:     PetscCall(MatDestroy(&B));
4413:     /* coarse basis functions */
4414:     for (i = 0; i < n_constraints; i++) {
4415:       Vec v;

4417:       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i));
4418:       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v));
4419:       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4420:       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4421:       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v));
4422:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4423:         PetscInt    j;
4424:         PetscScalar zero = 0.0;
4425:         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v));
4426:         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4427:         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4428:         for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES));
4429:         PetscCall(VecAssemblyBegin(v));
4430:         PetscCall(VecAssemblyEnd(v));
4431:         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v));
4432:       }
4433:       PetscCall(VecResetArray(pcbddc->vec1_R));
4434:     }
4435:   }
4436:   if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R));
4437:   PetscCall(PetscFree(p0_lidx_I));

4439:   /* coarse matrix entries relative to B_0 */
4440:   if (pcbddc->benign_n) {
4441:     Mat                B0_B, B0_BPHI;
4442:     IS                 is_dummy;
4443:     const PetscScalar *data;
4444:     PetscInt           j;

4446:     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
4447:     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
4448:     PetscCall(ISDestroy(&is_dummy));
4449:     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
4450:     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
4451:     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
4452:     for (j = 0; j < pcbddc->benign_n; j++) {
4453:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4454:       for (i = 0; i < pcbddc->local_primal_size; i++) {
4455:         coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j];
4456:         coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j];
4457:       }
4458:     }
4459:     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
4460:     PetscCall(MatDestroy(&B0_B));
4461:     PetscCall(MatDestroy(&B0_BPHI));
4462:   }

4464:   /* compute other basis functions for non-symmetric problems */
4465:   if (!pcbddc->symmetric_primal) {
4466:     Mat          B_V = NULL, B_C = NULL;
4467:     PetscScalar *marray;

4469:     if (n_constraints) {
4470:       Mat S_CCT, C_CRT;

4472:       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
4473:       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
4474:       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C));
4475:       PetscCall(MatDestroy(&S_CCT));
4476:       if (n_vertices) {
4477:         Mat S_VCT;

4479:         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
4480:         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V));
4481:         PetscCall(MatDestroy(&S_VCT));
4482:       }
4483:       PetscCall(MatDestroy(&C_CRT));
4484:     } else {
4485:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
4486:     }
4487:     if (n_vertices && n_R) {
4488:       PetscScalar    *av, *marray;
4489:       const PetscInt *xadj, *adjncy;
4490:       PetscInt        n;
4491:       PetscBool       flg_row;

4493:       /* B_V = B_V - A_VR^T */
4494:       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4495:       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4496:       PetscCall(MatSeqAIJGetArray(A_VR, &av));
4497:       PetscCall(MatDenseGetArray(B_V, &marray));
4498:       for (i = 0; i < n; i++) {
4499:         PetscInt j;
4500:         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
4501:       }
4502:       PetscCall(MatDenseRestoreArray(B_V, &marray));
4503:       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4504:       PetscCall(MatDestroy(&A_VR));
4505:     }

4507:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4508:     if (n_vertices) {
4509:       PetscCall(MatDenseGetArray(B_V, &marray));
4510:       for (i = 0; i < n_vertices; i++) {
4511:         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
4512:         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
4513:         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4514:         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4515:         PetscCall(VecResetArray(pcbddc->vec1_R));
4516:         PetscCall(VecResetArray(pcbddc->vec2_R));
4517:       }
4518:       PetscCall(MatDenseRestoreArray(B_V, &marray));
4519:     }
4520:     if (B_C) {
4521:       PetscCall(MatDenseGetArray(B_C, &marray));
4522:       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
4523:         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
4524:         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
4525:         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4526:         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4527:         PetscCall(VecResetArray(pcbddc->vec1_R));
4528:         PetscCall(VecResetArray(pcbddc->vec2_R));
4529:       }
4530:       PetscCall(MatDenseRestoreArray(B_C, &marray));
4531:     }
4532:     /* coarse basis functions */
4533:     for (i = 0; i < pcbddc->local_primal_size; i++) {
4534:       Vec v;

4536:       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
4537:       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
4538:       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4539:       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4540:       if (i < n_vertices) {
4541:         PetscScalar one = 1.0;
4542:         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
4543:         PetscCall(VecAssemblyBegin(v));
4544:         PetscCall(VecAssemblyEnd(v));
4545:       }
4546:       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));

4548:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4549:         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
4550:         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4551:         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4552:         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
4553:       }
4554:       PetscCall(VecResetArray(pcbddc->vec1_R));
4555:     }
4556:     PetscCall(MatDestroy(&B_V));
4557:     PetscCall(MatDestroy(&B_C));
4558:   }

4560:   /* free memory */
4561:   PetscCall(PetscFree(idx_V_B));
4562:   PetscCall(MatDestroy(&S_VV));
4563:   PetscCall(MatDestroy(&S_CV));
4564:   PetscCall(MatDestroy(&S_VC));
4565:   PetscCall(MatDestroy(&S_CC));
4566:   PetscCall(PetscFree(work));
4567:   if (n_vertices) PetscCall(MatDestroy(&A_VR));
4568:   if (n_constraints) PetscCall(MatDestroy(&C_CR));
4569:   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));

4571:   /* Checking coarse_sub_mat and coarse basis functions */
4572:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4573:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4574:   if (pcbddc->dbg_flag) {
4575:     Mat       coarse_sub_mat;
4576:     Mat       AUXMAT, TM1, TM2, TM3, TM4;
4577:     Mat       coarse_phi_D, coarse_phi_B;
4578:     Mat       coarse_psi_D, coarse_psi_B;
4579:     Mat       A_II, A_BB, A_IB, A_BI;
4580:     Mat       C_B, CPHI;
4581:     IS        is_dummy;
4582:     Vec       mones;
4583:     MatType   checkmattype = MATSEQAIJ;
4584:     PetscReal real_value;

4586:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4587:       Mat A;
4588:       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
4589:       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
4590:       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
4591:       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
4592:       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
4593:       PetscCall(MatDestroy(&A));
4594:     } else {
4595:       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
4596:       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
4597:       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
4598:       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
4599:     }
4600:     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
4601:     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
4602:     if (!pcbddc->symmetric_primal) {
4603:       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
4604:       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
4605:     }
4606:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat));

4608:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
4609:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
4610:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
4611:     if (!pcbddc->symmetric_primal) {
4612:       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4613:       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
4614:       PetscCall(MatDestroy(&AUXMAT));
4615:       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4616:       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
4617:       PetscCall(MatDestroy(&AUXMAT));
4618:       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4619:       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
4620:       PetscCall(MatDestroy(&AUXMAT));
4621:       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4622:       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
4623:       PetscCall(MatDestroy(&AUXMAT));
4624:     } else {
4625:       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
4626:       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
4627:       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4628:       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
4629:       PetscCall(MatDestroy(&AUXMAT));
4630:       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4631:       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
4632:       PetscCall(MatDestroy(&AUXMAT));
4633:     }
4634:     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
4635:     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
4636:     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
4637:     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
4638:     if (pcbddc->benign_n) {
4639:       Mat                B0_B, B0_BPHI;
4640:       const PetscScalar *data2;
4641:       PetscScalar       *data;
4642:       PetscInt           j;

4644:       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
4645:       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
4646:       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
4647:       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
4648:       PetscCall(MatDenseGetArray(TM1, &data));
4649:       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
4650:       for (j = 0; j < pcbddc->benign_n; j++) {
4651:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4652:         for (i = 0; i < pcbddc->local_primal_size; i++) {
4653:           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
4654:           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
4655:         }
4656:       }
4657:       PetscCall(MatDenseRestoreArray(TM1, &data));
4658:       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
4659:       PetscCall(MatDestroy(&B0_B));
4660:       PetscCall(ISDestroy(&is_dummy));
4661:       PetscCall(MatDestroy(&B0_BPHI));
4662:     }
4663: #if 0
4664:   {
4665:     PetscViewer viewer;
4666:     char filename[256];
4667:     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
4668:     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
4669:     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
4670:     PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed"));
4671:     PetscCall(MatView(coarse_sub_mat,viewer));
4672:     PetscCall(PetscObjectSetName((PetscObject)TM1,"projected"));
4673:     PetscCall(MatView(TM1,viewer));
4674:     if (pcbddc->coarse_phi_B) {
4675:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
4676:       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
4677:     }
4678:     if (pcbddc->coarse_phi_D) {
4679:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
4680:       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
4681:     }
4682:     if (pcbddc->coarse_psi_B) {
4683:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
4684:       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
4685:     }
4686:     if (pcbddc->coarse_psi_D) {
4687:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
4688:       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
4689:     }
4690:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
4691:     PetscCall(MatView(pcbddc->local_mat,viewer));
4692:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
4693:     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
4694:     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
4695:     PetscCall(ISView(pcis->is_I_local,viewer));
4696:     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
4697:     PetscCall(ISView(pcis->is_B_local,viewer));
4698:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
4699:     PetscCall(ISView(pcbddc->is_R_local,viewer));
4700:     PetscCall(PetscViewerDestroy(&viewer));
4701:   }
4702: #endif
4703:     PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN));
4704:     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
4705:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
4706:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));

4708:     /* check constraints */
4709:     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
4710:     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4711:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4712:       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
4713:     } else {
4714:       PetscScalar *data;
4715:       Mat          tmat;
4716:       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
4717:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
4718:       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
4719:       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
4720:       PetscCall(MatDestroy(&tmat));
4721:     }
4722:     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
4723:     PetscCall(VecSet(mones, -1.0));
4724:     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
4725:     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
4726:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
4727:     if (!pcbddc->symmetric_primal) {
4728:       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
4729:       PetscCall(VecSet(mones, -1.0));
4730:       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
4731:       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
4732:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
4733:     }
4734:     PetscCall(MatDestroy(&C_B));
4735:     PetscCall(MatDestroy(&CPHI));
4736:     PetscCall(ISDestroy(&is_dummy));
4737:     PetscCall(VecDestroy(&mones));
4738:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
4739:     PetscCall(MatDestroy(&A_II));
4740:     PetscCall(MatDestroy(&A_BB));
4741:     PetscCall(MatDestroy(&A_IB));
4742:     PetscCall(MatDestroy(&A_BI));
4743:     PetscCall(MatDestroy(&TM1));
4744:     PetscCall(MatDestroy(&TM2));
4745:     PetscCall(MatDestroy(&TM3));
4746:     PetscCall(MatDestroy(&TM4));
4747:     PetscCall(MatDestroy(&coarse_phi_D));
4748:     PetscCall(MatDestroy(&coarse_phi_B));
4749:     if (!pcbddc->symmetric_primal) {
4750:       PetscCall(MatDestroy(&coarse_psi_D));
4751:       PetscCall(MatDestroy(&coarse_psi_B));
4752:     }
4753:     PetscCall(MatDestroy(&coarse_sub_mat));
4754:   }
4755:   /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4756:   {
4757:     PetscBool gpu;

4759:     PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu));
4760:     if (gpu) {
4761:       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
4762:       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
4763:       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
4764:       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
4765:       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
4766:       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
4767:     }
4768:   }
4769:   /* get back data */
4770:   *coarse_submat_vals_n = coarse_submat_vals;
4771:   PetscFunctionReturn(PETSC_SUCCESS);
4772: }

4774: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
4775: {
4776:   Mat      *work_mat;
4777:   IS        isrow_s, iscol_s;
4778:   PetscBool rsorted, csorted;
4779:   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;

4781:   PetscFunctionBegin;
4782:   PetscCall(ISSorted(isrow, &rsorted));
4783:   PetscCall(ISSorted(iscol, &csorted));
4784:   PetscCall(ISGetLocalSize(isrow, &rsize));
4785:   PetscCall(ISGetLocalSize(iscol, &csize));

4787:   if (!rsorted) {
4788:     const PetscInt *idxs;
4789:     PetscInt       *idxs_sorted, i;

4791:     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
4792:     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
4793:     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
4794:     PetscCall(ISGetIndices(isrow, &idxs));
4795:     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
4796:     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
4797:     PetscCall(ISRestoreIndices(isrow, &idxs));
4798:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
4799:   } else {
4800:     PetscCall(PetscObjectReference((PetscObject)isrow));
4801:     isrow_s = isrow;
4802:   }

4804:   if (!csorted) {
4805:     if (isrow == iscol) {
4806:       PetscCall(PetscObjectReference((PetscObject)isrow_s));
4807:       iscol_s = isrow_s;
4808:     } else {
4809:       const PetscInt *idxs;
4810:       PetscInt       *idxs_sorted, i;

4812:       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
4813:       PetscCall(PetscMalloc1(csize, &idxs_sorted));
4814:       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
4815:       PetscCall(ISGetIndices(iscol, &idxs));
4816:       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
4817:       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
4818:       PetscCall(ISRestoreIndices(iscol, &idxs));
4819:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
4820:     }
4821:   } else {
4822:     PetscCall(PetscObjectReference((PetscObject)iscol));
4823:     iscol_s = iscol;
4824:   }

4826:   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));

4828:   if (!rsorted || !csorted) {
4829:     Mat new_mat;
4830:     IS  is_perm_r, is_perm_c;

4832:     if (!rsorted) {
4833:       PetscInt *idxs_r, i;
4834:       PetscCall(PetscMalloc1(rsize, &idxs_r));
4835:       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
4836:       PetscCall(PetscFree(idxs_perm_r));
4837:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
4838:     } else {
4839:       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
4840:     }
4841:     PetscCall(ISSetPermutation(is_perm_r));

4843:     if (!csorted) {
4844:       if (isrow_s == iscol_s) {
4845:         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
4846:         is_perm_c = is_perm_r;
4847:       } else {
4848:         PetscInt *idxs_c, i;
4849:         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
4850:         PetscCall(PetscMalloc1(csize, &idxs_c));
4851:         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
4852:         PetscCall(PetscFree(idxs_perm_c));
4853:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
4854:       }
4855:     } else {
4856:       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
4857:     }
4858:     PetscCall(ISSetPermutation(is_perm_c));

4860:     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
4861:     PetscCall(MatDestroy(&work_mat[0]));
4862:     work_mat[0] = new_mat;
4863:     PetscCall(ISDestroy(&is_perm_r));
4864:     PetscCall(ISDestroy(&is_perm_c));
4865:   }

4867:   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
4868:   *B = work_mat[0];
4869:   PetscCall(MatDestroyMatrices(1, &work_mat));
4870:   PetscCall(ISDestroy(&isrow_s));
4871:   PetscCall(ISDestroy(&iscol_s));
4872:   PetscFunctionReturn(PETSC_SUCCESS);
4873: }

4875: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4876: {
4877:   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
4878:   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
4879:   Mat       new_mat, lA;
4880:   IS        is_local, is_global;
4881:   PetscInt  local_size;
4882:   PetscBool isseqaij, issym, isset;

4884:   PetscFunctionBegin;
4885:   PetscCall(MatDestroy(&pcbddc->local_mat));
4886:   PetscCall(MatGetSize(matis->A, &local_size, NULL));
4887:   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
4888:   PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
4889:   PetscCall(ISDestroy(&is_local));
4890:   PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
4891:   PetscCall(ISDestroy(&is_global));

4893:   if (pcbddc->dbg_flag) {
4894:     Vec       x, x_change;
4895:     PetscReal error;

4897:     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
4898:     PetscCall(VecSetRandom(x, NULL));
4899:     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
4900:     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
4901:     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
4902:     PetscCall(MatMult(new_mat, matis->x, matis->y));
4903:     if (!pcbddc->change_interior) {
4904:       const PetscScalar *x, *y, *v;
4905:       PetscReal          lerror = 0.;
4906:       PetscInt           i;

4908:       PetscCall(VecGetArrayRead(matis->x, &x));
4909:       PetscCall(VecGetArrayRead(matis->y, &y));
4910:       PetscCall(VecGetArrayRead(matis->counter, &v));
4911:       for (i = 0; i < local_size; i++)
4912:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
4913:       PetscCall(VecRestoreArrayRead(matis->x, &x));
4914:       PetscCall(VecRestoreArrayRead(matis->y, &y));
4915:       PetscCall(VecRestoreArrayRead(matis->counter, &v));
4916:       PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
4917:       if (error > PETSC_SMALL) {
4918:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4919:           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
4920:         } else {
4921:           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
4922:         }
4923:       }
4924:     }
4925:     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
4926:     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
4927:     PetscCall(VecAXPY(x, -1.0, x_change));
4928:     PetscCall(VecNorm(x, NORM_INFINITY, &error));
4929:     if (error > PETSC_SMALL) {
4930:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4931:         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
4932:       } else {
4933:         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
4934:       }
4935:     }
4936:     PetscCall(VecDestroy(&x));
4937:     PetscCall(VecDestroy(&x_change));
4938:   }

4940:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4941:   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));

4943:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4944:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
4945:   if (isseqaij) {
4946:     PetscCall(MatDestroy(&pcbddc->local_mat));
4947:     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
4948:     if (lA) {
4949:       Mat work;
4950:       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
4951:       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
4952:       PetscCall(MatDestroy(&work));
4953:     }
4954:   } else {
4955:     Mat work_mat;

4957:     PetscCall(MatDestroy(&pcbddc->local_mat));
4958:     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
4959:     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
4960:     PetscCall(MatDestroy(&work_mat));
4961:     if (lA) {
4962:       Mat work;
4963:       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
4964:       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
4965:       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
4966:       PetscCall(MatDestroy(&work));
4967:     }
4968:   }
4969:   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
4970:   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
4971:   PetscCall(MatDestroy(&new_mat));
4972:   PetscFunctionReturn(PETSC_SUCCESS);
4973: }

4975: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4976: {
4977:   PC_IS          *pcis        = (PC_IS *)(pc->data);
4978:   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
4979:   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
4980:   PetscInt       *idx_R_local = NULL;
4981:   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
4982:   PetscInt        vbs, bs;
4983:   PetscBT         bitmask = NULL;

4985:   PetscFunctionBegin;
4986:   /*
4987:     No need to setup local scatters if
4988:       - primal space is unchanged
4989:         AND
4990:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4991:         AND
4992:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4993:   */
4994:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
4995:   /* destroy old objects */
4996:   PetscCall(ISDestroy(&pcbddc->is_R_local));
4997:   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
4998:   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
4999:   /* Set Non-overlapping dimensions */
5000:   n_B        = pcis->n_B;
5001:   n_D        = pcis->n - n_B;
5002:   n_vertices = pcbddc->n_vertices;

5004:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5006:   /* create auxiliary bitmask and allocate workspace */
5007:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5008:     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5009:     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5010:     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));

5012:     for (i = 0, n_R = 0; i < pcis->n; i++) {
5013:       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5014:     }
5015:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5016:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5018:     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5019:     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5020:   }

5022:   /* Block code */
5023:   vbs = 1;
5024:   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5025:   if (bs > 1 && !(n_vertices % bs)) {
5026:     PetscBool is_blocked = PETSC_TRUE;
5027:     PetscInt *vary;
5028:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5029:       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5030:       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5031:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5032:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5033:       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5034:       for (i = 0; i < pcis->n / bs; i++) {
5035:         if (vary[i] != 0 && vary[i] != bs) {
5036:           is_blocked = PETSC_FALSE;
5037:           break;
5038:         }
5039:       }
5040:       PetscCall(PetscFree(vary));
5041:     } else {
5042:       /* Verify directly the R set */
5043:       for (i = 0; i < n_R / bs; i++) {
5044:         PetscInt j, node = idx_R_local[bs * i];
5045:         for (j = 1; j < bs; j++) {
5046:           if (node != idx_R_local[bs * i + j] - j) {
5047:             is_blocked = PETSC_FALSE;
5048:             break;
5049:           }
5050:         }
5051:       }
5052:     }
5053:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5054:       vbs = bs;
5055:       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5056:     }
5057:   }
5058:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5059:   if (sub_schurs && sub_schurs->reuse_solver) {
5060:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5062:     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5063:     PetscCall(ISDestroy(&reuse_solver->is_R));
5064:     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5065:     reuse_solver->is_R = pcbddc->is_R_local;
5066:   } else {
5067:     PetscCall(PetscFree(idx_R_local));
5068:   }

5070:   /* print some info if requested */
5071:   if (pcbddc->dbg_flag) {
5072:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5073:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5074:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5075:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5076:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5077:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5078:                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5079:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5080:   }

5082:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5083:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5084:     IS        is_aux1, is_aux2;
5085:     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;

5087:     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5088:     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5089:     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5090:     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5091:     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5092:     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5093:     for (i = 0, j = 0; i < n_R; i++) {
5094:       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5095:     }
5096:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5097:     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5098:     for (i = 0, j = 0; i < n_B; i++) {
5099:       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5100:     }
5101:     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5102:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5103:     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5104:     PetscCall(ISDestroy(&is_aux1));
5105:     PetscCall(ISDestroy(&is_aux2));

5107:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5108:       PetscCall(PetscMalloc1(n_D, &aux_array1));
5109:       for (i = 0, j = 0; i < n_R; i++) {
5110:         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5111:       }
5112:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5113:       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5114:       PetscCall(ISDestroy(&is_aux1));
5115:     }
5116:     PetscCall(PetscBTDestroy(&bitmask));
5117:     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5118:   } else {
5119:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5120:     IS                 tis;
5121:     PetscInt           schur_size;

5123:     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5124:     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5125:     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5126:     PetscCall(ISDestroy(&tis));
5127:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5128:       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5129:       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5130:       PetscCall(ISDestroy(&tis));
5131:     }
5132:   }
5133:   PetscFunctionReturn(PETSC_SUCCESS);
5134: }

5136: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5137: {
5138:   MatNullSpace   NullSpace;
5139:   Mat            dmat;
5140:   const Vec     *nullvecs;
5141:   Vec            v, v2, *nullvecs2;
5142:   VecScatter     sct = NULL;
5143:   PetscContainer c;
5144:   PetscScalar   *ddata;
5145:   PetscInt       k, nnsp_size, bsiz, bsiz2, n, N, bs;
5146:   PetscBool      nnsp_has_cnst;

5148:   PetscFunctionBegin;
5149:   if (!is && !B) { /* MATIS */
5150:     Mat_IS *matis = (Mat_IS *)A->data;

5152:     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5153:     sct = matis->cctx;
5154:     PetscCall(PetscObjectReference((PetscObject)sct));
5155:   } else {
5156:     PetscCall(MatGetNullSpace(B, &NullSpace));
5157:     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5158:     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5159:   }
5160:   PetscCall(MatGetNullSpace(A, &NullSpace));
5161:   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5162:   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);

5164:   PetscCall(MatCreateVecs(A, &v, NULL));
5165:   PetscCall(MatCreateVecs(B, &v2, NULL));
5166:   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5167:   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs));
5168:   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5169:   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5170:   PetscCall(VecGetBlockSize(v2, &bs));
5171:   PetscCall(VecGetSize(v2, &N));
5172:   PetscCall(VecGetLocalSize(v2, &n));
5173:   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5174:   for (k = 0; k < nnsp_size; k++) {
5175:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5176:     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5177:     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5178:   }
5179:   if (nnsp_has_cnst) {
5180:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5181:     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5182:   }
5183:   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5184:   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));

5186:   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5187:   PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c));
5188:   PetscCall(PetscContainerSetPointer(c, ddata));
5189:   PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault));
5190:   PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c));
5191:   PetscCall(PetscContainerDestroy(&c));
5192:   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5193:   PetscCall(MatDestroy(&dmat));

5195:   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5196:   PetscCall(PetscFree(nullvecs2));
5197:   PetscCall(MatSetNearNullSpace(B, NullSpace));
5198:   PetscCall(MatNullSpaceDestroy(&NullSpace));
5199:   PetscCall(VecDestroy(&v));
5200:   PetscCall(VecDestroy(&v2));
5201:   PetscCall(VecScatterDestroy(&sct));
5202:   PetscFunctionReturn(PETSC_SUCCESS);
5203: }

5205: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5206: {
5207:   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5208:   PC_IS       *pcis   = (PC_IS *)pc->data;
5209:   PC           pc_temp;
5210:   Mat          A_RR;
5211:   MatNullSpace nnsp;
5212:   MatReuse     reuse;
5213:   PetscScalar  m_one = -1.0;
5214:   PetscReal    value;
5215:   PetscInt     n_D, n_R;
5216:   PetscBool    issbaij, opts, isset, issym;
5217:   void (*f)(void) = NULL;
5218:   char   dir_prefix[256], neu_prefix[256], str_level[16];
5219:   size_t len;

5221:   PetscFunctionBegin;
5222:   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5223:   /* approximate solver, propagate NearNullSpace if needed */
5224:   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5225:     MatNullSpace gnnsp1, gnnsp2;
5226:     PetscBool    lhas, ghas;

5228:     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5229:     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5230:     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5231:     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5232:     PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5233:     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5234:   }

5236:   /* compute prefixes */
5237:   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5238:   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5239:   if (!pcbddc->current_level) {
5240:     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5241:     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5242:     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5243:     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5244:   } else {
5245:     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level)));
5246:     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5247:     len -= 15;                                /* remove "pc_bddc_coarse_" */
5248:     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5249:     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5250:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5251:     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5252:     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5253:     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5254:     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5255:     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5256:     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5257:   }

5259:   /* DIRICHLET PROBLEM */
5260:   if (dirichlet) {
5261:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5262:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5263:       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5264:       if (pcbddc->dbg_flag) {
5265:         Mat A_IIn;

5267:         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5268:         PetscCall(MatDestroy(&pcis->A_II));
5269:         pcis->A_II = A_IIn;
5270:       }
5271:     }
5272:     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5273:     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));

5275:     /* Matrix for Dirichlet problem is pcis->A_II */
5276:     n_D  = pcis->n - pcis->n_B;
5277:     opts = PETSC_FALSE;
5278:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5279:       opts = PETSC_TRUE;
5280:       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5281:       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5282:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5283:       /* default */
5284:       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5285:       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5286:       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5287:       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5288:       if (issbaij) {
5289:         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5290:       } else {
5291:         PetscCall(PCSetType(pc_temp, PCLU));
5292:       }
5293:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5294:     }
5295:     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5296:     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5297:     /* Allow user's customization */
5298:     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5299:     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5300:     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5301:       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5302:     }
5303:     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5304:     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5305:     PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5306:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5307:       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5308:       const PetscInt *idxs;
5309:       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;

5311:       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5312:       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5313:       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5314:       for (i = 0; i < nl; i++) {
5315:         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5316:       }
5317:       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5318:       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5319:       PetscCall(PetscFree(scoords));
5320:     }
5321:     if (sub_schurs && sub_schurs->reuse_solver) {
5322:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5324:       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5325:     }

5327:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5328:     if (!n_D) {
5329:       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5330:       PetscCall(PCSetType(pc_temp, PCNONE));
5331:     }
5332:     PetscCall(KSPSetUp(pcbddc->ksp_D));
5333:     /* set ksp_D into pcis data */
5334:     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5335:     PetscCall(KSPDestroy(&pcis->ksp_D));
5336:     pcis->ksp_D = pcbddc->ksp_D;
5337:   }

5339:   /* NEUMANN PROBLEM */
5340:   A_RR = NULL;
5341:   if (neumann) {
5342:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5343:     PetscInt        ibs, mbs;
5344:     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5345:     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;

5347:     reuse_neumann_solver = PETSC_FALSE;
5348:     if (sub_schurs && sub_schurs->reuse_solver) {
5349:       IS iP;

5351:       reuse_neumann_solver = PETSC_TRUE;
5352:       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5353:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5354:     }
5355:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5356:     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5357:     if (pcbddc->ksp_R) { /* already created ksp */
5358:       PetscInt nn_R;
5359:       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5360:       PetscCall(PetscObjectReference((PetscObject)A_RR));
5361:       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5362:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5363:         PetscCall(KSPReset(pcbddc->ksp_R));
5364:         PetscCall(MatDestroy(&A_RR));
5365:         reuse = MAT_INITIAL_MATRIX;
5366:       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5367:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5368:           PetscCall(MatDestroy(&A_RR));
5369:           reuse = MAT_INITIAL_MATRIX;
5370:         } else { /* safe to reuse the matrix */
5371:           reuse = MAT_REUSE_MATRIX;
5372:         }
5373:       }
5374:       /* last check */
5375:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5376:         PetscCall(MatDestroy(&A_RR));
5377:         reuse = MAT_INITIAL_MATRIX;
5378:       }
5379:     } else { /* first time, so we need to create the matrix */
5380:       reuse = MAT_INITIAL_MATRIX;
5381:     }
5382:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5383:        TODO: Get Rid of these conversions */
5384:     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5385:     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5386:     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5387:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5388:       if (matis->A == pcbddc->local_mat) {
5389:         PetscCall(MatDestroy(&pcbddc->local_mat));
5390:         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5391:       } else {
5392:         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5393:       }
5394:     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
5395:       if (matis->A == pcbddc->local_mat) {
5396:         PetscCall(MatDestroy(&pcbddc->local_mat));
5397:         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5398:       } else {
5399:         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5400:       }
5401:     }
5402:     /* extract A_RR */
5403:     if (reuse_neumann_solver) {
5404:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5406:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5407:         PetscCall(MatDestroy(&A_RR));
5408:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5409:           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
5410:         } else {
5411:           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
5412:         }
5413:       } else {
5414:         PetscCall(MatDestroy(&A_RR));
5415:         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
5416:         PetscCall(PetscObjectReference((PetscObject)A_RR));
5417:       }
5418:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5419:       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
5420:     }
5421:     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5422:     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
5423:     opts = PETSC_FALSE;
5424:     if (!pcbddc->ksp_R) { /* create object if not present */
5425:       opts = PETSC_TRUE;
5426:       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
5427:       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
5428:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
5429:       /* default */
5430:       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
5431:       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
5432:       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5433:       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
5434:       if (issbaij) {
5435:         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5436:       } else {
5437:         PetscCall(PCSetType(pc_temp, PCLU));
5438:       }
5439:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
5440:     }
5441:     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
5442:     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
5443:     if (opts) { /* Allow user's customization once */
5444:       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
5445:     }
5446:     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
5447:     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5448:       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
5449:     }
5450:     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
5451:     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5452:     PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5453:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5454:       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5455:       const PetscInt *idxs;
5456:       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;

5458:       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
5459:       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
5460:       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5461:       for (i = 0; i < nl; i++) {
5462:         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5463:       }
5464:       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
5465:       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5466:       PetscCall(PetscFree(scoords));
5467:     }

5469:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5470:     if (!n_R) {
5471:       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5472:       PetscCall(PCSetType(pc_temp, PCNONE));
5473:     }
5474:     /* Reuse solver if it is present */
5475:     if (reuse_neumann_solver) {
5476:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5478:       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
5479:     }
5480:     PetscCall(KSPSetUp(pcbddc->ksp_R));
5481:   }

5483:   if (pcbddc->dbg_flag) {
5484:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5485:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5486:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5487:   }
5488:   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));

5490:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5491:   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
5492:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
5493:   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
5494:   /* check Dirichlet and Neumann solvers */
5495:   if (pcbddc->dbg_flag) {
5496:     if (dirichlet) { /* Dirichlet */
5497:       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
5498:       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
5499:       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
5500:       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
5501:       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
5502:       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
5503:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value));
5504:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5505:     }
5506:     if (neumann) { /* Neumann */
5507:       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
5508:       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
5509:       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
5510:       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5511:       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
5512:       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
5513:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value));
5514:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5515:     }
5516:   }
5517:   /* free Neumann problem's matrix */
5518:   PetscCall(MatDestroy(&A_RR));
5519:   PetscFunctionReturn(PETSC_SUCCESS);
5520: }

5522: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5523: {
5524:   PC_BDDC        *pcbddc       = (PC_BDDC *)(pc->data);
5525:   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
5526:   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;

5528:   PetscFunctionBegin;
5529:   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
5530:   if (!pcbddc->switch_static) {
5531:     if (applytranspose && pcbddc->local_auxmat1) {
5532:       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
5533:       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
5534:     }
5535:     if (!reuse_solver) {
5536:       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5537:       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5538:     } else {
5539:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5541:       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
5542:       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
5543:     }
5544:   } else {
5545:     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5546:     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5547:     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5548:     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5549:     if (applytranspose && pcbddc->local_auxmat1) {
5550:       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
5551:       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
5552:       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5553:       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5554:     }
5555:   }
5556:   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
5557:   if (!reuse_solver || pcbddc->switch_static) {
5558:     if (applytranspose) {
5559:       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
5560:     } else {
5561:       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
5562:     }
5563:     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
5564:   } else {
5565:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5567:     if (applytranspose) {
5568:       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
5569:     } else {
5570:       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
5571:     }
5572:   }
5573:   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
5574:   PetscCall(VecSet(inout_B, 0.));
5575:   if (!pcbddc->switch_static) {
5576:     if (!reuse_solver) {
5577:       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5578:       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5579:     } else {
5580:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5582:       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
5583:       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
5584:     }
5585:     if (!applytranspose && pcbddc->local_auxmat1) {
5586:       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
5587:       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
5588:     }
5589:   } else {
5590:     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5591:     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5592:     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5593:     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5594:     if (!applytranspose && pcbddc->local_auxmat1) {
5595:       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
5596:       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
5597:     }
5598:     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5599:     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5600:     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5601:     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5602:   }
5603:   PetscFunctionReturn(PETSC_SUCCESS);
5604: }

5606: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5607: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5608: {
5609:   PC_BDDC          *pcbddc = (PC_BDDC *)(pc->data);
5610:   PC_IS            *pcis   = (PC_IS *)(pc->data);
5611:   const PetscScalar zero   = 0.0;

5613:   PetscFunctionBegin;
5614:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5615:   if (!pcbddc->benign_apply_coarse_only) {
5616:     if (applytranspose) {
5617:       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
5618:       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
5619:     } else {
5620:       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
5621:       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
5622:     }
5623:   } else {
5624:     PetscCall(VecSet(pcbddc->vec1_P, zero));
5625:   }

5627:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5628:   if (pcbddc->benign_n) {
5629:     PetscScalar *array;
5630:     PetscInt     j;

5632:     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
5633:     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
5634:     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
5635:   }

5637:   /* start communications from local primal nodes to rhs of coarse solver */
5638:   PetscCall(VecSet(pcbddc->coarse_vec, zero));
5639:   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
5640:   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));

5642:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5643:   if (pcbddc->coarse_ksp) {
5644:     Mat          coarse_mat;
5645:     Vec          rhs, sol;
5646:     MatNullSpace nullsp;
5647:     PetscBool    isbddc = PETSC_FALSE;

5649:     if (pcbddc->benign_have_null) {
5650:       PC coarse_pc;

5652:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5653:       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
5654:       /* we need to propagate to coarser levels the need for a possible benign correction */
5655:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5656:         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)(coarse_pc->data);
5657:         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
5658:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5659:       }
5660:     }
5661:     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
5662:     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
5663:     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
5664:     if (applytranspose) {
5665:       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
5666:       PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5667:       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
5668:       PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5669:       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
5670:       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
5671:       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
5672:     } else {
5673:       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
5674:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5675:         PC coarse_pc;

5677:         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
5678:         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5679:         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
5680:         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
5681:         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
5682:       } else {
5683:         PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5684:         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
5685:         PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5686:         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
5687:         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
5688:       }
5689:     }
5690:     /* we don't need the benign correction at coarser levels anymore */
5691:     if (pcbddc->benign_have_null && isbddc) {
5692:       PC       coarse_pc;
5693:       PC_BDDC *coarsepcbddc;

5695:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5696:       coarsepcbddc                           = (PC_BDDC *)(coarse_pc->data);
5697:       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
5698:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5699:     }
5700:   }

5702:   /* Local solution on R nodes */
5703:   if (pcis->n && !pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
5704:   /* communications from coarse sol to local primal nodes */
5705:   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
5706:   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));

5708:   /* Sum contributions from the two levels */
5709:   if (!pcbddc->benign_apply_coarse_only) {
5710:     if (applytranspose) {
5711:       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
5712:       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
5713:     } else {
5714:       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
5715:       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
5716:     }
5717:     /* store p0 */
5718:     if (pcbddc->benign_n) {
5719:       PetscScalar *array;
5720:       PetscInt     j;

5722:       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
5723:       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
5724:       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
5725:     }
5726:   } else { /* expand the coarse solution */
5727:     if (applytranspose) {
5728:       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
5729:     } else {
5730:       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
5731:     }
5732:   }
5733:   PetscFunctionReturn(PETSC_SUCCESS);
5734: }

5736: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
5737: {
5738:   PC_BDDC           *pcbddc = (PC_BDDC *)(pc->data);
5739:   Vec                from, to;
5740:   const PetscScalar *array;

5742:   PetscFunctionBegin;
5743:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5744:     from = pcbddc->coarse_vec;
5745:     to   = pcbddc->vec1_P;
5746:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5747:       Vec tvec;

5749:       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
5750:       PetscCall(VecResetArray(tvec));
5751:       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
5752:       PetscCall(VecGetArrayRead(tvec, &array));
5753:       PetscCall(VecPlaceArray(from, array));
5754:       PetscCall(VecRestoreArrayRead(tvec, &array));
5755:     }
5756:   } else { /* from local to global -> put data in coarse right hand side */
5757:     from = pcbddc->vec1_P;
5758:     to   = pcbddc->coarse_vec;
5759:   }
5760:   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
5761:   PetscFunctionReturn(PETSC_SUCCESS);
5762: }

5764: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5765: {
5766:   PC_BDDC           *pcbddc = (PC_BDDC *)(pc->data);
5767:   Vec                from, to;
5768:   const PetscScalar *array;

5770:   PetscFunctionBegin;
5771:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5772:     from = pcbddc->coarse_vec;
5773:     to   = pcbddc->vec1_P;
5774:   } else { /* from local to global -> put data in coarse right hand side */
5775:     from = pcbddc->vec1_P;
5776:     to   = pcbddc->coarse_vec;
5777:   }
5778:   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
5779:   if (smode == SCATTER_FORWARD) {
5780:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5781:       Vec tvec;

5783:       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
5784:       PetscCall(VecGetArrayRead(to, &array));
5785:       PetscCall(VecPlaceArray(tvec, array));
5786:       PetscCall(VecRestoreArrayRead(to, &array));
5787:     }
5788:   } else {
5789:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5790:       PetscCall(VecResetArray(from));
5791:     }
5792:   }
5793:   PetscFunctionReturn(PETSC_SUCCESS);
5794: }

5796: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5797: {
5798:   PC_IS   *pcis   = (PC_IS *)(pc->data);
5799:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5800:   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
5801:   /* one and zero */
5802:   PetscScalar one = 1.0, zero = 0.0;
5803:   /* space to store constraints and their local indices */
5804:   PetscScalar *constraints_data;
5805:   PetscInt    *constraints_idxs, *constraints_idxs_B;
5806:   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
5807:   PetscInt    *constraints_n;
5808:   /* iterators */
5809:   PetscInt i, j, k, total_counts, total_counts_cc, cum;
5810:   /* BLAS integers */
5811:   PetscBLASInt lwork, lierr;
5812:   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
5813:   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
5814:   /* reuse */
5815:   PetscInt  olocal_primal_size, olocal_primal_size_cc;
5816:   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
5817:   /* change of basis */
5818:   PetscBool qr_needed;
5819:   PetscBT   change_basis, qr_needed_idx;
5820:   /* auxiliary stuff */
5821:   PetscInt *nnz, *is_indices;
5822:   PetscInt  ncc;
5823:   /* some quantities */
5824:   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
5825:   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
5826:   PetscReal tol; /* tolerance for retaining eigenmodes */

5828:   PetscFunctionBegin;
5829:   tol = PetscSqrtReal(PETSC_SMALL);
5830:   /* Destroy Mat objects computed previously */
5831:   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
5832:   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
5833:   PetscCall(MatDestroy(&pcbddc->switch_static_change));
5834:   /* save info on constraints from previous setup (if any) */
5835:   olocal_primal_size    = pcbddc->local_primal_size;
5836:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5837:   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
5838:   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
5839:   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
5840:   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
5841:   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));

5843:   if (!pcbddc->adaptive_selection) {
5844:     IS           ISForVertices, *ISForFaces, *ISForEdges;
5845:     MatNullSpace nearnullsp;
5846:     const Vec   *nearnullvecs;
5847:     Vec         *localnearnullsp;
5848:     PetscScalar *array;
5849:     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
5850:     PetscBool    nnsp_has_cnst;
5851:     /* LAPACK working arrays for SVD or POD */
5852:     PetscBool    skip_lapack, boolforchange;
5853:     PetscScalar *work;
5854:     PetscReal   *singular_vals;
5855: #if defined(PETSC_USE_COMPLEX)
5856:     PetscReal *rwork;
5857: #endif
5858:     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
5859:     PetscBLASInt dummy_int    = 1;
5860:     PetscScalar  dummy_scalar = 1.;
5861:     PetscBool    use_pod      = PETSC_FALSE;

5863:     /* MKL SVD with same input gives different results on different processes! */
5864: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
5865:     use_pod = PETSC_TRUE;
5866: #endif
5867:     /* Get index sets for faces, edges and vertices from graph */
5868:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
5869:     o_nf       = n_ISForFaces;
5870:     o_ne       = n_ISForEdges;
5871:     n_vertices = 0;
5872:     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
5873:     /* print some info */
5874:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5875:       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
5876:       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
5877:       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5878:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
5879:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
5880:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
5881:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
5882:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5883:       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
5884:     }

5886:     if (!pcbddc->use_vertices) n_vertices = 0;
5887:     if (!pcbddc->use_edges) n_ISForEdges = 0;
5888:     if (!pcbddc->use_faces) n_ISForFaces = 0;

5890:     /* check if near null space is attached to global mat */
5891:     if (pcbddc->use_nnsp) {
5892:       PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
5893:     } else nearnullsp = NULL;

5895:     if (nearnullsp) {
5896:       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
5897:       /* remove any stored info */
5898:       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
5899:       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
5900:       /* store information for BDDC solver reuse */
5901:       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
5902:       pcbddc->onearnullspace = nearnullsp;
5903:       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
5904:       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
5905:     } else { /* if near null space is not provided BDDC uses constants by default */
5906:       nnsp_size     = 0;
5907:       nnsp_has_cnst = PETSC_TRUE;
5908:     }
5909:     /* get max number of constraints on a single cc */
5910:     max_constraints = nnsp_size;
5911:     if (nnsp_has_cnst) max_constraints++;

5913:     /*
5914:          Evaluate maximum storage size needed by the procedure
5915:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5916:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5917:          There can be multiple constraints per connected component
5918:                                                                                                                                                            */
5919:     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
5920:     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));

5922:     total_counts = n_ISForFaces + n_ISForEdges;
5923:     total_counts *= max_constraints;
5924:     total_counts += n_vertices;
5925:     PetscCall(PetscBTCreate(total_counts, &change_basis));

5927:     total_counts           = 0;
5928:     max_size_of_constraint = 0;
5929:     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
5930:       IS used_is;
5931:       if (i < n_ISForEdges) {
5932:         used_is = ISForEdges[i];
5933:       } else {
5934:         used_is = ISForFaces[i - n_ISForEdges];
5935:       }
5936:       PetscCall(ISGetSize(used_is, &j));
5937:       total_counts += j;
5938:       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
5939:     }
5940:     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));

5942:     /* get local part of global near null space vectors */
5943:     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
5944:     for (k = 0; k < nnsp_size; k++) {
5945:       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
5946:       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
5947:       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
5948:     }

5950:     /* whether or not to skip lapack calls */
5951:     skip_lapack = PETSC_TRUE;
5952:     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5954:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5955:     if (!skip_lapack) {
5956:       PetscScalar temp_work;

5958:       if (use_pod) {
5959:         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5960:         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
5961:         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
5962:         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
5963: #if defined(PETSC_USE_COMPLEX)
5964:         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
5965: #endif
5966:         /* now we evaluate the optimal workspace using query with lwork=-1 */
5967:         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
5968:         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
5969:         lwork = -1;
5970:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
5971: #if !defined(PETSC_USE_COMPLEX)
5972:         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
5973: #else
5974:         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
5975: #endif
5976:         PetscCall(PetscFPTrapPop());
5977:         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr);
5978:       } else {
5979: #if !defined(PETSC_MISSING_LAPACK_GESVD)
5980:         /* SVD */
5981:         PetscInt max_n, min_n;
5982:         max_n = max_size_of_constraint;
5983:         min_n = max_constraints;
5984:         if (max_size_of_constraint < max_constraints) {
5985:           min_n = max_size_of_constraint;
5986:           max_n = max_constraints;
5987:         }
5988:         PetscCall(PetscMalloc1(min_n, &singular_vals));
5989:   #if defined(PETSC_USE_COMPLEX)
5990:         PetscCall(PetscMalloc1(5 * min_n, &rwork));
5991:   #endif
5992:         /* now we evaluate the optimal workspace using query with lwork=-1 */
5993:         lwork = -1;
5994:         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
5995:         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
5996:         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
5997:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
5998:   #if !defined(PETSC_USE_COMPLEX)
5999:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6000:   #else
6001:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6002:   #endif
6003:         PetscCall(PetscFPTrapPop());
6004:         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr);
6005: #else
6006:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6007: #endif /* on missing GESVD */
6008:       }
6009:       /* Allocate optimal workspace */
6010:       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6011:       PetscCall(PetscMalloc1(lwork, &work));
6012:     }
6013:     /* Now we can loop on constraining sets */
6014:     total_counts            = 0;
6015:     constraints_idxs_ptr[0] = 0;
6016:     constraints_data_ptr[0] = 0;
6017:     /* vertices */
6018:     if (n_vertices) {
6019:       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6020:       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6021:       for (i = 0; i < n_vertices; i++) {
6022:         constraints_n[total_counts]            = 1;
6023:         constraints_data[total_counts]         = 1.0;
6024:         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6025:         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6026:         total_counts++;
6027:       }
6028:       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6029:     }

6031:     /* edges and faces */
6032:     total_counts_cc = total_counts;
6033:     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6034:       IS        used_is;
6035:       PetscBool idxs_copied = PETSC_FALSE;

6037:       if (ncc < n_ISForEdges) {
6038:         used_is       = ISForEdges[ncc];
6039:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6040:       } else {
6041:         used_is       = ISForFaces[ncc - n_ISForEdges];
6042:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6043:       }
6044:       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */

6046:       PetscCall(ISGetSize(used_is, &size_of_constraint));
6047:       if (!size_of_constraint) continue;
6048:       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6049:       /* change of basis should not be performed on local periodic nodes */
6050:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6051:       if (nnsp_has_cnst) {
6052:         PetscScalar quad_value;

6054:         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6055:         idxs_copied = PETSC_TRUE;

6057:         if (!pcbddc->use_nnsp_true) {
6058:           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6059:         } else {
6060:           quad_value = 1.0;
6061:         }
6062:         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6063:         temp_constraints++;
6064:         total_counts++;
6065:       }
6066:       for (k = 0; k < nnsp_size; k++) {
6067:         PetscReal    real_value;
6068:         PetscScalar *ptr_to_data;

6070:         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6071:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6072:         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6073:         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6074:         /* check if array is null on the connected component */
6075:         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6076:         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6077:         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6078:           temp_constraints++;
6079:           total_counts++;
6080:           if (!idxs_copied) {
6081:             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6082:             idxs_copied = PETSC_TRUE;
6083:           }
6084:         }
6085:       }
6086:       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6087:       valid_constraints = temp_constraints;
6088:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6089:         if (temp_constraints == 1) { /* just normalize the constraint */
6090:           PetscScalar norm, *ptr_to_data;

6092:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6093:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6094:           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6095:           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6096:           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6097:         } else { /* perform SVD */
6098:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6100:           if (use_pod) {
6101:             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6102:                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6103:                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6104:                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6105:                   from that computed using LAPACKgesvd
6106:                -> This is due to a different computation of eigenvectors in LAPACKheev
6107:                -> The quality of the POD-computed basis will be the same */
6108:             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6109:             /* Store upper triangular part of correlation matrix */
6110:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6111:             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6112:             for (j = 0; j < temp_constraints; j++) {
6113:               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6114:             }
6115:             /* compute eigenvalues and eigenvectors of correlation matrix */
6116:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6117:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6118: #if !defined(PETSC_USE_COMPLEX)
6119:             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6120: #else
6121:             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6122: #endif
6123:             PetscCall(PetscFPTrapPop());
6124:             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr);
6125:             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6126:             j = 0;
6127:             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6128:             total_counts      = total_counts - j;
6129:             valid_constraints = temp_constraints - j;
6130:             /* scale and copy POD basis into used quadrature memory */
6131:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6132:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6133:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6134:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6135:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6136:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6137:             if (j < temp_constraints) {
6138:               PetscInt ii;
6139:               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6140:               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6141:               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6142:               PetscCall(PetscFPTrapPop());
6143:               for (k = 0; k < temp_constraints - j; k++) {
6144:                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6145:               }
6146:             }
6147:           } else {
6148: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6149:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6150:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6151:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6152:             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6153:   #if !defined(PETSC_USE_COMPLEX)
6154:             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6155:   #else
6156:             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6157:   #endif
6158:             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
6159:             PetscCall(PetscFPTrapPop());
6160:             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6161:             k = temp_constraints;
6162:             if (k > size_of_constraint) k = size_of_constraint;
6163:             j = 0;
6164:             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6165:             valid_constraints = k - j;
6166:             total_counts      = total_counts - temp_constraints + valid_constraints;
6167: #else
6168:             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6169: #endif /* on missing GESVD */
6170:           }
6171:         }
6172:       }
6173:       /* update pointers information */
6174:       if (valid_constraints) {
6175:         constraints_n[total_counts_cc]            = valid_constraints;
6176:         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6177:         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6178:         /* set change_of_basis flag */
6179:         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6180:         total_counts_cc++;
6181:       }
6182:     }
6183:     /* free workspace */
6184:     if (!skip_lapack) {
6185:       PetscCall(PetscFree(work));
6186: #if defined(PETSC_USE_COMPLEX)
6187:       PetscCall(PetscFree(rwork));
6188: #endif
6189:       PetscCall(PetscFree(singular_vals));
6190:       PetscCall(PetscFree(correlation_mat));
6191:       PetscCall(PetscFree(temp_basis));
6192:     }
6193:     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6194:     PetscCall(PetscFree(localnearnullsp));
6195:     /* free index sets of faces, edges and vertices */
6196:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6197:   } else {
6198:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6200:     total_counts = 0;
6201:     n_vertices   = 0;
6202:     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6203:     max_constraints = 0;
6204:     total_counts_cc = 0;
6205:     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6206:       total_counts += pcbddc->adaptive_constraints_n[i];
6207:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6208:       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6209:     }
6210:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6211:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6212:     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6213:     constraints_data     = pcbddc->adaptive_constraints_data;
6214:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6215:     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6216:     total_counts_cc = 0;
6217:     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6218:       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6219:     }

6221:     max_size_of_constraint = 0;
6222:     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6223:     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6224:     /* Change of basis */
6225:     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6226:     if (pcbddc->use_change_of_basis) {
6227:       for (i = 0; i < sub_schurs->n_subs; i++) {
6228:         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6229:       }
6230:     }
6231:   }
6232:   pcbddc->local_primal_size = total_counts;
6233:   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));

6235:   /* map constraints_idxs in boundary numbering */
6236:   if (pcbddc->use_change_of_basis) {
6237:     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6238:     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6239:   }

6241:   /* Create constraint matrix */
6242:   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6243:   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6244:   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));

6246:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6247:   /* determine if a QR strategy is needed for change of basis */
6248:   qr_needed = pcbddc->use_qr_single;
6249:   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6250:   total_primal_vertices        = 0;
6251:   pcbddc->local_primal_size_cc = 0;
6252:   for (i = 0; i < total_counts_cc; i++) {
6253:     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6254:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6255:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6256:       pcbddc->local_primal_size_cc += 1;
6257:     } else if (PetscBTLookup(change_basis, i)) {
6258:       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6259:       pcbddc->local_primal_size_cc += constraints_n[i];
6260:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6261:         PetscCall(PetscBTSet(qr_needed_idx, i));
6262:         qr_needed = PETSC_TRUE;
6263:       }
6264:     } else {
6265:       pcbddc->local_primal_size_cc += 1;
6266:     }
6267:   }
6268:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6269:   pcbddc->n_vertices = total_primal_vertices;
6270:   /* permute indices in order to have a sorted set of vertices */
6271:   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6272:   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6273:   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6274:   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;

6276:   /* nonzero structure of constraint matrix */
6277:   /* and get reference dof for local constraints */
6278:   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6279:   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;

6281:   j            = total_primal_vertices;
6282:   total_counts = total_primal_vertices;
6283:   cum          = total_primal_vertices;
6284:   for (i = n_vertices; i < total_counts_cc; i++) {
6285:     if (!PetscBTLookup(change_basis, i)) {
6286:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6287:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6288:       cum++;
6289:       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6290:       for (k = 0; k < constraints_n[i]; k++) {
6291:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6292:         nnz[j + k]                                        = size_of_constraint;
6293:       }
6294:       j += constraints_n[i];
6295:     }
6296:   }
6297:   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6298:   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6299:   PetscCall(PetscFree(nnz));

6301:   /* set values in constraint matrix */
6302:   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6303:   total_counts = total_primal_vertices;
6304:   for (i = n_vertices; i < total_counts_cc; i++) {
6305:     if (!PetscBTLookup(change_basis, i)) {
6306:       PetscInt *cols;

6308:       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6309:       cols               = constraints_idxs + constraints_idxs_ptr[i];
6310:       for (k = 0; k < constraints_n[i]; k++) {
6311:         PetscInt     row = total_counts + k;
6312:         PetscScalar *vals;

6314:         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6315:         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6316:       }
6317:       total_counts += constraints_n[i];
6318:     }
6319:   }
6320:   /* assembling */
6321:   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6322:   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6323:   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));

6325:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6326:   if (pcbddc->use_change_of_basis) {
6327:     /* dual and primal dofs on a single cc */
6328:     PetscInt dual_dofs, primal_dofs;
6329:     /* working stuff for GEQRF */
6330:     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6331:     PetscBLASInt lqr_work;
6332:     /* working stuff for UNGQR */
6333:     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6334:     PetscBLASInt lgqr_work;
6335:     /* working stuff for TRTRS */
6336:     PetscScalar *trs_rhs = NULL;
6337:     PetscBLASInt Blas_NRHS;
6338:     /* pointers for values insertion into change of basis matrix */
6339:     PetscInt    *start_rows, *start_cols;
6340:     PetscScalar *start_vals;
6341:     /* working stuff for values insertion */
6342:     PetscBT   is_primal;
6343:     PetscInt *aux_primal_numbering_B;
6344:     /* matrix sizes */
6345:     PetscInt global_size, local_size;
6346:     /* temporary change of basis */
6347:     Mat localChangeOfBasisMatrix;
6348:     /* extra space for debugging */
6349:     PetscScalar *dbg_work = NULL;

6351:     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6352:     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6353:     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6354:     /* nonzeros for local mat */
6355:     PetscCall(PetscMalloc1(pcis->n, &nnz));
6356:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6357:       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6358:     } else {
6359:       const PetscInt *ii;
6360:       PetscInt        n;
6361:       PetscBool       flg_row;
6362:       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6363:       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6364:       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6365:     }
6366:     for (i = n_vertices; i < total_counts_cc; i++) {
6367:       if (PetscBTLookup(change_basis, i)) {
6368:         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6369:         if (PetscBTLookup(qr_needed_idx, i)) {
6370:           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6371:         } else {
6372:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6373:           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6374:         }
6375:       }
6376:     }
6377:     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6378:     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6379:     PetscCall(PetscFree(nnz));
6380:     /* Set interior change in the matrix */
6381:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6382:       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6383:     } else {
6384:       const PetscInt *ii, *jj;
6385:       PetscScalar    *aa;
6386:       PetscInt        n;
6387:       PetscBool       flg_row;
6388:       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6389:       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6390:       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6391:       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6392:       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6393:     }

6395:     if (pcbddc->dbg_flag) {
6396:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6397:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6398:     }

6400:     /* Now we loop on the constraints which need a change of basis */
6401:     /*
6402:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6403:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6405:        Basic blocks of change of basis matrix T computed:

6407:           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6409:             | 1        0   ...        0         s_1/S |
6410:             | 0        1   ...        0         s_2/S |
6411:             |              ...                        |
6412:             | 0        ...            1     s_{n-1}/S |
6413:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6415:             with S = \sum_{i=1}^n s_i^2
6416:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6417:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6419:           - QR decomposition of constraints otherwise
6420:     */
6421:     if (qr_needed && max_size_of_constraint) {
6422:       /* space to store Q */
6423:       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
6424:       /* array to store scaling factors for reflectors */
6425:       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
6426:       /* first we issue queries for optimal work */
6427:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
6428:       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6429:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
6430:       lqr_work = -1;
6431:       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
6432:       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr);
6433:       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
6434:       PetscCall(PetscMalloc1(lqr_work, &qr_work));
6435:       lgqr_work = -1;
6436:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
6437:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
6438:       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
6439:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
6440:       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
6441:       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
6442:       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr);
6443:       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
6444:       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
6445:       /* array to store rhs and solution of triangular solver */
6446:       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
6447:       /* allocating workspace for check */
6448:       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
6449:     }
6450:     /* array to store whether a node is primal or not */
6451:     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
6452:     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
6453:     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
6454:     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
6455:     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
6456:     PetscCall(PetscFree(aux_primal_numbering_B));

6458:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6459:     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
6460:       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
6461:       if (PetscBTLookup(change_basis, total_counts)) {
6462:         /* get constraint info */
6463:         primal_dofs = constraints_n[total_counts];
6464:         dual_dofs   = size_of_constraint - primal_dofs;

6466:         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));

6468:         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */

6470:           /* copy quadrature constraints for change of basis check */
6471:           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6472:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6473:           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));

6475:           /* compute QR decomposition of constraints */
6476:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6477:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6478:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6479:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6480:           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
6481:           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr);
6482:           PetscCall(PetscFPTrapPop());

6484:           /* explicitly compute R^-T */
6485:           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
6486:           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
6487:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6488:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
6489:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6490:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
6491:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6492:           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
6493:           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr);
6494:           PetscCall(PetscFPTrapPop());

6496:           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6497:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6498:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6499:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
6500:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6501:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6502:           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
6503:           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr);
6504:           PetscCall(PetscFPTrapPop());

6506:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6507:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6508:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6509:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6510:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6511:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
6512:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6513:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
6514:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6515:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6516:           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
6517:           PetscCall(PetscFPTrapPop());
6518:           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));

6520:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6521:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6522:           /* insert cols for primal dofs */
6523:           for (j = 0; j < primal_dofs; j++) {
6524:             start_vals = &qr_basis[j * size_of_constraint];
6525:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6526:             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
6527:           }
6528:           /* insert cols for dual dofs */
6529:           for (j = 0, k = 0; j < dual_dofs; k++) {
6530:             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
6531:               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
6532:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6533:               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
6534:               j++;
6535:             }
6536:           }

6538:           /* check change of basis */
6539:           if (pcbddc->dbg_flag) {
6540:             PetscInt  ii, jj;
6541:             PetscBool valid_qr = PETSC_TRUE;
6542:             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
6543:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6544:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
6545:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6546:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
6547:             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
6548:             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6549:             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
6550:             PetscCall(PetscFPTrapPop());
6551:             for (jj = 0; jj < size_of_constraint; jj++) {
6552:               for (ii = 0; ii < primal_dofs; ii++) {
6553:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6554:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6555:               }
6556:             }
6557:             if (!valid_qr) {
6558:               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
6559:               for (jj = 0; jj < size_of_constraint; jj++) {
6560:                 for (ii = 0; ii < primal_dofs; ii++) {
6561:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
6562:                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
6563:                   }
6564:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
6565:                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
6566:                   }
6567:                 }
6568:               }
6569:             } else {
6570:               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
6571:             }
6572:           }
6573:         } else { /* simple transformation block */
6574:           PetscInt    row, col;
6575:           PetscScalar val, norm;

6577:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6578:           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
6579:           for (j = 0; j < size_of_constraint; j++) {
6580:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
6581:             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6582:             if (!PetscBTLookup(is_primal, row_B)) {
6583:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6584:               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
6585:               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
6586:             } else {
6587:               for (k = 0; k < size_of_constraint; k++) {
6588:                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6589:                 if (row != col) {
6590:                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
6591:                 } else {
6592:                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
6593:                 }
6594:                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
6595:               }
6596:             }
6597:           }
6598:           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
6599:         }
6600:       } else {
6601:         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
6602:       }
6603:     }

6605:     /* free workspace */
6606:     if (qr_needed) {
6607:       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
6608:       PetscCall(PetscFree(trs_rhs));
6609:       PetscCall(PetscFree(qr_tau));
6610:       PetscCall(PetscFree(qr_work));
6611:       PetscCall(PetscFree(gqr_work));
6612:       PetscCall(PetscFree(qr_basis));
6613:     }
6614:     PetscCall(PetscBTDestroy(&is_primal));
6615:     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
6616:     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));

6618:     /* assembling of global change of variable */
6619:     if (!pcbddc->fake_change) {
6620:       Mat      tmat;
6621:       PetscInt bs;

6623:       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
6624:       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
6625:       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
6626:       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
6627:       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
6628:       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
6629:       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
6630:       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
6631:       PetscCall(MatGetBlockSize(pc->pmat, &bs));
6632:       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
6633:       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
6634:       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
6635:       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
6636:       PetscCall(MatDestroy(&tmat));
6637:       PetscCall(VecSet(pcis->vec1_global, 0.0));
6638:       PetscCall(VecSet(pcis->vec1_N, 1.0));
6639:       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
6640:       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
6641:       PetscCall(VecReciprocal(pcis->vec1_global));
6642:       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));

6644:       /* check */
6645:       if (pcbddc->dbg_flag) {
6646:         PetscReal error;
6647:         Vec       x, x_change;

6649:         PetscCall(VecDuplicate(pcis->vec1_global, &x));
6650:         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
6651:         PetscCall(VecSetRandom(x, NULL));
6652:         PetscCall(VecCopy(x, pcis->vec1_global));
6653:         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
6654:         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
6655:         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
6656:         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
6657:         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
6658:         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
6659:         PetscCall(VecAXPY(x, -1.0, x_change));
6660:         PetscCall(VecNorm(x, NORM_INFINITY, &error));
6661:         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
6662:         PetscCall(VecDestroy(&x));
6663:         PetscCall(VecDestroy(&x_change));
6664:       }
6665:       /* adapt sub_schurs computed (if any) */
6666:       if (pcbddc->use_deluxe_scaling) {
6667:         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6669:         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6670:         if (sub_schurs && sub_schurs->S_Ej_all) {
6671:           Mat S_new, tmat;
6672:           IS  is_all_N, is_V_Sall = NULL;

6674:           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
6675:           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
6676:           if (pcbddc->deluxe_zerorows) {
6677:             ISLocalToGlobalMapping NtoSall;
6678:             IS                     is_V;
6679:             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
6680:             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
6681:             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
6682:             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
6683:             PetscCall(ISDestroy(&is_V));
6684:           }
6685:           PetscCall(ISDestroy(&is_all_N));
6686:           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
6687:           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
6688:           PetscCall(PetscObjectReference((PetscObject)S_new));
6689:           if (pcbddc->deluxe_zerorows) {
6690:             const PetscScalar *array;
6691:             const PetscInt    *idxs_V, *idxs_all;
6692:             PetscInt           i, n_V;

6694:             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
6695:             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
6696:             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
6697:             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
6698:             PetscCall(VecGetArrayRead(pcis->D, &array));
6699:             for (i = 0; i < n_V; i++) {
6700:               PetscScalar val;
6701:               PetscInt    idx;

6703:               idx = idxs_V[i];
6704:               val = array[idxs_all[idxs_V[i]]];
6705:               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
6706:             }
6707:             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
6708:             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
6709:             PetscCall(VecRestoreArrayRead(pcis->D, &array));
6710:             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
6711:             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
6712:           }
6713:           sub_schurs->S_Ej_all = S_new;
6714:           PetscCall(MatDestroy(&S_new));
6715:           if (sub_schurs->sum_S_Ej_all) {
6716:             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
6717:             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
6718:             PetscCall(PetscObjectReference((PetscObject)S_new));
6719:             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
6720:             sub_schurs->sum_S_Ej_all = S_new;
6721:             PetscCall(MatDestroy(&S_new));
6722:           }
6723:           PetscCall(ISDestroy(&is_V_Sall));
6724:           PetscCall(MatDestroy(&tmat));
6725:         }
6726:         /* destroy any change of basis context in sub_schurs */
6727:         if (sub_schurs && sub_schurs->change) {
6728:           PetscInt i;

6730:           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
6731:           PetscCall(PetscFree(sub_schurs->change));
6732:         }
6733:       }
6734:       if (pcbddc->switch_static) { /* need to save the local change */
6735:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6736:       } else {
6737:         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
6738:       }
6739:       /* determine if any process has changed the pressures locally */
6740:       pcbddc->change_interior = pcbddc->benign_have_null;
6741:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6742:       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6743:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6744:       pcbddc->use_qr_single    = qr_needed;
6745:     }
6746:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6747:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6748:       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
6749:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6750:     } else {
6751:       Mat benign_global = NULL;
6752:       if (pcbddc->benign_have_null) {
6753:         Mat M;

6755:         pcbddc->change_interior = PETSC_TRUE;
6756:         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
6757:         PetscCall(VecReciprocal(pcis->vec1_N));
6758:         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
6759:         if (pcbddc->benign_change) {
6760:           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
6761:           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
6762:         } else {
6763:           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
6764:           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
6765:         }
6766:         PetscCall(MatISSetLocalMat(benign_global, M));
6767:         PetscCall(MatDestroy(&M));
6768:         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
6769:         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
6770:       }
6771:       if (pcbddc->user_ChangeOfBasisMatrix) {
6772:         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix));
6773:         PetscCall(MatDestroy(&benign_global));
6774:       } else if (pcbddc->benign_have_null) {
6775:         pcbddc->ChangeOfBasisMatrix = benign_global;
6776:       }
6777:     }
6778:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6779:       IS              is_global;
6780:       const PetscInt *gidxs;

6782:       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
6783:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
6784:       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
6785:       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
6786:       PetscCall(ISDestroy(&is_global));
6787:     }
6788:   }
6789:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));

6791:   if (!pcbddc->fake_change) {
6792:     /* add pressure dofs to set of primal nodes for numbering purposes */
6793:     for (i = 0; i < pcbddc->benign_n; i++) {
6794:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
6795:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6796:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
6797:       pcbddc->local_primal_size_cc++;
6798:       pcbddc->local_primal_size++;
6799:     }

6801:     /* check if a new primal space has been introduced (also take into account benign trick) */
6802:     pcbddc->new_primal_space_local = PETSC_TRUE;
6803:     if (olocal_primal_size == pcbddc->local_primal_size) {
6804:       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
6805:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6806:       if (!pcbddc->new_primal_space_local) {
6807:         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
6808:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6809:       }
6810:     }
6811:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6812:     PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
6813:   }
6814:   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));

6816:   /* flush dbg viewer */
6817:   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));

6819:   /* free workspace */
6820:   PetscCall(PetscBTDestroy(&qr_needed_idx));
6821:   PetscCall(PetscBTDestroy(&change_basis));
6822:   if (!pcbddc->adaptive_selection) {
6823:     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
6824:     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
6825:   } else {
6826:     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
6827:     PetscCall(PetscFree(constraints_n));
6828:     PetscCall(PetscFree(constraints_idxs_B));
6829:   }
6830:   PetscFunctionReturn(PETSC_SUCCESS);
6831: }

6833: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6834: {
6835:   ISLocalToGlobalMapping map;
6836:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
6837:   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
6838:   PetscInt               i, N;
6839:   PetscBool              rcsr = PETSC_FALSE;

6841:   PetscFunctionBegin;
6842:   if (pcbddc->recompute_topography) {
6843:     pcbddc->graphanalyzed = PETSC_FALSE;
6844:     /* Reset previously computed graph */
6845:     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
6846:     /* Init local Graph struct */
6847:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
6848:     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
6849:     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));

6851:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
6852:     /* Check validity of the csr graph passed in by the user */
6853:     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
6854:                pcbddc->mat_graph->nvtxs);

6856:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6857:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6858:       PetscInt *xadj, *adjncy;
6859:       PetscInt  nvtxs;
6860:       PetscBool flg_row = PETSC_FALSE;

6862:       PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
6863:       if (flg_row) {
6864:         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
6865:         pcbddc->computed_rowadj = PETSC_TRUE;
6866:       }
6867:       PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
6868:       rcsr = PETSC_TRUE;
6869:     }
6870:     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));

6872:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6873:       PetscReal   *lcoords;
6874:       PetscInt     n;
6875:       MPI_Datatype dimrealtype;

6877:       /* TODO: support for blocked */
6878:       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
6879:       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
6880:       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
6881:       PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype));
6882:       PetscCallMPI(MPI_Type_commit(&dimrealtype));
6883:       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
6884:       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
6885:       PetscCallMPI(MPI_Type_free(&dimrealtype));
6886:       PetscCall(PetscFree(pcbddc->mat_graph->coords));

6888:       pcbddc->mat_graph->coords = lcoords;
6889:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
6890:       pcbddc->mat_graph->cnloc  = n;
6891:     }
6892:     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
6893:                pcbddc->mat_graph->nvtxs);
6894:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);

6896:     /* Setup of Graph */
6897:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6898:     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));

6900:     /* attach info on disconnected subdomains if present */
6901:     if (pcbddc->n_local_subs) {
6902:       PetscInt *local_subs, n, totn;

6904:       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
6905:       PetscCall(PetscMalloc1(n, &local_subs));
6906:       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
6907:       for (i = 0; i < pcbddc->n_local_subs; i++) {
6908:         const PetscInt *idxs;
6909:         PetscInt        nl, j;

6911:         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
6912:         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
6913:         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
6914:         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
6915:       }
6916:       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
6917:       pcbddc->mat_graph->n_local_subs = totn + 1;
6918:       pcbddc->mat_graph->local_subs   = local_subs;
6919:     }
6920:   }

6922:   if (!pcbddc->graphanalyzed) {
6923:     /* Graph's connected components analysis */
6924:     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
6925:     pcbddc->graphanalyzed   = PETSC_TRUE;
6926:     pcbddc->corner_selected = pcbddc->corner_selection;
6927:   }
6928:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6929:   PetscFunctionReturn(PETSC_SUCCESS);
6930: }

6932: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
6933: {
6934:   PetscInt     i, j, n;
6935:   PetscScalar *alphas;
6936:   PetscReal    norm, *onorms;

6938:   PetscFunctionBegin;
6939:   n = *nio;
6940:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
6941:   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
6942:   PetscCall(VecNormalize(vecs[0], &norm));
6943:   if (norm < PETSC_SMALL) {
6944:     onorms[0] = 0.0;
6945:     PetscCall(VecSet(vecs[0], 0.0));
6946:   } else {
6947:     onorms[0] = norm;
6948:   }

6950:   for (i = 1; i < n; i++) {
6951:     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
6952:     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
6953:     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
6954:     PetscCall(VecNormalize(vecs[i], &norm));
6955:     if (norm < PETSC_SMALL) {
6956:       onorms[i] = 0.0;
6957:       PetscCall(VecSet(vecs[i], 0.0));
6958:     } else {
6959:       onorms[i] = norm;
6960:     }
6961:   }
6962:   /* push nonzero vectors at the beginning */
6963:   for (i = 0; i < n; i++) {
6964:     if (onorms[i] == 0.0) {
6965:       for (j = i + 1; j < n; j++) {
6966:         if (onorms[j] != 0.0) {
6967:           PetscCall(VecCopy(vecs[j], vecs[i]));
6968:           onorms[j] = 0.0;
6969:         }
6970:       }
6971:     }
6972:   }
6973:   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
6974:   PetscCall(PetscFree2(alphas, onorms));
6975:   PetscFunctionReturn(PETSC_SUCCESS);
6976: }

6978: static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
6979: {
6980:   ISLocalToGlobalMapping mapping;
6981:   Mat                    A;
6982:   PetscInt               n_neighs, *neighs, *n_shared, **shared;
6983:   PetscMPIInt            size, rank, color;
6984:   PetscInt              *xadj, *adjncy;
6985:   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
6986:   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
6987:   PetscInt               void_procs, *procs_candidates = NULL;
6988:   PetscInt               xadj_count, *count;
6989:   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
6990:   PetscSubcomm           psubcomm;
6991:   MPI_Comm               subcomm;

6993:   PetscFunctionBegin;
6995:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
6996:   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
6999:   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);

7001:   if (have_void) *have_void = PETSC_FALSE;
7002:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7003:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7004:   PetscCall(MatISGetLocalMat(mat, &A));
7005:   PetscCall(MatGetLocalSize(A, &n, NULL));
7006:   im_active = !!n;
7007:   PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7008:   void_procs = size - active_procs;
7009:   /* get ranks of of non-active processes in mat communicator */
7010:   if (void_procs) {
7011:     PetscInt ncand;

7013:     if (have_void) *have_void = PETSC_TRUE;
7014:     PetscCall(PetscMalloc1(size, &procs_candidates));
7015:     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7016:     for (i = 0, ncand = 0; i < size; i++) {
7017:       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7018:     }
7019:     /* force n_subdomains to be not greater that the number of non-active processes */
7020:     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7021:   }

7023:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7024:      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7025:   PetscCall(MatGetSize(mat, &N, NULL));
7026:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7027:     PetscInt issize, isidx, dest;
7028:     if (*n_subdomains == 1) dest = 0;
7029:     else dest = rank;
7030:     if (im_active) {
7031:       issize = 1;
7032:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7033:         isidx = procs_candidates[dest];
7034:       } else {
7035:         isidx = dest;
7036:       }
7037:     } else {
7038:       issize = 0;
7039:       isidx  = -1;
7040:     }
7041:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7042:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7043:     PetscCall(PetscFree(procs_candidates));
7044:     PetscFunctionReturn(PETSC_SUCCESS);
7045:   }
7046:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL));
7047:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL));
7048:   threshold = PetscMax(threshold, 2);

7050:   /* Get info on mapping */
7051:   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7052:   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));

7054:   /* build local CSR graph of subdomains' connectivity */
7055:   PetscCall(PetscMalloc1(2, &xadj));
7056:   xadj[0] = 0;
7057:   xadj[1] = PetscMax(n_neighs - 1, 0);
7058:   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7059:   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7060:   PetscCall(PetscCalloc1(n, &count));
7061:   for (i = 1; i < n_neighs; i++)
7062:     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;

7064:   xadj_count = 0;
7065:   for (i = 1; i < n_neighs; i++) {
7066:     for (j = 0; j < n_shared[i]; j++) {
7067:       if (count[shared[i][j]] < threshold) {
7068:         adjncy[xadj_count]     = neighs[i];
7069:         adjncy_wgt[xadj_count] = n_shared[i];
7070:         xadj_count++;
7071:         break;
7072:       }
7073:     }
7074:   }
7075:   xadj[1] = xadj_count;
7076:   PetscCall(PetscFree(count));
7077:   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7078:   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));

7080:   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));

7082:   /* Restrict work on active processes only */
7083:   PetscCall(PetscMPIIntCast(im_active, &color));
7084:   if (void_procs) {
7085:     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7086:     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7087:     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7088:     subcomm = PetscSubcommChild(psubcomm);
7089:   } else {
7090:     psubcomm = NULL;
7091:     subcomm  = PetscObjectComm((PetscObject)mat);
7092:   }

7094:   v_wgt = NULL;
7095:   if (!color) {
7096:     PetscCall(PetscFree(xadj));
7097:     PetscCall(PetscFree(adjncy));
7098:     PetscCall(PetscFree(adjncy_wgt));
7099:   } else {
7100:     Mat             subdomain_adj;
7101:     IS              new_ranks, new_ranks_contig;
7102:     MatPartitioning partitioner;
7103:     PetscInt        rstart = 0, rend = 0;
7104:     PetscInt       *is_indices, *oldranks;
7105:     PetscMPIInt     size;
7106:     PetscBool       aggregate;

7108:     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7109:     if (void_procs) {
7110:       PetscInt prank = rank;
7111:       PetscCall(PetscMalloc1(size, &oldranks));
7112:       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7113:       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7114:       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7115:     } else {
7116:       oldranks = NULL;
7117:     }
7118:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7119:     if (aggregate) { /* TODO: all this part could be made more efficient */
7120:       PetscInt     lrows, row, ncols, *cols;
7121:       PetscMPIInt  nrank;
7122:       PetscScalar *vals;

7124:       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7125:       lrows = 0;
7126:       if (nrank < redprocs) {
7127:         lrows = size / redprocs;
7128:         if (nrank < size % redprocs) lrows++;
7129:       }
7130:       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7131:       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7132:       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7133:       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7134:       row   = nrank;
7135:       ncols = xadj[1] - xadj[0];
7136:       cols  = adjncy;
7137:       PetscCall(PetscMalloc1(ncols, &vals));
7138:       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7139:       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7140:       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7141:       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7142:       PetscCall(PetscFree(xadj));
7143:       PetscCall(PetscFree(adjncy));
7144:       PetscCall(PetscFree(adjncy_wgt));
7145:       PetscCall(PetscFree(vals));
7146:       if (use_vwgt) {
7147:         Vec                v;
7148:         const PetscScalar *array;
7149:         PetscInt           nl;

7151:         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7152:         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7153:         PetscCall(VecAssemblyBegin(v));
7154:         PetscCall(VecAssemblyEnd(v));
7155:         PetscCall(VecGetLocalSize(v, &nl));
7156:         PetscCall(VecGetArrayRead(v, &array));
7157:         PetscCall(PetscMalloc1(nl, &v_wgt));
7158:         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7159:         PetscCall(VecRestoreArrayRead(v, &array));
7160:         PetscCall(VecDestroy(&v));
7161:       }
7162:     } else {
7163:       PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7164:       if (use_vwgt) {
7165:         PetscCall(PetscMalloc1(1, &v_wgt));
7166:         v_wgt[0] = n;
7167:       }
7168:     }
7169:     /* PetscCall(MatView(subdomain_adj,0)); */

7171:     /* Partition */
7172:     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7173: #if defined(PETSC_HAVE_PTSCOTCH)
7174:     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7175: #elif defined(PETSC_HAVE_PARMETIS)
7176:     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7177: #else
7178:     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7179: #endif
7180:     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7181:     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7182:     *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7183:     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7184:     PetscCall(MatPartitioningSetFromOptions(partitioner));
7185:     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7186:     /* PetscCall(MatPartitioningView(partitioner,0)); */

7188:     /* renumber new_ranks to avoid "holes" in new set of processors */
7189:     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7190:     PetscCall(ISDestroy(&new_ranks));
7191:     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7192:     if (!aggregate) {
7193:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7194:         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7195:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7196:       } else if (oldranks) {
7197:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7198:       } else {
7199:         ranks_send_to_idx[0] = is_indices[0];
7200:       }
7201:     } else {
7202:       PetscInt     idx = 0;
7203:       PetscMPIInt  tag;
7204:       MPI_Request *reqs;

7206:       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7207:       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7208:       for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7209:       PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7210:       PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE));
7211:       PetscCall(PetscFree(reqs));
7212:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7213:         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7214:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7215:       } else if (oldranks) {
7216:         ranks_send_to_idx[0] = oldranks[idx];
7217:       } else {
7218:         ranks_send_to_idx[0] = idx;
7219:       }
7220:     }
7221:     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7222:     /* clean up */
7223:     PetscCall(PetscFree(oldranks));
7224:     PetscCall(ISDestroy(&new_ranks_contig));
7225:     PetscCall(MatDestroy(&subdomain_adj));
7226:     PetscCall(MatPartitioningDestroy(&partitioner));
7227:   }
7228:   PetscCall(PetscSubcommDestroy(&psubcomm));
7229:   PetscCall(PetscFree(procs_candidates));

7231:   /* assemble parallel IS for sends */
7232:   i = 1;
7233:   if (!color) i = 0;
7234:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7235:   PetscFunctionReturn(PETSC_SUCCESS);
7236: }

7238: typedef enum {
7239:   MATDENSE_PRIVATE = 0,
7240:   MATAIJ_PRIVATE,
7241:   MATBAIJ_PRIVATE,
7242:   MATSBAIJ_PRIVATE
7243: } MatTypePrivate;

7245: static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7246: {
7247:   Mat                    local_mat;
7248:   IS                     is_sends_internal;
7249:   PetscInt               rows, cols, new_local_rows;
7250:   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7251:   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7252:   ISLocalToGlobalMapping l2gmap;
7253:   PetscInt              *l2gmap_indices;
7254:   const PetscInt        *is_indices;
7255:   MatType                new_local_type;
7256:   /* buffers */
7257:   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7258:   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7259:   PetscInt          *recv_buffer_idxs_local;
7260:   PetscScalar       *ptr_vals, *recv_buffer_vals;
7261:   const PetscScalar *send_buffer_vals;
7262:   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7263:   /* MPI */
7264:   MPI_Comm     comm, comm_n;
7265:   PetscSubcomm subcomm;
7266:   PetscMPIInt  n_sends, n_recvs, size;
7267:   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7268:   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7269:   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7270:   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7271:   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;

7273:   PetscFunctionBegin;
7275:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7276:   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7283:   if (nvecs) {
7284:     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7286:   }
7287:   /* further checks */
7288:   PetscCall(MatISGetLocalMat(mat, &local_mat));
7289:   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7290:   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7291:   PetscCall(MatGetSize(local_mat, &rows, &cols));
7292:   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7293:   if (reuse && *mat_n) {
7294:     PetscInt mrows, mcols, mnrows, mncols;
7296:     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7297:     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7298:     PetscCall(MatGetSize(mat, &mrows, &mcols));
7299:     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7300:     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7301:     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7302:   }
7303:   PetscCall(MatGetBlockSize(local_mat, &bs));

7306:   /* prepare IS for sending if not provided */
7307:   if (!is_sends) {
7308:     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7309:     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7310:   } else {
7311:     PetscCall(PetscObjectReference((PetscObject)is_sends));
7312:     is_sends_internal = is_sends;
7313:   }

7315:   /* get comm */
7316:   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));

7318:   /* compute number of sends */
7319:   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7320:   PetscCall(PetscMPIIntCast(i, &n_sends));

7322:   /* compute number of receives */
7323:   PetscCallMPI(MPI_Comm_size(comm, &size));
7324:   PetscCall(PetscMalloc1(size, &iflags));
7325:   PetscCall(PetscArrayzero(iflags, size));
7326:   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7327:   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7328:   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7329:   PetscCall(PetscFree(iflags));

7331:   /* restrict comm if requested */
7332:   subcomm     = NULL;
7333:   destroy_mat = PETSC_FALSE;
7334:   if (restrict_comm) {
7335:     PetscMPIInt color, subcommsize;

7337:     color = 0;
7338:     if (restrict_full) {
7339:       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7340:     } else {
7341:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7342:     }
7343:     PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7344:     subcommsize = size - subcommsize;
7345:     /* check if reuse has been requested */
7346:     if (reuse) {
7347:       if (*mat_n) {
7348:         PetscMPIInt subcommsize2;
7349:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7350:         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7351:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7352:       } else {
7353:         comm_n = PETSC_COMM_SELF;
7354:       }
7355:     } else { /* MAT_INITIAL_MATRIX */
7356:       PetscMPIInt rank;

7358:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7359:       PetscCall(PetscSubcommCreate(comm, &subcomm));
7360:       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7361:       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7362:       comm_n = PetscSubcommChild(subcomm);
7363:     }
7364:     /* flag to destroy *mat_n if not significative */
7365:     if (color) destroy_mat = PETSC_TRUE;
7366:   } else {
7367:     comm_n = comm;
7368:   }

7370:   /* prepare send/receive buffers */
7371:   PetscCall(PetscMalloc1(size, &ilengths_idxs));
7372:   PetscCall(PetscArrayzero(ilengths_idxs, size));
7373:   PetscCall(PetscMalloc1(size, &ilengths_vals));
7374:   PetscCall(PetscArrayzero(ilengths_vals, size));
7375:   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));

7377:   /* Get data from local matrices */
7378:   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
7379:   /* TODO: See below some guidelines on how to prepare the local buffers */
7380:   /*
7381:        send_buffer_vals should contain the raw values of the local matrix
7382:        send_buffer_idxs should contain:
7383:        - MatType_PRIVATE type
7384:        - PetscInt        size_of_l2gmap
7385:        - PetscInt        global_row_indices[size_of_l2gmap]
7386:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7387:     */
7388:   {
7389:     ISLocalToGlobalMapping mapping;

7391:     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7392:     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
7393:     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
7394:     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
7395:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7396:     send_buffer_idxs[1] = i;
7397:     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
7398:     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
7399:     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
7400:     PetscCall(PetscMPIIntCast(i, &len));
7401:     for (i = 0; i < n_sends; i++) {
7402:       ilengths_vals[is_indices[i]] = len * len;
7403:       ilengths_idxs[is_indices[i]] = len + 2;
7404:     }
7405:   }
7406:   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
7407:   /* additional is (if any) */
7408:   if (nis) {
7409:     PetscMPIInt psum;
7410:     PetscInt    j;
7411:     for (j = 0, psum = 0; j < nis; j++) {
7412:       PetscInt plen;
7413:       PetscCall(ISGetLocalSize(isarray[j], &plen));
7414:       PetscCall(PetscMPIIntCast(plen, &len));
7415:       psum += len + 1; /* indices + length */
7416:     }
7417:     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
7418:     for (j = 0, psum = 0; j < nis; j++) {
7419:       PetscInt        plen;
7420:       const PetscInt *is_array_idxs;
7421:       PetscCall(ISGetLocalSize(isarray[j], &plen));
7422:       send_buffer_idxs_is[psum] = plen;
7423:       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
7424:       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
7425:       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
7426:       psum += plen + 1; /* indices + length */
7427:     }
7428:     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
7429:     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
7430:   }
7431:   PetscCall(MatISRestoreLocalMat(mat, &local_mat));

7433:   buf_size_idxs    = 0;
7434:   buf_size_vals    = 0;
7435:   buf_size_idxs_is = 0;
7436:   buf_size_vecs    = 0;
7437:   for (i = 0; i < n_recvs; i++) {
7438:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7439:     buf_size_vals += (PetscInt)olengths_vals[i];
7440:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7441:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7442:   }
7443:   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
7444:   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
7445:   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
7446:   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));

7448:   /* get new tags for clean communications */
7449:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
7450:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
7451:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
7452:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));

7454:   /* allocate for requests */
7455:   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
7456:   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
7457:   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
7458:   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
7459:   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
7460:   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
7461:   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
7462:   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));

7464:   /* communications */
7465:   ptr_idxs    = recv_buffer_idxs;
7466:   ptr_vals    = recv_buffer_vals;
7467:   ptr_idxs_is = recv_buffer_idxs_is;
7468:   ptr_vecs    = recv_buffer_vecs;
7469:   for (i = 0; i < n_recvs; i++) {
7470:     source_dest = onodes[i];
7471:     PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]));
7472:     PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]));
7473:     ptr_idxs += olengths_idxs[i];
7474:     ptr_vals += olengths_vals[i];
7475:     if (nis) {
7476:       source_dest = onodes_is[i];
7477:       PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]));
7478:       ptr_idxs_is += olengths_idxs_is[i];
7479:     }
7480:     if (nvecs) {
7481:       source_dest = onodes[i];
7482:       PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]));
7483:       ptr_vecs += olengths_idxs[i] - 2;
7484:     }
7485:   }
7486:   for (i = 0; i < n_sends; i++) {
7487:     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
7488:     PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
7489:     PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
7490:     if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
7491:     if (nvecs) {
7492:       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
7493:       PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
7494:     }
7495:   }
7496:   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
7497:   PetscCall(ISDestroy(&is_sends_internal));

7499:   /* assemble new l2g map */
7500:   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
7501:   ptr_idxs       = recv_buffer_idxs;
7502:   new_local_rows = 0;
7503:   for (i = 0; i < n_recvs; i++) {
7504:     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7505:     ptr_idxs += olengths_idxs[i];
7506:   }
7507:   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
7508:   ptr_idxs       = recv_buffer_idxs;
7509:   new_local_rows = 0;
7510:   for (i = 0; i < n_recvs; i++) {
7511:     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
7512:     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7513:     ptr_idxs += olengths_idxs[i];
7514:   }
7515:   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
7516:   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
7517:   PetscCall(PetscFree(l2gmap_indices));

7519:   /* infer new local matrix type from received local matrices type */
7520:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7521:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7522:   if (n_recvs) {
7523:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7524:     ptr_idxs                              = recv_buffer_idxs;
7525:     for (i = 0; i < n_recvs; i++) {
7526:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7527:         new_local_type_private = MATAIJ_PRIVATE;
7528:         break;
7529:       }
7530:       ptr_idxs += olengths_idxs[i];
7531:     }
7532:     switch (new_local_type_private) {
7533:     case MATDENSE_PRIVATE:
7534:       new_local_type = MATSEQAIJ;
7535:       bs             = 1;
7536:       break;
7537:     case MATAIJ_PRIVATE:
7538:       new_local_type = MATSEQAIJ;
7539:       bs             = 1;
7540:       break;
7541:     case MATBAIJ_PRIVATE:
7542:       new_local_type = MATSEQBAIJ;
7543:       break;
7544:     case MATSBAIJ_PRIVATE:
7545:       new_local_type = MATSEQSBAIJ;
7546:       break;
7547:     default:
7548:       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
7549:     }
7550:   } else { /* by default, new_local_type is seqaij */
7551:     new_local_type = MATSEQAIJ;
7552:     bs             = 1;
7553:   }

7555:   /* create MATIS object if needed */
7556:   if (!reuse) {
7557:     PetscCall(MatGetSize(mat, &rows, &cols));
7558:     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
7559:   } else {
7560:     /* it also destroys the local matrices */
7561:     if (*mat_n) {
7562:       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
7563:     } else { /* this is a fake object */
7564:       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
7565:     }
7566:   }
7567:   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
7568:   PetscCall(MatSetType(local_mat, new_local_type));

7570:   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));

7572:   /* Global to local map of received indices */
7573:   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
7574:   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
7575:   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));

7577:   /* restore attributes -> type of incoming data and its size */
7578:   buf_size_idxs = 0;
7579:   for (i = 0; i < n_recvs; i++) {
7580:     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
7581:     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
7582:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7583:   }
7584:   PetscCall(PetscFree(recv_buffer_idxs));

7586:   /* set preallocation */
7587:   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
7588:   if (!newisdense) {
7589:     PetscInt *new_local_nnz = NULL;

7591:     ptr_idxs = recv_buffer_idxs_local;
7592:     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
7593:     for (i = 0; i < n_recvs; i++) {
7594:       PetscInt j;
7595:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7596:         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
7597:       } else {
7598:         /* TODO */
7599:       }
7600:       ptr_idxs += olengths_idxs[i];
7601:     }
7602:     if (new_local_nnz) {
7603:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
7604:       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
7605:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
7606:       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
7607:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
7608:       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
7609:     } else {
7610:       PetscCall(MatSetUp(local_mat));
7611:     }
7612:     PetscCall(PetscFree(new_local_nnz));
7613:   } else {
7614:     PetscCall(MatSetUp(local_mat));
7615:   }

7617:   /* set values */
7618:   ptr_vals = recv_buffer_vals;
7619:   ptr_idxs = recv_buffer_idxs_local;
7620:   for (i = 0; i < n_recvs; i++) {
7621:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7622:       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
7623:       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
7624:       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
7625:       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
7626:       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
7627:     } else {
7628:       /* TODO */
7629:     }
7630:     ptr_idxs += olengths_idxs[i];
7631:     ptr_vals += olengths_vals[i];
7632:   }
7633:   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
7634:   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
7635:   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
7636:   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
7637:   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
7638:   PetscCall(PetscFree(recv_buffer_vals));

7640: #if 0
7641:   if (!restrict_comm) { /* check */
7642:     Vec       lvec,rvec;
7643:     PetscReal infty_error;

7645:     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
7646:     PetscCall(VecSetRandom(rvec,NULL));
7647:     PetscCall(MatMult(mat,rvec,lvec));
7648:     PetscCall(VecScale(lvec,-1.0));
7649:     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
7650:     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
7651:     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
7652:     PetscCall(VecDestroy(&rvec));
7653:     PetscCall(VecDestroy(&lvec));
7654:   }
7655: #endif

7657:   /* assemble new additional is (if any) */
7658:   if (nis) {
7659:     PetscInt **temp_idxs, *count_is, j, psum;

7661:     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
7662:     PetscCall(PetscCalloc1(nis, &count_is));
7663:     ptr_idxs = recv_buffer_idxs_is;
7664:     psum     = 0;
7665:     for (i = 0; i < n_recvs; i++) {
7666:       for (j = 0; j < nis; j++) {
7667:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7668:         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
7669:         psum += plen;
7670:         ptr_idxs += plen + 1; /* shift pointer to received data */
7671:       }
7672:     }
7673:     PetscCall(PetscMalloc1(nis, &temp_idxs));
7674:     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
7675:     for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1];
7676:     PetscCall(PetscArrayzero(count_is, nis));
7677:     ptr_idxs = recv_buffer_idxs_is;
7678:     for (i = 0; i < n_recvs; i++) {
7679:       for (j = 0; j < nis; j++) {
7680:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7681:         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
7682:         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
7683:         ptr_idxs += plen + 1; /* shift pointer to received data */
7684:       }
7685:     }
7686:     for (i = 0; i < nis; i++) {
7687:       PetscCall(ISDestroy(&isarray[i]));
7688:       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
7689:       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
7690:     }
7691:     PetscCall(PetscFree(count_is));
7692:     PetscCall(PetscFree(temp_idxs[0]));
7693:     PetscCall(PetscFree(temp_idxs));
7694:   }
7695:   /* free workspace */
7696:   PetscCall(PetscFree(recv_buffer_idxs_is));
7697:   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
7698:   PetscCall(PetscFree(send_buffer_idxs));
7699:   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
7700:   if (isdense) {
7701:     PetscCall(MatISGetLocalMat(mat, &local_mat));
7702:     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
7703:     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
7704:   } else {
7705:     /* PetscCall(PetscFree(send_buffer_vals)); */
7706:   }
7707:   if (nis) {
7708:     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
7709:     PetscCall(PetscFree(send_buffer_idxs_is));
7710:   }

7712:   if (nvecs) {
7713:     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
7714:     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
7715:     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
7716:     PetscCall(VecDestroy(&nnsp_vec[0]));
7717:     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
7718:     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
7719:     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
7720:     /* set values */
7721:     ptr_vals = recv_buffer_vecs;
7722:     ptr_idxs = recv_buffer_idxs_local;
7723:     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
7724:     for (i = 0; i < n_recvs; i++) {
7725:       PetscInt j;
7726:       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
7727:       ptr_idxs += olengths_idxs[i];
7728:       ptr_vals += olengths_idxs[i] - 2;
7729:     }
7730:     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
7731:     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
7732:     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
7733:   }

7735:   PetscCall(PetscFree(recv_buffer_vecs));
7736:   PetscCall(PetscFree(recv_buffer_idxs_local));
7737:   PetscCall(PetscFree(recv_req_idxs));
7738:   PetscCall(PetscFree(recv_req_vals));
7739:   PetscCall(PetscFree(recv_req_vecs));
7740:   PetscCall(PetscFree(recv_req_idxs_is));
7741:   PetscCall(PetscFree(send_req_idxs));
7742:   PetscCall(PetscFree(send_req_vals));
7743:   PetscCall(PetscFree(send_req_vecs));
7744:   PetscCall(PetscFree(send_req_idxs_is));
7745:   PetscCall(PetscFree(ilengths_vals));
7746:   PetscCall(PetscFree(ilengths_idxs));
7747:   PetscCall(PetscFree(olengths_vals));
7748:   PetscCall(PetscFree(olengths_idxs));
7749:   PetscCall(PetscFree(onodes));
7750:   if (nis) {
7751:     PetscCall(PetscFree(ilengths_idxs_is));
7752:     PetscCall(PetscFree(olengths_idxs_is));
7753:     PetscCall(PetscFree(onodes_is));
7754:   }
7755:   PetscCall(PetscSubcommDestroy(&subcomm));
7756:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
7757:     PetscCall(MatDestroy(mat_n));
7758:     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
7759:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7760:       PetscCall(VecDestroy(&nnsp_vec[0]));
7761:     }
7762:     *mat_n = NULL;
7763:   }
7764:   PetscFunctionReturn(PETSC_SUCCESS);
7765: }

7767: /* temporary hack into ksp private data structure */
7768: #include <petsc/private/kspimpl.h>

7770: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals)
7771: {
7772:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7773:   PC_IS                 *pcis   = (PC_IS *)pc->data;
7774:   Mat                    coarse_mat, coarse_mat_is, coarse_submat_dense;
7775:   Mat                    coarsedivudotp = NULL;
7776:   Mat                    coarseG, t_coarse_mat_is;
7777:   MatNullSpace           CoarseNullSpace = NULL;
7778:   ISLocalToGlobalMapping coarse_islg;
7779:   IS                     coarse_is, *isarray, corners;
7780:   PetscInt               i, im_active = -1, active_procs = -1;
7781:   PetscInt               nis, nisdofs, nisneu, nisvert;
7782:   PetscInt               coarse_eqs_per_proc;
7783:   PC                     pc_temp;
7784:   PCType                 coarse_pc_type;
7785:   KSPType                coarse_ksp_type;
7786:   PetscBool              multilevel_requested, multilevel_allowed;
7787:   PetscBool              coarse_reuse;
7788:   PetscInt               ncoarse, nedcfield;
7789:   PetscBool              compute_vecs = PETSC_FALSE;
7790:   PetscScalar           *array;
7791:   MatReuse               coarse_mat_reuse;
7792:   PetscBool              restr, full_restr, have_void;
7793:   PetscMPIInt            size;

7795:   PetscFunctionBegin;
7796:   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
7797:   /* Assign global numbering to coarse dofs */
7798:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7799:     PetscInt ocoarse_size;
7800:     compute_vecs = PETSC_TRUE;

7802:     pcbddc->new_primal_space = PETSC_TRUE;
7803:     ocoarse_size             = pcbddc->coarse_size;
7804:     PetscCall(PetscFree(pcbddc->global_primal_indices));
7805:     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
7806:     /* see if we can avoid some work */
7807:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7808:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7809:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7810:         PetscCall(KSPReset(pcbddc->coarse_ksp));
7811:         coarse_reuse = PETSC_FALSE;
7812:       } else { /* we can safely reuse already computed coarse matrix */
7813:         coarse_reuse = PETSC_TRUE;
7814:       }
7815:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7816:       coarse_reuse = PETSC_FALSE;
7817:     }
7818:     /* reset any subassembling information */
7819:     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
7820:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7821:     coarse_reuse = PETSC_TRUE;
7822:   }
7823:   if (coarse_reuse && pcbddc->coarse_ksp) {
7824:     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
7825:     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
7826:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7827:   } else {
7828:     coarse_mat       = NULL;
7829:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7830:   }

7832:   /* creates temporary l2gmap and IS for coarse indexes */
7833:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
7834:   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));

7836:   /* creates temporary MATIS object for coarse matrix */
7837:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense));
7838:   PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is));
7839:   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense));
7840:   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
7841:   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
7842:   PetscCall(MatDestroy(&coarse_submat_dense));

7844:   /* count "active" (i.e. with positive local size) and "void" processes */
7845:   im_active = !!(pcis->n);
7846:   PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));

7848:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7849:   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
7850:   /* full_restr : just use the receivers from the subassembling pattern */
7851:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
7852:   coarse_mat_is        = NULL;
7853:   multilevel_allowed   = PETSC_FALSE;
7854:   multilevel_requested = PETSC_FALSE;
7855:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
7856:   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
7857:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7858:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7859:   if (multilevel_requested) {
7860:     ncoarse    = active_procs / pcbddc->coarsening_ratio;
7861:     restr      = PETSC_FALSE;
7862:     full_restr = PETSC_FALSE;
7863:   } else {
7864:     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
7865:     restr      = PETSC_TRUE;
7866:     full_restr = PETSC_TRUE;
7867:   }
7868:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7869:   ncoarse = PetscMax(1, ncoarse);
7870:   if (!pcbddc->coarse_subassembling) {
7871:     if (pcbddc->coarsening_ratio > 1) {
7872:       if (multilevel_requested) {
7873:         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
7874:       } else {
7875:         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
7876:       }
7877:     } else {
7878:       PetscMPIInt rank;

7880:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
7881:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7882:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
7883:     }
7884:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7885:     PetscInt psum;
7886:     if (pcbddc->coarse_ksp) psum = 1;
7887:     else psum = 0;
7888:     PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
7889:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7890:   }
7891:   /* determine if we can go multilevel */
7892:   if (multilevel_requested) {
7893:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7894:     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
7895:   }
7896:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7898:   /* dump subassembling pattern */
7899:   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
7900:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7901:   nedcfield = -1;
7902:   corners   = NULL;
7903:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
7904:     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
7905:     const PetscInt        *idxs;
7906:     ISLocalToGlobalMapping tmap;

7908:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7909:     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
7910:     /* allocate space for temporary storage */
7911:     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
7912:     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
7913:     /* allocate for IS array */
7914:     nisdofs = pcbddc->n_ISForDofsLocal;
7915:     if (pcbddc->nedclocal) {
7916:       if (pcbddc->nedfield > -1) {
7917:         nedcfield = pcbddc->nedfield;
7918:       } else {
7919:         nedcfield = 0;
7920:         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
7921:         nisdofs = 1;
7922:       }
7923:     }
7924:     nisneu  = !!pcbddc->NeumannBoundariesLocal;
7925:     nisvert = 0; /* nisvert is not used */
7926:     nis     = nisdofs + nisneu + nisvert;
7927:     PetscCall(PetscMalloc1(nis, &isarray));
7928:     /* dofs splitting */
7929:     for (i = 0; i < nisdofs; i++) {
7930:       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
7931:       if (nedcfield != i) {
7932:         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
7933:         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
7934:         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7935:         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
7936:       } else {
7937:         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
7938:         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
7939:         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7940:         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
7941:         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
7942:       }
7943:       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7944:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
7945:       /* PetscCall(ISView(isarray[i],0)); */
7946:     }
7947:     /* neumann boundaries */
7948:     if (pcbddc->NeumannBoundariesLocal) {
7949:       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
7950:       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
7951:       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
7952:       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7953:       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
7954:       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7955:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
7956:       /* PetscCall(ISView(isarray[nisdofs],0)); */
7957:     }
7958:     /* coordinates */
7959:     if (pcbddc->corner_selected) {
7960:       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
7961:       PetscCall(ISGetLocalSize(corners, &tsize));
7962:       PetscCall(ISGetIndices(corners, &idxs));
7963:       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7964:       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
7965:       PetscCall(ISRestoreIndices(corners, &idxs));
7966:       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
7967:       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7968:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
7969:     }
7970:     PetscCall(PetscFree(tidxs));
7971:     PetscCall(PetscFree(tidxs2));
7972:     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
7973:   } else {
7974:     nis     = 0;
7975:     nisdofs = 0;
7976:     nisneu  = 0;
7977:     nisvert = 0;
7978:     isarray = NULL;
7979:   }
7980:   /* destroy no longer needed map */
7981:   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));

7983:   /* subassemble */
7984:   if (multilevel_allowed) {
7985:     Vec       vp[1];
7986:     PetscInt  nvecs = 0;
7987:     PetscBool reuse, reuser;

7989:     if (coarse_mat) reuse = PETSC_TRUE;
7990:     else reuse = PETSC_FALSE;
7991:     PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7992:     vp[0] = NULL;
7993:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7994:       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
7995:       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
7996:       PetscCall(VecSetType(vp[0], VECSTANDARD));
7997:       nvecs = 1;

7999:       if (pcbddc->divudotp) {
8000:         Mat      B, loc_divudotp;
8001:         Vec      v, p;
8002:         IS       dummy;
8003:         PetscInt np;

8005:         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8006:         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8007:         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8008:         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8009:         PetscCall(MatCreateVecs(B, &v, &p));
8010:         PetscCall(VecSet(p, 1.));
8011:         PetscCall(MatMultTranspose(B, p, v));
8012:         PetscCall(VecDestroy(&p));
8013:         PetscCall(MatDestroy(&B));
8014:         PetscCall(VecGetArray(vp[0], &array));
8015:         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8016:         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8017:         PetscCall(VecResetArray(pcbddc->vec1_P));
8018:         PetscCall(VecRestoreArray(vp[0], &array));
8019:         PetscCall(ISDestroy(&dummy));
8020:         PetscCall(VecDestroy(&v));
8021:       }
8022:     }
8023:     if (reuser) {
8024:       PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8025:     } else {
8026:       PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8027:     }
8028:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8029:       PetscScalar       *arraym;
8030:       const PetscScalar *arrayv;
8031:       PetscInt           nl;
8032:       PetscCall(VecGetLocalSize(vp[0], &nl));
8033:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8034:       PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8035:       PetscCall(VecGetArrayRead(vp[0], &arrayv));
8036:       PetscCall(PetscArraycpy(arraym, arrayv, nl));
8037:       PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8038:       PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8039:       PetscCall(VecDestroy(&vp[0]));
8040:     } else {
8041:       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8042:     }
8043:   } else {
8044:     PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8045:   }
8046:   if (coarse_mat_is || coarse_mat) {
8047:     if (!multilevel_allowed) {
8048:       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8049:     } else {
8050:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8051:       if (coarse_mat_is) {
8052:         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8053:         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8054:         coarse_mat = coarse_mat_is;
8055:       }
8056:     }
8057:   }
8058:   PetscCall(MatDestroy(&t_coarse_mat_is));
8059:   PetscCall(MatDestroy(&coarse_mat_is));

8061:   /* create local to global scatters for coarse problem */
8062:   if (compute_vecs) {
8063:     PetscInt lrows;
8064:     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8065:     if (coarse_mat) {
8066:       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8067:     } else {
8068:       lrows = 0;
8069:     }
8070:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8071:     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8072:     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8073:     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8074:     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8075:   }
8076:   PetscCall(ISDestroy(&coarse_is));

8078:   /* set defaults for coarse KSP and PC */
8079:   if (multilevel_allowed) {
8080:     coarse_ksp_type = KSPRICHARDSON;
8081:     coarse_pc_type  = PCBDDC;
8082:   } else {
8083:     coarse_ksp_type = KSPPREONLY;
8084:     coarse_pc_type  = PCREDUNDANT;
8085:   }

8087:   /* print some info if requested */
8088:   if (pcbddc->dbg_flag) {
8089:     if (!multilevel_allowed) {
8090:       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8091:       if (multilevel_requested) {
8092:         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio));
8093:       } else if (pcbddc->max_levels) {
8094:         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8095:       }
8096:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8097:     }
8098:   }

8100:   /* communicate coarse discrete gradient */
8101:   coarseG = NULL;
8102:   if (pcbddc->nedcG && multilevel_allowed) {
8103:     MPI_Comm ccomm;
8104:     if (coarse_mat) {
8105:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8106:     } else {
8107:       ccomm = MPI_COMM_NULL;
8108:     }
8109:     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8110:   }

8112:   /* create the coarse KSP object only once with defaults */
8113:   if (coarse_mat) {
8114:     PetscBool   isredundant, isbddc, force, valid;
8115:     PetscViewer dbg_viewer = NULL;
8116:     PetscBool   isset, issym, isher, isspd;

8118:     if (pcbddc->dbg_flag) {
8119:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8120:       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8121:     }
8122:     if (!pcbddc->coarse_ksp) {
8123:       char   prefix[256], str_level[16];
8124:       size_t len;

8126:       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8127:       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8128:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8129:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8130:       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1));
8131:       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8132:       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8133:       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8134:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8135:       /* TODO is this logic correct? should check for coarse_mat type */
8136:       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8137:       /* prefix */
8138:       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8139:       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8140:       if (!pcbddc->current_level) {
8141:         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8142:         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8143:       } else {
8144:         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8145:         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8146:         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8147:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8148:         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8149:         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level)));
8150:         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8151:       }
8152:       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8153:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8154:       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8155:       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8156:       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8157:       /* allow user customization */
8158:       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8159:       /* get some info after set from options */
8160:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8161:       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8162:       force = PETSC_FALSE;
8163:       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8164:       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8165:       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8166:       if (multilevel_allowed && !force && !valid) {
8167:         isbddc = PETSC_TRUE;
8168:         PetscCall(PCSetType(pc_temp, PCBDDC));
8169:         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8170:         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8171:         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8172:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8173:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8174:           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8175:           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8176:           PetscOptionsEnd();
8177:           pc_temp->setfromoptionscalled++;
8178:         }
8179:       }
8180:     }
8181:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8182:     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8183:     if (nisdofs) {
8184:       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8185:       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8186:     }
8187:     if (nisneu) {
8188:       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8189:       PetscCall(ISDestroy(&isarray[nisdofs]));
8190:     }
8191:     if (nisvert) {
8192:       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8193:       PetscCall(ISDestroy(&isarray[nis - 1]));
8194:     }
8195:     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));

8197:     /* get some info after set from options */
8198:     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));

8200:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8201:     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8202:     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8203:     force = PETSC_FALSE;
8204:     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8205:     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8206:     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8207:     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8208:     if (isredundant) {
8209:       KSP inner_ksp;
8210:       PC  inner_pc;

8212:       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8213:       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8214:     }

8216:     /* parameters which miss an API */
8217:     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8218:     if (isbddc) {
8219:       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;

8221:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8222:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8223:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8224:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8225:       if (pcbddc_coarse->benign_saddle_point) {
8226:         Mat                    coarsedivudotp_is;
8227:         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8228:         IS                     row, col;
8229:         const PetscInt        *gidxs;
8230:         PetscInt               n, st, M, N;

8232:         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8233:         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8234:         st = st - n;
8235:         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8236:         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8237:         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8238:         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8239:         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8240:         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8241:         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8242:         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8243:         PetscCall(ISGetSize(row, &M));
8244:         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8245:         PetscCall(ISDestroy(&row));
8246:         PetscCall(ISDestroy(&col));
8247:         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8248:         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8249:         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8250:         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8251:         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8252:         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8253:         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8254:         PetscCall(MatDestroy(&coarsedivudotp));
8255:         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8256:         PetscCall(MatDestroy(&coarsedivudotp_is));
8257:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8258:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8259:       }
8260:     }

8262:     /* propagate symmetry info of coarse matrix */
8263:     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8264:     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8265:     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8266:     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8267:     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8268:     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8269:     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));

8271:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8272:     /* set operators */
8273:     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8274:     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8275:     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8276:     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8277:   }
8278:   PetscCall(MatDestroy(&coarseG));
8279:   PetscCall(PetscFree(isarray));
8280: #if 0
8281:   {
8282:     PetscViewer viewer;
8283:     char filename[256];
8284:     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8285:     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8286:     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8287:     PetscCall(MatView(coarse_mat,viewer));
8288:     PetscCall(PetscViewerPopFormat(viewer));
8289:     PetscCall(PetscViewerDestroy(&viewer));
8290:   }
8291: #endif

8293:   if (corners) {
8294:     Vec             gv;
8295:     IS              is;
8296:     const PetscInt *idxs;
8297:     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8298:     PetscScalar    *coords;

8300:     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8301:     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8302:     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8303:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8304:     PetscCall(VecSetBlockSize(gv, cdim));
8305:     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8306:     PetscCall(VecSetType(gv, VECSTANDARD));
8307:     PetscCall(VecSetFromOptions(gv));
8308:     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */

8310:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8311:     PetscCall(ISGetLocalSize(is, &n));
8312:     PetscCall(ISGetIndices(is, &idxs));
8313:     PetscCall(PetscMalloc1(n * cdim, &coords));
8314:     for (i = 0; i < n; i++) {
8315:       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8316:     }
8317:     PetscCall(ISRestoreIndices(is, &idxs));
8318:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));

8320:     PetscCall(ISGetLocalSize(corners, &n));
8321:     PetscCall(ISGetIndices(corners, &idxs));
8322:     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8323:     PetscCall(ISRestoreIndices(corners, &idxs));
8324:     PetscCall(PetscFree(coords));
8325:     PetscCall(VecAssemblyBegin(gv));
8326:     PetscCall(VecAssemblyEnd(gv));
8327:     PetscCall(VecGetArray(gv, &coords));
8328:     if (pcbddc->coarse_ksp) {
8329:       PC        coarse_pc;
8330:       PetscBool isbddc;

8332:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8333:       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8334:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8335:         PetscReal *realcoords;

8337:         PetscCall(VecGetLocalSize(gv, &n));
8338: #if defined(PETSC_USE_COMPLEX)
8339:         PetscCall(PetscMalloc1(n, &realcoords));
8340:         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8341: #else
8342:         realcoords = coords;
8343: #endif
8344:         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8345: #if defined(PETSC_USE_COMPLEX)
8346:         PetscCall(PetscFree(realcoords));
8347: #endif
8348:       }
8349:     }
8350:     PetscCall(VecRestoreArray(gv, &coords));
8351:     PetscCall(VecDestroy(&gv));
8352:   }
8353:   PetscCall(ISDestroy(&corners));

8355:   if (pcbddc->coarse_ksp) {
8356:     Vec crhs, csol;

8358:     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
8359:     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
8360:     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL));
8361:     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs)));
8362:   }
8363:   PetscCall(MatDestroy(&coarsedivudotp));

8365:   /* compute null space for coarse solver if the benign trick has been requested */
8366:   if (pcbddc->benign_null) {
8367:     PetscCall(VecSet(pcbddc->vec1_P, 0.));
8368:     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
8369:     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
8370:     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
8371:     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
8372:     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
8373:     if (coarse_mat) {
8374:       Vec          nullv;
8375:       PetscScalar *array, *array2;
8376:       PetscInt     nl;

8378:       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
8379:       PetscCall(VecGetLocalSize(nullv, &nl));
8380:       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
8381:       PetscCall(VecGetArray(nullv, &array2));
8382:       PetscCall(PetscArraycpy(array2, array, nl));
8383:       PetscCall(VecRestoreArray(nullv, &array2));
8384:       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
8385:       PetscCall(VecNormalize(nullv, NULL));
8386:       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
8387:       PetscCall(VecDestroy(&nullv));
8388:     }
8389:   }
8390:   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));

8392:   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
8393:   if (pcbddc->coarse_ksp) {
8394:     PetscBool ispreonly;

8396:     if (CoarseNullSpace) {
8397:       PetscBool isnull;

8399:       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
8400:       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
8401:       /* TODO: add local nullspaces (if any) */
8402:     }
8403:     /* setup coarse ksp */
8404:     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
8405:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8406:     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
8407:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8408:       KSP         check_ksp;
8409:       KSPType     check_ksp_type;
8410:       PC          check_pc;
8411:       Vec         check_vec, coarse_vec;
8412:       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
8413:       PetscInt    its;
8414:       PetscBool   compute_eigs;
8415:       PetscReal  *eigs_r, *eigs_c;
8416:       PetscInt    neigs;
8417:       const char *prefix;

8419:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8420:       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
8421:       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
8422:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
8423:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
8424:       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
8425:       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size));
8426:       /* prevent from setup unneeded object */
8427:       PetscCall(KSPGetPC(check_ksp, &check_pc));
8428:       PetscCall(PCSetType(check_pc, PCNONE));
8429:       if (ispreonly) {
8430:         check_ksp_type = KSPPREONLY;
8431:         compute_eigs   = PETSC_FALSE;
8432:       } else {
8433:         check_ksp_type = KSPGMRES;
8434:         compute_eigs   = PETSC_TRUE;
8435:       }
8436:       PetscCall(KSPSetType(check_ksp, check_ksp_type));
8437:       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
8438:       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
8439:       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
8440:       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
8441:       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
8442:       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
8443:       PetscCall(KSPSetFromOptions(check_ksp));
8444:       PetscCall(KSPSetUp(check_ksp));
8445:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
8446:       PetscCall(KSPSetPC(check_ksp, check_pc));
8447:       /* create random vec */
8448:       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
8449:       PetscCall(VecSetRandom(check_vec, NULL));
8450:       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
8451:       /* solve coarse problem */
8452:       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
8453:       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
8454:       /* set eigenvalue estimation if preonly has not been requested */
8455:       if (compute_eigs) {
8456:         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
8457:         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
8458:         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
8459:         if (neigs) {
8460:           lambda_max = eigs_r[neigs - 1];
8461:           lambda_min = eigs_r[0];
8462:           if (pcbddc->use_coarse_estimates) {
8463:             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8464:               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
8465:               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
8466:             }
8467:           }
8468:         }
8469:       }

8471:       /* check coarse problem residual error */
8472:       if (pcbddc->dbg_flag) {
8473:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8474:         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
8475:         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
8476:         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
8477:         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
8478:         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
8479:         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
8480:         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer));
8481:         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer));
8482:         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
8483:         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
8484:         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
8485:         if (compute_eigs) {
8486:           PetscReal          lambda_max_s, lambda_min_s;
8487:           KSPConvergedReason reason;
8488:           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
8489:           PetscCall(KSPGetIterationNumber(check_ksp, &its));
8490:           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
8491:           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
8492:           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
8493:           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
8494:         }
8495:         PetscCall(PetscViewerFlush(dbg_viewer));
8496:         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
8497:       }
8498:       PetscCall(VecDestroy(&check_vec));
8499:       PetscCall(VecDestroy(&coarse_vec));
8500:       PetscCall(KSPDestroy(&check_ksp));
8501:       if (compute_eigs) {
8502:         PetscCall(PetscFree(eigs_r));
8503:         PetscCall(PetscFree(eigs_c));
8504:       }
8505:     }
8506:   }
8507:   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
8508:   /* print additional info */
8509:   if (pcbddc->dbg_flag) {
8510:     /* waits until all processes reaches this point */
8511:     PetscCall(PetscBarrier((PetscObject)pc));
8512:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
8513:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8514:   }

8516:   /* free memory */
8517:   PetscCall(MatDestroy(&coarse_mat));
8518:   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
8519:   PetscFunctionReturn(PETSC_SUCCESS);
8520: }

8522: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
8523: {
8524:   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
8525:   PC_IS          *pcis   = (PC_IS *)pc->data;
8526:   Mat_IS         *matis  = (Mat_IS *)pc->pmat->data;
8527:   IS              subset, subset_mult, subset_n;
8528:   PetscInt        local_size, coarse_size = 0;
8529:   PetscInt       *local_primal_indices = NULL;
8530:   const PetscInt *t_local_primal_indices;

8532:   PetscFunctionBegin;
8533:   /* Compute global number of coarse dofs */
8534:   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
8535:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
8536:   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
8537:   PetscCall(ISDestroy(&subset_n));
8538:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
8539:   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
8540:   PetscCall(ISDestroy(&subset));
8541:   PetscCall(ISDestroy(&subset_mult));
8542:   PetscCall(ISGetLocalSize(subset_n, &local_size));
8543:   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
8544:   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
8545:   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
8546:   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
8547:   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
8548:   PetscCall(ISDestroy(&subset_n));

8550:   /* check numbering */
8551:   if (pcbddc->dbg_flag) {
8552:     PetscScalar coarsesum, *array, *array2;
8553:     PetscInt    i;
8554:     PetscBool   set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE;

8556:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8557:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8558:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n"));
8559:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8560:     /* counter */
8561:     PetscCall(VecSet(pcis->vec1_global, 0.0));
8562:     PetscCall(VecSet(pcis->vec1_N, 1.0));
8563:     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8564:     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8565:     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD));
8566:     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD));
8567:     PetscCall(VecSet(pcis->vec1_N, 0.0));
8568:     for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES));
8569:     PetscCall(VecAssemblyBegin(pcis->vec1_N));
8570:     PetscCall(VecAssemblyEnd(pcis->vec1_N));
8571:     PetscCall(VecSet(pcis->vec1_global, 0.0));
8572:     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8573:     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8574:     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
8575:     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
8576:     PetscCall(VecGetArray(pcis->vec1_N, &array));
8577:     PetscCall(VecGetArray(pcis->vec2_N, &array2));
8578:     for (i = 0; i < pcis->n; i++) {
8579:       if (array[i] != 0.0 && array[i] != array2[i]) {
8580:         PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi;
8581:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8582:         set_error      = PETSC_TRUE;
8583:         PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi));
8584:         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh));
8585:       }
8586:     }
8587:     PetscCall(VecRestoreArray(pcis->vec2_N, &array2));
8588:     PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8589:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8590:     for (i = 0; i < pcis->n; i++) {
8591:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]);
8592:     }
8593:     PetscCall(VecRestoreArray(pcis->vec1_N, &array));
8594:     PetscCall(VecSet(pcis->vec1_global, 0.0));
8595:     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8596:     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8597:     PetscCall(VecSum(pcis->vec1_global, &coarsesum));
8598:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum)));
8599:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8600:       PetscInt *gidxs;

8602:       PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs));
8603:       PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs));
8604:       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n"));
8605:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8606:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank));
8607:       for (i = 0; i < pcbddc->local_primal_size; i++) {
8608:         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i]));
8609:       }
8610:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8611:       PetscCall(PetscFree(gidxs));
8612:     }
8613:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8614:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8615:     PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed");
8616:   }

8618:   /* get back data */
8619:   *coarse_size_n          = coarse_size;
8620:   *local_primal_indices_n = local_primal_indices;
8621:   PetscFunctionReturn(PETSC_SUCCESS);
8622: }

8624: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
8625: {
8626:   IS           localis_t;
8627:   PetscInt     i, lsize, *idxs, n;
8628:   PetscScalar *vals;

8630:   PetscFunctionBegin;
8631:   /* get indices in local ordering exploiting local to global map */
8632:   PetscCall(ISGetLocalSize(globalis, &lsize));
8633:   PetscCall(PetscMalloc1(lsize, &vals));
8634:   for (i = 0; i < lsize; i++) vals[i] = 1.0;
8635:   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
8636:   PetscCall(VecSet(gwork, 0.0));
8637:   PetscCall(VecSet(lwork, 0.0));
8638:   if (idxs) { /* multilevel guard */
8639:     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
8640:     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
8641:   }
8642:   PetscCall(VecAssemblyBegin(gwork));
8643:   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
8644:   PetscCall(PetscFree(vals));
8645:   PetscCall(VecAssemblyEnd(gwork));
8646:   /* now compute set in local ordering */
8647:   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
8648:   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
8649:   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
8650:   PetscCall(VecGetSize(lwork, &n));
8651:   for (i = 0, lsize = 0; i < n; i++) {
8652:     if (PetscRealPart(vals[i]) > 0.5) lsize++;
8653:   }
8654:   PetscCall(PetscMalloc1(lsize, &idxs));
8655:   for (i = 0, lsize = 0; i < n; i++) {
8656:     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
8657:   }
8658:   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
8659:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
8660:   *localis = localis_t;
8661:   PetscFunctionReturn(PETSC_SUCCESS);
8662: }

8664: PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
8665: {
8666:   PC_IS   *pcis   = (PC_IS *)pc->data;
8667:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8668:   PC_IS   *pcisf;
8669:   PC_BDDC *pcbddcf;
8670:   PC       pcf;

8672:   PetscFunctionBegin;
8673:   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
8674:   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
8675:   PetscCall(PCSetType(pcf, PCBDDC));

8677:   pcisf   = (PC_IS *)pcf->data;
8678:   pcbddcf = (PC_BDDC *)pcf->data;

8680:   pcisf->is_B_local = pcis->is_B_local;
8681:   pcisf->vec1_N     = pcis->vec1_N;
8682:   pcisf->BtoNmap    = pcis->BtoNmap;
8683:   pcisf->n          = pcis->n;
8684:   pcisf->n_B        = pcis->n_B;

8686:   PetscCall(PetscFree(pcbddcf->mat_graph));
8687:   PetscCall(PetscFree(pcbddcf->sub_schurs));
8688:   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
8689:   pcbddcf->sub_schurs            = schurs;
8690:   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
8691:   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
8692:   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
8693:   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
8694:   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
8695:   pcbddcf->use_faces             = PETSC_TRUE;
8696:   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
8697:   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
8698:   pcbddcf->use_qr_single         = (PetscBool)!constraints;
8699:   pcbddcf->fake_change           = PETSC_TRUE;
8700:   pcbddcf->dbg_flag              = pcbddc->dbg_flag;

8702:   PetscCall(PCBDDCAdaptiveSelection(pcf));
8703:   PetscCall(PCBDDCConstraintsSetUp(pcf));

8705:   *change = pcbddcf->ConstraintMatrix;
8706:   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
8707:   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
8708:   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;

8710:   if (schurs) pcbddcf->sub_schurs = NULL;
8711:   pcbddcf->ConstraintMatrix = NULL;
8712:   pcbddcf->mat_graph        = NULL;
8713:   pcisf->is_B_local         = NULL;
8714:   pcisf->vec1_N             = NULL;
8715:   pcisf->BtoNmap            = NULL;
8716:   PetscCall(PCDestroy(&pcf));
8717:   PetscFunctionReturn(PETSC_SUCCESS);
8718: }

8720: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8721: {
8722:   PC_IS          *pcis       = (PC_IS *)pc->data;
8723:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
8724:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
8725:   Mat             S_j;
8726:   PetscInt       *used_xadj, *used_adjncy;
8727:   PetscBool       free_used_adj;

8729:   PetscFunctionBegin;
8730:   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
8731:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8732:   free_used_adj = PETSC_FALSE;
8733:   if (pcbddc->sub_schurs_layers == -1) {
8734:     used_xadj   = NULL;
8735:     used_adjncy = NULL;
8736:   } else {
8737:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8738:       used_xadj   = pcbddc->mat_graph->xadj;
8739:       used_adjncy = pcbddc->mat_graph->adjncy;
8740:     } else if (pcbddc->computed_rowadj) {
8741:       used_xadj   = pcbddc->mat_graph->xadj;
8742:       used_adjncy = pcbddc->mat_graph->adjncy;
8743:     } else {
8744:       PetscBool       flg_row = PETSC_FALSE;
8745:       const PetscInt *xadj, *adjncy;
8746:       PetscInt        nvtxs;

8748:       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
8749:       if (flg_row) {
8750:         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
8751:         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
8752:         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
8753:         free_used_adj = PETSC_TRUE;
8754:       } else {
8755:         pcbddc->sub_schurs_layers = -1;
8756:         used_xadj                 = NULL;
8757:         used_adjncy               = NULL;
8758:       }
8759:       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
8760:     }
8761:   }

8763:   /* setup sub_schurs data */
8764:   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
8765:   if (!sub_schurs->schur_explicit) {
8766:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8767:     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
8768:     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
8769:   } else {
8770:     Mat       change        = NULL;
8771:     Vec       scaling       = NULL;
8772:     IS        change_primal = NULL, iP;
8773:     PetscInt  benign_n;
8774:     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
8775:     PetscBool need_change       = PETSC_FALSE;
8776:     PetscBool discrete_harmonic = PETSC_FALSE;

8778:     if (!pcbddc->use_vertices && reuse_solvers) {
8779:       PetscInt n_vertices;

8781:       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
8782:       reuse_solvers = (PetscBool)!n_vertices;
8783:     }
8784:     if (!pcbddc->benign_change_explicit) {
8785:       benign_n = pcbddc->benign_n;
8786:     } else {
8787:       benign_n = 0;
8788:     }
8789:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8790:        We need a global reduction to avoid possible deadlocks.
8791:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8792:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8793:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8794:       PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8795:       need_change = (PetscBool)(!need_change);
8796:     }
8797:     /* If the user defines additional constraints, we import them here */
8798:     if (need_change) {
8799:       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
8800:       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
8801:     }
8802:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8804:     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
8805:     if (iP) {
8806:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
8807:       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
8808:       PetscOptionsEnd();
8809:     }
8810:     if (discrete_harmonic) {
8811:       Mat A;
8812:       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
8813:       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
8814:       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
8815:       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
8816:                                      pcbddc->benign_zerodiag_subs, change, change_primal));
8817:       PetscCall(MatDestroy(&A));
8818:     } else {
8819:       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
8820:                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
8821:     }
8822:     PetscCall(MatDestroy(&change));
8823:     PetscCall(ISDestroy(&change_primal));
8824:   }
8825:   PetscCall(MatDestroy(&S_j));

8827:   /* free adjacency */
8828:   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
8829:   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
8830:   PetscFunctionReturn(PETSC_SUCCESS);
8831: }

8833: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8834: {
8835:   PC_IS      *pcis   = (PC_IS *)pc->data;
8836:   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
8837:   PCBDDCGraph graph;

8839:   PetscFunctionBegin;
8840:   /* attach interface graph for determining subsets */
8841:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8842:     IS       verticesIS, verticescomm;
8843:     PetscInt vsize, *idxs;

8845:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
8846:     PetscCall(ISGetSize(verticesIS, &vsize));
8847:     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
8848:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
8849:     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
8850:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
8851:     PetscCall(PCBDDCGraphCreate(&graph));
8852:     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
8853:     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
8854:     PetscCall(ISDestroy(&verticescomm));
8855:     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
8856:   } else {
8857:     graph = pcbddc->mat_graph;
8858:   }
8859:   /* print some info */
8860:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8861:     IS       vertices;
8862:     PetscInt nv, nedges, nfaces;
8863:     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
8864:     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
8865:     PetscCall(ISGetSize(vertices, &nv));
8866:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8867:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
8868:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
8869:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
8870:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
8871:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8872:     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
8873:     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
8874:   }

8876:   /* sub_schurs init */
8877:   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
8878:   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));

8880:   /* free graph struct */
8881:   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
8882:   PetscFunctionReturn(PETSC_SUCCESS);
8883: }

8885: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8886: static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8887: {
8888:   Mat         At;
8889:   IS          rows;
8890:   PetscInt    rst, ren;
8891:   PetscLayout rmap;

8893:   PetscFunctionBegin;
8894:   rst = ren = 0;
8895:   if (ccomm != MPI_COMM_NULL) {
8896:     PetscCall(PetscLayoutCreate(ccomm, &rmap));
8897:     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
8898:     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
8899:     PetscCall(PetscLayoutSetUp(rmap));
8900:     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
8901:   }
8902:   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
8903:   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
8904:   PetscCall(ISDestroy(&rows));

8906:   if (ccomm != MPI_COMM_NULL) {
8907:     Mat_MPIAIJ *a, *b;
8908:     IS          from, to;
8909:     Vec         gvec;
8910:     PetscInt    lsize;

8912:     PetscCall(MatCreate(ccomm, B));
8913:     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
8914:     PetscCall(MatSetType(*B, MATAIJ));
8915:     PetscCall(PetscLayoutDestroy(&((*B)->rmap)));
8916:     PetscCall(PetscLayoutSetUp((*B)->cmap));
8917:     a = (Mat_MPIAIJ *)At->data;
8918:     b = (Mat_MPIAIJ *)(*B)->data;
8919:     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
8920:     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
8921:     PetscCall(PetscObjectReference((PetscObject)a->A));
8922:     PetscCall(PetscObjectReference((PetscObject)a->B));
8923:     b->A = a->A;
8924:     b->B = a->B;

8926:     b->donotstash   = a->donotstash;
8927:     b->roworiented  = a->roworiented;
8928:     b->rowindices   = NULL;
8929:     b->rowvalues    = NULL;
8930:     b->getrowactive = PETSC_FALSE;

8932:     (*B)->rmap         = rmap;
8933:     (*B)->factortype   = A->factortype;
8934:     (*B)->assembled    = PETSC_TRUE;
8935:     (*B)->insertmode   = NOT_SET_VALUES;
8936:     (*B)->preallocated = PETSC_TRUE;

8938:     if (a->colmap) {
8939: #if defined(PETSC_USE_CTABLE)
8940:       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
8941: #else
8942:       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
8943:       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
8944: #endif
8945:     } else b->colmap = NULL;
8946:     if (a->garray) {
8947:       PetscInt len;
8948:       len = a->B->cmap->n;
8949:       PetscCall(PetscMalloc1(len + 1, &b->garray));
8950:       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
8951:     } else b->garray = NULL;

8953:     PetscCall(PetscObjectReference((PetscObject)a->lvec));
8954:     b->lvec = a->lvec;

8956:     /* cannot use VecScatterCopy */
8957:     PetscCall(VecGetLocalSize(b->lvec, &lsize));
8958:     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
8959:     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
8960:     PetscCall(MatCreateVecs(*B, &gvec, NULL));
8961:     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
8962:     PetscCall(ISDestroy(&from));
8963:     PetscCall(ISDestroy(&to));
8964:     PetscCall(VecDestroy(&gvec));
8965:   }
8966:   PetscCall(MatDestroy(&At));
8967:   PetscFunctionReturn(PETSC_SUCCESS);
8968: }