Actual source code: redistribute.c


  2: /*
  3:   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
  4: */
  5: #include <petsc/private/pcimpl.h>
  6: #include <petscksp.h>

  8: typedef struct {
  9:   KSP         ksp;
 10:   Vec         x,b;
 11:   VecScatter  scatter;
 12:   IS          is;
 13:   PetscInt    dcnt,*drows;    /* these are the local rows that have only diagonal entry */
 14:   PetscScalar *diag;
 15:   Vec         work;
 16: } PC_Redistribute;

 18: static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
 19: {
 20:   PC_Redistribute *red = (PC_Redistribute*)pc->data;
 21:   PetscBool       iascii,isstring;
 22:   PetscInt        ncnt,N;

 24:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 25:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
 26:   if (iascii) {
 27:     MPIU_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
 28:     MatGetSize(pc->pmat,&N,NULL);
 29:     PetscViewerASCIIPrintf(viewer,"    Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));
 30:     PetscViewerASCIIPrintf(viewer,"  Redistribute preconditioner: \n");
 31:     KSPView(red->ksp,viewer);
 32:   } else if (isstring) {
 33:     PetscViewerStringSPrintf(viewer," Redistribute preconditioner");
 34:     KSPView(red->ksp,viewer);
 35:   }
 36:   return 0;
 37: }

 39: static PetscErrorCode PCSetUp_Redistribute(PC pc)
 40: {
 41:   PC_Redistribute          *red = (PC_Redistribute*)pc->data;
 42:   MPI_Comm                 comm;
 43:   PetscInt                 rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
 44:   PetscLayout              map,nmap;
 45:   PetscMPIInt              size,tag,n;
 46:   PETSC_UNUSED PetscMPIInt imdex;
 47:   PetscInt                 *source = NULL;
 48:   PetscMPIInt              *sizes = NULL,nrecvs;
 49:   PetscInt                 j,nsends;
 50:   PetscInt                 *owner = NULL,*starts = NULL,count,slen;
 51:   PetscInt                 *rvalues,*svalues,recvtotal;
 52:   PetscMPIInt              *onodes1,*olengths1;
 53:   MPI_Request              *send_waits = NULL,*recv_waits = NULL;
 54:   MPI_Status               recv_status,*send_status;
 55:   Vec                      tvec,diag;
 56:   Mat                      tmat;
 57:   const PetscScalar        *d,*values;
 58:   const PetscInt           *cols;

 60:   if (pc->setupcalled) {
 61:     KSPGetOperators(red->ksp,NULL,&tmat);
 62:     MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);
 63:     KSPSetOperators(red->ksp,tmat,tmat);
 64:   } else {
 65:     PetscInt NN;

 67:     PetscObjectGetComm((PetscObject)pc,&comm);
 68:     MPI_Comm_size(comm,&size);
 69:     PetscObjectGetNewTag((PetscObject)pc,&tag);

 71:     /* count non-diagonal rows on process */
 72:     MatGetOwnershipRange(pc->mat,&rstart,&rend);
 73:     cnt  = 0;
 74:     for (i=rstart; i<rend; i++) {
 75:       MatGetRow(pc->mat,i,&nz,&cols,&values);
 76:       for (PetscInt j=0; j<nz; j++) {
 77:         if (values[j] != 0 && cols[j] != i) {
 78:           cnt++;
 79:           break;
 80:         }
 81:       }
 82:       MatRestoreRow(pc->mat,i,&nz,&cols,&values);
 83:     }
 84:     PetscMalloc1(cnt,&rows);
 85:     PetscMalloc1(rend - rstart - cnt,&drows);

 87:     /* list non-diagonal rows on process */
 88:     cnt = 0; dcnt = 0;
 89:     for (i=rstart; i<rend; i++) {
 90:       PetscBool diagonly = PETSC_TRUE;
 91:       MatGetRow(pc->mat,i,&nz,&cols,&values);
 92:       for (PetscInt j=0; j<nz; j++) {
 93:         if (values[j] != 0 && cols[j] != i) {
 94:           diagonly = PETSC_FALSE;
 95:           break;
 96:         }
 97:       }
 98:       if (!diagonly) rows[cnt++] = i;
 99:       else drows[dcnt++] = i - rstart;
100:       MatRestoreRow(pc->mat,i,&nz,&cols,&values);
101:     }

103:     /* create PetscLayout for non-diagonal rows on each process */
104:     PetscLayoutCreate(comm,&map);
105:     PetscLayoutSetLocalSize(map,cnt);
106:     PetscLayoutSetBlockSize(map,1);
107:     PetscLayoutSetUp(map);
108:     rstart = map->rstart;
109:     rend   = map->rend;

111:     /* create PetscLayout for load-balanced non-diagonal rows on each process */
112:     PetscLayoutCreate(comm,&nmap);
113:     MPIU_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);
114:     PetscLayoutSetSize(nmap,ncnt);
115:     PetscLayoutSetBlockSize(nmap,1);
116:     PetscLayoutSetUp(nmap);

118:     MatGetSize(pc->pmat,&NN,NULL);
119:     PetscInfo(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));

121:     if (size > 1) {
122:       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
123:       /*
124:        this code is taken from VecScatterCreate_PtoS()
125:        Determines what rows need to be moved where to
126:        load balance the non-diagonal rows
127:        */
128:       /*  count number of contributors to each processor */
129:       PetscMalloc2(size,&sizes,cnt,&owner);
130:       PetscArrayzero(sizes,size);
131:       j      = 0;
132:       nsends = 0;
133:       for (i=rstart; i<rend; i++) {
134:         if (i < nmap->range[j]) j = 0;
135:         for (; j<size; j++) {
136:           if (i < nmap->range[j+1]) {
137:             if (!sizes[j]++) nsends++;
138:             owner[i-rstart] = j;
139:             break;
140:           }
141:         }
142:       }
143:       /* inform other processors of number of messages and max length*/
144:       PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs);
145:       PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1);
146:       PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
147:       recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

149:       /* post receives:  rvalues - rows I will own; count - nu */
150:       PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
151:       count = 0;
152:       for (i=0; i<nrecvs; i++) {
153:         MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
154:         count += olengths1[i];
155:       }

157:       /* do sends:
158:        1) starts[i] gives the starting index in svalues for stuff going to
159:        the ith processor
160:        */
161:       PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts);
162:       starts[0] = 0;
163:       for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
164:       for (i=0; i<cnt; i++)  svalues[starts[owner[i]]++] = rows[i];
165:       for (i=0; i<cnt; i++)  rows[i] = rows[i] - rstart;
166:       red->drows = drows;
167:       red->dcnt  = dcnt;
168:       PetscFree(rows);

170:       starts[0] = 0;
171:       for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
172:       count = 0;
173:       for (i=0; i<size; i++) {
174:         if (sizes[i]) {
175:           MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++);
176:         }
177:       }

179:       /*  wait on receives */
180:       count = nrecvs;
181:       slen  = 0;
182:       while (count) {
183:         MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
184:         /* unpack receives into our local space */
185:         MPI_Get_count(&recv_status,MPIU_INT,&n);
186:         slen += n;
187:         count--;
188:       }
190:       ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);

192:       /* free all work space */
193:       PetscFree(olengths1);
194:       PetscFree(onodes1);
195:       PetscFree3(rvalues,source,recv_waits);
196:       PetscFree2(sizes,owner);
197:       if (nsends) {   /* wait on sends */
198:         PetscMalloc1(nsends,&send_status);
199:         MPI_Waitall(nsends,send_waits,send_status);
200:         PetscFree(send_status);
201:       }
202:       PetscFree3(svalues,send_waits,starts);
203:     } else {
204:       ISCreateGeneral(comm,cnt,rows,PETSC_OWN_POINTER,&red->is);
205:       red->drows = drows;
206:       red->dcnt  = dcnt;
207:       slen = cnt;
208:     }
209:     PetscLayoutDestroy(&map);
210:     PetscLayoutDestroy(&nmap);

212:     VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);
213:     VecDuplicate(red->b,&red->x);
214:     MatCreateVecs(pc->pmat,&tvec,NULL);
215:     VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter);
216:     VecDestroy(&tvec);
217:     MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);
218:     KSPSetOperators(red->ksp,tmat,tmat);
219:     MatDestroy(&tmat);
220:   }

222:   /* get diagonal portion of matrix */
223:   PetscFree(red->diag);
224:   PetscMalloc1(red->dcnt,&red->diag);
225:   MatCreateVecs(pc->pmat,&diag,NULL);
226:   MatGetDiagonal(pc->pmat,diag);
227:   VecGetArrayRead(diag,&d);
228:   for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]];
229:   VecRestoreArrayRead(diag,&d);
230:   VecDestroy(&diag);
231:   KSPSetUp(red->ksp);
232:   return 0;
233: }

235: static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
236: {
237:   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
238:   PetscInt          dcnt   = red->dcnt,i;
239:   const PetscInt    *drows = red->drows;
240:   PetscScalar       *xwork;
241:   const PetscScalar *bwork,*diag = red->diag;

243:   if (!red->work) {
244:     VecDuplicate(b,&red->work);
245:   }
246:   /* compute the rows of solution that have diagonal entries only */
247:   VecSet(x,0.0);         /* x = diag(A)^{-1} b */
248:   VecGetArray(x,&xwork);
249:   VecGetArrayRead(b,&bwork);
250:   for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]];
251:   PetscLogFlops(dcnt);
252:   VecRestoreArray(red->work,&xwork);
253:   VecRestoreArrayRead(b,&bwork);
254:   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
255:   MatMult(pc->pmat,x,red->work);
256:   VecAYPX(red->work,-1.0,b);   /* red->work = b - A x */

258:   VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);
259:   VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);
260:   KSPSolve(red->ksp,red->b,red->x);
261:   KSPCheckSolve(red->ksp,pc,red->x);
262:   VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);
263:   VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);
264:   return 0;
265: }

267: static PetscErrorCode PCDestroy_Redistribute(PC pc)
268: {
269:   PC_Redistribute *red = (PC_Redistribute*)pc->data;

271:   VecScatterDestroy(&red->scatter);
272:   ISDestroy(&red->is);
273:   VecDestroy(&red->b);
274:   VecDestroy(&red->x);
275:   KSPDestroy(&red->ksp);
276:   VecDestroy(&red->work);
277:   PetscFree(red->drows);
278:   PetscFree(red->diag);
279:   PetscFree(pc->data);
280:   return 0;
281: }

283: static PetscErrorCode PCSetFromOptions_Redistribute(PetscOptionItems *PetscOptionsObject,PC pc)
284: {
285:   PC_Redistribute *red = (PC_Redistribute*)pc->data;

287:   KSPSetFromOptions(red->ksp);
288:   return 0;
289: }

291: /*@
292:    PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE

294:    Not Collective

296:    Input Parameter:
297: .  pc - the preconditioner context

299:    Output Parameter:
300: .  innerksp - the inner KSP

302:    Level: advanced

304: @*/
305: PetscErrorCode  PCRedistributeGetKSP(PC pc,KSP *innerksp)
306: {
307:   PC_Redistribute *red = (PC_Redistribute*)pc->data;

311:   *innerksp = red->ksp;
312:   return 0;
313: }

315: /* -------------------------------------------------------------------------------------*/
316: /*MC
317:      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix

319:      Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>

321:      Notes:
322:     Usually run this with -ksp_type preonly

324:      If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
325:      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.

327:      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.

329:      Developer Notes:
330:     Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.

332:    Level: intermediate

334: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
335: M*/

337: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
338: {
339:   PC_Redistribute *red;
340:   const char      *prefix;

342:   PetscNewLog(pc,&red);
343:   pc->data = (void*)red;

345:   pc->ops->apply          = PCApply_Redistribute;
346:   pc->ops->applytranspose = NULL;
347:   pc->ops->setup          = PCSetUp_Redistribute;
348:   pc->ops->destroy        = PCDestroy_Redistribute;
349:   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
350:   pc->ops->view           = PCView_Redistribute;

352:   KSPCreate(PetscObjectComm((PetscObject)pc),&red->ksp);
353:   KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure);
354:   PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);
355:   PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp);
356:   PCGetOptionsPrefix(pc,&prefix);
357:   KSPSetOptionsPrefix(red->ksp,prefix);
358:   KSPAppendOptionsPrefix(red->ksp,"redistribute_");
359:   return 0;
360: }