Actual source code: ex192.c

  1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
  2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **args)
  7: {
  8:   Mat         A, RHS, C, F, X, S;
  9:   Vec         u, x, b;
 10:   Vec         xschur, bschur, uschur;
 11:   IS          is_schur;
 12:   PetscMPIInt size;
 13:   PetscInt    isolver = 0, size_schur, m, n, nfact, nsolve, nrhs;
 14:   PetscReal   norm, tol = PETSC_SQRT_MACHINE_EPSILON;
 15:   PetscRandom rand;
 16:   PetscBool   data_provided, herm, symm, use_lu, cuda = PETSC_FALSE;
 17:   PetscReal   sratio = 5.1 / 12.;
 18:   PetscViewer fd; /* viewer */
 19:   char        solver[256];
 20:   char        file[PETSC_MAX_PATH_LEN]; /* input file name */

 22:   PetscFunctionBeginUser;
 23:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 24:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 25:   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test");
 26:   /* Determine which type of solver we want to test for */
 27:   herm = PETSC_FALSE;
 28:   symm = PETSC_FALSE;
 29:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL));
 30:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL));
 31:   if (herm) symm = PETSC_TRUE;
 32:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL));
 33:   PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL));

 35:   /* Determine file from which we read the matrix A */
 36:   PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided));
 37:   if (!data_provided) { /* get matrices from PETSc distribution */
 38:     PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file)));
 39:     if (symm) {
 40: #if defined(PETSC_USE_COMPLEX)
 41:       PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file)));
 42: #else
 43:       PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file)));
 44: #endif
 45:     } else {
 46: #if defined(PETSC_USE_COMPLEX)
 47:       PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file)));
 48: #else
 49:       PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file)));
 50: #endif
 51:     }
 52: #if defined(PETSC_USE_64BIT_INDICES)
 53:     PetscCall(PetscStrlcat(file, "int64-", sizeof(file)));
 54: #else
 55:     PetscCall(PetscStrlcat(file, "int32-", sizeof(file)));
 56: #endif
 57: #if defined(PETSC_USE_REAL_SINGLE)
 58:     PetscCall(PetscStrlcat(file, "float32", sizeof(file)));
 59: #else
 60:     PetscCall(PetscStrlcat(file, "float64", sizeof(file)));
 61: #endif
 62:   }
 63:   /* Load matrix A */
 64:   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd));
 65:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 66:   PetscCall(MatLoad(A, fd));
 67:   PetscCall(PetscViewerDestroy(&fd));
 68:   PetscCall(MatGetSize(A, &m, &n));
 69:   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n);

 71:   /* Create dense matrix C and X; C holds true solution with identical columns */
 72:   nrhs = 2;
 73:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
 74:   PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
 75:   PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs));
 76:   PetscCall(MatSetType(C, MATDENSE));
 77:   PetscCall(MatSetFromOptions(C));
 78:   PetscCall(MatSetUp(C));

 80:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
 81:   PetscCall(PetscRandomSetFromOptions(rand));
 82:   PetscCall(MatSetRandom(C, rand));
 83:   PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X));

 85:   /* Create vectors */
 86:   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
 87:   PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
 88:   PetscCall(VecSetFromOptions(x));
 89:   PetscCall(VecDuplicate(x, &b));
 90:   PetscCall(VecDuplicate(x, &u)); /* save the true solution */

 92:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL));
 93:   switch (isolver) {
 94: #if defined(PETSC_HAVE_MUMPS)
 95:   case 0:
 96:     PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver)));
 97:     break;
 98: #endif
 99: #if defined(PETSC_HAVE_MKL_PARDISO)
100:   case 1:
101:     PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver)));
102:     break;
103: #endif
104:   default:
105:     PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
106:     break;
107:   }

109: #if defined(PETSC_USE_COMPLEX)
110:   if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
111:     PetscScalar im  = PetscSqrtScalar((PetscScalar)-1.);
112:     PetscScalar val = -1.0;
113:     val             = val + im;
114:     PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES));
115:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
116:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
117:   }
118: #endif

120:   PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL));
121:   PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio);
122:   size_schur = (PetscInt)(sratio * m);

124:   PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m));

126:   /* Test LU/Cholesky Factorization */
127:   use_lu = PETSC_FALSE;
128:   if (!symm) use_lu = PETSC_TRUE;
129: #if defined(PETSC_USE_COMPLEX)
130:   if (isolver == 1) use_lu = PETSC_TRUE;
131: #endif
132:   if (cuda && symm && !herm) use_lu = PETSC_TRUE;

134:   if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
135:     PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
136:     PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A));
137:   }

139:   if (use_lu) {
140:     PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F));
141:   } else {
142:     if (herm) {
143:       PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
144:     } else {
145:       PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
146:       PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE));
147:     }
148:     PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F));
149:   }
150:   PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur));
151:   PetscCall(MatFactorSetSchurIS(F, is_schur));

153:   PetscCall(ISDestroy(&is_schur));
154:   if (use_lu) {
155:     PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
156:   } else {
157:     PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
158:   }

160:   for (nfact = 0; nfact < 3; nfact++) {
161:     Mat AD;

163:     if (!nfact) {
164:       PetscCall(VecSetRandom(x, rand));
165:       if (symm && herm) PetscCall(VecAbs(x));
166:       PetscCall(MatDiagonalSet(A, x, ADD_VALUES));
167:     }
168:     if (use_lu) {
169:       PetscCall(MatLUFactorNumeric(F, A, NULL));
170:     } else {
171:       PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
172:     }
173:     if (cuda) {
174:       PetscCall(MatFactorGetSchurComplement(F, &S, NULL));
175:       PetscCall(MatSetType(S, MATSEQDENSECUDA));
176:       PetscCall(MatCreateVecs(S, &xschur, &bschur));
177:       PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED));
178:     }
179:     PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
180:     if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur));
181:     PetscCall(VecDuplicate(xschur, &uschur));
182:     if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F));
183:     for (nsolve = 0; nsolve < 2; nsolve++) {
184:       PetscCall(VecSetRandom(x, rand));
185:       PetscCall(VecCopy(x, u));

187:       if (nsolve) {
188:         PetscCall(MatMult(A, x, b));
189:         PetscCall(MatSolve(F, b, x));
190:       } else {
191:         PetscCall(MatMultTranspose(A, x, b));
192:         PetscCall(MatSolveTranspose(F, b, x));
193:       }
194:       /* Check the error */
195:       PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */
196:       PetscCall(VecNorm(u, NORM_2, &norm));
197:       if (norm > tol) {
198:         PetscReal resi;
199:         if (nsolve) {
200:           PetscCall(MatMult(A, x, u)); /* u = A*x */
201:         } else {
202:           PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */
203:         }
204:         PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */
205:         PetscCall(VecNorm(u, NORM_2, &resi));
206:         if (nsolve) {
207:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
208:         } else {
209:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
210:         }
211:       }
212:       PetscCall(VecSetRandom(xschur, rand));
213:       PetscCall(VecCopy(xschur, uschur));
214:       if (nsolve) {
215:         PetscCall(MatMult(S, xschur, bschur));
216:         PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur));
217:       } else {
218:         PetscCall(MatMultTranspose(S, xschur, bschur));
219:         PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur));
220:       }
221:       /* Check the error */
222:       PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */
223:       PetscCall(VecNorm(uschur, NORM_2, &norm));
224:       if (norm > tol) {
225:         PetscReal resi;
226:         if (nsolve) {
227:           PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */
228:         } else {
229:           PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */
230:         }
231:         PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */
232:         PetscCall(VecNorm(uschur, NORM_2, &resi));
233:         if (nsolve) {
234:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
235:         } else {
236:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
237:         }
238:       }
239:     }
240:     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD));
241:     if (!nfact) {
242:       PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS));
243:     } else {
244:       PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS));
245:     }
246:     PetscCall(MatDestroy(&AD));
247:     for (nsolve = 0; nsolve < 2; nsolve++) {
248:       PetscCall(MatMatSolve(F, RHS, X));

250:       /* Check the error */
251:       PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
252:       PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
253:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
254: #if PetscDefined(HAVE_MUMPS)
255:       PetscCall(MatMumpsSetIcntl(F, 26, 1));
256:       PetscCall(MatMatSolve(F, RHS, X));
257:       PetscCall(MatMumpsSetIcntl(F, 26, 2));
258:       PetscCall(MatMatSolve(F, RHS, X));
259:       PetscCall(MatMumpsSetIcntl(F, 26, -1));

261:       /* Check the error */
262:       PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
263:       PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
264:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
265: #endif
266:     }
267:     if (isolver == 0) {
268:       Mat spRHS, spRHST, RHST;

270:       PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST));
271:       PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST));
272:       PetscCall(MatCreateTranspose(spRHST, &spRHS));
273:       for (nsolve = 0; nsolve < 2; nsolve++) {
274:         PetscCall(MatMatSolve(F, spRHS, X));

276:         /* Check the error */
277:         PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
278:         PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
279:         if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
280:       }
281:       PetscCall(MatDestroy(&spRHST));
282:       PetscCall(MatDestroy(&spRHS));
283:       PetscCall(MatDestroy(&RHST));
284:     }
285:     PetscCall(MatDestroy(&S));
286:     PetscCall(VecDestroy(&xschur));
287:     PetscCall(VecDestroy(&bschur));
288:     PetscCall(VecDestroy(&uschur));
289:   }
290:   /* Free data structures */
291:   PetscCall(MatDestroy(&A));
292:   PetscCall(MatDestroy(&C));
293:   PetscCall(MatDestroy(&F));
294:   PetscCall(MatDestroy(&X));
295:   PetscCall(MatDestroy(&RHS));
296:   PetscCall(PetscRandomDestroy(&rand));
297:   PetscCall(VecDestroy(&x));
298:   PetscCall(VecDestroy(&b));
299:   PetscCall(VecDestroy(&u));
300:   PetscCall(PetscFinalize());
301:   return 0;
302: }

304: /*TEST

306:    testset:
307:      requires: mkl_pardiso double !complex
308:      args: -solver 1

310:      test:
311:        suffix: mkl_pardiso
312:      test:
313:        requires: cuda
314:        suffix: mkl_pardiso_cuda
315:        args: -cuda_solve
316:        output_file: output/ex192_mkl_pardiso.out
317:      test:
318:        suffix: mkl_pardiso_1
319:        args: -symmetric_solve
320:        output_file: output/ex192_mkl_pardiso_1.out
321:      test:
322:        requires: cuda
323:        suffix: mkl_pardiso_cuda_1
324:        args: -symmetric_solve -cuda_solve
325:        output_file: output/ex192_mkl_pardiso_1.out
326:      test:
327:        suffix: mkl_pardiso_3
328:        args: -symmetric_solve -hermitian_solve
329:        output_file: output/ex192_mkl_pardiso_3.out
330:      test:
331:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
332:        suffix: mkl_pardiso_cuda_3
333:        args: -symmetric_solve -hermitian_solve -cuda_solve
334:        output_file: output/ex192_mkl_pardiso_3.out

336:    testset:
337:      requires: mumps double !complex
338:      args: -solver 0

340:      test:
341:        suffix: mumps
342:      test:
343:        requires: cuda
344:        suffix: mumps_cuda
345:        args: -cuda_solve
346:        output_file: output/ex192_mumps.out
347:      test:
348:        suffix: mumps_2
349:        args: -symmetric_solve
350:        output_file: output/ex192_mumps_2.out
351:      test:
352:        requires: cuda
353:        suffix: mumps_cuda_2
354:        args: -symmetric_solve -cuda_solve
355:        output_file: output/ex192_mumps_2.out
356:      test:
357:        suffix: mumps_3
358:        args: -symmetric_solve -hermitian_solve
359:        output_file: output/ex192_mumps_3.out
360:      test:
361:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
362:        suffix: mumps_cuda_3
363:        args: -symmetric_solve -hermitian_solve -cuda_solve
364:        output_file: output/ex192_mumps_3.out

366: TEST*/