Actual source code: ex192.c
1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";
4: #include <petscmat.h>
6: int main(int argc, char **args)
7: {
8: Mat A, RHS, C, F, X, S;
9: Vec u, x, b;
10: Vec xschur, bschur, uschur;
11: IS is_schur;
12: PetscMPIInt size;
13: PetscInt isolver = 0, size_schur, m, n, nfact, nsolve, nrhs;
14: PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON;
15: PetscRandom rand;
16: PetscBool data_provided, herm, symm, use_lu, cuda = PETSC_FALSE;
17: PetscReal sratio = 5.1 / 12.;
18: PetscViewer fd; /* viewer */
19: char solver[256];
20: char file[PETSC_MAX_PATH_LEN]; /* input file name */
22: PetscFunctionBeginUser;
23: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
24: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
25: PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test");
26: /* Determine which type of solver we want to test for */
27: herm = PETSC_FALSE;
28: symm = PETSC_FALSE;
29: PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL));
30: PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL));
31: if (herm) symm = PETSC_TRUE;
32: PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL));
33: PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL));
35: /* Determine file from which we read the matrix A */
36: PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided));
37: if (!data_provided) { /* get matrices from PETSc distribution */
38: PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file)));
39: if (symm) {
40: #if defined(PETSC_USE_COMPLEX)
41: PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file)));
42: #else
43: PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file)));
44: #endif
45: } else {
46: #if defined(PETSC_USE_COMPLEX)
47: PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file)));
48: #else
49: PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file)));
50: #endif
51: }
52: #if defined(PETSC_USE_64BIT_INDICES)
53: PetscCall(PetscStrlcat(file, "int64-", sizeof(file)));
54: #else
55: PetscCall(PetscStrlcat(file, "int32-", sizeof(file)));
56: #endif
57: #if defined(PETSC_USE_REAL_SINGLE)
58: PetscCall(PetscStrlcat(file, "float32", sizeof(file)));
59: #else
60: PetscCall(PetscStrlcat(file, "float64", sizeof(file)));
61: #endif
62: }
63: /* Load matrix A */
64: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd));
65: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
66: PetscCall(MatLoad(A, fd));
67: PetscCall(PetscViewerDestroy(&fd));
68: PetscCall(MatGetSize(A, &m, &n));
69: PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n);
71: /* Create dense matrix C and X; C holds true solution with identical columns */
72: nrhs = 2;
73: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
74: PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
75: PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs));
76: PetscCall(MatSetType(C, MATDENSE));
77: PetscCall(MatSetFromOptions(C));
78: PetscCall(MatSetUp(C));
80: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
81: PetscCall(PetscRandomSetFromOptions(rand));
82: PetscCall(MatSetRandom(C, rand));
83: PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X));
85: /* Create vectors */
86: PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
87: PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
88: PetscCall(VecSetFromOptions(x));
89: PetscCall(VecDuplicate(x, &b));
90: PetscCall(VecDuplicate(x, &u)); /* save the true solution */
92: PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL));
93: switch (isolver) {
94: #if defined(PETSC_HAVE_MUMPS)
95: case 0:
96: PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver)));
97: break;
98: #endif
99: #if defined(PETSC_HAVE_MKL_PARDISO)
100: case 1:
101: PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver)));
102: break;
103: #endif
104: default:
105: PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
106: break;
107: }
109: #if defined(PETSC_USE_COMPLEX)
110: if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
111: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
112: PetscScalar val = -1.0;
113: val = val + im;
114: PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES));
115: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
116: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
117: }
118: #endif
120: PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL));
121: PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio);
122: size_schur = (PetscInt)(sratio * m);
124: PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m));
126: /* Test LU/Cholesky Factorization */
127: use_lu = PETSC_FALSE;
128: if (!symm) use_lu = PETSC_TRUE;
129: #if defined(PETSC_USE_COMPLEX)
130: if (isolver == 1) use_lu = PETSC_TRUE;
131: #endif
132: if (cuda && symm && !herm) use_lu = PETSC_TRUE;
134: if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
135: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
136: PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A));
137: }
139: if (use_lu) {
140: PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F));
141: } else {
142: if (herm) {
143: PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
144: } else {
145: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
146: PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE));
147: }
148: PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F));
149: }
150: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur));
151: PetscCall(MatFactorSetSchurIS(F, is_schur));
153: PetscCall(ISDestroy(&is_schur));
154: if (use_lu) {
155: PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
156: } else {
157: PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
158: }
160: for (nfact = 0; nfact < 3; nfact++) {
161: Mat AD;
163: if (!nfact) {
164: PetscCall(VecSetRandom(x, rand));
165: if (symm && herm) PetscCall(VecAbs(x));
166: PetscCall(MatDiagonalSet(A, x, ADD_VALUES));
167: }
168: if (use_lu) {
169: PetscCall(MatLUFactorNumeric(F, A, NULL));
170: } else {
171: PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
172: }
173: if (cuda) {
174: PetscCall(MatFactorGetSchurComplement(F, &S, NULL));
175: PetscCall(MatSetType(S, MATSEQDENSECUDA));
176: PetscCall(MatCreateVecs(S, &xschur, &bschur));
177: PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED));
178: }
179: PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
180: if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur));
181: PetscCall(VecDuplicate(xschur, &uschur));
182: if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F));
183: for (nsolve = 0; nsolve < 2; nsolve++) {
184: PetscCall(VecSetRandom(x, rand));
185: PetscCall(VecCopy(x, u));
187: if (nsolve) {
188: PetscCall(MatMult(A, x, b));
189: PetscCall(MatSolve(F, b, x));
190: } else {
191: PetscCall(MatMultTranspose(A, x, b));
192: PetscCall(MatSolveTranspose(F, b, x));
193: }
194: /* Check the error */
195: PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */
196: PetscCall(VecNorm(u, NORM_2, &norm));
197: if (norm > tol) {
198: PetscReal resi;
199: if (nsolve) {
200: PetscCall(MatMult(A, x, u)); /* u = A*x */
201: } else {
202: PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */
203: }
204: PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */
205: PetscCall(VecNorm(u, NORM_2, &resi));
206: if (nsolve) {
207: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
208: } else {
209: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
210: }
211: }
212: PetscCall(VecSetRandom(xschur, rand));
213: PetscCall(VecCopy(xschur, uschur));
214: if (nsolve) {
215: PetscCall(MatMult(S, xschur, bschur));
216: PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur));
217: } else {
218: PetscCall(MatMultTranspose(S, xschur, bschur));
219: PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur));
220: }
221: /* Check the error */
222: PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */
223: PetscCall(VecNorm(uschur, NORM_2, &norm));
224: if (norm > tol) {
225: PetscReal resi;
226: if (nsolve) {
227: PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */
228: } else {
229: PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */
230: }
231: PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */
232: PetscCall(VecNorm(uschur, NORM_2, &resi));
233: if (nsolve) {
234: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
235: } else {
236: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
237: }
238: }
239: }
240: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD));
241: if (!nfact) {
242: PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS));
243: } else {
244: PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS));
245: }
246: PetscCall(MatDestroy(&AD));
247: for (nsolve = 0; nsolve < 2; nsolve++) {
248: PetscCall(MatMatSolve(F, RHS, X));
250: /* Check the error */
251: PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
252: PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
253: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
254: #if PetscDefined(HAVE_MUMPS)
255: PetscCall(MatMumpsSetIcntl(F, 26, 1));
256: PetscCall(MatMatSolve(F, RHS, X));
257: PetscCall(MatMumpsSetIcntl(F, 26, 2));
258: PetscCall(MatMatSolve(F, RHS, X));
259: PetscCall(MatMumpsSetIcntl(F, 26, -1));
261: /* Check the error */
262: PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
263: PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
264: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
265: #endif
266: }
267: if (isolver == 0) {
268: Mat spRHS, spRHST, RHST;
270: PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST));
271: PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST));
272: PetscCall(MatCreateTranspose(spRHST, &spRHS));
273: for (nsolve = 0; nsolve < 2; nsolve++) {
274: PetscCall(MatMatSolve(F, spRHS, X));
276: /* Check the error */
277: PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
278: PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
279: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
280: }
281: PetscCall(MatDestroy(&spRHST));
282: PetscCall(MatDestroy(&spRHS));
283: PetscCall(MatDestroy(&RHST));
284: }
285: PetscCall(MatDestroy(&S));
286: PetscCall(VecDestroy(&xschur));
287: PetscCall(VecDestroy(&bschur));
288: PetscCall(VecDestroy(&uschur));
289: }
290: /* Free data structures */
291: PetscCall(MatDestroy(&A));
292: PetscCall(MatDestroy(&C));
293: PetscCall(MatDestroy(&F));
294: PetscCall(MatDestroy(&X));
295: PetscCall(MatDestroy(&RHS));
296: PetscCall(PetscRandomDestroy(&rand));
297: PetscCall(VecDestroy(&x));
298: PetscCall(VecDestroy(&b));
299: PetscCall(VecDestroy(&u));
300: PetscCall(PetscFinalize());
301: return 0;
302: }
304: /*TEST
306: testset:
307: requires: mkl_pardiso double !complex
308: args: -solver 1
310: test:
311: suffix: mkl_pardiso
312: test:
313: requires: cuda
314: suffix: mkl_pardiso_cuda
315: args: -cuda_solve
316: output_file: output/ex192_mkl_pardiso.out
317: test:
318: suffix: mkl_pardiso_1
319: args: -symmetric_solve
320: output_file: output/ex192_mkl_pardiso_1.out
321: test:
322: requires: cuda
323: suffix: mkl_pardiso_cuda_1
324: args: -symmetric_solve -cuda_solve
325: output_file: output/ex192_mkl_pardiso_1.out
326: test:
327: suffix: mkl_pardiso_3
328: args: -symmetric_solve -hermitian_solve
329: output_file: output/ex192_mkl_pardiso_3.out
330: test:
331: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
332: suffix: mkl_pardiso_cuda_3
333: args: -symmetric_solve -hermitian_solve -cuda_solve
334: output_file: output/ex192_mkl_pardiso_3.out
336: testset:
337: requires: mumps double !complex
338: args: -solver 0
340: test:
341: suffix: mumps
342: test:
343: requires: cuda
344: suffix: mumps_cuda
345: args: -cuda_solve
346: output_file: output/ex192_mumps.out
347: test:
348: suffix: mumps_2
349: args: -symmetric_solve
350: output_file: output/ex192_mumps_2.out
351: test:
352: requires: cuda
353: suffix: mumps_cuda_2
354: args: -symmetric_solve -cuda_solve
355: output_file: output/ex192_mumps_2.out
356: test:
357: suffix: mumps_3
358: args: -symmetric_solve -hermitian_solve
359: output_file: output/ex192_mumps_3.out
360: test:
361: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
362: suffix: mumps_cuda_3
363: args: -symmetric_solve -hermitian_solve -cuda_solve
364: output_file: output/ex192_mumps_3.out
366: TEST*/