Actual source code: ex1.c

  1: static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\
  2:                       For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK.       \n\
  3:                       For MATSEQDENSECUDA, it uses cusolverDn routines \n\n";

  5: #include <petscmat.h>

  7: static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b)
  8: {
  9:   PetscRandom rand;
 10:   Mat         mat, RHS, SOLU;
 11:   PetscInt    rstart, rend;
 12:   PetscInt    cstart, cend;
 13:   PetscScalar value = 1.0;
 14:   Vec         x, y, b;

 16:   PetscFunctionBegin;
 17:   /* create multiple vectors RHS and SOLU */
 18:   PetscCall(MatCreate(PETSC_COMM_WORLD, &RHS));
 19:   PetscCall(MatSetSizes(RHS, PETSC_DECIDE, PETSC_DECIDE, m, nrhs));
 20:   PetscCall(MatSetType(RHS, MATDENSE));
 21:   PetscCall(MatSetOptionsPrefix(RHS, "rhs_"));
 22:   PetscCall(MatSetFromOptions(RHS));
 23:   PetscCall(MatSeqDenseSetPreallocation(RHS, NULL));

 25:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
 26:   PetscCall(PetscRandomSetFromOptions(rand));
 27:   PetscCall(MatSetRandom(RHS, rand));

 29:   if (m == n) {
 30:     PetscCall(MatDuplicate(RHS, MAT_DO_NOT_COPY_VALUES, &SOLU));
 31:   } else {
 32:     PetscCall(MatCreate(PETSC_COMM_WORLD, &SOLU));
 33:     PetscCall(MatSetSizes(SOLU, PETSC_DECIDE, PETSC_DECIDE, n, nrhs));
 34:     PetscCall(MatSetType(SOLU, MATDENSE));
 35:     PetscCall(MatSeqDenseSetPreallocation(SOLU, NULL));
 36:   }
 37:   PetscCall(MatSetRandom(SOLU, rand));

 39:   /* create matrix */
 40:   PetscCall(MatCreate(PETSC_COMM_WORLD, &mat));
 41:   PetscCall(MatSetSizes(mat, PETSC_DECIDE, PETSC_DECIDE, m, n));
 42:   PetscCall(MatSetType(mat, MATDENSE));
 43:   PetscCall(MatSetFromOptions(mat));
 44:   PetscCall(MatSetUp(mat));
 45:   PetscCall(MatGetOwnershipRange(mat, &rstart, &rend));
 46:   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
 47:   if (!full) {
 48:     for (PetscInt i = rstart; i < rend; i++) {
 49:       if (m == n) {
 50:         value = (PetscReal)i + 1;
 51:         PetscCall(MatSetValues(mat, 1, &i, 1, &i, &value, INSERT_VALUES));
 52:       } else {
 53:         for (PetscInt j = cstart; j < cend; j++) {
 54:           value = ((PetscScalar)i + 1.) / (PetscSqr(i - j) + 1.);
 55:           PetscCall(MatSetValues(mat, 1, &i, 1, &j, &value, INSERT_VALUES));
 56:         }
 57:       }
 58:     }
 59:     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
 60:     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
 61:   } else {
 62:     PetscCall(MatSetRandom(mat, rand));
 63:     if (m == n) {
 64:       Mat T;

 66:       PetscCall(MatMatTransposeMult(mat, mat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &T));
 67:       PetscCall(MatDestroy(&mat));
 68:       mat = T;
 69:     }
 70:   }

 72:   /* create single vectors */
 73:   PetscCall(MatCreateVecs(mat, &x, &b));
 74:   PetscCall(VecDuplicate(x, &y));
 75:   PetscCall(VecSet(x, value));
 76:   PetscCall(PetscRandomDestroy(&rand));
 77:   *_mat  = mat;
 78:   *_RHS  = RHS;
 79:   *_SOLU = SOLU;
 80:   *_x    = x;
 81:   *_y    = y;
 82:   *_b    = b;
 83:   PetscFunctionReturn(PETSC_SUCCESS);
 84: }

 86: int main(int argc, char **argv)
 87: {
 88:   Mat         mat, F, RHS, SOLU;
 89:   MatInfo     info;
 90:   PetscInt    m = 15, n = 10, i, j, nrhs = 2;
 91:   Vec         x, y, b, ytmp;
 92:   IS          perm;
 93:   PetscReal   norm, tol = PETSC_SMALL;
 94:   PetscMPIInt size;
 95:   char        solver[64];
 96:   PetscBool   inplace, full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE;

 98:   PetscFunctionBeginUser;
 99:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
100:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
101:   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only!");
102:   PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
103:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
104:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
105:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
106:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-ldl", &ldl, NULL));
107:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-qr", &qr, NULL));
108:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-full", &full, NULL));
109:   PetscCall(PetscOptionsGetString(NULL, NULL, "-solver_type", solver, sizeof(solver), NULL));

111:   PetscCall(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b));
112:   PetscCall(VecDuplicate(y, &ytmp));

114:   /* Only SeqDense* support in-place factorizations and NULL permutations */
115:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQDENSE, &inplace));
116:   PetscCall(MatGetLocalSize(mat, &i, NULL));
117:   PetscCall(MatGetOwnershipRange(mat, &j, NULL));
118:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, i, j, 1, &perm));

120:   PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
121:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n", (PetscInt)info.nz_used, (PetscInt)info.nz_allocated));
122:   PetscCall(MatMult(mat, x, b));

124:   /* Cholesky factorization - perm and factinfo are ignored by LAPACK */
125:   /* in-place Cholesky */
126:   if (inplace) {
127:     Mat RHS2;

129:     PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
130:     if (!ldl) PetscCall(MatSetOption(F, MAT_SPD, PETSC_TRUE));
131:     PetscCall(MatCholeskyFactor(F, perm, 0));
132:     PetscCall(MatSolve(F, b, y));
133:     PetscCall(VecAXPY(y, -1.0, x));
134:     PetscCall(VecNorm(y, NORM_2, &norm));
135:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place Cholesky %g\n", (double)norm));

137:     PetscCall(MatMatSolve(F, RHS, SOLU));
138:     PetscCall(MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &RHS2));
139:     PetscCall(MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN));
140:     PetscCall(MatNorm(RHS, NORM_FROBENIUS, &norm));
141:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n", (double)norm));
142:     PetscCall(MatDestroy(&F));
143:     PetscCall(MatDestroy(&RHS2));
144:   }

146:   /* out-of-place Cholesky */
147:   PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_CHOLESKY, &F));
148:   if (!ldl) PetscCall(MatSetOption(F, MAT_SPD, PETSC_TRUE));
149:   PetscCall(MatCholeskyFactorSymbolic(F, mat, perm, 0));
150:   PetscCall(MatCholeskyFactorNumeric(F, mat, 0));
151:   PetscCall(MatSolve(F, b, y));
152:   PetscCall(VecAXPY(y, -1.0, x));
153:   PetscCall(VecNorm(y, NORM_2, &norm));
154:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place Cholesky %g\n", (double)norm));
155:   PetscCall(MatDestroy(&F));

157:   /* LU factorization - perms and factinfo are ignored by LAPACK */
158:   i = n - 1;
159:   PetscCall(MatZeroRows(mat, 1, &i, -1.0, NULL, NULL));
160:   PetscCall(MatMult(mat, x, b));

162:   /* in-place LU */
163:   if (inplace) {
164:     Mat RHS2;

166:     PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
167:     PetscCall(MatLUFactor(F, perm, perm, 0));
168:     PetscCall(MatSolve(F, b, y));
169:     PetscCall(VecAXPY(y, -1.0, x));
170:     PetscCall(VecNorm(y, NORM_2, &norm));
171:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place LU %g\n", (double)norm));
172:     PetscCall(MatMatSolve(F, RHS, SOLU));
173:     PetscCall(MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &RHS2));
174:     PetscCall(MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN));
175:     PetscCall(MatNorm(RHS, NORM_FROBENIUS, &norm));
176:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place LU (MatMatSolve) %g\n", (double)norm));
177:     PetscCall(MatDestroy(&F));
178:     PetscCall(MatDestroy(&RHS2));
179:   }

181:   /* out-of-place LU */
182:   PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_LU, &F));
183:   PetscCall(MatLUFactorSymbolic(F, mat, perm, perm, 0));
184:   PetscCall(MatLUFactorNumeric(F, mat, 0));
185:   PetscCall(MatSolve(F, b, y));
186:   PetscCall(VecAXPY(y, -1.0, x));
187:   PetscCall(VecNorm(y, NORM_2, &norm));
188:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place LU %g\n", (double)norm));

190:   /* free space */
191:   PetscCall(ISDestroy(&perm));
192:   PetscCall(MatDestroy(&F));
193:   PetscCall(MatDestroy(&mat));
194:   PetscCall(MatDestroy(&RHS));
195:   PetscCall(MatDestroy(&SOLU));
196:   PetscCall(VecDestroy(&x));
197:   PetscCall(VecDestroy(&b));
198:   PetscCall(VecDestroy(&y));
199:   PetscCall(VecDestroy(&ytmp));

201:   if (qr) {
202:     /* setup rectangular */
203:     PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b));
204:     PetscCall(VecDuplicate(y, &ytmp));

206:     /* QR factorization - perms and factinfo are ignored by LAPACK */
207:     PetscCall(MatMult(mat, x, b));

209:     /* in-place QR */
210:     if (inplace) {
211:       Mat SOLU2;

213:       PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
214:       PetscCall(MatQRFactor(F, NULL, 0));
215:       PetscCall(MatSolve(F, b, y));
216:       PetscCall(VecAXPY(y, -1.0, x));
217:       PetscCall(VecNorm(y, NORM_2, &norm));
218:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place QR %g\n", (double)norm));
219:       PetscCall(MatMatMult(mat, SOLU, MAT_REUSE_MATRIX, PETSC_DEFAULT, &RHS));
220:       PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2));
221:       PetscCall(MatMatSolve(F, RHS, SOLU2));
222:       PetscCall(MatAXPY(SOLU2, -1.0, SOLU, SAME_NONZERO_PATTERN));
223:       PetscCall(MatNorm(SOLU2, NORM_FROBENIUS, &norm));
224:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of error for in-place QR (MatMatSolve) %g\n", (double)norm));
225:       PetscCall(MatDestroy(&F));
226:       PetscCall(MatDestroy(&SOLU2));
227:     }

229:     /* out-of-place QR */
230:     PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_QR, &F));
231:     PetscCall(MatQRFactorSymbolic(F, mat, NULL, NULL));
232:     PetscCall(MatQRFactorNumeric(F, mat, NULL));
233:     PetscCall(MatSolve(F, b, y));
234:     PetscCall(VecAXPY(y, -1.0, x));
235:     PetscCall(VecNorm(y, NORM_2, &norm));
236:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm));

238:     if (m == n) {
239:       /* out-of-place MatSolveTranspose */
240:       PetscCall(MatMultTranspose(mat, x, b));
241:       PetscCall(MatSolveTranspose(F, b, y));
242:       PetscCall(VecAXPY(y, -1.0, x));
243:       PetscCall(VecNorm(y, NORM_2, &norm));
244:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm));
245:     }

247:     /* free space */
248:     PetscCall(MatDestroy(&F));
249:     PetscCall(MatDestroy(&mat));
250:     PetscCall(MatDestroy(&RHS));
251:     PetscCall(MatDestroy(&SOLU));
252:     PetscCall(VecDestroy(&x));
253:     PetscCall(VecDestroy(&b));
254:     PetscCall(VecDestroy(&y));
255:     PetscCall(VecDestroy(&ytmp));
256:   }
257:   PetscCall(PetscFinalize());
258:   return 0;
259: }

261: /*TEST

263:    test:

265:    test:
266:      requires: cuda
267:      suffix: seqdensecuda
268:      args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}}
269:      output_file: output/ex1_1.out

271:    test:
272:      requires: cuda
273:      suffix: seqdensecuda_2
274:      args: -ldl 0 -solver_type cuda
275:      output_file: output/ex1_1.out

277:    test:
278:      requires: cuda
279:      suffix: seqdensecuda_seqaijcusparse
280:      args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0
281:      output_file: output/ex1_2.out

283:    test:
284:      requires: cuda viennacl
285:      suffix: seqdensecuda_seqaijviennacl
286:      args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0
287:      output_file: output/ex1_2.out

289:    test:
290:      suffix: 4
291:      args: -m 10 -n 10
292:      output_file: output/ex1_1.out

294: TEST*/