Actual source code: ex60.c
1: static char help[] = "Working out corner cases of the ASM preconditioner.\n\n";
3: #include <petscksp.h>
5: int main(int argc, char **args)
6: {
7: KSP ksp;
8: PC pc;
9: Mat A;
10: Vec u, x, b;
11: PetscReal error;
12: PetscMPIInt rank, size, sized;
13: PetscInt M = 8, N = 8, m, n, rstart, rend, r;
14: PetscBool userSubdomains = PETSC_FALSE;
16: PetscFunctionBeginUser;
17: PetscCall(PetscInitialize(&argc, &args, NULL, help));
18: PetscCall(PetscOptionsGetInt(NULL, NULL, "-M", &M, NULL));
19: PetscCall(PetscOptionsGetInt(NULL, NULL, "-N", &N, NULL));
20: PetscCall(PetscOptionsGetBool(NULL, NULL, "-user_subdomains", &userSubdomains, NULL));
21: /* Do parallel decomposition */
22: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
23: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
24: sized = (PetscMPIInt)PetscSqrtReal((PetscReal)size);
25: PetscCheck(PetscSqr(sized) == size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "This test may only be run on a number of processes which is a perfect square, not %d", (int)size);
26: PetscCheck((M % sized) == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "The number of x-vertices %" PetscInt_FMT " does not divide the number of x-processes %d", M, (int)sized);
27: PetscCheck((N % sized) == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "The number of y-vertices %" PetscInt_FMT " does not divide the number of y-processes %d", N, (int)sized);
28: /* Assemble the matrix for the five point stencil, YET AGAIN
29: Every other process will be empty */
30: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
31: m = (sized > 1) ? (rank % 2) ? 0 : 2 * M / sized : M;
32: n = N / sized;
33: PetscCall(MatSetSizes(A, m * n, m * n, M * N, M * N));
34: PetscCall(MatSetFromOptions(A));
35: PetscCall(MatSetUp(A));
36: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
37: for (r = rstart; r < rend; ++r) {
38: const PetscScalar diag = 4.0, offdiag = -1.0;
39: const PetscInt i = r / N;
40: const PetscInt j = r - i * N;
41: PetscInt c;
43: if (i > 0) {
44: c = r - n;
45: PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
46: }
47: if (i < M - 1) {
48: c = r + n;
49: PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
50: }
51: if (j > 0) {
52: c = r - 1;
53: PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
54: }
55: if (j < N - 1) {
56: c = r + 1;
57: PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
58: }
59: PetscCall(MatSetValues(A, 1, &r, 1, &r, &diag, INSERT_VALUES));
60: }
61: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
62: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
63: /* Setup Solve */
64: PetscCall(MatCreateVecs(A, &x, &b));
65: PetscCall(VecDuplicate(x, &u));
66: PetscCall(VecSet(u, 1.0));
67: PetscCall(MatMult(A, u, b));
68: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
69: PetscCall(KSPSetOperators(ksp, A, A));
70: PetscCall(KSPGetPC(ksp, &pc));
71: PetscCall(PCSetType(pc, PCASM));
72: /* Setup ASM by hand */
73: if (userSubdomains) {
74: IS is;
75: PetscInt *rows;
77: /* Use no overlap for now */
78: PetscCall(PetscMalloc1(rend - rstart, &rows));
79: for (r = rstart; r < rend; ++r) rows[r - rstart] = r;
80: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rend - rstart, rows, PETSC_OWN_POINTER, &is));
81: PetscCall(PCASMSetLocalSubdomains(pc, 1, &is, &is));
82: PetscCall(ISDestroy(&is));
83: }
84: PetscCall(KSPSetFromOptions(ksp));
85: /* Solve and Compare */
86: PetscCall(KSPSolve(ksp, b, x));
87: PetscCall(VecAXPY(x, -1.0, u));
88: PetscCall(VecNorm(x, NORM_INFINITY, &error));
89: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Infinity norm of the error: %g\n", (double)error));
90: /* Cleanup */
91: PetscCall(KSPDestroy(&ksp));
92: PetscCall(MatDestroy(&A));
93: PetscCall(VecDestroy(&u));
94: PetscCall(VecDestroy(&x));
95: PetscCall(VecDestroy(&b));
96: PetscCall(PetscFinalize());
97: return 0;
98: }
100: /*TEST
102: test:
103: suffix: 0
104: args: -ksp_view
106: test:
107: requires: kokkos_kernels
108: suffix: 0_kokkos
109: args: -ksp_view -mat_type aijkokkos
111: test:
112: requires: cuda
113: suffix: 0_cuda
114: args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
116: test:
117: suffix: 1
118: nsize: 4
119: args: -ksp_view
121: test:
122: requires: kokkos_kernels
123: suffix: 1_kokkos
124: nsize: 4
125: args: -ksp_view -mat_type aijkokkos
127: test:
128: requires: cuda
129: suffix: 1_cuda
130: nsize: 4
131: args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
133: test:
134: suffix: 2
135: nsize: 4
136: args: -user_subdomains -ksp_view
138: test:
139: requires: kokkos_kernels
140: suffix: 2_kokkos
141: nsize: 4
142: args: -user_subdomains -ksp_view -mat_type aijkokkos
144: test:
145: requires: cuda
146: suffix: 2_cuda
147: nsize: 4
148: args: -user_subdomains -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
150: TEST*/