Actual source code: ex13.c
1: static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\
2: We solve the Poisson problem in a rectangular domain\n\
3: using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n";
5: #include <petscdmplex.h>
6: #include <petscsnes.h>
7: #include <petscds.h>
8: #include <petscconvest.h>
9: #if defined(PETSC_HAVE_AMGX)
10: #include <amgx_c.h>
11: #endif
13: typedef struct {
14: PetscInt nit; /* Number of benchmark iterations */
15: PetscBool strong; /* Do not integrate the Laplacian by parts */
16: } AppCtx;
18: static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
19: {
20: PetscInt d;
21: *u = 0.0;
22: for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]);
23: return PETSC_SUCCESS;
24: }
26: static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
27: {
28: PetscInt d;
29: for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]);
30: }
32: static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
33: {
34: PetscInt d;
35: for (d = 0; d < dim; ++d) f1[d] = u_x[d];
36: }
38: static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
39: {
40: PetscInt d;
41: for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
42: }
44: static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
45: {
46: *u = PetscSqr(x[0]) + PetscSqr(x[1]);
47: return PETSC_SUCCESS;
48: }
50: static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
51: {
52: PetscInt d;
53: for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d];
54: f0[0] += 4.0;
55: }
57: static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
58: {
59: PetscFunctionBeginUser;
60: options->nit = 10;
61: options->strong = PETSC_FALSE;
62: PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");
63: PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL));
64: PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL));
65: PetscOptionsEnd();
66: PetscFunctionReturn(PETSC_SUCCESS);
67: }
69: static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
70: {
71: PetscFunctionBeginUser;
72: PetscCall(DMCreate(comm, dm));
73: PetscCall(DMSetType(*dm, DMPLEX));
74: PetscCall(DMSetFromOptions(*dm));
75: PetscCall(DMSetApplicationContext(*dm, user));
76: PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
77: PetscFunctionReturn(PETSC_SUCCESS);
78: }
80: static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user)
81: {
82: PetscDS ds;
83: DMLabel label;
84: const PetscInt id = 1;
86: PetscFunctionBeginUser;
87: PetscCall(DMGetDS(dm, &ds));
88: PetscCall(DMGetLabel(dm, "marker", &label));
89: if (user->strong) {
90: PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL));
91: PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user));
92: PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL));
93: } else {
94: PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u));
95: PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu));
96: PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user));
97: PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL));
98: }
99: PetscFunctionReturn(PETSC_SUCCESS);
100: }
102: static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user)
103: {
104: DM cdm = dm;
105: PetscFE fe;
106: DMPolytopeType ct;
107: PetscBool simplex;
108: PetscInt dim, cStart;
109: char prefix[PETSC_MAX_PATH_LEN];
111: PetscFunctionBeginUser;
112: PetscCall(DMGetDimension(dm, &dim));
113: PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL));
114: PetscCall(DMPlexGetCellType(dm, cStart, &ct));
115: simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false
116: /* Create finite element */
117: PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name));
118: PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe));
119: PetscCall(PetscObjectSetName((PetscObject)fe, name));
120: /* Set discretization and boundary conditions for each mesh */
121: PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
122: PetscCall(DMCreateDS(dm));
123: PetscCall((*setup)(dm, user));
124: while (cdm) {
125: PetscCall(DMCopyDisc(dm, cdm));
126: /* TODO: Check whether the boundary of coarse meshes is marked */
127: PetscCall(DMGetCoarseDM(cdm, &cdm));
128: }
129: PetscCall(PetscFEDestroy(&fe));
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: int main(int argc, char **argv)
134: {
135: DM dm; /* Problem specification */
136: SNES snes; /* Nonlinear solver */
137: Vec u; /* Solutions */
138: AppCtx user; /* User-defined work context */
139: PetscLogDouble time;
140: Mat Amat;
142: PetscFunctionBeginUser;
143: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
144: PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user));
145: /* system */
146: PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
147: PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm));
148: PetscCall(SNESSetDM(snes, dm));
149: PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user));
150: PetscCall(DMCreateGlobalVector(dm, &u));
151: {
152: PetscInt N;
153: PetscCall(VecGetSize(u, &N));
154: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N));
155: }
156: PetscCall(SNESSetFromOptions(snes));
157: PetscCall(PetscObjectSetName((PetscObject)u, "potential"));
158: PetscCall(DMPlexSetSNESLocalFEM(dm, &user, &user, &user));
159: PetscCall(DMSNESCheckFromOptions(snes, u));
160: PetscCall(PetscTime(&time));
161: PetscCall(SNESSetUp(snes));
162: #if defined(PETSC_HAVE_AMGX)
163: KSP ksp;
164: PC pc;
165: PetscBool flg;
166: AMGX_resources_handle rsc;
167: PetscCall(SNESGetKSP(snes, &ksp));
168: PetscCall(KSPGetPC(ksp, &pc));
169: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg));
170: if (flg) {
171: PetscCall(PCAmgXGetResources(pc, (void *)&rsc));
172: /* do ... with resource */
173: }
174: #endif
175: PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL));
176: PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
177: PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
178: PetscCall(SNESSolve(snes, NULL, u));
179: PetscCall(PetscTimeSubtract(&time));
180: /* Benchmark system */
181: if (user.nit) {
182: Vec b;
183: PetscInt i;
184: PetscLogStage kspstage;
185: PetscCall(PetscLogStageRegister("Solve only", &kspstage));
186: PetscCall(PetscLogStagePush(kspstage));
187: PetscCall(SNESGetSolution(snes, &u));
188: PetscCall(SNESGetFunction(snes, &b, NULL, NULL));
189: for (i = 0; i < user.nit; i++) {
190: PetscCall(VecZeroEntries(u));
191: PetscCall(SNESSolve(snes, NULL, u));
192: }
193: PetscCall(PetscLogStagePop());
194: }
195: PetscCall(SNESGetSolution(snes, &u));
196: PetscCall(VecViewFromOptions(u, NULL, "-potential_view"));
197: /* Cleanup */
198: PetscCall(VecDestroy(&u));
199: PetscCall(SNESDestroy(&snes));
200: PetscCall(DMDestroy(&dm));
201: PetscCall(PetscFinalize());
202: return 0;
203: }
205: /*TEST
207: test:
208: suffix: strong
209: requires: triangle
210: args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong -pc_type jacobi
212: test:
213: suffix: bench
214: nsize: 4
215: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \
216: -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \
217: -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \
218: -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \
219: -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \
220: -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \
221: -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \
222: -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true
224: testset:
225: nsize: 4
226: output_file: output/ex13_comparison.out
227: args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \
228: -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \
229: -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \
230: -ksp_norm_type unpreconditioned -ksp_converged_reason
231: test:
232: suffix: comparison
233: test:
234: suffix: cuda
235: requires: cuda
236: args: -dm_mat_type aijcusparse -dm_vec_type cuda
237: test:
238: suffix: kokkos
239: requires: sycl kokkos_kernels
240: args: -dm_mat_type aijkokkos -dm_vec_type kokkos
241: test:
242: suffix: aijmkl_comp
243: requires: mkl_sparse
244: args: -dm_mat_type aijmkl
246: test:
247: suffix: aijmkl_seq
248: nsize: 1
249: requires: mkl_sparse
250: TODO: broken (INDEFINITE PC)
251: args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \
252: -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \
253: -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \
254: -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard
256: testset:
257: requires: cuda amgx
258: filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime"
259: output_file: output/ex13_amgx.out
260: args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \
261: -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false
262: nsize: 4
263: test:
264: suffix: amgx
265: args: -dm_mat_type aijcusparse -dm_vec_type cuda
266: test:
267: suffix: amgx_cpu
268: args: -dm_mat_type aij
270: TEST*/