Actual source code: ex183.c
1: static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n"
2: "This test can only be run in parallel.\n"
3: "\n";
5: #include <petscmat.h>
7: int main(int argc, char **args)
8: {
9: Mat A, *submats;
10: MPI_Comm subcomm;
11: PetscMPIInt rank, size, subrank, subsize, color;
12: PetscInt m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1;
13: PetscInt nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs;
14: PetscInt *rowindices, *colindices, idx, rep;
15: PetscScalar *vals;
16: IS rowis[1], colis[1];
17: PetscViewer viewer;
18: PetscBool permute_indices, flg;
20: PetscFunctionBeginUser;
21: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
22: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
23: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
25: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat");
26: m = 5;
27: PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg));
28: total_subdomains = size - 1;
29: PetscCall(PetscOptionsInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg));
30: permute_indices = PETSC_FALSE;
31: PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg));
32: hash = 7;
33: PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg));
34: rep = 2;
35: PetscCall(PetscOptionsInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg));
36: PetscOptionsEnd();
38: PetscCheck(total_subdomains <= size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of subdomains %" PetscInt_FMT " must not exceed comm size %d", total_subdomains, size);
39: PetscCheck(total_subdomains >= 1 && total_subdomains <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT, size, total_subdomains);
40: PetscCheck(rep == 1 || rep == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2", rep);
42: viewer = PETSC_VIEWER_STDOUT_WORLD;
43: /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */
44: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
45: PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE));
46: PetscCall(MatSetFromOptions(A));
47: PetscCall(MatSetUp(A));
48: PetscCall(MatGetSize(A, NULL, &N));
49: PetscCall(MatGetLocalSize(A, NULL, &n));
50: PetscCall(MatGetBlockSize(A, &bs));
51: PetscCall(MatSeqAIJSetPreallocation(A, n, NULL));
52: PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL));
53: PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL));
54: PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL));
55: PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL));
56: PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL));
58: PetscCall(PetscMalloc2(N, &cols, N, &vals));
59: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
60: for (j = 0; j < N; ++j) cols[j] = j;
61: for (i = rstart; i < rend; i++) {
62: for (j = 0; j < N; ++j) vals[j] = i * 10000 + j;
63: PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES));
64: }
65: PetscCall(PetscFree2(cols, vals));
66: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
67: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
69: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n"));
70: PetscCall(MatView(A, viewer));
72: /*
73: Create subcomms and ISs so that each rank participates in one IS.
74: The IS either coalesces adjacent rank indices (contiguous),
75: or selects indices by scrambling them using a hash.
76: */
77: k = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */
78: color = rank / k;
79: PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm));
80: PetscCallMPI(MPI_Comm_size(subcomm, &subsize));
81: PetscCallMPI(MPI_Comm_rank(subcomm, &subrank));
82: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
83: nis = 1;
84: PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices));
86: for (j = rstart; j < rend; ++j) {
87: if (permute_indices) {
88: idx = (j * hash);
89: } else {
90: idx = j;
91: }
92: rowindices[j - rstart] = idx % N;
93: colindices[j - rstart] = (idx + m) % N;
94: }
95: PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0]));
96: PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0]));
97: PetscCall(ISSort(rowis[0]));
98: PetscCall(ISSort(colis[0]));
99: PetscCall(PetscFree2(rowindices, colindices));
100: /*
101: Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms,
102: we need to obtain the global numbers of our local objects and wait for the corresponding global
103: number to be viewed.
104: */
105: PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains"));
106: if (permute_indices) PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash));
107: PetscCall(PetscViewerASCIIPrintf(viewer, ":\n"));
108: PetscCall(PetscViewerFlush(viewer));
110: nsubdomains = 1;
111: for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
112: PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums));
113: PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
114: for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
115: if (s < nsubdomains) {
116: PetscInt ss;
117: ss = gsubdomainperm[s];
118: if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
119: PetscViewer subviewer = NULL;
120: PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer));
121: PetscCall(PetscViewerASCIIPrintf(subviewer, "Row IS %" PetscInt_FMT "\n", gs));
122: PetscCall(ISView(rowis[ss], subviewer));
123: PetscCall(PetscViewerFlush(subviewer));
124: PetscCall(PetscViewerASCIIPrintf(subviewer, "Col IS %" PetscInt_FMT "\n", gs));
125: PetscCall(ISView(colis[ss], subviewer));
126: PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer));
127: ++s;
128: }
129: }
130: PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
131: }
132: PetscCall(PetscViewerFlush(viewer));
133: PetscCall(ISSort(rowis[0]));
134: PetscCall(ISSort(colis[0]));
135: nsubdomains = 1;
136: PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats));
137: /*
138: Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms,
139: we need to obtain the global numbers of our local objects and wait for the corresponding global
140: number to be viewed.
141: */
142: PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n"));
143: for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
144: PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums));
145: PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
146: for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
147: if (s < nsubdomains) {
148: PetscInt ss;
149: ss = gsubdomainperm[s];
150: if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
151: PetscViewer subviewer = NULL;
152: PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
153: PetscCall(MatView(submats[ss], subviewer));
154: PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
155: ++s;
156: }
157: }
158: PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
159: }
160: PetscCall(PetscViewerFlush(viewer));
161: if (rep == 1) goto cleanup;
162: nsubdomains = 1;
163: PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats));
164: /*
165: Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms,
166: we need to obtain the global numbers of our local objects and wait for the corresponding global
167: number to be viewed.
168: */
169: PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n"));
170: for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
171: PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums));
172: PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
173: for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
174: if (s < nsubdomains) {
175: PetscInt ss;
176: ss = gsubdomainperm[s];
177: if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
178: PetscViewer subviewer = NULL;
179: PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
180: PetscCall(MatView(submats[ss], subviewer));
181: PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
182: ++s;
183: }
184: }
185: PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
186: }
187: PetscCall(PetscViewerFlush(viewer));
188: cleanup:
189: for (k = 0; k < nsubdomains; ++k) PetscCall(MatDestroy(submats + k));
190: PetscCall(PetscFree(submats));
191: for (k = 0; k < nis; ++k) {
192: PetscCall(ISDestroy(rowis + k));
193: PetscCall(ISDestroy(colis + k));
194: }
195: PetscCall(MatDestroy(&A));
196: PetscCallMPI(MPI_Comm_free(&subcomm));
197: PetscCall(PetscFinalize());
198: return 0;
199: }
201: /*TEST
203: test:
204: nsize: 2
205: args: -total_subdomains 1
206: output_file: output/ex183_2_1.out
208: test:
209: suffix: 2
210: nsize: 3
211: args: -total_subdomains 2
212: output_file: output/ex183_3_2.out
214: test:
215: suffix: 3
216: nsize: 4
217: args: -total_subdomains 2
218: output_file: output/ex183_4_2.out
220: test:
221: suffix: 4
222: nsize: 6
223: args: -total_subdomains 2
224: output_file: output/ex183_6_2.out
226: TEST*/