Actual source code: ex5.c

  1: static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\
  2: Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), MatDiagonalScale(), MatZeroEntries() and MatDuplicate().\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **args)
  7: {
  8:   Mat         C;
  9:   Vec         s, u, w, x, y, z;
 10:   PetscInt    i, j, m = 8, n, rstart, rend, vstart, vend;
 11:   PetscScalar one = 1.0, negone = -1.0, v, alpha = 0.1;
 12:   PetscReal   norm, tol = PETSC_SQRT_MACHINE_EPSILON;
 13:   PetscBool   flg;

 15:   PetscFunctionBeginUser;
 16:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 17:   PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_COMMON));
 18:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 19:   n = m;
 20:   PetscCall(PetscOptionsHasName(NULL, NULL, "-rectA", &flg));
 21:   if (flg) n += 2;
 22:   PetscCall(PetscOptionsHasName(NULL, NULL, "-rectB", &flg));
 23:   if (flg) n -= 2;

 25:   /* ---------- Assemble matrix and vectors ----------- */

 27:   PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
 28:   PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m, n));
 29:   PetscCall(MatSetFromOptions(C));
 30:   PetscCall(MatSetUp(C));
 31:   PetscCall(MatGetOwnershipRange(C, &rstart, &rend));
 32:   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
 33:   PetscCall(VecSetSizes(x, PETSC_DECIDE, m));
 34:   PetscCall(VecSetFromOptions(x));
 35:   PetscCall(VecDuplicate(x, &z));
 36:   PetscCall(VecDuplicate(x, &w));
 37:   PetscCall(VecCreate(PETSC_COMM_WORLD, &y));
 38:   PetscCall(VecSetSizes(y, PETSC_DECIDE, n));
 39:   PetscCall(VecSetFromOptions(y));
 40:   PetscCall(VecDuplicate(y, &u));
 41:   PetscCall(VecDuplicate(y, &s));
 42:   PetscCall(VecGetOwnershipRange(y, &vstart, &vend));

 44:   /* Assembly */
 45:   for (i = rstart; i < rend; i++) {
 46:     v = 100 * (i + 1);
 47:     PetscCall(VecSetValues(z, 1, &i, &v, INSERT_VALUES));
 48:     for (j = 0; j < n; j++) {
 49:       v = 10 * (i + 1) + j + 1;
 50:       PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
 51:     }
 52:   }

 54:   /* Flush off proc Vec values and do more assembly */
 55:   PetscCall(VecAssemblyBegin(z));
 56:   for (i = vstart; i < vend; i++) {
 57:     v = one * ((PetscReal)i);
 58:     PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
 59:     v = 100.0 * i;
 60:     PetscCall(VecSetValues(u, 1, &i, &v, INSERT_VALUES));
 61:   }

 63:   /* Flush off proc Mat values and do more assembly */
 64:   PetscCall(MatAssemblyBegin(C, MAT_FLUSH_ASSEMBLY));
 65:   for (i = rstart; i < rend; i++) {
 66:     for (j = 0; j < n; j++) {
 67:       v = 10 * (i + 1) + j + 1;
 68:       PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
 69:     }
 70:   }
 71:   /* Try overlap Coomunication with the next stage XXXSetValues */
 72:   PetscCall(VecAssemblyEnd(z));

 74:   PetscCall(MatAssemblyEnd(C, MAT_FLUSH_ASSEMBLY));
 75:   CHKMEMQ;
 76:   /* The Assembly for the second Stage */
 77:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
 78:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
 79:   PetscCall(VecAssemblyBegin(y));
 80:   PetscCall(VecAssemblyEnd(y));
 81:   PetscCall(MatScale(C, alpha));
 82:   PetscCall(VecAssemblyBegin(u));
 83:   PetscCall(VecAssemblyEnd(u));
 84:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMult()\n"));
 85:   CHKMEMQ;
 86:   PetscCall(MatMult(C, y, x));
 87:   CHKMEMQ;
 88:   PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
 89:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultAdd()\n"));
 90:   PetscCall(MatMultAdd(C, y, z, w));
 91:   PetscCall(VecAXPY(x, one, z));
 92:   PetscCall(VecAXPY(x, negone, w));
 93:   PetscCall(VecNorm(x, NORM_2, &norm));
 94:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));

 96:   /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */

 98:   for (i = rstart; i < rend; i++) {
 99:     v = one * ((PetscReal)i);
100:     PetscCall(VecSetValues(x, 1, &i, &v, INSERT_VALUES));
101:   }
102:   PetscCall(VecAssemblyBegin(x));
103:   PetscCall(VecAssemblyEnd(x));
104:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTranspose()\n"));
105:   PetscCall(MatMultTranspose(C, x, y));
106:   PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));

108:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTransposeAdd()\n"));
109:   PetscCall(MatMultTransposeAdd(C, x, u, s));
110:   PetscCall(VecAXPY(y, one, u));
111:   PetscCall(VecAXPY(y, negone, s));
112:   PetscCall(VecNorm(y, NORM_2, &norm));
113:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));

115:   /* -------------------- Test MatGetDiagonal() ------------------ */

117:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatGetDiagonal(), MatDiagonalScale()\n"));
118:   PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
119:   PetscCall(VecSet(x, one));
120:   PetscCall(MatGetDiagonal(C, x));
121:   PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
122:   for (i = vstart; i < vend; i++) {
123:     v = one * ((PetscReal)(i + 1));
124:     PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
125:   }

127:   /* -------------------- Test () MatDiagonalScale ------------------ */
128:   PetscCall(PetscOptionsHasName(NULL, NULL, "-test_diagonalscale", &flg));
129:   if (flg) {
130:     PetscCall(MatDiagonalScale(C, x, y));
131:     PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
132:   }
133:   /* -------------------- Test () MatZeroEntries() and MatDuplicate() ------------------ */
134:   PetscCall(PetscOptionsHasName(NULL, NULL, "-test_zeroentries", &flg));
135:   if (flg) {
136:     Mat D;
137:     PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &D));
138:     PetscCall(MatZeroEntries(D));
139:     PetscCall(MatView(D, PETSC_VIEWER_STDOUT_WORLD));
140:     PetscCall(MatDestroy(&D));
141:   }
142:   /* Free data structures */
143:   PetscCall(VecDestroy(&u));
144:   PetscCall(VecDestroy(&s));
145:   PetscCall(VecDestroy(&w));
146:   PetscCall(VecDestroy(&x));
147:   PetscCall(VecDestroy(&y));
148:   PetscCall(VecDestroy(&z));
149:   PetscCall(MatDestroy(&C));

151:   PetscCall(PetscFinalize());
152:   return 0;
153: }

155: /*TEST

157:    test:
158:       suffix: 11_A
159:       args: -mat_type seqaij -rectA
160:       filter: grep -v type

162:    test:
163:       args: -mat_type seqdense -rectA
164:       suffix: 12_A

166:    test:
167:       args: -mat_type seqaij -rectB
168:       suffix: 11_B
169:       filter: grep -v type

171:    test:
172:       args: -mat_type seqdense -rectB
173:       suffix: 12_B

175:    test:
176:       suffix: 21
177:       args: -mat_type mpiaij
178:       filter: grep -v type

180:    test:
181:       suffix: 22
182:       args: -mat_type mpidense

184:    test:
185:       suffix: 23
186:       nsize: 3
187:       args: -mat_type mpiaij
188:       filter: grep -v type

190:    test:
191:       suffix: 24
192:       nsize: 3
193:       args: -mat_type mpidense

195:    test:
196:       suffix: 2_aijcusparse_1
197:       args: -mat_type mpiaijcusparse -vec_type cuda
198:       filter: grep -v type
199:       output_file: output/ex5_21.out
200:       requires: cuda

202:    test:
203:       nsize: 3
204:       suffix: 2_aijcusparse_2
205:       filter: grep -v type
206:       args: -mat_type mpiaijcusparse -vec_type cuda
207:       args: -sf_type {{basic neighbor}}
208:       output_file: output/ex5_23.out
209:       requires: cuda

211:    test:
212:       nsize: 3
213:       suffix: 2_aijcusparse_3
214:       filter: grep -v type
215:       args: -mat_type mpiaijcusparse -vec_type cuda
216:       args: -sf_type {{basic neighbor}}
217:       output_file: output/ex5_23.out
218:       requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)

220:    test:
221:       suffix: 31
222:       args: -mat_type mpiaij -test_diagonalscale
223:       filter: grep -v type

225:    test:
226:       suffix: 32
227:       args: -mat_type mpibaij -test_diagonalscale

229:    test:
230:       suffix: 33
231:       nsize: 3
232:       args: -mat_type mpiaij -test_diagonalscale
233:       filter: grep -v type

235:    test:
236:       suffix: 34
237:       nsize: 3
238:       args: -mat_type mpibaij -test_diagonalscale

240:    test:
241:       suffix: 3_aijcusparse_1
242:       args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
243:       filter: grep -v type
244:       output_file: output/ex5_31.out
245:       requires: cuda

247:    test:
248:       suffix: 3_aijcusparse_2
249:       nsize: 3
250:       args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
251:       filter: grep -v type
252:       output_file: output/ex5_33.out
253:       requires: cuda

255:    test:
256:       suffix: 3_kokkos
257:       nsize: 3
258:       args: -mat_type mpiaijkokkos -vec_type kokkos -test_diagonalscale
259:       filter: grep -v type
260:       output_file: output/ex5_33.out
261:       requires: kokkos_kernels

263:    test:
264:       suffix: aijcusparse_1
265:       args: -mat_type seqaijcusparse -vec_type cuda -rectA
266:       filter: grep -v type
267:       output_file: output/ex5_11_A.out
268:       requires: cuda

270:    test:
271:       suffix: aijcusparse_2
272:       args: -mat_type seqaijcusparse -vec_type cuda -rectB
273:       filter: grep -v type
274:       output_file: output/ex5_11_B.out
275:       requires: cuda

277:    test:
278:       suffix: sell_1
279:       args: -mat_type sell -mat_sell_slice_height 8
280:       output_file: output/ex5_41.out

282:    test:
283:       suffix: sell_2
284:       nsize: 3
285:       args: -mat_type sell -mat_sell_slice_height 8
286:       output_file: output/ex5_43.out

288:    test:
289:       suffix: sell_3
290:       args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
291:       output_file: output/ex5_51.out

293:    test:
294:       suffix: sell_4
295:       nsize: 3
296:       args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
297:       output_file: output/ex5_53.out

299:    test:
300:       suffix: sell_5
301:       nsize: 3
302:       args: -mat_type sellcuda -vec_type cuda -test_diagonalscale -test_zeroentries
303:       output_file: output/ex5_55.out
304:       requires: cuda !complex

306:    test:
307:       suffix: sell_6
308:       nsize: 3
309:       args: -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{1 2 3 4 5 6}}
310:       output_file: output/ex5_56.out
311:       requires: cuda !complex

313:    test:
314:       suffix: sell_7
315:       args: -m 32 -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{0 7 9}} -mat_sell_spmv_cuda_blocky {{2 4 8 16 32}}
316:       output_file: output/ex5_57.out
317:       requires: cuda !complex !single
318: TEST*/