Actual source code: ex158.c
1: static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";
3: /*
4: Usage:
5: mpiexec -n <np> ./ex158 -use_FFTW_interface NO
6: mpiexec -n <np> ./ex158 -use_FFTW_interface YES
7: */
9: #include <petscmat.h>
10: #include <fftw3-mpi.h>
12: int main(int argc, char **args)
13: {
14: PetscMPIInt rank, size;
15: PetscInt N0 = 50, N1 = 20, N = N0 * N1;
16: PetscRandom rdm;
17: PetscScalar a;
18: PetscReal enorm;
19: Vec x, y, z;
20: PetscBool view = PETSC_FALSE, use_interface = PETSC_TRUE;
22: PetscFunctionBeginUser;
23: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
24: #if defined(PETSC_USE_COMPLEX)
25: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
26: #endif
28: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");
29: PetscCall(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158", use_interface, &use_interface, NULL));
30: PetscOptionsEnd();
32: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
33: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
35: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
36: PetscCall(PetscRandomSetFromOptions(rdm));
38: if (!use_interface) {
39: /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
40: /*---------------------------------------------------------*/
41: fftw_plan fplan, bplan;
42: fftw_complex *data_in, *data_out, *data_out2;
43: ptrdiff_t alloc_local, local_n0, local_0_start;
45: if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n");
46: fftw_mpi_init();
47: N = N0 * N1;
48: alloc_local = fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start);
50: data_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
51: data_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
52: data_out2 = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
54: PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_in, &x));
55: PetscCall(PetscObjectSetName((PetscObject)x, "Real Space vector"));
56: PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out, &y));
57: PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
58: PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out2, &z));
59: PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
61: fplan = fftw_mpi_plan_dft_2d(N0, N1, data_in, data_out, PETSC_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE);
62: bplan = fftw_mpi_plan_dft_2d(N0, N1, data_out, data_out2, PETSC_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);
64: PetscCall(VecSetRandom(x, rdm));
65: if (view) PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
67: fftw_execute(fplan);
68: if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
70: fftw_execute(bplan);
72: /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
73: a = 1.0 / (PetscReal)N;
74: PetscCall(VecScale(z, a));
75: if (view) PetscCall(VecView(z, PETSC_VIEWER_STDOUT_WORLD));
76: PetscCall(VecAXPY(z, -1.0, x));
77: PetscCall(VecNorm(z, NORM_1, &enorm));
78: if (enorm > 1.e-11) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Error norm of |x - z| %g\n", (double)enorm));
80: /* Free spaces */
81: fftw_destroy_plan(fplan);
82: fftw_destroy_plan(bplan);
83: fftw_free(data_in);
84: PetscCall(VecDestroy(&x));
85: fftw_free(data_out);
86: PetscCall(VecDestroy(&y));
87: fftw_free(data_out2);
88: PetscCall(VecDestroy(&z));
90: } else {
91: /* Use PETSc-FFTW interface */
92: /*-------------------------------------------*/
93: PetscInt i, *dim, k, DIM;
94: Mat A;
95: Vec input, output;
97: N = 30;
98: for (i = 2; i < 3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
99: DIM = i;
100: PetscCall(PetscMalloc1(i, &dim));
101: for (k = 0; k < i; k++) dim[k] = 30;
102: N *= dim[i - 1];
104: /* Create FFTW object */
105: if (rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Use PETSc-FFTW interface...%d-DIM:%d \n", DIM, N));
106: PetscCall(MatCreateFFT(PETSC_COMM_WORLD, DIM, dim, MATFFTW, &A));
108: /* Create FFTW vectors that are compatible with parallel layout of A */
109: PetscCall(MatCreateVecsFFTW(A, &x, &y, &z));
110: PetscCall(PetscObjectSetName((PetscObject)x, "Real space vector"));
111: PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
112: PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));
114: /* Create and set PETSc vector */
115: PetscCall(VecCreate(PETSC_COMM_WORLD, &input));
116: PetscCall(VecSetSizes(input, PETSC_DECIDE, N));
117: PetscCall(VecSetFromOptions(input));
118: PetscCall(VecSetRandom(input, rdm));
119: PetscCall(VecDuplicate(input, &output));
120: if (view) PetscCall(VecView(input, PETSC_VIEWER_STDOUT_WORLD));
122: /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
123: can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
124: data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
125: layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
126: at the end of last dimension. This padding is required by FFTW to perform parallel real D.F.T. */
127: PetscCall(VecScatterPetscToFFTW(A, input, x)); /* buggy for dim = 3, 4... */
129: /* Apply FFTW_FORWARD and FFTW_BACKWARD */
130: PetscCall(MatMult(A, x, y));
131: if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
132: PetscCall(MatMultTranspose(A, y, z));
134: /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
135: performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
136: the extra spaces that were artificially padded to perform real parallel transform. */
137: PetscCall(VecScatterFFTWToPetsc(A, z, output));
139: /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
140: a = 1.0 / (PetscReal)N;
141: PetscCall(VecScale(output, a));
142: if (view) PetscCall(VecView(output, PETSC_VIEWER_STDOUT_WORLD));
143: PetscCall(VecAXPY(output, -1.0, input));
144: PetscCall(VecNorm(output, NORM_1, &enorm));
145: if (enorm > 1.e-09 && rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Error norm of |x - z| %e\n", enorm));
147: /* Free spaces */
148: PetscCall(PetscFree(dim));
149: PetscCall(VecDestroy(&input));
150: PetscCall(VecDestroy(&output));
151: PetscCall(VecDestroy(&x));
152: PetscCall(VecDestroy(&y));
153: PetscCall(VecDestroy(&z));
154: PetscCall(MatDestroy(&A));
155: }
156: }
157: PetscCall(PetscRandomDestroy(&rdm));
158: PetscCall(PetscFinalize());
159: return 0;
160: }
162: /*TEST
164: build:
165: requires: !mpiuni fftw !complex
167: test:
168: output_file: output/ex158.out
170: test:
171: suffix: 2
172: nsize: 3
174: TEST*/