Actual source code: ex302k.kokkos.cxx
1: static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n";
3: #include <petscvec_kokkos.hpp>
4: #include <petscdevice.h>
5: #include <petscmat.h>
6: #include <petscmat_kokkos.hpp>
7: #include <Kokkos_Core.hpp>
8: #include <Kokkos_DualView.hpp>
10: using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space;
12: int main(int argc, char **argv)
13: {
14: Mat A, B;
15: PetscInt i, j, column, M, N, m, n, m_ab, n_ab;
16: PetscInt *di, *dj, *oi, *oj, nd;
17: const PetscInt *garray;
18: PetscInt *garray_h;
19: PetscScalar *oa, *da;
20: PetscScalar value;
21: PetscRandom rctx;
22: PetscBool equal, done;
23: Mat AA, AB;
24: PetscMPIInt size, rank;
26: // ~~~~~~~~~~~~~~~~~~~~~
27: // This test shows the routines needed to build a kokkos matrix without preallocation
28: // on the host
29: // ~~~~~~~~~~~~~~~~~~~~~
31: PetscFunctionBeginUser;
32: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
33: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
34: PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes");
35: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
37: /* Create a mpiaij matrix for checking */
38: PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A));
39: PetscCall(MatSetFromOptions(A));
40: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
41: PetscCall(MatSetUp(A));
42: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
43: PetscCall(PetscRandomSetFromOptions(rctx));
45: for (i = 5 * rank; i < 5 * rank + 5; i++) {
46: for (j = 0; j < 5 * size; j++) {
47: PetscCall(PetscRandomGetValue(rctx, &value));
48: column = (PetscInt)(5 * size * PetscRealPart(value));
49: PetscCall(PetscRandomGetValue(rctx, &value));
50: PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES));
51: }
52: }
53: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
54: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
56: PetscCall(MatGetSize(A, &M, &N));
57: PetscCall(MatGetLocalSize(A, &m, &n));
58: PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray));
59: PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
60: PetscCall(MatSeqAIJGetArray(AA, &da));
61: PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
62: PetscCall(MatSeqAIJGetArray(AB, &oa));
63: PetscCall(MatGetSize(AB, &m_ab, &n_ab));
65: Mat output_mat_local, output_mat_nonlocal;
66: // Be careful about scope given the kokkos memory reference counts
67: {
68: // Local
69: Kokkos::View<PetscScalar *> a_local_d;
70: Kokkos::View<PetscInt *> i_local_d;
71: Kokkos::View<PetscInt *> j_local_d;
73: // Nonlocal
74: Kokkos::View<PetscScalar *> a_nonlocal_d;
75: Kokkos::View<PetscInt *> i_nonlocal_d;
76: Kokkos::View<PetscInt *> j_nonlocal_d;
78: // Create device memory
79: PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5]));
80: PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1));
81: PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5]));
83: // Create non-local device memory
84: PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5]));
85: PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1));
86: PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5]));
88: // ~~~~~~~~~~~~~~~~~~~~~
89: // Could fill the aij on the device - we're just going to test
90: // by copying in the existing host values
91: // ~~~~~~~~~~~~~~~~~~~~~
92: Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h;
93: Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_local_h;
94: Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_local_h;
95: Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h;
96: Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_nonlocal_h;
97: Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_nonlocal_h;
99: PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5]));
100: PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1));
101: PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5]));
102: PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5]));
103: PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1));
104: PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5]));
106: // Haven't specified an exec space so these will all be synchronous
107: // and finish without a need to call fence after
108: PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h));
109: PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h));
110: PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h));
111: PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h));
112: PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h));
113: PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h));
115: // The garray passed in has to be on the host, but it can be created
116: // on device and copied to the host
117: // We're just going to copy the existing host values here
118: PetscCall(PetscMalloc1(n_ab, &garray_h));
119: for (int i = 0; i < n_ab; i++) { garray_h[i] = garray[i]; }
121: // ~~~~~~~~~~~~~~~~~~~~~
123: // ~~~~~~~~~~~~~~~~~
124: // Test MatCreateSeqAIJKokkosWithKokkosViews
125: // ~~~~~~~~~~~~~~~~~
127: // We can create our local diagonal block matrix directly on the device
128: PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local));
130: // We can create our nonlocal diagonal block matrix directly on the device
131: PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal));
133: // Build our MPI matrix
134: // If we provide garray and output_mat_nonlocal with local indices and the compactified size
135: // almost nothing happens on the host
136: PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B));
138: PetscCall(MatEqual(A, B, &equal));
139: PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
140: PetscCall(MatSeqAIJRestoreArray(AA, &da));
141: PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
142: PetscCall(MatSeqAIJRestoreArray(AB, &oa));
144: PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()");
145: PetscCall(MatDestroy(&B));
147: // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
149: /* Free spaces */
150: PetscCall(PetscRandomDestroy(&rctx));
151: PetscCall(MatDestroy(&A));
152: }
153: PetscCall(PetscFinalize());
154: return 0;
155: }
157: /*TEST
158: build:
159: requires: kokkos_kernels
161: test:
162: nsize: 2
163: args: -mat_type aijkokkos
164: requires: kokkos_kernels
165: output_file: output/empty.out
167: TEST*/