Actual source code: math2opusutils.cu
1: #include <petsc/private/matimpl.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petscsf.h>
4: #if defined(PETSC_HAVE_CUDA)
5: #include <thrust/for_each.h>
6: #include <thrust/device_vector.h>
7: #include <thrust/execution_policy.h>
8: #endif
10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
11: {
12: PetscSF rankssf;
13: const PetscSFNode *iremote;
14: PetscSFNode *viremote, *rremotes;
15: const PetscInt *ilocal;
16: PetscInt *vilocal = NULL, *ldrs;
17: const PetscMPIInt *ranks;
18: PetscMPIInt *sranks;
19: PetscInt nranks, nr, nl, vnr, vnl, i, v, j, maxl;
20: MPI_Comm comm;
22: PetscFunctionBegin;
25: PetscAssertPointer(vsf, 5);
26: if (nv == 1) {
27: PetscCall(PetscObjectReference((PetscObject)sf));
28: *vsf = sf;
29: PetscFunctionReturn(PETSC_SUCCESS);
30: }
31: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
32: PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote));
33: PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl));
34: maxl += 1;
35: if (ldl == PETSC_DECIDE) ldl = maxl;
36: if (ldr == PETSC_DECIDE) ldr = nr;
37: PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldr, nr);
38: PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldl, maxl);
39: vnr = nr * nv;
40: vnl = nl * nv;
41: PetscCall(PetscMalloc1(vnl, &viremote));
42: if (ilocal) PetscCall(PetscMalloc1(vnl, &vilocal));
44: /* TODO: Should this special SF be available, e.g.
45: PetscSFGetRanksSF or similar? */
46: PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
47: PetscCall(PetscMalloc1(nranks, &sranks));
48: PetscCall(PetscArraycpy(sranks, ranks, nranks));
49: PetscCall(PetscSortMPIInt(nranks, sranks));
50: PetscCall(PetscMalloc1(nranks, &rremotes));
51: for (i = 0; i < nranks; i++) {
52: rremotes[i].rank = sranks[i];
53: rremotes[i].index = 0;
54: }
55: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf));
56: PetscCall(PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
57: PetscCall(PetscMalloc1(nranks, &ldrs));
58: PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
59: PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
60: PetscCall(PetscSFDestroy(&rankssf));
62: j = -1;
63: for (i = 0; i < nl; i++) {
64: const PetscInt r = iremote[i].rank;
65: const PetscInt ii = iremote[i].index;
67: if (j < 0 || sranks[j] != r) PetscCall(PetscFindMPIInt(r, nranks, sranks, &j));
68: PetscCheck(j >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r);
69: for (v = 0; v < nv; v++) {
70: viremote[v * nl + i].rank = r;
71: viremote[v * nl + i].index = v * ldrs[j] + ii;
72: if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i];
73: }
74: }
75: PetscCall(PetscFree(sranks));
76: PetscCall(PetscFree(ldrs));
77: PetscCall(PetscSFCreate(comm, vsf));
78: PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER));
79: PetscFunctionReturn(PETSC_SUCCESS);
80: }
82: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf)
83: {
84: PetscSF asf;
86: PetscFunctionBegin;
89: PetscAssertPointer(osf, 3);
90: PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf));
91: if (!asf) {
92: PetscInt lda;
94: PetscCall(MatDenseGetLDA(A, &lda));
95: PetscCall(PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
96: PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf));
97: PetscCall(PetscObjectDereference((PetscObject)asf));
98: }
99: *osf = asf;
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: #if defined(PETSC_HAVE_CUDA)
104: struct SignVector_Functor {
105: const PetscScalar *v;
106: PetscScalar *s;
107: SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
109: __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
110: };
111: #endif
113: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
114: {
115: const PetscScalar *av;
116: PetscScalar *as;
117: PetscInt i, n;
118: #if defined(PETSC_HAVE_CUDA)
119: PetscBool viscuda, siscuda;
120: #endif
122: PetscFunctionBegin;
125: PetscCall(VecGetLocalSize(s, &n));
126: PetscCall(VecGetLocalSize(v, &i));
127: PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
128: #if defined(PETSC_HAVE_CUDA)
129: PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
130: PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
131: viscuda = (PetscBool)(viscuda && !v->boundtocpu);
132: siscuda = (PetscBool)(siscuda && !s->boundtocpu);
133: if (viscuda && siscuda) {
134: PetscCall(VecCUDAGetArrayRead(v, &av));
135: PetscCall(VecCUDAGetArrayWrite(s, &as));
136: SignVector_Functor sign_vector(av, as);
137: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
138: PetscCall(VecCUDARestoreArrayWrite(s, &as));
139: PetscCall(VecCUDARestoreArrayRead(v, &av));
140: } else
141: #endif
142: {
143: PetscCall(VecGetArrayRead(v, &av));
144: PetscCall(VecGetArrayWrite(s, &as));
145: for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
146: PetscCall(VecRestoreArrayWrite(s, &as));
147: PetscCall(VecRestoreArrayRead(v, &av));
148: }
149: PetscFunctionReturn(PETSC_SUCCESS);
150: }
152: #if defined(PETSC_HAVE_CUDA)
153: struct StandardBasis_Functor {
154: PetscScalar *v;
155: PetscInt j;
157: StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
158: __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
159: };
160: #endif
162: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
163: {
164: #if defined(PETSC_HAVE_CUDA)
165: PetscBool iscuda;
166: #endif
167: PetscInt st, en;
169: PetscFunctionBegin;
170: PetscCall(VecGetOwnershipRange(x, &st, &en));
171: #if defined(PETSC_HAVE_CUDA)
172: PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
173: iscuda = (PetscBool)(iscuda && !x->boundtocpu);
174: if (iscuda) {
175: PetscScalar *ax;
176: PetscCall(VecCUDAGetArrayWrite(x, &ax));
177: StandardBasis_Functor delta(ax, i - st);
178: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
179: PetscCall(VecCUDARestoreArrayWrite(x, &ax));
180: } else
181: #endif
182: {
183: PetscCall(VecSet(x, 0.));
184: if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
185: PetscCall(VecAssemblyBegin(x));
186: PetscCall(VecAssemblyEnd(x));
187: }
188: PetscFunctionReturn(PETSC_SUCCESS);
189: }
191: /* these are approximate norms */
192: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
193: NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
194: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
195: {
196: Vec x, y, w, z;
197: PetscReal normz, adot;
198: PetscScalar dot;
199: PetscInt i, j, N, jold = -1;
200: PetscBool boundtocpu = PETSC_TRUE;
202: PetscFunctionBegin;
203: #if defined(PETSC_HAVE_DEVICE)
204: boundtocpu = A->boundtocpu;
205: #endif
206: switch (normtype) {
207: case NORM_INFINITY:
208: case NORM_1:
209: if (normsamples < 0) normsamples = 10; /* pure guess */
210: if (normtype == NORM_INFINITY) {
211: Mat B;
212: PetscCall(MatCreateTranspose(A, &B));
213: A = B;
214: } else {
215: PetscCall(PetscObjectReference((PetscObject)A));
216: }
217: PetscCall(MatCreateVecs(A, &x, &y));
218: PetscCall(MatCreateVecs(A, &z, &w));
219: PetscCall(VecBindToCPU(x, boundtocpu));
220: PetscCall(VecBindToCPU(y, boundtocpu));
221: PetscCall(VecBindToCPU(z, boundtocpu));
222: PetscCall(VecBindToCPU(w, boundtocpu));
223: PetscCall(VecGetSize(x, &N));
224: PetscCall(VecSet(x, 1. / N));
225: *n = 0.0;
226: for (i = 0; i < normsamples; i++) {
227: PetscCall(MatMult(A, x, y));
228: PetscCall(VecSign(y, w));
229: PetscCall(MatMultTranspose(A, w, z));
230: PetscCall(VecNorm(z, NORM_INFINITY, &normz));
231: PetscCall(VecDot(x, z, &dot));
232: adot = PetscAbsScalar(dot);
233: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
234: if (normz <= adot && i > 0) {
235: PetscCall(VecNorm(y, NORM_1, n));
236: break;
237: }
238: PetscCall(VecMax(z, &j, &normz));
239: if (j == jold) {
240: PetscCall(VecNorm(y, NORM_1, n));
241: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
242: break;
243: }
244: jold = j;
245: PetscCall(VecSetDelta(x, j));
246: }
247: PetscCall(MatDestroy(&A));
248: PetscCall(VecDestroy(&x));
249: PetscCall(VecDestroy(&w));
250: PetscCall(VecDestroy(&y));
251: PetscCall(VecDestroy(&z));
252: break;
253: case NORM_2:
254: if (normsamples < 0) normsamples = 20; /* pure guess */
255: PetscCall(MatCreateVecs(A, &x, &y));
256: PetscCall(MatCreateVecs(A, &z, NULL));
257: PetscCall(VecBindToCPU(x, boundtocpu));
258: PetscCall(VecBindToCPU(y, boundtocpu));
259: PetscCall(VecBindToCPU(z, boundtocpu));
260: PetscCall(VecSetRandom(x, NULL));
261: PetscCall(VecNormalize(x, NULL));
262: *n = 0.0;
263: for (i = 0; i < normsamples; i++) {
264: PetscCall(MatMult(A, x, y));
265: PetscCall(VecNormalize(y, n));
266: PetscCall(MatMultTranspose(A, y, z));
267: PetscCall(VecNorm(z, NORM_2, &normz));
268: PetscCall(VecDot(x, z, &dot));
269: adot = PetscAbsScalar(dot);
270: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
271: if (normz <= adot) break;
272: if (i < normsamples - 1) {
273: Vec t;
275: PetscCall(VecNormalize(z, NULL));
276: t = x;
277: x = z;
278: z = t;
279: }
280: }
281: PetscCall(VecDestroy(&x));
282: PetscCall(VecDestroy(&y));
283: PetscCall(VecDestroy(&z));
284: break;
285: default:
286: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
287: }
288: PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
289: PetscFunctionReturn(PETSC_SUCCESS);
290: }