Actual source code: baijsolvtran1.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: PetscErrorCode MatSolveTranspose_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
5: {
6: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
7: IS iscol = a->col,isrow = a->row;
8: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
9: PetscInt i,n = a->mbs,j;
10: PetscInt nz;
11: PetscScalar *x,*tmp,s1;
12: const MatScalar *aa = a->a,*v;
13: const PetscScalar *b;
15: VecGetArrayRead(bb,&b);
16: VecGetArray(xx,&x);
17: tmp = a->solve_work;
19: ISGetIndices(isrow,&rout); r = rout;
20: ISGetIndices(iscol,&cout); c = cout;
22: /* copy the b into temp work space according to permutation */
23: for (i=0; i<n; i++) tmp[i] = b[c[i]];
25: /* forward solve the U^T */
26: for (i=0; i<n; i++) {
27: v = aa + adiag[i+1] + 1;
28: vi = aj + adiag[i+1] + 1;
29: nz = adiag[i] - adiag[i+1] - 1;
30: s1 = tmp[i];
31: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
32: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
33: tmp[i] = s1;
34: }
36: /* backward solve the L^T */
37: for (i=n-1; i>=0; i--) {
38: v = aa + ai[i];
39: vi = aj + ai[i];
40: nz = ai[i+1] - ai[i];
41: s1 = tmp[i];
42: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
43: }
45: /* copy tmp into x according to permutation */
46: for (i=0; i<n; i++) x[r[i]] = tmp[i];
48: ISRestoreIndices(isrow,&rout);
49: ISRestoreIndices(iscol,&cout);
50: VecRestoreArrayRead(bb,&b);
51: VecRestoreArray(xx,&x);
53: PetscLogFlops(2.0*a->nz-A->cmap->n);
54: return 0;
55: }
57: PetscErrorCode MatSolveTranspose_SeqBAIJ_1_inplace(Mat A,Vec bb,Vec xx)
58: {
59: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data;
60: IS iscol=a->col,isrow=a->row;
61: const PetscInt *r,*c,*rout,*cout;
62: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
63: PetscInt i,nz;
64: const MatScalar *aa=a->a,*v;
65: PetscScalar s1,*x,*t;
66: const PetscScalar *b;
68: VecGetArrayRead(bb,&b);
69: VecGetArray(xx,&x);
70: t = a->solve_work;
72: ISGetIndices(isrow,&rout); r = rout;
73: ISGetIndices(iscol,&cout); c = cout;
75: /* copy the b into temp work space according to permutation */
76: for (i=0; i<n; i++) t[i] = b[c[i]];
78: /* forward solve the U^T */
79: for (i=0; i<n; i++) {
81: v = aa + diag[i];
82: /* multiply by the inverse of the block diagonal */
83: s1 = (*v++)*t[i];
84: vi = aj + diag[i] + 1;
85: nz = ai[i+1] - diag[i] - 1;
86: while (nz--) {
87: t[*vi++] -= (*v++)*s1;
88: }
89: t[i] = s1;
90: }
91: /* backward solve the L^T */
92: for (i=n-1; i>=0; i--) {
93: v = aa + diag[i] - 1;
94: vi = aj + diag[i] - 1;
95: nz = diag[i] - ai[i];
96: s1 = t[i];
97: while (nz--) {
98: t[*vi--] -= (*v--)*s1;
99: }
100: }
102: /* copy t into x according to permutation */
103: for (i=0; i<n; i++) x[r[i]] = t[i];
105: ISRestoreIndices(isrow,&rout);
106: ISRestoreIndices(iscol,&cout);
107: VecRestoreArrayRead(bb,&b);
108: VecRestoreArray(xx,&x);
109: PetscLogFlops(2.0*(a->nz) - A->cmap->n);
110: return 0;
111: }