ergo
template_lapack_lar1v.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LAR1V_HEADER
38#define TEMPLATE_LAPACK_LAR1V_HEADER
39
40template<class Treal>
42 *lambda, Treal *d__, Treal *l, Treal *ld, Treal *
43 lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical
44 *wantnc, integer *negcnt, Treal *ztz, Treal *mingma,
45 integer *r__, integer *isuppz, Treal *nrminv, Treal *resid,
46 Treal *rqcorr, Treal *work)
47{
48 /* System generated locals */
49 integer i__1;
50 Treal d__1, d__2, d__3;
51
52
53 /* Local variables */
54 integer i__;
55 Treal s;
56 integer r1, r2;
57 Treal eps, tmp;
58 integer neg1, neg2, indp, inds;
59 Treal dplus;
60 integer indlpl, indumn;
61 Treal dminus;
62 logical sawnan1, sawnan2;
63
64
65/* -- LAPACK auxiliary routine (version 3.2) -- */
66/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
67/* November 2006 */
68
69/* .. Scalar Arguments .. */
70/* .. */
71/* .. Array Arguments .. */
72/* .. */
73
74/* Purpose */
75/* ======= */
76
77/* DLAR1V computes the (scaled) r-th column of the inverse of */
78/* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
79/* L D L^T - sigma I. When sigma is close to an eigenvalue, the */
80/* computed vector is an accurate eigenvector. Usually, r corresponds */
81/* to the index where the eigenvector is largest in magnitude. */
82/* The following steps accomplish this computation : */
83/* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */
84/* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
85/* (c) Computation of the diagonal elements of the inverse of */
86/* L D L^T - sigma I by combining the above transforms, and choosing */
87/* r as the index where the diagonal of the inverse is (one of the) */
88/* largest in magnitude. */
89/* (d) Computation of the (scaled) r-th column of the inverse using the */
90/* twisted factorization obtained by combining the top part of the */
91/* the stationary and the bottom part of the progressive transform. */
92
93/* Arguments */
94/* ========= */
95
96/* N (input) INTEGER */
97/* The order of the matrix L D L^T. */
98
99/* B1 (input) INTEGER */
100/* First index of the submatrix of L D L^T. */
101
102/* BN (input) INTEGER */
103/* Last index of the submatrix of L D L^T. */
104
105/* LAMBDA (input) DOUBLE PRECISION */
106/* The shift. In order to compute an accurate eigenvector, */
107/* LAMBDA should be a good approximation to an eigenvalue */
108/* of L D L^T. */
109
110/* L (input) DOUBLE PRECISION array, dimension (N-1) */
111/* The (n-1) subdiagonal elements of the unit bidiagonal matrix */
112/* L, in elements 1 to N-1. */
113
114/* D (input) DOUBLE PRECISION array, dimension (N) */
115/* The n diagonal elements of the diagonal matrix D. */
116
117/* LD (input) DOUBLE PRECISION array, dimension (N-1) */
118/* The n-1 elements L(i)*D(i). */
119
120/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
121/* The n-1 elements L(i)*L(i)*D(i). */
122
123/* PIVMIN (input) DOUBLE PRECISION */
124/* The minimum pivot in the Sturm sequence. */
125
126/* GAPTOL (input) DOUBLE PRECISION */
127/* Tolerance that indicates when eigenvector entries are negligible */
128/* w.r.t. their contribution to the residual. */
129
130/* Z (input/output) DOUBLE PRECISION array, dimension (N) */
131/* On input, all entries of Z must be set to 0. */
132/* On output, Z contains the (scaled) r-th column of the */
133/* inverse. The scaling is such that Z(R) equals 1. */
134
135/* WANTNC (input) LOGICAL */
136/* Specifies whether NEGCNT has to be computed. */
137
138/* NEGCNT (output) INTEGER */
139/* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
140/* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
141
142/* ZTZ (output) DOUBLE PRECISION */
143/* The square of the 2-norm of Z. */
144
145/* MINGMA (output) DOUBLE PRECISION */
146/* The reciprocal of the largest (in magnitude) diagonal */
147/* element of the inverse of L D L^T - sigma I. */
148
149/* R (input/output) INTEGER */
150/* The twist index for the twisted factorization used to */
151/* compute Z. */
152/* On input, 0 <= R <= N. If R is input as 0, R is set to */
153/* the index where (L D L^T - sigma I)^{-1} is largest */
154/* in magnitude. If 1 <= R <= N, R is unchanged. */
155/* On output, R contains the twist index used to compute Z. */
156/* Ideally, R designates the position of the maximum entry in the */
157/* eigenvector. */
158
159/* ISUPPZ (output) INTEGER array, dimension (2) */
160/* The support of the vector in Z, i.e., the vector Z is */
161/* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
162
163/* NRMINV (output) DOUBLE PRECISION */
164/* NRMINV = 1/SQRT( ZTZ ) */
165
166/* RESID (output) DOUBLE PRECISION */
167/* The residual of the FP vector. */
168/* RESID = ABS( MINGMA )/SQRT( ZTZ ) */
169
170/* RQCORR (output) DOUBLE PRECISION */
171/* The Rayleigh Quotient correction to LAMBDA. */
172/* RQCORR = MINGMA*TMP */
173
174/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */
175
176/* Further Details */
177/* =============== */
178
179/* Based on contributions by */
180/* Beresford Parlett, University of California, Berkeley, USA */
181/* Jim Demmel, University of California, Berkeley, USA */
182/* Inderjit Dhillon, University of Texas, Austin, USA */
183/* Osni Marques, LBNL/NERSC, USA */
184/* Christof Voemel, University of California, Berkeley, USA */
185
186/* ===================================================================== */
187
188/* .. Parameters .. */
189/* .. */
190/* .. Local Scalars .. */
191/* .. */
192/* .. External Functions .. */
193/* .. */
194/* .. Intrinsic Functions .. */
195/* .. */
196/* .. Executable Statements .. */
197
198 /* Parameter adjustments */
199 --work;
200 --isuppz;
201 --z__;
202 --lld;
203 --ld;
204 --l;
205 --d__;
206
207 /* Function Body */
208 eps = template_lapack_lamch("Precision", (Treal)0);
209 if (*r__ == 0) {
210 r1 = *b1;
211 r2 = *bn;
212 } else {
213 r1 = *r__;
214 r2 = *r__;
215 }
216/* Storage for LPLUS */
217 indlpl = 0;
218/* Storage for UMINUS */
219 indumn = *n;
220 inds = (*n << 1) + 1;
221 indp = *n * 3 + 1;
222 if (*b1 == 1) {
223 work[inds] = 0.;
224 } else {
225 work[inds + *b1 - 1] = lld[*b1 - 1];
226 }
227
228/* Compute the stationary transform (using the differential form) */
229/* until the index R2. */
230
231 sawnan1 = FALSE_;
232 neg1 = 0;
233 s = work[inds + *b1 - 1] - *lambda;
234 i__1 = r1 - 1;
235 for (i__ = *b1; i__ <= i__1; ++i__) {
236 dplus = d__[i__] + s;
237 work[indlpl + i__] = ld[i__] / dplus;
238 if (dplus < 0.) {
239 ++neg1;
240 }
241 work[inds + i__] = s * work[indlpl + i__] * l[i__];
242 s = work[inds + i__] - *lambda;
243/* L50: */
244 }
245 sawnan1 = template_lapack_isnan(&s);
246 if (sawnan1) {
247 goto L60;
248 }
249 i__1 = r2 - 1;
250 for (i__ = r1; i__ <= i__1; ++i__) {
251 dplus = d__[i__] + s;
252 work[indlpl + i__] = ld[i__] / dplus;
253 work[inds + i__] = s * work[indlpl + i__] * l[i__];
254 s = work[inds + i__] - *lambda;
255/* L51: */
256 }
257 sawnan1 = template_lapack_isnan(&s);
258
259L60:
260 if (sawnan1) {
261/* Runs a slower version of the above loop if a NaN is detected */
262 neg1 = 0;
263 s = work[inds + *b1 - 1] - *lambda;
264 i__1 = r1 - 1;
265 for (i__ = *b1; i__ <= i__1; ++i__) {
266 dplus = d__[i__] + s;
267 if (absMACRO(dplus) < *pivmin) {
268 dplus = -(*pivmin);
269 }
270 work[indlpl + i__] = ld[i__] / dplus;
271 if (dplus < 0.) {
272 ++neg1;
273 }
274 work[inds + i__] = s * work[indlpl + i__] * l[i__];
275 if (work[indlpl + i__] == 0.) {
276 work[inds + i__] = lld[i__];
277 }
278 s = work[inds + i__] - *lambda;
279/* L70: */
280 }
281 i__1 = r2 - 1;
282 for (i__ = r1; i__ <= i__1; ++i__) {
283 dplus = d__[i__] + s;
284 if (absMACRO(dplus) < *pivmin) {
285 dplus = -(*pivmin);
286 }
287 work[indlpl + i__] = ld[i__] / dplus;
288 work[inds + i__] = s * work[indlpl + i__] * l[i__];
289 if (work[indlpl + i__] == 0.) {
290 work[inds + i__] = lld[i__];
291 }
292 s = work[inds + i__] - *lambda;
293/* L71: */
294 }
295 }
296
297/* Compute the progressive transform (using the differential form) */
298/* until the index R1 */
299
300 sawnan2 = FALSE_;
301 neg2 = 0;
302 work[indp + *bn - 1] = d__[*bn] - *lambda;
303 i__1 = r1;
304 for (i__ = *bn - 1; i__ >= i__1; --i__) {
305 dminus = lld[i__] + work[indp + i__];
306 tmp = d__[i__] / dminus;
307 if (dminus < 0.) {
308 ++neg2;
309 }
310 work[indumn + i__] = l[i__] * tmp;
311 work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
312/* L80: */
313 }
314 tmp = work[indp + r1 - 1];
315 sawnan2 = template_lapack_isnan(&tmp);
316 if (sawnan2) {
317/* Runs a slower version of the above loop if a NaN is detected */
318 neg2 = 0;
319 i__1 = r1;
320 for (i__ = *bn - 1; i__ >= i__1; --i__) {
321 dminus = lld[i__] + work[indp + i__];
322 if (absMACRO(dminus) < *pivmin) {
323 dminus = -(*pivmin);
324 }
325 tmp = d__[i__] / dminus;
326 if (dminus < 0.) {
327 ++neg2;
328 }
329 work[indumn + i__] = l[i__] * tmp;
330 work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
331 if (tmp == 0.) {
332 work[indp + i__ - 1] = d__[i__] - *lambda;
333 }
334/* L100: */
335 }
336 }
337
338/* Find the index (from R1 to R2) of the largest (in magnitude) */
339/* diagonal element of the inverse */
340
341 *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
342 if (*mingma < 0.) {
343 ++neg1;
344 }
345 if (*wantnc) {
346 *negcnt = neg1 + neg2;
347 } else {
348 *negcnt = -1;
349 }
350 if (absMACRO(*mingma) == 0.) {
351 *mingma = eps * work[inds + r1 - 1];
352 }
353 *r__ = r1;
354 i__1 = r2 - 1;
355 for (i__ = r1; i__ <= i__1; ++i__) {
356 tmp = work[inds + i__] + work[indp + i__];
357 if (tmp == 0.) {
358 tmp = eps * work[inds + i__];
359 }
360 if (absMACRO(tmp) <= absMACRO(*mingma)) {
361 *mingma = tmp;
362 *r__ = i__ + 1;
363 }
364/* L110: */
365 }
366
367/* Compute the FP vector: solve N^T v = e_r */
368
369 isuppz[1] = *b1;
370 isuppz[2] = *bn;
371 z__[*r__] = 1.;
372 *ztz = 1.;
373
374/* Compute the FP vector upwards from R */
375
376 if (! sawnan1 && ! sawnan2) {
377 i__1 = *b1;
378 for (i__ = *r__ - 1; i__ >= i__1; --i__) {
379 z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
380 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
381 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
382 z__[i__] = 0.;
383 isuppz[1] = i__ + 1;
384 goto L220;
385 }
386 *ztz += z__[i__] * z__[i__];
387/* L210: */
388 }
389L220:
390 ;
391 } else {
392/* Run slower loop if NaN occurred. */
393 i__1 = *b1;
394 for (i__ = *r__ - 1; i__ >= i__1; --i__) {
395 if (z__[i__ + 1] == 0.) {
396 z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
397 } else {
398 z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
399 }
400 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
401 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
402 z__[i__] = 0.;
403 isuppz[1] = i__ + 1;
404 goto L240;
405 }
406 *ztz += z__[i__] * z__[i__];
407/* L230: */
408 }
409L240:
410 ;
411 }
412/* Compute the FP vector downwards from R in blocks of size BLKSIZ */
413 if (! sawnan1 && ! sawnan2) {
414 i__1 = *bn - 1;
415 for (i__ = *r__; i__ <= i__1; ++i__) {
416 z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
417 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
418 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
419 z__[i__ + 1] = 0.;
420 isuppz[2] = i__;
421 goto L260;
422 }
423 *ztz += z__[i__ + 1] * z__[i__ + 1];
424/* L250: */
425 }
426L260:
427 ;
428 } else {
429/* Run slower loop if NaN occurred. */
430 i__1 = *bn - 1;
431 for (i__ = *r__; i__ <= i__1; ++i__) {
432 if (z__[i__] == 0.) {
433 z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
434 } else {
435 z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
436 }
437 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
438 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
439 z__[i__ + 1] = 0.;
440 isuppz[2] = i__;
441 goto L280;
442 }
443 *ztz += z__[i__ + 1] * z__[i__ + 1];
444/* L270: */
445 }
446L280:
447 ;
448 }
449
450/* Compute quantities for convergence test */
451
452 tmp = 1. / *ztz;
453 *nrminv = template_blas_sqrt(tmp);
454 *resid = absMACRO(*mingma) * *nrminv;
455 *rqcorr = *mingma * tmp;
456
457
458 return 0;
459
460/* End of DLAR1V */
461
462} /* dlar1v_ */
463
464#endif
Treal template_blas_sqrt(Treal x)
int integer
Definition template_blas_common.h:40
#define absMACRO(x)
Definition template_blas_common.h:47
bool logical
Definition template_blas_common.h:41
#define FALSE_
Definition template_lapack_common.h:43
logical template_lapack_isnan(Treal *din)
Definition template_lapack_isnan.h:45
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition template_lapack_lamch.h:202
int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal *lambda, Treal *d__, Treal *l, Treal *ld, Treal *lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, Treal *rqcorr, Treal *work)
Definition template_lapack_lar1v.h:41