1: /*
2: BDC - Block-divide and conquer (see description in README file).
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/dsimpl.h>
25: #include <slepcblaslapack.h>
27: PetscErrorCode BDC_dsbtdc_(const char *jobz,const char *jobacc,PetscBLASInt n, 28: PetscBLASInt nblks,PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d, 29: PetscBLASInt l2d,PetscReal *e,PetscBLASInt l1e,PetscBLASInt l2e,PetscReal tol, 30: PetscReal tau1,PetscReal tau2,PetscReal *ev,PetscReal *z,PetscBLASInt ldz, 31: PetscReal *work,PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork, 32: PetscReal *mingap,PetscBLASInt *mingapi,PetscBLASInt *info, 33: PetscBLASInt jobz_len,PetscBLASInt jobacc_len) 34: {
35: /* -- Routine written in LAPACK Version 3.0 style -- */
36: /* *************************************************** */
37: /* Written by */
38: /* Michael Moldaschl and Wilfried Gansterer */
39: /* University of Vienna */
40: /* last modification: March 28, 2014 */
42: /* Small adaptations of original code written by */
43: /* Wilfried Gansterer and Bob Ward, */
44: /* Department of Computer Science, University of Tennessee */
45: /* see http://dx.doi.org/10.1137/S1064827501399432 */
46: /* *************************************************** */
48: /* Purpose */
49: /* ======= */
51: /* DSBTDC computes approximations to all eigenvalues and eigenvectors */
52: /* of a symmetric block tridiagonal matrix using the divide and */
53: /* conquer method with lower rank approximations to the subdiagonal blocks. */
55: /* This code makes very mild assumptions about floating point */
56: /* arithmetic. It will work on machines with a guard digit in */
57: /* add/subtract, or on those binary machines without guard digits */
58: /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
59: /* It could conceivably fail on hexadecimal or decimal machines */
60: /* without guard digits, but we know of none. See DLAED3M for details. */
62: /* Arguments */
63: /* ========= */
65: /* JOBZ (input) CHARACTER*1 */
66: /* = 'N': Compute eigenvalues only (not implemented); */
67: /* = 'D': Compute eigenvalues and eigenvectors. Eigenvectors */
68: /* are accumulated in the divide-and-conquer process. */
70: /* JOBACC (input) CHARACTER*1 */
71: /* = 'A' ("automatic"): The accuracy parameters TAU1 and TAU2 */
72: /* are determined automatically from the */
73: /* parameter TOL according to the analytical */
74: /* bounds. In that case the input values of */
75: /* TAU1 and TAU2 are irrelevant (ignored). */
76: /* = 'M' ("manual"): The input values of the accuracy parameters */
77: /* TAU1 and TAU2 are used. In that case the input */
78: /* value of the parameter TOL is irrelevant */
79: /* (ignored). */
81: /* N (input) INTEGER */
82: /* The dimension of the symmetric block tridiagonal matrix. */
83: /* N >= 1. */
85: /* NBLKS (input) INTEGER, 1 <= NBLKS <= N */
86: /* The number of diagonal blocks in the matrix. */
88: /* KSIZES (input) INTEGER array, dimension (NBLKS) */
89: /* The dimensions of the square diagonal blocks from top left */
90: /* to bottom right. KSIZES(I) >= 1 for all I, and the sum of */
91: /* KSIZES(I) for I = 1 to NBLKS has to be equal to N. */
93: /* D (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
94: /* The lower triangular elements of the symmetric diagonal */
95: /* blocks of the block tridiagonal matrix. The elements of the top */
96: /* left diagonal block, which is of dimension KSIZES(1), have to */
97: /* be placed in D(*,*,1); the elements of the next diagonal */
98: /* block, which is of dimension KSIZES(2), have to be placed in */
99: /* D(*,*,2); etc. */
101: /* L1D (input) INTEGER */
102: /* The leading dimension of the array D. L1D >= max(3,KMAX), */
103: /* where KMAX is the dimension of the largest diagonal block, */
104: /* i.e., KMAX = max_I ( KSIZES(I) ). */
106: /* L2D (input) INTEGER */
107: /* The second dimension of the array D. L2D >= max(3,KMAX), */
108: /* where KMAX is as stated in L1D above. */
110: /* E (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
111: /* The elements of the subdiagonal blocks of the */
112: /* block tridiagonal matrix. The elements of the top left */
113: /* subdiagonal block, which is KSIZES(2) x KSIZES(1), have to be */
114: /* placed in E(*,*,1); the elements of the next subdiagonal block, */
115: /* which is KSIZES(3) x KSIZES(2), have to be placed in E(*,*,2); etc. */
116: /* During runtime, the original contents of E(*,*,K) is */
117: /* overwritten by the singular vectors and singular values of */
118: /* the lower rank representation. */
120: /* L1E (input) INTEGER */
121: /* The leading dimension of the array E. L1E >= max(3,2*KMAX+1), */
122: /* where KMAX is as stated in L1D above. The size of L1E enables */
123: /* the storage of ALL singular vectors and singular values for */
124: /* the corresponding off-diagonal block in E(*,*,K) and therefore */
125: /* there are no restrictions on the rank of the approximation */
126: /* (only the "natural" restriction */
127: /* RANK( K ) .LE. MIN( KSIZES( K ),KSIZES( K+1 ) )). */
129: /* L2E (input) INTEGER */
130: /* The second dimension of the array E. L2E >= max(3,2*KMAX+1), */
131: /* where KMAX is as stated in L1D above. The size of L2E enables */
132: /* the storage of ALL singular vectors and singular values for */
133: /* the corresponding off-diagonal block in E(*,*,K) and therefore */
134: /* there are no restrictions on the rank of the approximation */
135: /* (only the "natural" restriction */
136: /* RANK( K ) .LE. MIN( KSIZES( K ),KSIZES( K+1 ) )). */
138: /* TOL (input) DOUBLE PRECISION, TOL.LE.TOLMAX */
139: /* User specified tolerance for the residuals of the computed */
140: /* eigenpairs. If ( JOBACC.EQ.'A' ) then it is used to determine */
141: /* TAU1 and TAU2; ignored otherwise. */
142: /* If ( TOL.LT.40*EPS .AND. JOBACC.EQ.'A' ) then TAU1 is set to machine */
143: /* epsilon and TAU2 is set to the standard deflation tolerance from */
144: /* LAPACK. */
146: /* TAU1 (input) DOUBLE PRECISION, TAU1.LE.TOLMAX/2 */
147: /* User specified tolerance for determining the rank of the */
148: /* lower rank approximations to the off-diagonal blocks. */
149: /* The rank for each off-diagonal block is determined such that */
150: /* the resulting absolute eigenvalue error is less than or equal */
151: /* to TAU1. */
152: /* If ( JOBACC.EQ.'A' ) then TAU1 is determined automatically from */
153: /* TOL and the input value is ignored. */
154: /* If ( JOBACC.EQ.'M' .AND. TAU1.LT.20*EPS ) then TAU1 is set to */
155: /* machine epsilon. */
157: /* TAU2 (input) DOUBLE PRECISION, TAU2.LE.TOLMAX/2 */
158: /* User specified deflation tolerance for the routine DIBTDC. */
159: /* If ( 1.0D-1.GT.TAU2.GT.20*EPS ) then TAU2 is used as */
160: /* the deflation tolerance in DSRTDF (EPS is the machine epsilon). */
161: /* If ( TAU2.LE.20*EPS ) then the standard deflation tolerance from */
162: /* LAPACK is used as the deflation tolerance in DSRTDF. */
163: /* If ( JOBACC.EQ.'A' ) then TAU2 is determined automatically from */
164: /* TOL and the input value is ignored. */
165: /* If ( JOBACC.EQ.'M' .AND. TAU2.LT.20*EPS ) then TAU2 is set to */
166: /* the standard deflation tolerance from LAPACK. */
168: /* EV (output) DOUBLE PRECISION array, dimension (N) */
169: /* If INFO = 0, then EV contains the computed eigenvalues of the */
170: /* symmetric block tridiagonal matrix in ascending order. */
172: /* Z (output) DOUBLE PRECISION array, dimension (LDZ,N) */
173: /* If ( JOBZ.EQ.'D' .AND. INFO = 0 ) */
174: /* then Z contains the orthonormal eigenvectors of the symmetric */
175: /* block tridiagonal matrix computed by the routine DIBTDC */
176: /* (accumulated in the divide-and-conquer process). */
177: /* If ( -199 < INFO < -99 ) then Z contains the orthonormal */
178: /* eigenvectors of the symmetric block tridiagonal matrix, */
179: /* computed without divide-and-conquer (quick returns). */
180: /* Otherwise not referenced. */
182: /* LDZ (input) INTEGER */
183: /* The leading dimension of the array Z. LDZ >= max(1,N). */
185: /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
187: /* LWORK (input) INTEGER */
188: /* The dimension of the array WORK. */
189: /* If NBLKS.EQ.1, then LWORK has to be at least 2N^2+6N+1 */
190: /* (for the call of DSYEVD). */
191: /* If NBLKS.GE.2 and ( JOBZ.EQ.'D' ) then the absolute minimum */
192: /* required for DIBTDC is ( N**2 + 3*N ). This will not always */
193: /* suffice, though, the routine will return a corresponding */
194: /* error code and report how much work space was missing (see */
195: /* INFO). */
196: /* In order to guarantee correct results in all cases where */
197: /* NBLKS.GE.2, LWORK must be at least ( 2*N**2 + 3*N ). */
199: /* IWORK (workspace/output) INTEGER array, dimension (LIWORK) */
201: /* LIWORK (input) INTEGER */
202: /* The dimension of the array IWORK. */
203: /* LIWORK must be at least ( 5*N + 5*NBLKS - 1 ) (for DIBTDC) */
204: /* Note that this should also suffice for the call of DSYEVD on a */
205: /* diagonal block which requires ( 5*KMAX + 3 ). */
208: /* MINGAP (output) DOUBLE PRECISION */
209: /* The minimum "gap" between the approximate eigenvalues */
210: /* computed, i.e., MIN( ABS( EV( I+1 )-EV( I ) ) for I=1,2,..., N-1 */
211: /* IF ( MINGAP.LE.TOL/10 ) THEN a warning flag is returned in INFO, */
212: /* because the computed eigenvectors may be unreliable individually */
213: /* (only the subspaces spanned are approximated reliably). */
215: /* MINGAPI (output) INTEGER */
216: /* Index I where the minimum gap in the spectrum occurred. */
218: /* INFO (output) INTEGER */
219: /* = 0: successful exit, no special cases occurred. */
220: /* < -200: not enough workspace. Space for ABS(INFO + 200) */
221: /* numbers is required in addition to the workspace provided, */
222: /* otherwise some of the computed eigenvectors will be incorrect. */
223: /* < -99, > -199: successful exit, but quick returns. */
224: /* if INFO = -100, successful exit, but the input matrix */
225: /* was the zero matrix and no */
226: /* divide-and-conquer was performed */
227: /* if INFO = -101, successful exit, but N was 1 and no */
228: /* divide-and-conquer was performed */
229: /* if INFO = -102, successful exit, but only a single */
230: /* dense block. Standard dense solver */
231: /* was called, no divide-and-conquer was */
232: /* performed */
233: /* if INFO = -103, successful exit, but warning that */
234: /* MINGAP.LE.TOL/10 and therefore the */
235: /* eigenvectors corresponding to close */
236: /* approximate eigenvalues may individually */
237: /* be unreliable (although taken together they */
238: /* do approximate the corresponding subspace to */
239: /* the desired accuracy) */
240: /* = -99: error in the preprocessing in DIBTDC (when determining */
241: /* the merging order). */
242: /* < 0, > -99: illegal arguments. */
243: /* if INFO = -i, the i-th argument had an illegal value. */
244: /* > 0: The algorithm failed to compute an eigenvalue while */
245: /* working on the submatrix lying in rows and columns */
246: /* INFO/(N+1) through mod(INFO,N+1). */
248: /* Further Details */
249: /* =============== */
251: /* Small modifications of code written by */
252: /* Wilfried Gansterer and Bob Ward, */
253: /* Department of Computer Science, University of Tennessee */
254: /* see http://dx.doi.org/10.1137/S1064827501399432 */
256: /* Based on the design of the LAPACK code sstedc.f written by Jeff */
257: /* Rutter, Computer Science Division, University of California at */
258: /* Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
260: /* ===================================================================== */
262: /* .. Parameters .. */
264: #define TOLMAX 0.1266: /* TOLMAX .... upper bound for tolerances TOL, TAU1, TAU2 */
267: /* NOTE: in the routine DIBTDC, the value */
268: /* 1.D-1 is hardcoded for TOLMAX ! */
270: #if defined(SLEPC_MISSING_LAPACK_SYEVD) || defined(SLEPC_MISSING_LAPACK_GESVD) || defined(SLEPC_MISSING_LAPACK_LASET) || defined(SLEPC_MISSING_LAPACK_LASCL)
272: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEVD/GESVD/LASET/LASCL - Lapack routine is unavailable");
273: #else
274: PetscBLASInt i, j, k, i1, iwspc, lwmin, start;
275: PetscBLASInt ii, ip, jp, nk, rk, np, iu, rp1, ldu;
276: PetscBLASInt ksk, ivt, iend, kchk, kmax, one=1, zero=0;
277: PetscBLASInt ldvt, ksum, kskp1, spneed, nrblks, liwmin, isvals;
278: PetscReal p, d2, eps, dmax, emax, done = 1.0, dzero = 0.0;
279: PetscReal dnrm, tiny, anorm, exdnrm=0, dropsv, absdiff;
283: /* Determine machine epsilon. */
284: eps = LAPACKlamch_("Epsilon");
286: *info = 0;
288: if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') {
289: *info = -1;
290: } else if (*(unsigned char *)jobacc != 'A' && *(unsigned char *)jobacc != 'M') {
291: *info = -2;
292: } else if (n < 1) {
293: *info = -3;
294: } else if (nblks < 1 || nblks > n) {
295: *info = -4;
296: }
297: if (*info == 0) {
298: ksum = 0;
299: kmax = 0;
300: kchk = 0;
301: for (k = 0; k < nblks; ++k) {
302: ksk = ksizes[k];
303: ksum += ksk;
304: if (ksk > kmax) kmax = ksk;
305: if (ksk < 1) kchk = 1;
306: }
307: if (nblks == 1) lwmin = 2*n*n + n*6 + 1;
308: else lwmin = n*n + n*3;
309: liwmin = n * 5 + nblks * 5 - 4;
310: if (ksum != n || kchk == 1) {
311: *info = -5;
312: } else if (l1d < PetscMax(3,kmax)) {
313: *info = -7;
314: } else if (l2d < PetscMax(3,kmax)) {
315: *info = -8;
316: } else if (l1e < PetscMax(3,2*kmax+1)) {
317: *info = -10;
318: } else if (l2e < PetscMax(3,2*kmax+1)) {
319: *info = -11;
320: } else if (*(unsigned char *)jobacc == 'A' && tol > TOLMAX) {
321: *info = -12;
322: } else if (*(unsigned char *)jobacc == 'M' && tau1 > TOLMAX/2) {
323: *info = -13;
324: } else if (*(unsigned char *)jobacc == 'M' && tau2 > TOLMAX/2) {
325: *info = -14;
326: } else if (ldz < PetscMax(1,n)) {
327: *info = -17;
328: } else if (lwork < lwmin) {
329: *info = -19;
330: } else if (liwork < liwmin) {
331: *info = -21;
332: }
333: }
335: if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DSBTDC",-(*info));
337: /* Quick return if possible */
339: if (n == 1) {
340: ev[0] = d[0];
341: z[0] = 1.;
342: *info = -101;
343: return(0);
344: }
346: /* If NBLKS is equal to 1, then solve the problem with standard */
347: /* dense solver (in this case KSIZES(1) = N). */
349: if (nblks == 1) {
350: PetscPrintf(PETSC_COMM_WORLD," dsbtdc: This branch still needs to be checked!\n");
351: for (i = 0; i < n; ++i) {
352: for (j = 0; j <= i; ++j) {
353: z[i + j*ldz] = d[i + j*l1d];
354: }
355: }
356: PetscStackCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &n, z, &ldz, ev, work, &lwork, iwork, &liwork, info));
357: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DSYEVD, info = %d",*info);
358: *info = -102;
359: return(0);
360: }
362: /* determine the accuracy parameters (if requested) */
364: if (*(unsigned char *)jobacc == 'A') {
365: tau1 = tol / 2;
366: if (tau1 < eps * 20) tau1 = eps;
367: tau2 = tol / 2;
368: }
370: /* Initialize Z as the identity matrix */
372: if (*(unsigned char *)jobz == 'D') {
373: PetscStackCallBLAS("LAPACKlaset",LAPACKlaset_("Full", &n, &n, &dzero, &done, z, &ldz));
374: }
376: /* Determine the off-diagonal ranks, form and store the lower rank */
377: /* approximations based on the tolerance parameters, the */
378: /* RANK( K ) largest singular values and the associated singular */
379: /* vectors of each subdiagonal block. Also find the maximum norm of */
380: /* the subdiagonal blocks (in EMAX). */
381: 382: /* Compute SVDs of the subdiagonal blocks.... */
384: /* EMAX .... maximum norm of the off-diagonal blocks */
386: emax = 0.;
387: for (k = 0; k < nblks-1; ++k) {
388: ksk = ksizes[k];
389: kskp1 = ksizes[k+1];
390: isvals = 0;
392: /* Note that min(KSKP1,KSK).LE.N/2 (equality possible for */
393: /* NBLKS=2), and therefore storing the singular values requires */
394: /* at most N/2 entries of the * array WORK. */
396: iu = isvals + n / 2;
397: ivt = isvals + n / 2;
399: /* Call of DGESVD: The space for U is not referenced, since */
400: /* JOBU='O' and therefore this portion of the array WORK */
401: /* is not referenced for U. */
403: ldu = kskp1;
404: ldvt = PetscMin(kskp1,ksk);
405: iwspc = ivt + n * n / 2;
407: /* Note that the minimum workspace required for this call */
408: /* of DGESVD is: N/2 for storing the singular values + N**2/2 for */
409: /* storing V^T + 5*N/2 workspace = N**2/2 + 3*N. */
411: i1 = lwork - iwspc;
412: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O", "S", &kskp1, &ksk,
413: &e[k*l1e*l2e], &l1e, &work[isvals],
414: &work[iu], &ldu, &work[ivt], &ldvt, &work[iwspc], &i1, info));
415: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DGESVD, info = %d",*info);
417: /* Note that after the return from DGESVD U is stored in */
418: /* E(*,*,K), and V^{\top} is stored in WORK( IVT, IVT+1, .... ) */
420: /* determine the ranks RANK() for the approximations */
422: rk = PetscMin(ksk,kskp1);
423: L8:424: dropsv = work[isvals - 1 + rk];
426: if (dropsv * 2. <= tau1) {
428: /* the error caused by dropping singular value RK is */
429: /* small enough, try to reduce the rank by one more */
431: --rk;
432: if (rk > 0) goto L8;
433: else iwork[k] = 0;
434: } else {
436: /* the error caused by dropping singular value RK is */
437: /* too large already, RK is the rank required to achieve the */
438: /* desired accuracy */
440: iwork[k] = rk;
441: }
443: /* ************************************************************************** */
445: /* Store the first RANK( K ) terms of the SVD of the current */
446: /* off-diagonal block. */
447: /* NOTE that here it is required that L1E, L2E >= 2*KMAX+1 in order */
448: /* to have enough space for storing singular vectors and values up */
449: /* to the full SVD of an off-diagonal block !!!! */
450: 451: /* u1-u_RANK(K) is already contained in E(:,1:RANK(K),K) (as a */
452: /* result of the call of DGESVD !), the sigma1-sigmaK are to be */
453: /* stored in E(1:RANK(K),RANK(K)+1,K), and v1-v_RANK(K) are to be */
454: /* stored in E(:,RANK(K)+2:2*RANK(K)+1,K) */
456: rp1 = iwork[k];
457: for (j = 0; j < iwork[k]; ++j) {
459: /* store sigma_J in E( J,RANK( K )+1,K ) */
461: e[j + (rp1 + k*l2e)* l1e] = work[isvals + j];
463: /* update maximum norm of subdiagonal blocks */
465: if (e[j + (rp1 + k*l2e)*l1e] > emax) {
466: emax = e[j + (rp1 + k*l2e)*l1e];
467: }
469: /* store v_J in E( :,RANK( K )+1+J,K ) */
470: /* (note that WORK contains V^{\top} and therefore */
471: /* we need to read rowwise !) */
473: for (i = 1; i <= ksk; ++i) {
474: e[i-1 + (rp1+j+1 + k*l2e)*l1e] = work[ivt+j + (i-1)*ldvt];
475: }
476: }
478: }
480: /* Compute the maximum norm of diagonal blocks and store the norm */
481: /* of each diagonal block in E(RP1,RP1,K) (after the singular values); */
482: /* store the norm of the last diagonal block in EXDNRM. */
483: 484: /* DMAX .... maximum one-norm of the diagonal blocks */
486: dmax = 0.;
487: for (k = 0; k < nblks; ++k) {
488: rp1 = iwork[k];
490: /* compute the one-norm of diagonal block K */
492: dnrm = LAPACKlansy_("1", "L", &ksizes[k], &d[k*l1d*l2d], &l1d, work);
493: if (k+1 == nblks) exdnrm = dnrm;
494: else e[rp1 + (rp1 + k*l2e)*l1e] = dnrm;
495: if (dnrm > dmax) dmax = dnrm;
496: }
498: /* Check for zero matrix. */
500: if (emax == 0. && dmax == 0.) {
501: for (i = 0; i < n; ++i) ev[i] = 0.;
502: *info = -100;
503: return(0);
504: }
506: /* **************************************************************** */
508: /* ....Identify irreducible parts of the block tridiagonal matrix */
509: /* [while ( START <= NBLKS )].... */
511: start = 0;
512: np = 0;
513: L10:514: if (start < nblks) {
516: /* Let IEND be the number of the next subdiagonal block such that */
517: /* its RANK is 0 or IEND = NBLKS if no such subdiagonal exists. */
518: /* The matrix identified by the elements between the diagonal block START */
519: /* and the diagonal block IEND constitutes an independent (irreducible) */
520: /* sub-problem. */
522: iend = start;
524: L20:525: if (iend < nblks) {
526: rk = iwork[iend];
528: /* NOTE: if RANK( IEND ).EQ.0 then decoupling happens due to */
529: /* reduced accuracy requirements ! (because in this case */
530: /* we would not merge the corresponding two diagonal blocks) */
531: 532: /* NOTE: seems like any combination may potentially happen: */
533: /* (i) RANK = 0 but no decoupling due to small norm of */
534: /* off-diagonal block (corresponding diagonal blocks */
535: /* also have small norm) as well as */
536: /* (ii) RANK > 0 but decoupling due to small norm of */
537: /* off-diagonal block (corresponding diagonal blocks */
538: /* have very large norm) */
539: /* case (i) is ruled out by checking for RANK = 0 above */
540: /* (we decide to decouple all the time when the rank */
541: /* of an off-diagonal block is zero, independently of */
542: /* the norms of the corresponding diagonal blocks. */
544: if (rk > 0) {
546: /* check for decoupling due to small norm of off-diagonal block */
547: /* (relative to the norms of the corresponding diagonal blocks) */
549: if (iend == nblks-2) {
550: d2 = PetscSqrtReal(exdnrm);
551: } else {
552: d2 = PetscSqrtReal(e[iwork[iend+1] + (iwork[iend+1] + (iend+1)*l2e)*l1e]);
553: }
555: /* this definition of TINY is analogous to the definition */
556: /* in the tridiagonal divide&conquer (dstedc) */
558: tiny = eps * PetscSqrtReal(e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e])*d2;
559: if (e[(iwork[iend] + iend*l2e)*l1e] > tiny) {
561: /* no decoupling due to small norm of off-diagonal block */
563: ++iend;
564: goto L20;
565: }
566: }
567: }
569: /* ....(Sub) Problem determined: between diagonal blocks */
570: /* START and IEND. Compute its size and solve it.... */
572: nrblks = iend - start + 1;
573: if (nrblks == 1) {
575: /* Isolated problem is a single diagonal block */
577: nk = ksizes[start];
579: /* copy this isolated block into Z */
581: for (i = 0; i < nk; ++i) {
582: ip = np + i + 1;
583: for (j = 0; j <= i; ++j) {
584: jp = np + j + 1;
585: z[ip + jp*ldz] = d[i + (j + start*l2d)*l1d];
586: }
587: }
589: /* check whether there is enough workspace */
591: spneed = 2*nk*nk + nk * 6 + 1;
592: if (spneed > lwork) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: not enough workspace for DSYEVD, info = %d",lwork - 200 - spneed);
594: PetscStackCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &nk,
595: &z[np + np*ldz], &ldz, &ev[np],
596: work, &lwork, &iwork[nblks-1], &liwork, info));
597: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DSYEVD, info = %d",*info);
598: start = iend + 1;
599: np += nk;
601: /* go to the next irreducible subproblem */
603: goto L10;
604: }
606: /* ....Isolated problem consists of more than one diagonal block. */
607: /* Start the divide and conquer algorithm.... */
608: 609: /* Scale: Divide by the maximum of all norms of diagonal blocks */
610: /* and singular values of the subdiagonal blocks */
611: 612: /* ....determine maximum of the norms of all diagonal and subdiagonal */
613: /* blocks.... */
615: if (iend == nblks-1) anorm = exdnrm;
616: else anorm = e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e];
617: for (k = start; k < iend; ++k) {
618: rp1 = iwork[k];
620: /* norm of diagonal block */
621: anorm = PetscMax(anorm,e[rp1 + (rp1 + k*l2e)*l1e]);
623: /* singular value of subdiagonal block */
624: anorm = PetscMax(anorm,e[(rp1 + k*l2e)*l1e]);
625: }
627: nk = 0;
628: for (k = start; k < iend+1; ++k) {
629: ksk = ksizes[k];
630: nk += ksk;
632: /* scale the diagonal block */
633: PetscStackCallBLAS("LAPACKlascl",LAPACKlascl_("L", &zero, &zero,
634: &anorm, &done, &ksk, &ksk, &d[k*l2d*l1d], &l1d, info));
635: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DLASCL, info = %d",*info);
637: /* scale the (approximated) off-diagonal block by dividing its */
638: /* singular values */
640: if (k != iend) {
642: /* the last subdiagonal block has index IEND-1 !!!! */
643: for (i = 0; i < iwork[k]; ++i) {
644: e[i + (iwork[k] + k*l2e)*l1e] /= anorm;
645: }
646: }
647: }
649: /* call the block-tridiagonal divide-and-conquer on the */
650: /* irreducible subproblem which has been identified */
652: BDC_dibtdc_(jobz, nk, nrblks, &ksizes[start], &d[start*l1d*l2d], l1d, l2d,
653: &e[start*l2e*l1e], &iwork[start], l1e, l2e, tau2, &ev[np],
654: &z[np + np*ldz], ldz, work, lwork, &iwork[nblks-1], liwork, info, 1);
655: 656: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DIBTDC, info = %d",*info);
658: /* ************************************************************************** */
660: /* Scale back the computed eigenvalues. */
662: PetscStackCallBLAS("LAPACKlascl",LAPACKlascl_("G", &zero, &zero, &done,
663: &anorm, &nk, &one, &ev[np], &nk, info));
664: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DLASCL, info = %d",*info);
666: start = iend + 1;
667: np += nk;
669: /* Go to the next irreducible subproblem. */
671: goto L10;
672: }
674: /* ....If the problem split any number of times, then the eigenvalues */
675: /* will not be properly ordered. Here we permute the eigenvalues */
676: /* (and the associated eigenvectors) across the irreducible parts */
677: /* into ascending order.... */
678: 679: /* IF( NRBLKS.LT.NBLKS )THEN */
680: 681: /* Use Selection Sort to minimize swaps of eigenvectors */
683: for (ii = 1; ii < n; ++ii) {
684: i = ii;
685: k = i;
686: p = ev[i];
687: for (j = ii; j < n; ++j) {
688: if (ev[j] < p) {
689: k = j;
690: p = ev[j];
691: }
692: }
693: if (k != i) {
694: ev[k] = ev[i];
695: ev[i] = p;
696: PetscStackCallBLAS("BLASswap",BLASswap_(&n, &z[i*ldz], &one, &z[k*ldz], &one));
697: }
698: }
700: /* ...Compute MINGAP (minimum difference between neighboring eigenvalue */
701: /* approximations).............................................. */
703: *mingap = ev[1] - ev[0];
704: if (*mingap < 0.) SETERRQ2(PETSC_COMM_SELF,1,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation %d is larger than approximation %d.",1,2);
705: *mingapi = 1;
706: for (i = 2; i < n; ++i) {
707: absdiff = ev[i] - ev[i-1];
708: if (absdiff < 0.) {
709: SETERRQ2(PETSC_COMM_SELF,1,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation %d is larger than approximation %d.",i,i+1);
710: } else if (absdiff < *mingap) {
711: *mingap = absdiff;
712: *mingapi = i;
713: }
714: }
716: /* check whether the minimum gap between eigenvalue approximations */
717: /* may indicate severe inaccuracies in the eigenvector approximations */
719: if (*mingap <= tol / 10) *info = -103;
720: return(0);
721: #endif
722: }