Actual source code: dqds.c
slepc-3.6.1 2015-09-03
1: /*
2: DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
3: Based on Matlab code from Carla Ferreira.
5: References:
7: [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
8: eigenvalue problem", preprint, 2012.
10: [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
11: Thesis, University of Minho, 2007.
13: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
14: SLEPc - Scalable Library for Eigenvalue Problem Computations
15: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
17: This file is part of SLEPc.
19: SLEPc is free software: you can redistribute it and/or modify it under the
20: terms of version 3 of the GNU Lesser General Public License as published by
21: the Free Software Foundation.
23: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
24: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
26: more details.
28: You should have received a copy of the GNU Lesser General Public License
29: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
30: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
31: */
32: #include <slepc/private/dsimpl.h>
33: #include <slepcblaslapack.h>
37: /*
38: INPUT:
39: a ---- diagonal of J
40: b ---- subdiagonal of J;
41: the superdiagonal of J is all 1's
43: OUTPUT:
44: For an eigenvalue lambda of J we have:
45: gl=<real(lambda)<=gr
46: -sigma=<imag(lambda)<=sigma
47: */
48: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
49: {
50: PetscInt i;
51: PetscReal b0,b1,rad;
54: /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
55: C_bal is the balanced form of C */
56: /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
57: *sigma = 0.0;
58: b0 = 0.0;
59: for (i=0;i<n-1;i++) {
60: if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
61: else b1 = 0.0;
62: *sigma = PetscMax(*sigma,b1+b0);
63: b0 = b1;
64: }
65: *sigma = PetscMax(*sigma,b0);
66: /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
67: rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
68: *gr = a[0]+rad;
69: *gl = a[0]-rad;
70: b0 = rad;
71: for (i=1;i<n-1;i++) {
72: b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
73: rad = b0+b1;
74: *gr = PetscMax(*gr,a[i]+rad);
75: *gl = PetscMin(*gl,a[i]-rad);
76: b0 = b1;
77: }
78: rad = b0;
79: *gr = PetscMax(*gr,a[n-1]+rad);
80: *gl = PetscMin(*gl,a[n-1]-rad);
81: return(0);
82: }
86: /*
87: INPUT:
88: a - vector with the diagonal elements
89: b - vector with the subdiagonal elements
90: gl - Gersgorin left bound (real axis)
91: gr - Gersgorin right bound (real axis)
92: OUTPUT:
93: eigvalue - multiple eigenvalue (if there is an eigenvalue)
94: m - its multiplicity (m=0 if there isn't a multiple eigenvalue)
95: X - matrix of generalized eigenvectors
96: shift
97: */
98: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *work,PetscReal nw)
99: {
102: PetscReal mu,tol,*a1,*y,*yp,*x,*xp;
103: PetscInt i,k,nwall=0;
106: *m = 0;
107: mu = 0.0;
108: for (i=0;i<n;i++) mu += a[i];
109: mu /= n;
110: tol = n*PETSC_MACHINE_EPSILON*(gr-gl);
111: nwall = 5*n+4;
112: if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
113: a1 = work; /* size n */
114: y = work+n; /* size n+1 */
115: yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
116: x = yp+n+1; /* size n+1 */
117: xp = x+n+1; /* size n+1 */
118: for (i=0;i<n;i++) a1[i] = mu-a[i];
119: x[0] = 1;
120: xp[0] = 0;
121: x[1] = a1[0];
122: xp[1] = 1;
123: for (i=1;i<n;i++) {
124: x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
125: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
126: }
127: *shift = mu;
128: if (PetscAbsReal(x[n])<tol) {
129: /* mu is an eigenvalue */
130: *m = *m+1;
131: if (PetscAbsReal(xp[n])<tol) {
132: /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
133: k = 0;
134: while (PetscAbsReal(xp[n])<tol && k<n-1) {
135: PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
136: PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
137: x[k] = 0.0;
138: k++;
139: x[k] = 1.0;
140: xp[k] = 0.0;
141: x[k+1] = a1[k] + y[k];
142: xp[k+1] = 1+yp[k];
143: for (i=k+1;i<n;i++) {
144: x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
145: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
146: }
147: *m = *m+1;
148: }
149: }
150: }
151: /*
152: When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu
154: Need to check for overflow!
156: After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case;
157: If m!=0, the only output to be used is the shift returned.
158: */
159: return(0);
160: }
164: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *work,PetscInt nw)
165: {
166: PetscInt nwall,i;
167: PetscReal *a1;
170: nwall = n;
171: if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",11);
172: a1 = work;
173: for (i=0;i<n;i++) a1[i] = a[i]-shift;
174: *fail = 0;
175: for (i=0;i<n-1;i++) {
176: U[i] = a1[i];
177: L[i] = b[i]/U[i];
178: a1[i+1] = a1[i+1]-L[i];
179: }
180: U[n-1] = a1[n-1];
182: /* Check if there are NaN values */
183: for (i=0;i<n-1 && !*fail;i++) {
184: if (PetscIsInfOrNanReal(L[i])) *fail=1;
185: if (PetscIsInfOrNanReal(U[i])) *fail=1;
186: }
187: if (!*fail && PetscIsInfOrNanReal(U[n-1])) *fail=1;
189: for (i=0;i<n-1 && !*fail;i++) {
190: if (PetscAbsReal(L[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
191: if (PetscAbsReal(U[i])>tol*norm) *fail = 1;
192: }
193: if (!*fail && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
194: return(0);
195: }
199: static PetscErrorCode RealDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
200: {
201: PetscReal d;
202: PetscInt i;
205: *fail = 0;
206: d = U[0]-shift;
207: for (i=0;i<n-1;i++) {
208: U1[i] = d+L[i];
209: L1[i] = L[i]*(U[i+1]/U1[i]);
210: d = d*(U[i+1]/U1[i])-shift;
211: }
212: U1[n-1]=d;
214: /* The following demands IEEE arithmetic */
215: for (i=0;i<n-1 && !*fail;i++) {
216: if (PetscIsInfOrNanReal(L1[i])) *fail=1;
217: if (PetscIsInfOrNanReal(U1[i])) *fail=1;
218: }
219: if (!*fail && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
220: for (i=0;i<n-1 && !*fail;i++) {
221: if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
222: if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
223: }
224: if (!*fail && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
225: return(0);
226: }
230: static PetscErrorCode TridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
231: {
232: PetscReal xl,yl,xr,yr,zr;
233: PetscInt i;
236: *fail = 0;
237: xr = 1.0;
238: yr = e[0];
239: zr = 0.0;
240: /* Step 1 */
241: /* the efect of Z1 */
242: xr = xr*q[0]+yr;
243: /* the inverse of L1 */
244: xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
245: yl = -(q[2]*e[1]*q[1]*e[0])/xl;
246: xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
247: /* the efect of L1 */
248: q[0] = xr-xl;
249: xr = yr-xl;
250: yr = zr-yl-xl*e[1];
251: /*the inverse of Y1 */
252: xr = xr/q[0];
253: yr = yr/q[0];
254: /*the effect of Y1 inverse */
255: e[0] = xl+yr+xr*q[1];
256: xl = yl+zr+yr*q[2]; /* zr=0 when n=3 */
257: /*the effect of Y1 */
258: xr = 1.0-xr;
259: yr = e[1]-yr;
261: /* STEP n-1 */
263: if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
264: /* the efect of Zn-1 */
265: xr = xr*q[n-2]+yr;
266: /* the inverse of Ln-1 */
267: xl = -xl/e[n-3];
268: /* the efect of Ln-1 */
269: q[n-2] = xr-xl;
270: xr = yr-xl;
271: /*the inverse of Yn-1 */
272: xr = xr/q[n-2];
273: /*the effect of the inverse of Yn-1 */
274: e[n-2] = xl+xr*q[n-1];
275: /*the effects of Yn-1 */
276: xr = 1.0-xr;
277: /* STEP n */
278: /*the effect of Zn */
279: xr = xr*q[n-1];
280: /*the inverse of Ln=I */
281: /*the effect of Ln */
282: q[n-1] = xr;
283: /* the inverse of Yn-1=I */
285: } else { /* Free deflation */
286: e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5; /* Sum=trace/2 */
287: q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl; /* det */
288: q[n-1] = e[n-2]*e[n-2]-q[n-2];
289: *fail = 2;
290: }
292: /* The following demands IEEE arithmetic */
293: for (i=0;i<n-1 && !*fail;i++) {
294: if (PetscIsInfOrNanReal(e[i])) *fail=1;
295: if (PetscIsInfOrNanReal(q[i])) *fail=1;
296: }
297: if (!*fail && PetscIsInfOrNanReal(q[n-1])) *fail=1;
298: for (i=0;i<n-1 && !*fail;i++) {
299: if (PetscAbsReal(e[i])>tol*norm) *fail=1;
300: if (PetscAbsReal(q[i])>tol*norm) *fail=1;
301: }
302: if (!*fail && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
303: return(0);
304: }
308: static PetscErrorCode TridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail)
309: {
311: PetscInt i;
312: PetscReal xl,yl,xr,yr,zr;
315: for (i=0;i<n-1;i++) {
316: e1[i] = e[i];
317: q1[i] = q[i];
318: }
319: q1[n-1] = q[n-1];
320: *fail = 0;
321: if (n>3) { /* For n>3 */
322: *fail = 0;
323: xr = 1;
324: yr = e1[0];
325: zr = 0;
326: /* step 1 */
327: /* the efect of Z1 */
328: xr = xr*q1[0]+yr;
329: /* the inverse of L1 */
330: xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
331: yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
332: xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
333: /* the efect of L1 */
334: q1[0] = xr-xl;
335: xr = yr-xl;
336: yr = zr-yl-xl*e1[1];
337: zr = -yl*e1[2];
338: /* the inverse of Y1 */
339: xr = xr/q1[0];
340: yr = yr/q1[0];
341: zr = zr/q1[0];
342: /* the effect of Y1 inverse */
343: e1[0] = xl+yr+xr*q1[1];
344: xl = yl+zr+yr*q1[2];
345: yl = zr*q1[3];
346: /* the effect of Y1 */
347: xr = 1-xr;
348: yr = e1[1]-yr;
349: zr = -zr;
350: /* step i=2,...,n-3 */
351: for (i=1;i<n-3;i++) {
352: /* the efect of Zi */
353: xr = xr*q1[i]+yr;
354: /* the inverse of Li */
355: xl = -xl/e1[i-1];
356: yl = -yl/e1[i-1];
357: /* the efect of Li */
358: q1[i] = xr-xl;
359: xr = yr-xl;
360: yr = zr-yl-xl*e1[i+1];
361: zr = -yl*e1[i+2];
362: /* the inverse of Yi */
363: xr = xr/q1[i];
364: yr = yr/q1[i];
365: zr = zr/q1[i];
366: /* the effect of the inverse of Yi */
367: e1[i] = xl+yr+xr*q1[i+1];
368: xl = yl+zr+yr*q1[i+2];
369: yl = zr*q1[i+3];
370: /* the effects of Yi */
371: xr = 1.0-xr;
372: yr = e1[i+1]-yr;
373: zr = -zr;
374: }
376: /* STEP n-2 zr is no longer needed */
378: /* the efect of Zn-2 */
379: xr = xr*q1[n-3]+yr;
380: /* the inverse of Ln-2 */
381: xl = -xl/e1[n-4];
382: yl = -yl/e1[n-4];
383: /* the efect of Ln-2 */
384: q1[n-3] = xr-xl;
385: xr = yr-xl;
386: yr = zr-yl-xl*e1[n-2];
387: /* the inverse of Yn-2 */
388: xr = xr/q1[n-3];
389: yr = yr/q1[n-3];
390: /* the effect of the inverse of Yn-2 */
391: e1[n-3] = xl+yr+xr*q1[n-2];
392: xl = yl+yr*q1[n-1];
393: /* the effect of Yn-2 */
394: xr = 1.0-xr;
395: yr = e1[n-2]-yr;
397: /* STEP n-1 yl and yr are no longer needed */
398: /* Testing for EARLY DEFLATION */
400: if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
401: /* the efect of Zn-1 */
402: xr = xr*q1[n-2]+yr;
403: /* the inverse of Ln-1 */
404: xl = -xl/e1[n-3];
405: /* the efect of Ln-1 */
406: q1[n-2] = xr-xl;
407: xr = yr-xl;
408: /*the inverse of Yn-1 */
409: xr = xr/q1[n-2];
410: /*the effect of the inverse of Yn-1 */
411: e1[n-2] = xl+xr*q1[n-1];
412: /*the effects of Yn-1 */
413: xr = 1.0-xr;
415: /* STEP n; xl no longer needed */
416: /* the effect of Zn */
417: xr = xr*q1[n-1];
418: /* the inverse of Ln = I */
419: /* the effect of Ln */
420: q1[n-1] = xr;
421: /* the inverse of Yn-1=I */
423: } else { /* FREE DEFLATION */
424: e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5; /* sum=trace/2 */
425: q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl; /* det */
426: q1[n-1] = e1[n-2]*e1[n-2]-q1[n-2];
427: *fail = 2;
428: }
430: for (i=0;i<n-1 && !*fail;i++) {
431: if (PetscIsInfOrNanReal(e1[i])) *fail=1;
432: if (PetscIsInfOrNanReal(q1[i])) *fail=1;
433: }
434: if (!*fail && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
435: for (i=0;i<n-1 && !*fail;i++) {
436: if (PetscAbsReal(e1[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
437: if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
438: }
439: if (!*fail && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;
441: } else { /* The case n=3 */
442: TridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
443: }
444: return(0);
445: }
449: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c,PetscScalar *wr,PetscScalar *wi,PetscReal *work,PetscInt nw)
450: {
451: PetscInt totalIt=0; /* Total Number of Iterations */
452: PetscInt totalFail=0; /* Total number of failures */
453: PetscInt nFail=0; /* Number of failures per transformation */
454: PetscReal tolZero=1.0/16; /* Tolerance for zero shifts */
455: PetscInt maxIt=10*n; /* Maximum number of iterations */
456: PetscInt maxFail=10*n; /* Maximum number of failures allowed per each transformation */
457: PetscReal tolDef=PETSC_MACHINE_EPSILON; /* Tolerance for deflation eps, 10*eps, 100*eps */
458: PetscReal tolGrowth=100000;
460: PetscInt i,k,nwu=0,nwall,begin,ind,flag,dim,m,*split,lastSplit;
461: PetscReal norm,gr,gl,sigma,delta,meanEig,*U,*L,*U1,*L1;
462: PetscReal acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
463: PetscBool test1,test2;
466: dim = n;
467: /* Test if the matrix is unreduced */
468: for (i=0;i<n-1;i++) {
469: if (PetscAbsReal(b[i])==0.0 || PetscAbsReal(c[i])==0.0) SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
470: }
471: nwall = 9*n+4;
472: if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",8);
473: U = work;
474: L = work+n;
475: U1 = work+2*n;
476: L1 = work+3*n;
477: nwu = 4*n;
478: if (wi) {
479: PetscMemzero(wi,n*sizeof(PetscScalar));
480: }
481: /* Normalization - the J form of C */
482: for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */
484: /* Scan matrix J ---- Finding a box of inclusion for the eigenvalues */
485: norm = 0.0;
486: for (i=0;i<n-1;i++) {
487: norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
488: }
489: norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
490: ScanJ(n,a,b,&gl,&gr,&sigma);
491: delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
492: meanEig = 0.0;
493: for (i=0;i<n;i++) meanEig += a[i];
494: meanEig /= n; /* shift = initial shift = mean of eigenvalues */
495: Prologue(n,a,b,gl,gr,&m,&shift,work+nwu,nwall-nwu);
496: if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
497: for (i=0;i<dim;i++) {
498: wr[i] = shift;
499: if (wi) wi[i] = 0.0;
500: }
501: return(0);
502: }
503: /* Initial LU Factorization */
504: if (delta==0.0) shift=0.0; /* The case when all eigenvalues are pure imaginary */
505: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
506: while (flag==1 && nFail<maxFail) {
507: shift=shift+delta;
508: if (shift>gr || shift<gl) { /* Successive failures */
509: shift=meanEig;
510: delta=-delta;
511: }
512: nFail=nFail+1;
513: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
514: }
515: if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
516: /* Successful Initial transformation */
517: totalFail = totalFail+nFail;
518: nFail = 0;
519: acShift = 0;
520: initialShift = shift;
521: shift = 0;
522: begin = 0;
523: lastSplit = 0;
524: PetscMalloc1(n,&split);
525: split[lastSplit] = begin;
526: while (begin!=-1) {
527: while (n-begin>2 && totalIt<maxIt) {
528: /* Check for deflation before performing a transformation */
529: test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
530: && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
531: && PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])
532: && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
533: if (flag==2) { /* Early 2x2 deflation */
534: test2 = PETSC_TRUE;
535: } else {
536: if (n-begin>4) {
537: test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
538: && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
539: } else { /* n-begin+1=3 */
540: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
541: }
542: }
543: while (test2 || test1) {
544: /* 2x2 deflation */
545: if (test2) {
546: if (flag==2) { /* Early deflation */
547: sum = L[n-2];
548: det = U[n-2];
549: disc = U[n-1];
550: flag = 0;
551: } else {
552: sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
553: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
554: det = U[n-2]*U[n-1];
555: }
556: if (disc<=0) {
557: #if !defined(PETSC_USE_COMPLEX)
558: wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
559: wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
560: #else
561: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
562: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
563: #endif
564: } else {
565: if (sum==0.0) {
566: x1 = PetscSqrtReal(disc);
567: x2 = -x1;
568: } else {
569: x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
570: x2 = det/x1;
571: }
572: wr[--n] = x1+acShift;
573: wr[--n] = x2+acShift;
574: }
575: } else { /* test1 -- 1x1 deflation */
576: x1 = U[n-1]+acShift;
577: wr[--n] = x1;
578: }
580: if (n<=begin+2) {
581: break;
582: } else {
583: test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
584: && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
585: && PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])
586: && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
587: if (n-begin>4) {
588: test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
589: && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
590: } else { /* n-begin+1=3 */
591: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
592: }
593: }
594: } /* end "WHILE deflations" */
595: /* After deflation */
596: if (n>begin+3) {
597: ind = begin;
598: for (k=n-4;k>=begin+1;k--) {
599: if (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k])
600: && PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) {
601: ind=k;
602: break;
603: }
604: }
605: if (ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin])) {
606: lastSplit = lastSplit+1;
607: split[lastSplit] = begin;
608: L[ind] = acShift; /* Use of L[ind] to save acShift */
609: begin = ind+1;
610: }
611: }
613: if (n>begin+2) {
614: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
615: if ((PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
616: shift = 0;
617: sum = 0; /* Needed in case of failure */
618: prod = 0;
619: RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
620: if (flag) { /* Failure */
621: TridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
622: shift = 0.0;
623: while (flag==1 && nFail<maxFail) { /* Successive failures */
624: shift = shift+delta;
625: if (shift>gr-acShift || shift<gl-acShift) {
626: shift = meanEig-acShift;
627: delta = -delta;
628: }
629: nFail++;
630: RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
631: }
632: }
633: } else { /* L's are small */
634: if (disc<0) { /* disc <0 Complex case; Francis shift; 3dqds */
635: sum = U[n-2]+L[n-2]+U[n-1];
636: prod = U[n-2]*U[n-1];
637: TridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
638: shift = 0.0; /* Restoring transformation */
639: while (flag==1 && nFail<maxFail) { /* In case of failure */
640: shift = shift+U[n-1]; /* first time shift=0 */
641: RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
642: nFail++;
643: }
644: } else { /* disc >0 Real case; real Wilkinson shift; dqds */
645: sum = (L[n-2]+U[n-2]+U[n-1])/2;
646: if (sum==0.0) {
647: x1 = PetscSqrtReal(disc);
648: x2 = -x1;
649: } else {
650: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
651: x2 = U[n-2]*U[n-1]/x1;
652: }
653: /* Take the eigenvalue closest to UL(n,n) */
654: if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
655: shift = x1;
656: } else {
657: shift = x2;
658: }
659: RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
660: /* In case of failure */
661: while (flag==1 && nFail<maxFail) {
662: sum = 2*shift;
663: prod = shift*shift;
664: TridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
665: /* In case of successive failures */
666: if (shift==0.0) {
667: shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
668: } else {
669: shift=shift+delta;
670: }
671: if (shift>gr-acShift || shift<gl-acShift) {
672: shift = meanEig-acShift;
673: delta = -delta;
674: }
675: if (!flag) { /* We changed from real dqds to 3dqds */
676: shift=0;
677: }
678: nFail++;
679: }
680: }
681: } /* end "if tolZero" */
682: if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
683: /* Successful Transformation; flag==0 */
684: totalIt++;
685: acShift = shift+acShift;
686: for (i=begin;i<n-1;i++) {
687: L[i] = L1[i];
688: U[i] = U1[i];
689: }
690: U[n-1] = U1[n-1];
691: totalFail = totalFail+nFail;
692: nFail = 0;
693: } /* end "if n>begin+1" */
694: } /* end WHILE 1 */
695: if (totalIt>=maxIt) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
696: /* END: n=2 or n=1 % n=begin+1 or n=begin */
697: if (n==begin+2) {
698: sum = (L[n-2]+U[n-2]+U[n-1])/2;
699: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
700: if (disc<=0) { /* Complex case */
701: /* Deflation 2 */
702: #if !defined(PETSC_USE_COMPLEX)
703: wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
704: wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
705: #else
706: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
707: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
708: #endif
709: } else { /* Real case */
710: if (sum==0.0) {
711: x1 = PetscSqrtReal(disc);
712: x2 = -x1;
713: } else {
714: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
715: x2 = U[n-2]*U[n-1]/x1;
716: }
717: /* Deflation 2 */
718: wr[--n] = x2+acShift;
719: wr[--n] = x1+acShift;
720: }
721: } else { /* n=1 n=begin */
722: /* deflation 1 */
723: x1 = U[n-1]+acShift;
724: wr[--n] = x1;
725: }
726: switch (n) {
727: case 0:
728: begin = -1;
729: break;
730: case 1:
731: acShift = L[begin-1];
732: begin = split[lastSplit];
733: lastSplit--;
734: break;
735: default : /* n>=2 */
736: acShift = L[begin-1];
737: begin = split[lastSplit];
738: lastSplit--;
739: }
740: }/* While begin~=-1 */
741: for (i=0;i<dim;i++) {
742: wr[i] = wr[i]+initialShift;
743: }
744: PetscFree(split);
745: return(0);
746: }
750: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
751: {
753: PetscInt i,off,ld,nwall,nwu;
754: PetscScalar *A,*B,*Q,*vi;
755: PetscReal *d,*e,*s,*a,*b,*c;
758: #if !defined(PETSC_USE_COMPLEX)
760: #endif
761: ld = ds->ld;
762: off = ds->l + ds->l*ld;
763: A = ds->mat[DS_MAT_A];
764: B = ds->mat[DS_MAT_B];
765: Q = ds->mat[DS_MAT_Q];
766: d = ds->rmat[DS_MAT_T];
767: e = ds->rmat[DS_MAT_T] + ld;
768: c = ds->rmat[DS_MAT_T] + 2*ld;
769: s = ds->rmat[DS_MAT_D];
770: /* Quick return if possible */
771: if (ds->n-ds->l == 1) {
772: *(Q+off) = 1;
773: if (!ds->compact) {
774: d[ds->l] = PetscRealPart(A[off]);
775: s[ds->l] = PetscRealPart(B[off]);
776: }
777: wr[ds->l] = d[ds->l]/s[ds->l];
778: if (wi) wi[ds->l] = 0.0;
779: return(0);
780: }
781: nwall = 12*ld+4;
782: DSAllocateWork_Private(ds,0,nwall,0);
783: /* Reduce to pseudotriadiagonal form */
784: DSIntermediate_GHIEP(ds);
786: /* Compute Eigenvalues (DQDS)*/
787: /* Form pseudosymmetric tridiagonal */
788: a = ds->rwork;
789: b = a+ld;
790: c = b+ld;
791: nwu = 3*ld;
792: if (ds->compact) {
793: for (i=ds->l;i<ds->n-1;i++) {
794: a[i] = d[i]*s[i];
795: b[i] = e[i]*s[i+1];
796: c[i] = e[i]*s[i];
797: }
798: a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
799: } else {
800: for (i=ds->l;i<ds->n-1;i++) {
801: a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
802: b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
803: c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
804: }
805: a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
806: }
807: vi = (wi)?wi+ds->l:NULL;
808: DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,vi,ds->rwork+nwu,nwall-nwu);
810: /* Compute Eigenvectors with Inverse Iteration */
811: DSGHIEPInverseIteration(ds,wr,wi);
813: /* Recover eigenvalues from diagonal */
814: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
815: #if defined(PETSC_USE_COMPLEX)
816: if (wi) {
817: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
818: }
819: #endif
820: return(0);
821: }