Actual source code: dqds.c

slepc-3.6.1 2015-09-03
Report Typos and Errors
  1: /*
  2:    DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
  3:    Based on Matlab code from Carla Ferreira.

  5:    References:

  7:        [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
  8:            eigenvalue problem", preprint, 2012.

 10:        [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
 11:            Thesis, University of Minho, 2007.

 13:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 14:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 15:    Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain

 17:    This file is part of SLEPc.

 19:    SLEPc is free software: you can redistribute it and/or modify it under  the
 20:    terms of version 3 of the GNU Lesser General Public License as published by
 21:    the Free Software Foundation.

 23:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 24:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 25:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 26:    more details.

 28:    You  should have received a copy of the GNU Lesser General  Public  License
 29:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 30:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 31: */
 32: #include <slepc/private/dsimpl.h>
 33: #include <slepcblaslapack.h>

 37: /*
 38:   INPUT:
 39:     a ---- diagonal of J
 40:     b ---- subdiagonal of J;
 41:     the superdiagonal of J is all 1's

 43:   OUTPUT:
 44:     For an eigenvalue lambda of J we have:
 45:       gl=<real(lambda)<=gr
 46:       -sigma=<imag(lambda)<=sigma
 47: */
 48: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
 49: {
 50:   PetscInt  i;
 51:   PetscReal b0,b1,rad;

 54:   /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
 55:    C_bal is the balanced form of C */
 56:   /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
 57:   *sigma = 0.0;
 58:   b0 = 0.0;
 59:   for (i=0;i<n-1;i++) {
 60:     if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
 61:     else b1 = 0.0;
 62:     *sigma = PetscMax(*sigma,b1+b0);
 63:     b0 = b1;
 64:   }
 65:   *sigma = PetscMax(*sigma,b0);
 66:   /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
 67:   rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
 68:   *gr = a[0]+rad;
 69:   *gl = a[0]-rad;
 70:   b0 = rad;
 71:   for (i=1;i<n-1;i++) {
 72:     b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
 73:     rad = b0+b1;
 74:     *gr = PetscMax(*gr,a[i]+rad);
 75:     *gl = PetscMin(*gl,a[i]-rad);
 76:     b0 = b1;
 77:   }
 78:   rad = b0;
 79:   *gr = PetscMax(*gr,a[n-1]+rad);
 80:   *gl = PetscMin(*gl,a[n-1]-rad);
 81:   return(0);
 82: }

 86: /*
 87:   INPUT:
 88:     a  - vector with the diagonal elements
 89:     b  - vector with the subdiagonal elements
 90:     gl - Gersgorin left bound (real axis)
 91:     gr - Gersgorin right bound (real axis)
 92:   OUTPUT:
 93:     eigvalue - multiple eigenvalue (if there is an eigenvalue)
 94:     m        - its multiplicity    (m=0 if there isn't a multiple eigenvalue)
 95:     X        - matrix of generalized eigenvectors
 96:     shift
 97: */
 98: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *work,PetscReal nw)
 99: {

102:   PetscReal      mu,tol,*a1,*y,*yp,*x,*xp;
103:   PetscInt       i,k,nwall=0;

106:   *m = 0;
107:   mu = 0.0;
108:   for (i=0;i<n;i++) mu += a[i];
109:   mu /= n;
110:   tol = n*PETSC_MACHINE_EPSILON*(gr-gl);
111:   nwall = 5*n+4;
112:   if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
113:   a1 = work; /* size n */
114:   y = work+n; /* size n+1 */
115:   yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
116:   x = yp+n+1; /* size n+1 */
117:   xp = x+n+1; /* size n+1 */
118:   for (i=0;i<n;i++) a1[i] = mu-a[i];
119:   x[0] = 1;
120:   xp[0] = 0;
121:   x[1] = a1[0];
122:   xp[1] = 1;
123:   for (i=1;i<n;i++) {
124:     x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
125:     xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
126:   }
127:   *shift = mu;
128:   if (PetscAbsReal(x[n])<tol) {
129:     /* mu is an eigenvalue */
130:     *m = *m+1;
131:     if (PetscAbsReal(xp[n])<tol) {
132:       /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
133:       k = 0;
134:       while (PetscAbsReal(xp[n])<tol && k<n-1) {
135:         PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
136:         PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
137:         x[k] = 0.0;
138:         k++;
139:         x[k] = 1.0;
140:         xp[k] = 0.0;
141:         x[k+1] = a1[k] + y[k];
142:         xp[k+1] = 1+yp[k];
143:         for (i=k+1;i<n;i++) {
144:           x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
145:           xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
146:         }
147:         *m = *m+1;
148:       }
149:     }
150:   }
151: /*
152:   When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu

154:   Need to check for overflow!

156:   After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case;
157:   If m!=0, the only output to be used is the shift returned.
158: */
159:   return(0);
160: }

164: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *work,PetscInt nw)
165: {
166:   PetscInt       nwall,i;
167:   PetscReal      *a1;

170:   nwall = n;
171:   if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",11);
172:   a1 = work;
173:   for (i=0;i<n;i++) a1[i] = a[i]-shift;
174:   *fail = 0;
175:   for (i=0;i<n-1;i++) {
176:     U[i] = a1[i];
177:     L[i] = b[i]/U[i];
178:     a1[i+1] = a1[i+1]-L[i];
179:   }
180:   U[n-1] = a1[n-1];

182:   /* Check if there are NaN values */
183:   for (i=0;i<n-1 && !*fail;i++) {
184:     if (PetscIsInfOrNanReal(L[i])) *fail=1;
185:     if (PetscIsInfOrNanReal(U[i])) *fail=1;
186:   }
187:   if (!*fail && PetscIsInfOrNanReal(U[n-1])) *fail=1;

189:   for (i=0;i<n-1 && !*fail;i++) {
190:     if (PetscAbsReal(L[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
191:     if (PetscAbsReal(U[i])>tol*norm) *fail = 1;
192:   }
193:   if (!*fail && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
194:   return(0);
195: }

199: static PetscErrorCode RealDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
200: {
201:   PetscReal d;
202:   PetscInt  i;

205:   *fail = 0;
206:   d = U[0]-shift;
207:   for (i=0;i<n-1;i++) {
208:     U1[i] = d+L[i];
209:     L1[i] = L[i]*(U[i+1]/U1[i]);
210:     d = d*(U[i+1]/U1[i])-shift;
211:   }
212:   U1[n-1]=d;

214:   /* The following demands IEEE arithmetic */
215:   for (i=0;i<n-1 && !*fail;i++) {
216:     if (PetscIsInfOrNanReal(L1[i])) *fail=1;
217:     if (PetscIsInfOrNanReal(U1[i])) *fail=1;
218:   }
219:   if (!*fail && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
220:   for (i=0;i<n-1 && !*fail;i++) {
221:     if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
222:     if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
223:   }
224:   if (!*fail && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
225:   return(0);
226: }

230: static PetscErrorCode TridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
231: {
232:   PetscReal xl,yl,xr,yr,zr;
233:   PetscInt  i;

236:   *fail = 0;
237:   xr = 1.0;
238:   yr = e[0];
239:   zr = 0.0;
240:   /* Step 1 */
241:   /* the efect of Z1 */
242:   xr = xr*q[0]+yr;
243:   /* the inverse of L1 */
244:   xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
245:   yl = -(q[2]*e[1]*q[1]*e[0])/xl;
246:   xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
247:   /* the efect of L1 */
248:   q[0] = xr-xl;
249:   xr = yr-xl;
250:   yr = zr-yl-xl*e[1];
251:   /*the inverse of Y1 */
252:   xr = xr/q[0];
253:   yr = yr/q[0];
254:   /*the effect of Y1 inverse */
255:   e[0] = xl+yr+xr*q[1];
256:   xl = yl+zr+yr*q[2];      /* zr=0  when n=3 */
257:   /*the effect of Y1 */
258:   xr = 1.0-xr;
259:   yr = e[1]-yr;

261:   /* STEP n-1 */

263:   if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
264:     /* the efect of Zn-1 */
265:     xr = xr*q[n-2]+yr;
266:     /* the inverse of Ln-1 */
267:     xl = -xl/e[n-3];
268:     /* the efect of Ln-1 */
269:     q[n-2] = xr-xl;
270:     xr = yr-xl;
271:     /*the inverse of Yn-1 */
272:     xr = xr/q[n-2];
273:     /*the effect of the inverse of Yn-1 */
274:     e[n-2] = xl+xr*q[n-1];
275:     /*the effects of Yn-1 */
276:     xr = 1.0-xr;
277:     /* STEP n */
278:     /*the effect of Zn */
279:     xr = xr*q[n-1];
280:     /*the inverse of Ln=I */
281:     /*the effect of Ln */
282:     q[n-1] = xr;
283:     /* the inverse of  Yn-1=I */

285:   } else { /* Free deflation */
286:     e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5;       /* Sum=trace/2 */
287:     q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl;             /* det */
288:     q[n-1] = e[n-2]*e[n-2]-q[n-2];
289:     *fail = 2;
290:   }

292:   /* The following demands IEEE arithmetic */
293:   for (i=0;i<n-1 && !*fail;i++) {
294:     if (PetscIsInfOrNanReal(e[i])) *fail=1;
295:     if (PetscIsInfOrNanReal(q[i])) *fail=1;
296:   }
297:   if (!*fail && PetscIsInfOrNanReal(q[n-1])) *fail=1;
298:   for (i=0;i<n-1 && !*fail;i++) {
299:     if (PetscAbsReal(e[i])>tol*norm) *fail=1;
300:     if (PetscAbsReal(q[i])>tol*norm) *fail=1;
301:   }
302:   if (!*fail && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
303:   return(0);
304: }

308: static PetscErrorCode TridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail)
309: {
311:   PetscInt       i;
312:   PetscReal      xl,yl,xr,yr,zr;

315:   for (i=0;i<n-1;i++) {
316:     e1[i] = e[i];
317:     q1[i] = q[i];
318:   }
319:   q1[n-1] = q[n-1];
320:   *fail = 0;
321:   if (n>3) {   /* For n>3 */
322:     *fail = 0;
323:     xr = 1;
324:     yr = e1[0];
325:     zr = 0;
326:     /* step 1 */
327:     /* the efect of Z1 */
328:     xr = xr*q1[0]+yr;
329:     /* the inverse of L1 */
330:     xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
331:     yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
332:     xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
333:     /* the efect of L1 */
334:     q1[0] = xr-xl;
335:     xr = yr-xl;
336:     yr = zr-yl-xl*e1[1];
337:     zr = -yl*e1[2];
338:     /* the inverse of Y1 */
339:     xr = xr/q1[0];
340:     yr = yr/q1[0];
341:     zr = zr/q1[0];
342:     /* the effect of Y1 inverse */
343:     e1[0] = xl+yr+xr*q1[1];
344:     xl = yl+zr+yr*q1[2];
345:     yl = zr*q1[3];
346:     /* the effect of Y1 */
347:     xr = 1-xr;
348:     yr = e1[1]-yr;
349:     zr = -zr;
350:     /* step i=2,...,n-3 */
351:     for (i=1;i<n-3;i++) {
352:       /* the efect of Zi */
353:       xr = xr*q1[i]+yr;
354:       /* the inverse of Li */
355:       xl = -xl/e1[i-1];
356:       yl = -yl/e1[i-1];
357:       /* the efect of Li */
358:       q1[i] = xr-xl;
359:       xr = yr-xl;
360:       yr = zr-yl-xl*e1[i+1];
361:       zr = -yl*e1[i+2];
362:       /* the inverse of Yi */
363:       xr = xr/q1[i];
364:       yr = yr/q1[i];
365:       zr = zr/q1[i];
366:       /* the effect of the inverse of Yi */
367:       e1[i] = xl+yr+xr*q1[i+1];
368:       xl = yl+zr+yr*q1[i+2];
369:       yl = zr*q1[i+3];
370:       /* the effects of Yi */
371:       xr = 1.0-xr;
372:       yr = e1[i+1]-yr;
373:       zr = -zr;
374:     }

376:     /* STEP n-2            zr is no longer needed */

378:     /* the efect of Zn-2 */
379:     xr = xr*q1[n-3]+yr;
380:     /* the inverse of Ln-2 */
381:     xl = -xl/e1[n-4];
382:     yl = -yl/e1[n-4];
383:     /* the efect of Ln-2 */
384:     q1[n-3] = xr-xl;
385:     xr = yr-xl;
386:     yr = zr-yl-xl*e1[n-2];
387:     /* the inverse of Yn-2 */
388:     xr = xr/q1[n-3];
389:     yr = yr/q1[n-3];
390:     /* the effect of the inverse of Yn-2 */
391:     e1[n-3] = xl+yr+xr*q1[n-2];
392:     xl = yl+yr*q1[n-1];
393:     /* the effect of Yn-2 */
394:     xr = 1.0-xr;
395:     yr = e1[n-2]-yr;

397:     /* STEP n-1           yl and yr are no longer needed */
398:     /* Testing for EARLY DEFLATION */

400:     if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
401:       /* the efect of Zn-1 */
402:       xr = xr*q1[n-2]+yr;
403:       /* the inverse of Ln-1 */
404:       xl = -xl/e1[n-3];
405:       /* the efect of Ln-1 */
406:       q1[n-2] = xr-xl;
407:       xr = yr-xl;
408:       /*the inverse of Yn-1 */
409:       xr = xr/q1[n-2];
410:       /*the effect of the inverse of Yn-1 */
411:       e1[n-2] = xl+xr*q1[n-1];
412:       /*the effects of Yn-1 */
413:       xr = 1.0-xr;

415:       /* STEP n;     xl no longer needed */
416:       /* the effect of Zn */
417:       xr = xr*q1[n-1];
418:       /* the inverse of Ln = I */
419:       /* the effect of Ln */
420:       q1[n-1] = xr;
421:       /* the inverse of  Yn-1=I */

423:     } else {  /* FREE DEFLATION */
424:       e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5;     /* sum=trace/2 */
425:       q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl;         /* det */
426:       q1[n-1] = e1[n-2]*e1[n-2]-q1[n-2];
427:       *fail = 2;
428:     }

430:     for (i=0;i<n-1 && !*fail;i++) {
431:       if (PetscIsInfOrNanReal(e1[i])) *fail=1;
432:       if (PetscIsInfOrNanReal(q1[i])) *fail=1;
433:     }
434:     if (!*fail && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
435:     for (i=0;i<n-1 && !*fail;i++) {
436:       if (PetscAbsReal(e1[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
437:       if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
438:     }
439:     if (!*fail && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;

441:   } else {  /* The case n=3 */
442:     TridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
443:   }
444:   return(0);
445: }

449: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c,PetscScalar *wr,PetscScalar *wi,PetscReal *work,PetscInt nw)
450: {
451:   PetscInt       totalIt=0;       /* Total Number of Iterations  */
452:   PetscInt       totalFail=0;     /* Total number of failures */
453:   PetscInt       nFail=0;         /* Number of failures per transformation */
454:   PetscReal      tolZero=1.0/16;  /* Tolerance for zero shifts */
455:   PetscInt       maxIt=10*n;      /* Maximum number of iterations */
456:   PetscInt       maxFail=10*n;    /* Maximum number of failures allowed per each transformation */
457:   PetscReal      tolDef=PETSC_MACHINE_EPSILON;  /* Tolerance for deflation eps, 10*eps, 100*eps */
458:   PetscReal      tolGrowth=100000;
460:   PetscInt       i,k,nwu=0,nwall,begin,ind,flag,dim,m,*split,lastSplit;
461:   PetscReal      norm,gr,gl,sigma,delta,meanEig,*U,*L,*U1,*L1;
462:   PetscReal      acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
463:   PetscBool      test1,test2;

466:   dim = n;
467:   /* Test if the matrix is unreduced */
468:   for (i=0;i<n-1;i++) {
469:     if (PetscAbsReal(b[i])==0.0 || PetscAbsReal(c[i])==0.0) SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
470:   }
471:   nwall = 9*n+4;
472:   if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",8);
473:   U = work;
474:   L = work+n;
475:   U1 = work+2*n;
476:   L1 = work+3*n;
477:   nwu = 4*n;
478:   if (wi) {
479:     PetscMemzero(wi,n*sizeof(PetscScalar));
480:   }
481:   /* Normalization - the J form of C */
482:   for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */

484:   /* Scan matrix J  ---- Finding a box of inclusion for the eigenvalues */
485:   norm = 0.0;
486:   for (i=0;i<n-1;i++) {
487:     norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
488:   }
489:   norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
490:   ScanJ(n,a,b,&gl,&gr,&sigma);
491:   delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
492:   meanEig = 0.0;
493:   for (i=0;i<n;i++) meanEig += a[i];
494:   meanEig /= n; /* shift = initial shift = mean of eigenvalues */
495:   Prologue(n,a,b,gl,gr,&m,&shift,work+nwu,nwall-nwu);
496:   if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
497:     for (i=0;i<dim;i++) {
498:       wr[i] = shift;
499:       if (wi) wi[i] = 0.0;
500:     }
501:     return(0);
502:   }
503:   /* Initial LU Factorization */
504:   if (delta==0.0) shift=0.0;  /* The case when all eigenvalues are pure imaginary */
505:   LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
506:   while (flag==1 && nFail<maxFail) {
507:     shift=shift+delta;
508:     if (shift>gr || shift<gl) { /* Successive failures */
509:       shift=meanEig;
510:       delta=-delta;
511:     }
512:     nFail=nFail+1;
513:     LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu,nwall-nwu); /* flag=1 failure; flag=0 successful transformation*/
514:   }
515:   if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
516:   /* Successful Initial transformation */
517:   totalFail = totalFail+nFail;
518:   nFail = 0;
519:   acShift = 0;
520:   initialShift = shift;
521:   shift = 0;
522:   begin = 0;
523:   lastSplit = 0;
524:   PetscMalloc1(n,&split);
525:   split[lastSplit] = begin;
526:   while (begin!=-1) {
527:     while (n-begin>2 && totalIt<maxIt) {
528:       /* Check for deflation before performing a transformation */
529:       test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
530:             && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
531:             && PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])
532:             && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
533:       if (flag==2) {  /* Early 2x2 deflation */
534:         test2 = PETSC_TRUE;
535:       } else {
536:         if (n-begin>4) {
537:           test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
538:                && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
539:         } else { /* n-begin+1=3 */
540:           test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
541:         }
542:       }
543:       while (test2 || test1) {
544:         /* 2x2 deflation */
545:         if (test2) {
546:           if (flag==2) { /* Early deflation */
547:             sum = L[n-2];
548:             det = U[n-2];
549:             disc = U[n-1];
550:             flag = 0;
551:           } else {
552:             sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
553:             disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
554:             det = U[n-2]*U[n-1];
555:           }
556:           if (disc<=0) {
557: #if !defined(PETSC_USE_COMPLEX)
558:             wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
559:             wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
560: #else
561:             wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
562:             wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
563: #endif
564:           } else {
565:             if (sum==0.0) {
566:               x1 = PetscSqrtReal(disc);
567:               x2 = -x1;
568:             } else {
569:               x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
570:               x2 = det/x1;
571:             }
572:             wr[--n] = x1+acShift;
573:             wr[--n] = x2+acShift;
574:           }
575:         } else { /* test1 -- 1x1 deflation */
576:           x1 = U[n-1]+acShift;
577:           wr[--n] = x1;
578:         }

580:         if (n<=begin+2) {
581:           break;
582:         } else {
583:           test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
584:                 && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
585:                 && PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])
586:                 && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
587:           if (n-begin>4) {
588:             test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
589:                   && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
590:           } else { /* n-begin+1=3 */
591:             test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
592:           }
593:         }
594:       } /* end "WHILE deflations" */
595:       /* After deflation */
596:       if (n>begin+3) {
597:         ind = begin;
598:         for (k=n-4;k>=begin+1;k--) {
599:           if (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k])
600:            && PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) {
601:             ind=k;
602:             break;
603:           }
604:         }
605:         if (ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin])) {
606:           lastSplit = lastSplit+1;
607:           split[lastSplit] = begin;
608:           L[ind] = acShift; /* Use of L[ind] to save acShift */
609:           begin = ind+1;
610:         }
611:       }

613:       if (n>begin+2) {
614:         disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
615:         if ((PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
616:           shift = 0;
617:           sum = 0; /* Needed in case of failure */
618:           prod = 0;
619:           RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
620:           if (flag) {  /* Failure */
621:             TridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
622:             shift = 0.0;
623:             while (flag==1 && nFail<maxFail) {  /* Successive failures */
624:               shift = shift+delta;
625:               if (shift>gr-acShift || shift<gl-acShift) {
626:                 shift = meanEig-acShift;
627:                 delta = -delta;
628:               }
629:               nFail++;
630:               RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
631:             }
632:           }
633:         } else { /* L's are small */
634:           if (disc<0) {  /* disc <0   Complex case; Francis shift; 3dqds */
635:             sum = U[n-2]+L[n-2]+U[n-1];
636:             prod = U[n-2]*U[n-1];
637:             TridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
638:             shift = 0.0; /* Restoring transformation */
639:             while (flag==1 && nFail<maxFail) { /* In case of failure */
640:               shift = shift+U[n-1];  /* first time shift=0 */
641:               RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
642:               nFail++;
643:             }
644:           } else  { /* disc >0  Real case; real Wilkinson shift; dqds */
645:             sum = (L[n-2]+U[n-2]+U[n-1])/2;
646:             if (sum==0.0) {
647:               x1 = PetscSqrtReal(disc);
648:               x2 = -x1;
649:             } else {
650:               x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
651:               x2 = U[n-2]*U[n-1]/x1;
652:             }
653:             /* Take the eigenvalue closest to UL(n,n) */
654:             if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
655:               shift = x1;
656:             } else {
657:               shift = x2;
658:             }
659:             RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
660:             /* In case of failure */
661:             while (flag==1 && nFail<maxFail) {
662:               sum = 2*shift;
663:               prod = shift*shift;
664:               TridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
665:               /* In case of successive failures */
666:               if (shift==0.0) {
667:                 shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
668:               } else {
669:                 shift=shift+delta;
670:               }
671:               if (shift>gr-acShift || shift<gl-acShift) {
672:                 shift = meanEig-acShift;
673:                 delta = -delta;
674:               }
675:               if (!flag) { /* We changed from real dqds to 3dqds */
676:                 shift=0;
677:               }
678:               nFail++;
679:             }
680:           }
681:         } /* end "if tolZero" */
682:         if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
683:         /* Successful Transformation; flag==0 */
684:         totalIt++;
685:         acShift = shift+acShift;
686:         for (i=begin;i<n-1;i++) {
687:           L[i] = L1[i];
688:           U[i] = U1[i];
689:         }
690:         U[n-1] = U1[n-1];
691:         totalFail = totalFail+nFail;
692:         nFail = 0;
693:       }  /* end "if n>begin+1" */
694:     }  /* end WHILE 1 */
695:     if (totalIt>=maxIt) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
696:     /* END: n=2 or n=1  % n=begin+1 or n=begin */
697:     if (n==begin+2) {
698:       sum = (L[n-2]+U[n-2]+U[n-1])/2;
699:       disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
700:         if (disc<=0)  {  /* Complex case */
701:         /* Deflation 2 */
702: #if !defined(PETSC_USE_COMPLEX)
703:         wr[--n] = sum+acShift; wi[n] = PetscSqrtReal(-disc);
704:         wr[--n] = sum+acShift; wi[n] = -PetscSqrtReal(-disc);
705: #else
706:         wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
707:         wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
708: #endif
709:       } else  { /* Real case */
710:         if (sum==0.0) {
711:           x1 = PetscSqrtReal(disc);
712:           x2 = -x1;
713:         } else {
714:           x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
715:           x2 = U[n-2]*U[n-1]/x1;
716:         }
717:         /* Deflation 2 */
718:         wr[--n] = x2+acShift;
719:         wr[--n] = x1+acShift;
720:       }
721:     } else { /* n=1   n=begin */
722:       /* deflation 1 */
723:       x1 = U[n-1]+acShift;
724:       wr[--n] = x1;
725:     }
726:     switch (n) {
727:       case 0:
728:         begin = -1;
729:         break;
730:       case 1:
731:         acShift = L[begin-1];
732:         begin = split[lastSplit];
733:         lastSplit--;
734:         break;
735:       default : /* n>=2 */
736:         acShift = L[begin-1];
737:         begin = split[lastSplit];
738:         lastSplit--;
739:     }
740:   }/* While begin~=-1 */
741:   for (i=0;i<dim;i++) {
742:     wr[i] = wr[i]+initialShift;
743:   }
744:   PetscFree(split);
745:   return(0);
746: }

750: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
751: {
753:   PetscInt       i,off,ld,nwall,nwu;
754:   PetscScalar    *A,*B,*Q,*vi;
755:   PetscReal      *d,*e,*s,*a,*b,*c;

758: #if !defined(PETSC_USE_COMPLEX)
760: #endif
761:   ld = ds->ld;
762:   off = ds->l + ds->l*ld;
763:   A = ds->mat[DS_MAT_A];
764:   B = ds->mat[DS_MAT_B];
765:   Q = ds->mat[DS_MAT_Q];
766:   d = ds->rmat[DS_MAT_T];
767:   e = ds->rmat[DS_MAT_T] + ld;
768:   c = ds->rmat[DS_MAT_T] + 2*ld;
769:   s = ds->rmat[DS_MAT_D];
770:   /* Quick return if possible */
771:   if (ds->n-ds->l == 1) {
772:     *(Q+off) = 1;
773:     if (!ds->compact) {
774:       d[ds->l] = PetscRealPart(A[off]);
775:       s[ds->l] = PetscRealPart(B[off]);
776:     }
777:     wr[ds->l] = d[ds->l]/s[ds->l];
778:     if (wi) wi[ds->l] = 0.0;
779:     return(0);
780:   }
781:   nwall = 12*ld+4;
782:   DSAllocateWork_Private(ds,0,nwall,0);
783:   /* Reduce to pseudotriadiagonal form */
784:   DSIntermediate_GHIEP(ds);

786:   /* Compute Eigenvalues (DQDS)*/
787:   /* Form pseudosymmetric tridiagonal */
788:   a = ds->rwork;
789:   b = a+ld;
790:   c = b+ld;
791:   nwu = 3*ld;
792:   if (ds->compact) {
793:     for (i=ds->l;i<ds->n-1;i++) {
794:       a[i] = d[i]*s[i];
795:       b[i] = e[i]*s[i+1];
796:       c[i] = e[i]*s[i];
797:     }
798:     a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
799:   } else {
800:     for (i=ds->l;i<ds->n-1;i++) {
801:       a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
802:       b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
803:       c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
804:     }
805:     a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
806:   }
807:   vi = (wi)?wi+ds->l:NULL;
808:   DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,vi,ds->rwork+nwu,nwall-nwu);

810:   /* Compute Eigenvectors with Inverse Iteration */
811:   DSGHIEPInverseIteration(ds,wr,wi);

813:   /* Recover eigenvalues from diagonal */
814:   DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
815: #if defined(PETSC_USE_COMPLEX)
816:   if (wi) {
817:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
818:   }
819: #endif
820:   return(0);
821: }