Actual source code: invit.c

slepc-3.6.1 2015-09-03
Report Typos and Errors
  1: /*

  3:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  4:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  5:    Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain

  7:    This file is part of SLEPc.

  9:    SLEPc is free software: you can redistribute it and/or modify it under  the
 10:    terms of version 3 of the GNU Lesser General Public License as published by
 11:    the Free Software Foundation.

 13:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 14:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 15:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 16:    more details.

 18:    You  should have received a copy of the GNU Lesser General  Public  License
 19:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 20:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 21: */
 22: #include <slepc/private/dsimpl.h>
 23: #include <slepcblaslapack.h>

 25: struct HRtr
 26: {
 27:   PetscScalar *data;
 28:   PetscInt    m;
 29:   PetscInt    idx[2];
 30:   PetscInt    n[2];
 31:   PetscScalar tau[2];
 32:   PetscReal   alpha;
 33:   PetscReal   cs;
 34:   PetscReal   sn;
 35:   PetscInt    type;
 36: };

 40: /*
 41:   Generates a hyperbolic rotation
 42:     if x1*x1 - x2*x2 != 0
 43:       r = sqrt(|x1*x1 - x2*x2|)
 44:       c = x1/r  s = x2/r

 46:       | c -s||x1|   |d*r|
 47:       |-s  c||x2| = | 0 |
 48:       where d = 1 for type==1 and -1 for type==2
 49:   Returns the condition number of the reduction
 50: */
 51: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
 52: {
 53:   PetscReal t,n2,xa,xb;
 54:   PetscInt  type_;

 57:   if (x2==0.0) {
 58:     *r = PetscAbsReal(x1);
 59:     *c = (x1>=0)?1.0:-1.0;
 60:     *s = 0.0;
 61:     if (type) *type = 1;
 62:     return(0);
 63:   }
 64:   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
 65:     /* hyperbolic rotation doesn't exist */
 66:     *c = 0.0;
 67:     *s = 0.0;
 68:     *r = 0.0;
 69:     if (type) *type = 0;
 70:     *cond = PETSC_MAX_REAL;
 71:     return(0);
 72:   }

 74:   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
 75:     xa = x1; xb = x2; type_ = 1;
 76:   } else {
 77:     xa = x2; xb = x1; type_ = 2;
 78:   }
 79:   t = xb/xa;
 80:   n2 = PetscAbsReal(1 - t*t);
 81:   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
 82:   *c = x1/(*r);
 83:   *s = x2/(*r);
 84:   if (type_ == 2) *r *= -1;
 85:   if (type) *type = type_;
 86:   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
 87:   return(0);
 88: }

 92: /*
 93:                                 |c  s|
 94:   Applies an hyperbolic rotator |s  c|
 95:            |c  s|
 96:     [x1 x2]|s  c|
 97: */
 98: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
 99: {
100:   PetscInt    i;
101:   PetscReal   t;
102:   PetscScalar tmp;

105:   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
106:     t = s/c;
107:     for (i=0;i<n;i++) {
108:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
109:       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
110:     }
111:   } else { /* Type II */
112:     t = c/s;
113:     for (i=0;i<n;i++) {
114:       tmp = x1[i*inc1];
115:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
116:       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
117:     }
118:   }
119:   return(0);
120: }

124: /*
125:   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).

127:   Input:
128:     A symmetric (only lower triangular part is referred)
129:     s vector +1 and -1 (signature matrix)
130:   Output:
131:     d,e
132:     s
133:     Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
134: */
135: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscInt nw,PetscReal *rwork,PetscInt nwr,PetscBLASInt *iwork,PetscInt nwi)
136: {
137: #if defined(PETSC_MISSING_LAPACK_LARFG) || defined(PETSC_MISSING_LAPACK_LARF)
139:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
140: #else
142:   PetscInt       i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
143:   PetscInt       nwall,nwu=0,nwallr,nwur=0,nwalli,nwui=0;
144:   PetscReal      *ss,cond=1.0,cs,sn,r;
145:   PetscScalar    tau,t,*AA;
146:   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
147:   PetscBool      breakdown = PETSC_TRUE;

150:   if (n<3) {
151:     if (n==1) Q[0]=1;
152:     if (n==2) {
153:       Q[0] = Q[1+ldq] = 1;
154:       Q[1] = Q[ldq] = 0;
155:     }
156:     return(0);
157:   }
158:   PetscBLASIntCast(lda,&lda_);
159:   PetscBLASIntCast(n,&n_);
160:   PetscBLASIntCast(ldq,&ldq_);
161:   nwall = n*n+n;
162:   if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
163:   nwallr = n;
164:   if (!rwork || nwr<nwallr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",14);
165:   ss = rwork;
166:   nwur += n;
167:   nwalli = n;
168:   if (!iwork || nwi<nwalli) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",16);
169:   perm = iwork;
170:   nwui += n;
171:   AA = work;
172:   for (i=0;i<n;i++) {
173:     PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
174:   }
175:   nwu += n*n;
176:   k=0;
177:   while (breakdown && k<n) {
178:     breakdown = PETSC_FALSE;
179:     /* Classify (and flip) A and s according to sign */
180:     if (flip) {
181:       for (i=0;i<n;i++) {
182:         perm[i] = n-1-perm_[i];
183:         if (perm[i]==0) i0 = i;
184:         if (perm[i]==k) ik = i;
185:       }
186:     } else {
187:       for (i=0;i<n;i++) {
188:         perm[i] = perm_[i];
189:         if (perm[i]==0) i0 = i;
190:         if (perm[i]==k) ik = i;
191:       }
192:     }
193:     perm[ik] = 0;
194:     perm[i0] = k;
195:     i=1;
196:     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
197:       if (s[perm[i]]!=s[perm[0]]) {
198:         j=i+1;
199:         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
200:         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
201:       }
202:       i++;
203:     }
204:     for (i=0;i<n;i++) {
205:       ss[i] = s[perm[i]];
206:     }
207:     if (flip) {
208:       ii = &j;
209:       jj = &i;
210:     } else {
211:       ii = &i;
212:       jj = &j;
213:     }
214:     for (i=0;i<n;i++)
215:       for (j=0;j<n;j++)
216:         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
217:     /* Initialize Q */
218:     for (i=0;i<n;i++) {
219:       PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
220:       Q[perm[i]+i*ldq] = 1.0;
221:     }
222:     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
223:     n0 = ni-1;
224:     n1 = n_-ni;
225:     for (j=0;j<n-2;j++) {
226:       PetscBLASIntCast(n-j-1,&m);
227:       /* Forming and applying reflectors */
228:       if (n0 > 1) {
229:         PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
230:         /* Apply reflector */
231:         if (PetscAbsScalar(tau) != 0.0) {
232:           t=*(A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
233:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
234:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
235:           /* Update Q */
236:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
237:           *(A+ni-n0+j*lda) = t;
238:           for (i=1;i<n0;i++) {
239:             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
240:           }
241:           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
242:         }
243:       }
244:       if (n1 > 1) {
245:         PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
246:         /* Apply reflector */
247:         if (PetscAbsScalar(tau) != 0.0) {
248:           t=*(A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
249:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
250:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
251:           /* Update Q */
252:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
253:           *(A+n-n1+j*lda) = t;
254:           for (i=1;i<n1;i++) {
255:             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
256:           }
257:           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
258:         }
259:       }
260:       /* Hyperbolic rotation */
261:       if (n0 > 0 && n1 > 0) {
262:         HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
263:         /* Check condition number */
264:         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
265:           breakdown = PETSC_TRUE;
266:           k++;
267:           if (k==n || flip)
268:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
269:           break;
270:         }
271:         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
272:         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
273:         /* Apply to A */
274:         HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
275:         HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);

277:         /* Update Q */
278:         HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
279:         if (type==2) {
280:           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
281:           n0++;ni++;n1--;
282:         }
283:       }
284:       if (n0>0) n0--;
285:       else n1--;
286:     }
287:   }

289:   /* flip matrices */
290:   if (flip) {
291:     for (i=0;i<n-1;i++) {
292:       d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
293:       e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
294:       s[i] = ss[n-i-1];
295:     }
296:     s[n-1] = ss[0];
297:     d[n-1] = PetscRealPart(A[0]);
298:     for (i=0;i<n;i++) {
299:       ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
300:     }
301:     for (i=0;i<n;i++)
302:       for (j=0;j<n;j++)
303:         Q[i+j*ldq] = work[i+(n-j-1)*n];
304:   } else {
305:     for (i=0;i<n-1;i++) {
306:       d[i] = PetscRealPart(A[i+i*lda]);
307:       e[i] = PetscRealPart(A[i+1+i*lda]);
308:       s[i] = ss[i];
309:     }
310:     s[n-1] = ss[n-1];
311:     d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
312:   }
313:   return(0);
314: #endif
315: }

319: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work,PetscInt lw)
320: {
322:   PetscScalar    *x,*y;
323:   PetscReal       ncond2;
324:   PetscBLASInt   n0_,n1_,inc=1;

327:   if (lw<n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",11);
328:   /* Hyperbolic transformation to make zeros in x */
329:   x = tr1->data;
330:   tr1->n[0] = n0;
331:   tr1->n[1] = n1;
332:   tr1->idx[0] = idx0;
333:   tr1->idx[1] = idx1;
334:   PetscBLASIntCast(tr1->n[0],&n0_);
335:   PetscBLASIntCast(tr1->n[1],&n1_);
336:   if (tr1->n[0] > 1) {
337:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
338:   }
339:   if (tr1->n[1]> 1) {
340:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
341:   }
342:   if (tr1->idx[0]<tr1->idx[1]) {
343:     HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);  
344:   } else {
345:     tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
346:     *ncond = 1.0;
347:   }
348:   if (sz==2) {
349:     y = tr2->data;
350:     /* Apply first transformation to second column */
351:     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
352:       x[tr1->idx[0]] = 1.0;
353:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
354:     }
355:     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
356:       x[tr1->idx[1]] = 1.0;
357:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
358:     }
359:     if (tr1->idx[0]<tr1->idx[1]) {
360:       HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
361:     }
362:     tr2->n[0] = tr1->n[0];
363:     tr2->n[1] = tr1->n[1];
364:     tr2->idx[0] = tr1->idx[0];
365:     tr2->idx[1] = tr1->idx[1];
366:     if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
367:       tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
368:     }
369:     if (tr2->n[0]>0) {
370:       tr2->n[0]--; tr2->idx[0]++;
371:       if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
372:     } else {
373:       tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
374:     }
375:     /* Hyperbolic transformation to make zeros in y */
376:     PetscBLASIntCast(tr2->n[0],&n0_);
377:     PetscBLASIntCast(tr2->n[1],&n1_);
378:     if (tr2->n[0] > 1) {
379:       PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
380:     }
381:     if (tr2->n[1]> 1) {
382:       PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
383:     }
384:     if (tr2->idx[0]<tr2->idx[1]) {
385:       HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);  
386:     } else {
387:     tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
388:     ncond2 = 1.0;
389:     }
390:     if (ncond2>*ncond) *ncond = ncond2;
391:   }
392:   return(0);
393: }

397: /*
398:   Auxiliary function to try perform one iteration of hr routine,
399:   checking condition number. If it is < tolD, apply the
400:   transformation to H and R, if not, ok=false and it do nothing
401:   tolE, tolerance to exchange complex pairs to improve conditioning
402: */
403: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work,PetscInt nw)
404: {
406:   struct HRtr    *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
407:   PetscScalar    *x,*y;
408:   PetscReal      ncond,ncond_e;
409:   PetscInt       nwu=0,nwall,i,d=1;
410:   PetscBLASInt   n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
411:   PetscReal      tolD = 1e+5;

414:   if (cond) *cond = 1.0;
415:   PetscBLASIntCast(n,&n_);
416:   PetscBLASIntCast(ldr,&ldr_);
417:   PetscBLASIntCast(ldh,&ldh_);
418:   nwall = 5*n;
419:   if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",16);
420:   x = work+nwu;
421:   nwu += n;
422:   PetscMemcpy(x,R+j*ldr,n*sizeof(PetscScalar));
423:   *exg = PETSC_FALSE;
424:   *ok = PETSC_TRUE;
425:   tr1_t.data = x;
426:   if (sz==1) {
427:     /* Hyperbolic transformation to make zeros in x */
428:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu,nwall-nwu);
429:     /* Check condition number to single column*/
430:     if (ncond>tolD) {
431:       *ok = PETSC_FALSE;
432:     }
433:     tr1 = &tr1_t;
434:     tr2 = &tr2_t;    
435:   } else {
436:     y = work+nwu;
437:     nwu += n;
438:     PetscMemcpy(y,R+(j+1)*ldr,n*sizeof(PetscScalar));
439:     tr2_t.data = y;
440:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu,nwall-nwu);
441:     /* Computing hyperbolic transformations also for exchanged vectors */
442:     tr1_te.data = work+nwu;
443:     nwu += n;
444:     PetscMemcpy(tr1_te.data,R+(j+1)*ldr,n*sizeof(PetscScalar));
445:     tr2_te.data = work+nwu;
446:     nwu += n;
447:     PetscMemcpy(tr2_te.data,R+j*ldr,n*sizeof(PetscScalar));
448:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu,nwall-nwu);
449:     if (ncond > d*ncond_e) {
450:       *exg = PETSC_TRUE;
451:       tr1 = &tr1_te;
452:       tr2 = &tr2_te;
453:       ncond = ncond_e;
454:     } else {
455:       tr1 = &tr1_t;
456:       tr2 = &tr2_t;
457:     }
458:     if (ncond>tolD) *ok = PETSC_FALSE;
459:   }
460:   if (*ok) {
461:     /* Everything is OK, apply transformations to R and H */
462:     /* First column */
463:     if (cond && *cond<ncond) *cond = ncond;
464:     x = tr1->data;
465:     PetscBLASIntCast(tr1->n[0],&n0_);
466:     PetscBLASIntCast(tr1->n[1],&n1_); 
467:     PetscBLASIntCast(n-j-sz,&mr);
468:     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
469:       x[tr1->idx[0]] = 1.0;
470:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
471:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
472:     }
473:     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
474:       x[tr1->idx[1]] = 1.0;
475:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
476:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
477:     }
478:     if (tr1->idx[0]<tr1->idx[1]) {
479:       HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
480:       if (tr1->type==1) {
481:         HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
482:       } else {
483:         HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
484:         s[tr1->idx[0]] = -s[tr1->idx[0]];
485:         s[tr1->idx[1]] = -s[tr1->idx[1]];
486:       }
487:     }
488:     for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];  
489:     for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
490:     *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
491:     if (sz==2) {
492:       y = tr2->data;
493:       /* Second column */
494:       PetscBLASIntCast(tr2->n[0],&n0_);
495:       PetscBLASIntCast(tr2->n[1],&n1_); 
496:       PetscBLASIntCast(n-j-sz,&mr);
497:       PetscBLASIntCast(n-tr2->idx[0],&mh);
498:       if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
499:         y[tr2->idx[0]] = 1.0;
500:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
501:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
502:       }
503:       if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
504:         y[tr2->idx[1]] = 1.0;
505:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
506:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
507:       }
508:       if (tr2->idx[0]<tr2->idx[1]) {
509:         HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
510:         if (tr2->type==1) {
511:           HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
512:         } else {
513:           HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
514:           s[tr2->idx[0]] = -s[tr2->idx[0]];
515:           s[tr2->idx[1]] = -s[tr2->idx[1]];
516:         }
517:       }
518:       for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
519:       *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
520:       for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
521:       *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
522:       *n0 = tr2->n[0];
523:       *n1 = tr2->n[1];
524:       *idx0 = tr2->idx[0];
525:       *idx1 = tr2->idx[1];
526:       if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
527:         (*idx1)++; (*n1)--; (*n0)++;
528:       }
529:     } else {
530:       *n0 = tr1->n[0];
531:       *n1 = tr1->n[1];
532:       *idx0 = tr1->idx[0];
533:       *idx1 = tr1->idx[1];
534:       if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
535:         (*idx1)++; (*n1)--; (*n0)++;
536:       }      
537:     }
538:     if (*n0>0) {
539:       (*n0)--; (*idx0)++;
540:       if (*n1==0) *idx1 = *idx0;
541:     } else {
542:       (*n1)--; (*idx1)++; *idx0 = *idx1;
543:     }
544:   }
545:   return(0);
546: }

550: /*
551:   compute V = HR whit H s-orthogonal and R upper triangular
552: */
553: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work,PetscInt nw)
554: {
556:   PetscInt       i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwall,nwu=0;
557:   PetscScalar    *col1,*col2;
558:   PetscBool      exg=PETSC_FALSE,ok=PETSC_FALSE;

561:   n = *nv;
562:   nwall = 7*n;
563:   if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",11);
564:   col1 = work+nwu;
565:   nwu += n;
566:   col2 = work+nwu;
567:   nwu += n;
568:   /* Sort R and s according to sing(s) */
569:   np = 0;
570:   for (i=0;i<n;i++) if (s[i]>0) np++;
571:   if (s[0]>0) n1 = np;
572:   else n1 = n-np;
573:   n0 = 0;
574:   for (i=0;i<n;i++) {
575:     if (s[i]==s[0]) {
576:       s[n0] = s[0];
577:       perm[n0++] = i;
578:     } else perm[n1++] = i;
579:   }
580:   for (i=n0;i<n;i++) s[i] = -s[0];
581:   n1 -= n0;
582:   idx0 = 0;
583:   idx1 = n0;
584:   if (idx1==n) idx1=idx0;
585:   for (i=0;i<n;i++) {
586:     for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
587:   }
588:   /* Initialize H */
589:   for (i=0;i<n;i++) {
590:     PetscMemzero(V+i*ldv,n*sizeof(PetscScalar));
591:     V[perm[i]+i*ldv] = 1.0;
592:   }
593:   for (i=0;i<n;i++) perm[i] = i;
594:   j = 0;
595:   while (j<n-k) {
596:     if (cmplxEig) {
597:       if (cmplxEig[j]==0) sz=1;
598:       else sz=2;
599:     }
600:     TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu,nw-nwu);
601:     if (ok) {
602:       if (exg) cmplxEig[j] = -cmplxEig[j];
603:       j = j+sz;
604:     } else { /* to be discarded */
605:       k = k+1;
606:       if (cmplxEig[j]==0) {
607:         if (j<n) {
608:           t1 = perm[j];
609:           for (i=j;i<n-1;i++) perm[i] = perm[i+1];
610:           perm[n-1] = t1;
611:           t1 = cmplxEig[j];
612:           for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
613:           cmplxEig[n-1] = t1;
614:           PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
615:           for (i=j;i<n-1;i++) { 
616:             PetscMemcpy(R+i*ldr,R+(i+1)*ldr,n*sizeof(PetscScalar));
617:           }
618:           PetscMemcpy(R+(n-1)*ldr,col1,n*sizeof(PetscScalar));
619:         }
620:       } else {
621:         k = k+1;
622:         if (j<n-1) {
623:           t1 = perm[j];
624:           t2 = perm[j+1];
625:           for (i=j;i<n-2;i++) perm[i] = perm[i+2];
626:           perm[n-2] = t1;
627:           perm[n-1] = t2;
628:           t1 = cmplxEig[j];
629:           t2 = cmplxEig[j+1];
630:           for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
631:           cmplxEig[n-2] = t1;
632:           cmplxEig[n-1] = t2;
633:           PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
634:           PetscMemcpy(col2,R+(j+1)*ldr,n*sizeof(PetscScalar));
635:           for (i=j;i<n-2;i++) {
636:             PetscMemcpy(R+i*ldr,R+(i+2)*ldr,n*sizeof(PetscScalar));
637:           }
638:           PetscMemcpy(R+(n-2)*ldr,col1,n*sizeof(PetscScalar));
639:           PetscMemcpy(R+(n-1)*ldr,col2,n*sizeof(PetscScalar));
640:         }
641:       }
642:     }
643:   }
644:   if (k!=0) {
645:     if (breakdown) *breakdown = PETSC_TRUE;
646:     *nv = n-k;
647:   }
648:   return(0);
649: }

653: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
654: {
656:   PetscInt       lws,nwus=0,nwui=0,lwi;
657:   PetscInt       off,n,nv,ld,i,ldr,l;
658:   PetscScalar    *W,*X,*R,*ts,zeroS=0.0,oneS=1.0;
659:   PetscReal      *s,vi,vr,tr,*d,*e;
660:   PetscBLASInt   ld_,n_,nv_,*perm,*cmplxEig;

663:   l = ds->l;
664:   n = ds->n-l;
665:   PetscBLASIntCast(n,&n_);
666:   ld = ds->ld;
667:   PetscBLASIntCast(ld,&ld_);
668:   off = l*ld+l;
669:   s = ds->rmat[DS_MAT_D];
670:   if (!ds->compact) {
671:     for (i=l;i<ds->n;i++) s[i] = PetscRealPart(*(ds->mat[DS_MAT_B]+i*ld+i));
672:   }
673:   lws = n*n+7*n;
674:   lwi = 2*n;
675:   DSAllocateWork_Private(ds,lws,0,lwi);
676:   R = ds->work+nwus;
677:   nwus += n*n;
678:   ldr = n;
679:   perm = ds->iwork + nwui;
680:   nwui += n;
681:   cmplxEig = ds->iwork+nwui;
682:   nwui += n;
683:   X = ds->mat[mat];
684:   for (i=0;i<n;i++) {
685: #if defined(PETSC_USE_COMPLEX)
686:     vi = PetscImaginaryPart(wr[l+i]);
687: #else
688:     vi = PetscRealPart(wi[l+i]);
689: #endif
690:     if (vi!=0) {
691:       cmplxEig[i] = 1;
692:       cmplxEig[i+1] = 2;
693:       i++;
694:     } else cmplxEig[i] = 0;
695:   }
696:   nv = n;
697:   
698:   /* Perform HR decomposition */
699:   /* Hyperbolic rotators */
700:   PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus,lws-nwus);
701:   /* Sort wr,wi perm */ 
702:   ts = ds->work+nwus;
703:   nwus += n;
704:   PetscMemcpy(ts,wr+l,n*sizeof(PetscScalar));
705:   for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
706: #if !defined(PETSC_USE_COMPLEX)
707:   PetscMemcpy(ts,wi+l,n*sizeof(PetscScalar));
708:   for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
709: #endif
710:   /* Projected Matrix */
711:   PetscMemzero(ds->rmat[DS_MAT_T]+2*ld,ld*sizeof(PetscReal));
712:   d = ds->rmat[DS_MAT_T];
713:   e = d+ld;
714:   for (i=0;i<nv;i++) {
715:     if (cmplxEig[i]==0) { /* Real */
716:       d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
717:       e[l+i] = 0.0;
718:     } else {
719:       vr = PetscRealPart(wr[l+i]);
720: #if defined(PETSC_USE_COMPLEX)
721:       vi = PetscImaginaryPart(wr[l+i]);
722: #else
723:       vi = PetscRealPart(wi[l+i]);
724: #endif
725:       if (cmplxEig[i]==-1) vi = -vi;
726:       tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
727:       d[l+i] = (vr-tr)*s[l+i];
728:       d[l+i+1] = (vr+tr)*s[l+i+1];
729:       e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
730:       e[l+i+1] = 0.0;
731:       i++;
732:     }
733:   }
734:   /* accumulate previous Q */
735:   if (accum) {
736:     PetscBLASIntCast(nv,&nv_);
737:     DSAllocateMat_Private(ds,DS_MAT_W);
738:     W = ds->mat[DS_MAT_W];
739:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
740:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&oneS,W+off,&ld_,X+off,&ld_,&zeroS,ds->mat[DS_MAT_Q]+off,&ld_));
741:   } else {
742:     PetscMemzero(ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
743:     for (i=0;i<ds->l;i++) *(ds->mat[DS_MAT_Q]+i+i*ld) = 1.0;
744:     for (i=0;i<n;i++) { PetscMemcpy(ds->mat[DS_MAT_Q]+off+i*ld,X+off+i*ld,n*sizeof(PetscScalar)); }
745:   }
746:   ds->t = nv+l;
747:   if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE); }  
748:   return(0);
749: }

753: /*
754:    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
755: */
756: PetscErrorCode DSIntermediate_GHIEP(DS ds)
757: {
759:   PetscInt       i,ld,off;
760:   PetscInt       nwall,nwallr,nwalli,nwu=0,nwur=0,nwui=0;
761:   PetscScalar    *A,*B,*Q;
762:   PetscReal      *d,*e,*s;

765:   ld = ds->ld;
766:   A = ds->mat[DS_MAT_A];
767:   B = ds->mat[DS_MAT_B];
768:   Q = ds->mat[DS_MAT_Q];
769:   d = ds->rmat[DS_MAT_T];
770:   e = ds->rmat[DS_MAT_T]+ld;
771:   s = ds->rmat[DS_MAT_D];
772:   off = ds->l+ds->l*ld;
773:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
774:   nwall = ld*ld+ld;
775:   nwallr = ld;
776:   nwalli = ld;
777:   DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
778:   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
779:   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
780:   if (ds->compact) {
781:     if (ds->state < DS_STATE_INTERMEDIATE) {
782:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
783:       TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work+nwu,nwall-nwu,ds->rwork+nwur,nwallr-nwur,ds->iwork+nwui,nwalli-nwui);
784:       ds->k = ds->l;
785:       PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
786:     }
787:   } else {
788:     if (ds->state < DS_STATE_INTERMEDIATE) {
789:       for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
790:       TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work+nwu,nwall-nwu,ds->rwork+nwur,nwallr-nwur,ds->iwork+nwui,nwalli-nwui);
791:       PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
792:       ds->k = ds->l;
793:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
794:     } else {
795:       DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
796:     }
797:   }
798:   return(0);
799: }