Actual source code: invit.c
slepc-3.6.1 2015-09-03
1: /*
3: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4: SLEPc - Scalable Library for Eigenvalue Problem Computations
5: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
7: This file is part of SLEPc.
9: SLEPc is free software: you can redistribute it and/or modify it under the
10: terms of version 3 of the GNU Lesser General Public License as published by
11: the Free Software Foundation.
13: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
14: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
16: more details.
18: You should have received a copy of the GNU Lesser General Public License
19: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
20: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21: */
22: #include <slepc/private/dsimpl.h>
23: #include <slepcblaslapack.h>
25: struct HRtr
26: {
27: PetscScalar *data;
28: PetscInt m;
29: PetscInt idx[2];
30: PetscInt n[2];
31: PetscScalar tau[2];
32: PetscReal alpha;
33: PetscReal cs;
34: PetscReal sn;
35: PetscInt type;
36: };
40: /*
41: Generates a hyperbolic rotation
42: if x1*x1 - x2*x2 != 0
43: r = sqrt(|x1*x1 - x2*x2|)
44: c = x1/r s = x2/r
46: | c -s||x1| |d*r|
47: |-s c||x2| = | 0 |
48: where d = 1 for type==1 and -1 for type==2
49: Returns the condition number of the reduction
50: */
51: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
52: {
53: PetscReal t,n2,xa,xb;
54: PetscInt type_;
57: if (x2==0.0) {
58: *r = PetscAbsReal(x1);
59: *c = (x1>=0)?1.0:-1.0;
60: *s = 0.0;
61: if (type) *type = 1;
62: return(0);
63: }
64: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
65: /* hyperbolic rotation doesn't exist */
66: *c = 0.0;
67: *s = 0.0;
68: *r = 0.0;
69: if (type) *type = 0;
70: *cond = PETSC_MAX_REAL;
71: return(0);
72: }
74: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
75: xa = x1; xb = x2; type_ = 1;
76: } else {
77: xa = x2; xb = x1; type_ = 2;
78: }
79: t = xb/xa;
80: n2 = PetscAbsReal(1 - t*t);
81: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
82: *c = x1/(*r);
83: *s = x2/(*r);
84: if (type_ == 2) *r *= -1;
85: if (type) *type = type_;
86: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
87: return(0);
88: }
92: /*
93: |c s|
94: Applies an hyperbolic rotator |s c|
95: |c s|
96: [x1 x2]|s c|
97: */
98: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
99: {
100: PetscInt i;
101: PetscReal t;
102: PetscScalar tmp;
105: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
106: t = s/c;
107: for (i=0;i<n;i++) {
108: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
109: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
110: }
111: } else { /* Type II */
112: t = c/s;
113: for (i=0;i<n;i++) {
114: tmp = x1[i*inc1];
115: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
116: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
117: }
118: }
119: return(0);
120: }
124: /*
125: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
127: Input:
128: A symmetric (only lower triangular part is referred)
129: s vector +1 and -1 (signature matrix)
130: Output:
131: d,e
132: s
133: Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
134: */
135: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscInt nw,PetscReal *rwork,PetscInt nwr,PetscBLASInt *iwork,PetscInt nwi)
136: {
137: #if defined(PETSC_MISSING_LAPACK_LARFG) || defined(PETSC_MISSING_LAPACK_LARF)
139: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
140: #else
142: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
143: PetscInt nwall,nwu=0,nwallr,nwur=0,nwalli,nwui=0;
144: PetscReal *ss,cond=1.0,cs,sn,r;
145: PetscScalar tau,t,*AA;
146: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
147: PetscBool breakdown = PETSC_TRUE;
150: if (n<3) {
151: if (n==1) Q[0]=1;
152: if (n==2) {
153: Q[0] = Q[1+ldq] = 1;
154: Q[1] = Q[ldq] = 0;
155: }
156: return(0);
157: }
158: PetscBLASIntCast(lda,&lda_);
159: PetscBLASIntCast(n,&n_);
160: PetscBLASIntCast(ldq,&ldq_);
161: nwall = n*n+n;
162: if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
163: nwallr = n;
164: if (!rwork || nwr<nwallr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",14);
165: ss = rwork;
166: nwur += n;
167: nwalli = n;
168: if (!iwork || nwi<nwalli) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",16);
169: perm = iwork;
170: nwui += n;
171: AA = work;
172: for (i=0;i<n;i++) {
173: PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
174: }
175: nwu += n*n;
176: k=0;
177: while (breakdown && k<n) {
178: breakdown = PETSC_FALSE;
179: /* Classify (and flip) A and s according to sign */
180: if (flip) {
181: for (i=0;i<n;i++) {
182: perm[i] = n-1-perm_[i];
183: if (perm[i]==0) i0 = i;
184: if (perm[i]==k) ik = i;
185: }
186: } else {
187: for (i=0;i<n;i++) {
188: perm[i] = perm_[i];
189: if (perm[i]==0) i0 = i;
190: if (perm[i]==k) ik = i;
191: }
192: }
193: perm[ik] = 0;
194: perm[i0] = k;
195: i=1;
196: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
197: if (s[perm[i]]!=s[perm[0]]) {
198: j=i+1;
199: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
200: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
201: }
202: i++;
203: }
204: for (i=0;i<n;i++) {
205: ss[i] = s[perm[i]];
206: }
207: if (flip) {
208: ii = &j;
209: jj = &i;
210: } else {
211: ii = &i;
212: jj = &j;
213: }
214: for (i=0;i<n;i++)
215: for (j=0;j<n;j++)
216: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
217: /* Initialize Q */
218: for (i=0;i<n;i++) {
219: PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
220: Q[perm[i]+i*ldq] = 1.0;
221: }
222: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
223: n0 = ni-1;
224: n1 = n_-ni;
225: for (j=0;j<n-2;j++) {
226: PetscBLASIntCast(n-j-1,&m);
227: /* Forming and applying reflectors */
228: if (n0 > 1) {
229: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
230: /* Apply reflector */
231: if (PetscAbsScalar(tau) != 0.0) {
232: t=*(A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
233: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
234: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
235: /* Update Q */
236: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
237: *(A+ni-n0+j*lda) = t;
238: for (i=1;i<n0;i++) {
239: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
240: }
241: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
242: }
243: }
244: if (n1 > 1) {
245: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
246: /* Apply reflector */
247: if (PetscAbsScalar(tau) != 0.0) {
248: t=*(A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
249: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
250: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
251: /* Update Q */
252: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
253: *(A+n-n1+j*lda) = t;
254: for (i=1;i<n1;i++) {
255: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
256: }
257: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
258: }
259: }
260: /* Hyperbolic rotation */
261: if (n0 > 0 && n1 > 0) {
262: HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
263: /* Check condition number */
264: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
265: breakdown = PETSC_TRUE;
266: k++;
267: if (k==n || flip)
268: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
269: break;
270: }
271: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
272: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
273: /* Apply to A */
274: HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
275: HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);
277: /* Update Q */
278: HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
279: if (type==2) {
280: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
281: n0++;ni++;n1--;
282: }
283: }
284: if (n0>0) n0--;
285: else n1--;
286: }
287: }
289: /* flip matrices */
290: if (flip) {
291: for (i=0;i<n-1;i++) {
292: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
293: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
294: s[i] = ss[n-i-1];
295: }
296: s[n-1] = ss[0];
297: d[n-1] = PetscRealPart(A[0]);
298: for (i=0;i<n;i++) {
299: ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
300: }
301: for (i=0;i<n;i++)
302: for (j=0;j<n;j++)
303: Q[i+j*ldq] = work[i+(n-j-1)*n];
304: } else {
305: for (i=0;i<n-1;i++) {
306: d[i] = PetscRealPart(A[i+i*lda]);
307: e[i] = PetscRealPart(A[i+1+i*lda]);
308: s[i] = ss[i];
309: }
310: s[n-1] = ss[n-1];
311: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
312: }
313: return(0);
314: #endif
315: }
319: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work,PetscInt lw)
320: {
322: PetscScalar *x,*y;
323: PetscReal ncond2;
324: PetscBLASInt n0_,n1_,inc=1;
327: if (lw<n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",11);
328: /* Hyperbolic transformation to make zeros in x */
329: x = tr1->data;
330: tr1->n[0] = n0;
331: tr1->n[1] = n1;
332: tr1->idx[0] = idx0;
333: tr1->idx[1] = idx1;
334: PetscBLASIntCast(tr1->n[0],&n0_);
335: PetscBLASIntCast(tr1->n[1],&n1_);
336: if (tr1->n[0] > 1) {
337: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
338: }
339: if (tr1->n[1]> 1) {
340: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
341: }
342: if (tr1->idx[0]<tr1->idx[1]) {
343: HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);
344: } else {
345: tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
346: *ncond = 1.0;
347: }
348: if (sz==2) {
349: y = tr2->data;
350: /* Apply first transformation to second column */
351: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
352: x[tr1->idx[0]] = 1.0;
353: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
354: }
355: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
356: x[tr1->idx[1]] = 1.0;
357: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
358: }
359: if (tr1->idx[0]<tr1->idx[1]) {
360: HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
361: }
362: tr2->n[0] = tr1->n[0];
363: tr2->n[1] = tr1->n[1];
364: tr2->idx[0] = tr1->idx[0];
365: tr2->idx[1] = tr1->idx[1];
366: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
367: tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
368: }
369: if (tr2->n[0]>0) {
370: tr2->n[0]--; tr2->idx[0]++;
371: if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
372: } else {
373: tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
374: }
375: /* Hyperbolic transformation to make zeros in y */
376: PetscBLASIntCast(tr2->n[0],&n0_);
377: PetscBLASIntCast(tr2->n[1],&n1_);
378: if (tr2->n[0] > 1) {
379: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
380: }
381: if (tr2->n[1]> 1) {
382: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
383: }
384: if (tr2->idx[0]<tr2->idx[1]) {
385: HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);
386: } else {
387: tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
388: ncond2 = 1.0;
389: }
390: if (ncond2>*ncond) *ncond = ncond2;
391: }
392: return(0);
393: }
397: /*
398: Auxiliary function to try perform one iteration of hr routine,
399: checking condition number. If it is < tolD, apply the
400: transformation to H and R, if not, ok=false and it do nothing
401: tolE, tolerance to exchange complex pairs to improve conditioning
402: */
403: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work,PetscInt nw)
404: {
406: struct HRtr *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
407: PetscScalar *x,*y;
408: PetscReal ncond,ncond_e;
409: PetscInt nwu=0,nwall,i,d=1;
410: PetscBLASInt n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
411: PetscReal tolD = 1e+5;
414: if (cond) *cond = 1.0;
415: PetscBLASIntCast(n,&n_);
416: PetscBLASIntCast(ldr,&ldr_);
417: PetscBLASIntCast(ldh,&ldh_);
418: nwall = 5*n;
419: if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",16);
420: x = work+nwu;
421: nwu += n;
422: PetscMemcpy(x,R+j*ldr,n*sizeof(PetscScalar));
423: *exg = PETSC_FALSE;
424: *ok = PETSC_TRUE;
425: tr1_t.data = x;
426: if (sz==1) {
427: /* Hyperbolic transformation to make zeros in x */
428: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu,nwall-nwu);
429: /* Check condition number to single column*/
430: if (ncond>tolD) {
431: *ok = PETSC_FALSE;
432: }
433: tr1 = &tr1_t;
434: tr2 = &tr2_t;
435: } else {
436: y = work+nwu;
437: nwu += n;
438: PetscMemcpy(y,R+(j+1)*ldr,n*sizeof(PetscScalar));
439: tr2_t.data = y;
440: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu,nwall-nwu);
441: /* Computing hyperbolic transformations also for exchanged vectors */
442: tr1_te.data = work+nwu;
443: nwu += n;
444: PetscMemcpy(tr1_te.data,R+(j+1)*ldr,n*sizeof(PetscScalar));
445: tr2_te.data = work+nwu;
446: nwu += n;
447: PetscMemcpy(tr2_te.data,R+j*ldr,n*sizeof(PetscScalar));
448: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu,nwall-nwu);
449: if (ncond > d*ncond_e) {
450: *exg = PETSC_TRUE;
451: tr1 = &tr1_te;
452: tr2 = &tr2_te;
453: ncond = ncond_e;
454: } else {
455: tr1 = &tr1_t;
456: tr2 = &tr2_t;
457: }
458: if (ncond>tolD) *ok = PETSC_FALSE;
459: }
460: if (*ok) {
461: /* Everything is OK, apply transformations to R and H */
462: /* First column */
463: if (cond && *cond<ncond) *cond = ncond;
464: x = tr1->data;
465: PetscBLASIntCast(tr1->n[0],&n0_);
466: PetscBLASIntCast(tr1->n[1],&n1_);
467: PetscBLASIntCast(n-j-sz,&mr);
468: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
469: x[tr1->idx[0]] = 1.0;
470: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
471: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
472: }
473: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
474: x[tr1->idx[1]] = 1.0;
475: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
476: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
477: }
478: if (tr1->idx[0]<tr1->idx[1]) {
479: HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
480: if (tr1->type==1) {
481: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
482: } else {
483: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
484: s[tr1->idx[0]] = -s[tr1->idx[0]];
485: s[tr1->idx[1]] = -s[tr1->idx[1]];
486: }
487: }
488: for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
489: for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
490: *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
491: if (sz==2) {
492: y = tr2->data;
493: /* Second column */
494: PetscBLASIntCast(tr2->n[0],&n0_);
495: PetscBLASIntCast(tr2->n[1],&n1_);
496: PetscBLASIntCast(n-j-sz,&mr);
497: PetscBLASIntCast(n-tr2->idx[0],&mh);
498: if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
499: y[tr2->idx[0]] = 1.0;
500: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
501: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
502: }
503: if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
504: y[tr2->idx[1]] = 1.0;
505: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
506: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
507: }
508: if (tr2->idx[0]<tr2->idx[1]) {
509: HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
510: if (tr2->type==1) {
511: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
512: } else {
513: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
514: s[tr2->idx[0]] = -s[tr2->idx[0]];
515: s[tr2->idx[1]] = -s[tr2->idx[1]];
516: }
517: }
518: for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
519: *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
520: for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
521: *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
522: *n0 = tr2->n[0];
523: *n1 = tr2->n[1];
524: *idx0 = tr2->idx[0];
525: *idx1 = tr2->idx[1];
526: if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
527: (*idx1)++; (*n1)--; (*n0)++;
528: }
529: } else {
530: *n0 = tr1->n[0];
531: *n1 = tr1->n[1];
532: *idx0 = tr1->idx[0];
533: *idx1 = tr1->idx[1];
534: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
535: (*idx1)++; (*n1)--; (*n0)++;
536: }
537: }
538: if (*n0>0) {
539: (*n0)--; (*idx0)++;
540: if (*n1==0) *idx1 = *idx0;
541: } else {
542: (*n1)--; (*idx1)++; *idx0 = *idx1;
543: }
544: }
545: return(0);
546: }
550: /*
551: compute V = HR whit H s-orthogonal and R upper triangular
552: */
553: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work,PetscInt nw)
554: {
556: PetscInt i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwall,nwu=0;
557: PetscScalar *col1,*col2;
558: PetscBool exg=PETSC_FALSE,ok=PETSC_FALSE;
561: n = *nv;
562: nwall = 7*n;
563: if (!work || nw<nwall) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",11);
564: col1 = work+nwu;
565: nwu += n;
566: col2 = work+nwu;
567: nwu += n;
568: /* Sort R and s according to sing(s) */
569: np = 0;
570: for (i=0;i<n;i++) if (s[i]>0) np++;
571: if (s[0]>0) n1 = np;
572: else n1 = n-np;
573: n0 = 0;
574: for (i=0;i<n;i++) {
575: if (s[i]==s[0]) {
576: s[n0] = s[0];
577: perm[n0++] = i;
578: } else perm[n1++] = i;
579: }
580: for (i=n0;i<n;i++) s[i] = -s[0];
581: n1 -= n0;
582: idx0 = 0;
583: idx1 = n0;
584: if (idx1==n) idx1=idx0;
585: for (i=0;i<n;i++) {
586: for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
587: }
588: /* Initialize H */
589: for (i=0;i<n;i++) {
590: PetscMemzero(V+i*ldv,n*sizeof(PetscScalar));
591: V[perm[i]+i*ldv] = 1.0;
592: }
593: for (i=0;i<n;i++) perm[i] = i;
594: j = 0;
595: while (j<n-k) {
596: if (cmplxEig) {
597: if (cmplxEig[j]==0) sz=1;
598: else sz=2;
599: }
600: TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu,nw-nwu);
601: if (ok) {
602: if (exg) cmplxEig[j] = -cmplxEig[j];
603: j = j+sz;
604: } else { /* to be discarded */
605: k = k+1;
606: if (cmplxEig[j]==0) {
607: if (j<n) {
608: t1 = perm[j];
609: for (i=j;i<n-1;i++) perm[i] = perm[i+1];
610: perm[n-1] = t1;
611: t1 = cmplxEig[j];
612: for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
613: cmplxEig[n-1] = t1;
614: PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
615: for (i=j;i<n-1;i++) {
616: PetscMemcpy(R+i*ldr,R+(i+1)*ldr,n*sizeof(PetscScalar));
617: }
618: PetscMemcpy(R+(n-1)*ldr,col1,n*sizeof(PetscScalar));
619: }
620: } else {
621: k = k+1;
622: if (j<n-1) {
623: t1 = perm[j];
624: t2 = perm[j+1];
625: for (i=j;i<n-2;i++) perm[i] = perm[i+2];
626: perm[n-2] = t1;
627: perm[n-1] = t2;
628: t1 = cmplxEig[j];
629: t2 = cmplxEig[j+1];
630: for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
631: cmplxEig[n-2] = t1;
632: cmplxEig[n-1] = t2;
633: PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
634: PetscMemcpy(col2,R+(j+1)*ldr,n*sizeof(PetscScalar));
635: for (i=j;i<n-2;i++) {
636: PetscMemcpy(R+i*ldr,R+(i+2)*ldr,n*sizeof(PetscScalar));
637: }
638: PetscMemcpy(R+(n-2)*ldr,col1,n*sizeof(PetscScalar));
639: PetscMemcpy(R+(n-1)*ldr,col2,n*sizeof(PetscScalar));
640: }
641: }
642: }
643: }
644: if (k!=0) {
645: if (breakdown) *breakdown = PETSC_TRUE;
646: *nv = n-k;
647: }
648: return(0);
649: }
653: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
654: {
656: PetscInt lws,nwus=0,nwui=0,lwi;
657: PetscInt off,n,nv,ld,i,ldr,l;
658: PetscScalar *W,*X,*R,*ts,zeroS=0.0,oneS=1.0;
659: PetscReal *s,vi,vr,tr,*d,*e;
660: PetscBLASInt ld_,n_,nv_,*perm,*cmplxEig;
663: l = ds->l;
664: n = ds->n-l;
665: PetscBLASIntCast(n,&n_);
666: ld = ds->ld;
667: PetscBLASIntCast(ld,&ld_);
668: off = l*ld+l;
669: s = ds->rmat[DS_MAT_D];
670: if (!ds->compact) {
671: for (i=l;i<ds->n;i++) s[i] = PetscRealPart(*(ds->mat[DS_MAT_B]+i*ld+i));
672: }
673: lws = n*n+7*n;
674: lwi = 2*n;
675: DSAllocateWork_Private(ds,lws,0,lwi);
676: R = ds->work+nwus;
677: nwus += n*n;
678: ldr = n;
679: perm = ds->iwork + nwui;
680: nwui += n;
681: cmplxEig = ds->iwork+nwui;
682: nwui += n;
683: X = ds->mat[mat];
684: for (i=0;i<n;i++) {
685: #if defined(PETSC_USE_COMPLEX)
686: vi = PetscImaginaryPart(wr[l+i]);
687: #else
688: vi = PetscRealPart(wi[l+i]);
689: #endif
690: if (vi!=0) {
691: cmplxEig[i] = 1;
692: cmplxEig[i+1] = 2;
693: i++;
694: } else cmplxEig[i] = 0;
695: }
696: nv = n;
697:
698: /* Perform HR decomposition */
699: /* Hyperbolic rotators */
700: PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus,lws-nwus);
701: /* Sort wr,wi perm */
702: ts = ds->work+nwus;
703: nwus += n;
704: PetscMemcpy(ts,wr+l,n*sizeof(PetscScalar));
705: for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
706: #if !defined(PETSC_USE_COMPLEX)
707: PetscMemcpy(ts,wi+l,n*sizeof(PetscScalar));
708: for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
709: #endif
710: /* Projected Matrix */
711: PetscMemzero(ds->rmat[DS_MAT_T]+2*ld,ld*sizeof(PetscReal));
712: d = ds->rmat[DS_MAT_T];
713: e = d+ld;
714: for (i=0;i<nv;i++) {
715: if (cmplxEig[i]==0) { /* Real */
716: d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
717: e[l+i] = 0.0;
718: } else {
719: vr = PetscRealPart(wr[l+i]);
720: #if defined(PETSC_USE_COMPLEX)
721: vi = PetscImaginaryPart(wr[l+i]);
722: #else
723: vi = PetscRealPart(wi[l+i]);
724: #endif
725: if (cmplxEig[i]==-1) vi = -vi;
726: tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
727: d[l+i] = (vr-tr)*s[l+i];
728: d[l+i+1] = (vr+tr)*s[l+i+1];
729: e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
730: e[l+i+1] = 0.0;
731: i++;
732: }
733: }
734: /* accumulate previous Q */
735: if (accum) {
736: PetscBLASIntCast(nv,&nv_);
737: DSAllocateMat_Private(ds,DS_MAT_W);
738: W = ds->mat[DS_MAT_W];
739: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
740: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&oneS,W+off,&ld_,X+off,&ld_,&zeroS,ds->mat[DS_MAT_Q]+off,&ld_));
741: } else {
742: PetscMemzero(ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
743: for (i=0;i<ds->l;i++) *(ds->mat[DS_MAT_Q]+i+i*ld) = 1.0;
744: for (i=0;i<n;i++) { PetscMemcpy(ds->mat[DS_MAT_Q]+off+i*ld,X+off+i*ld,n*sizeof(PetscScalar)); }
745: }
746: ds->t = nv+l;
747: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE); }
748: return(0);
749: }
753: /*
754: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
755: */
756: PetscErrorCode DSIntermediate_GHIEP(DS ds)
757: {
759: PetscInt i,ld,off;
760: PetscInt nwall,nwallr,nwalli,nwu=0,nwur=0,nwui=0;
761: PetscScalar *A,*B,*Q;
762: PetscReal *d,*e,*s;
765: ld = ds->ld;
766: A = ds->mat[DS_MAT_A];
767: B = ds->mat[DS_MAT_B];
768: Q = ds->mat[DS_MAT_Q];
769: d = ds->rmat[DS_MAT_T];
770: e = ds->rmat[DS_MAT_T]+ld;
771: s = ds->rmat[DS_MAT_D];
772: off = ds->l+ds->l*ld;
773: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
774: nwall = ld*ld+ld;
775: nwallr = ld;
776: nwalli = ld;
777: DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
778: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
779: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
780: if (ds->compact) {
781: if (ds->state < DS_STATE_INTERMEDIATE) {
782: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
783: TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work+nwu,nwall-nwu,ds->rwork+nwur,nwallr-nwur,ds->iwork+nwui,nwalli-nwui);
784: ds->k = ds->l;
785: PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
786: }
787: } else {
788: if (ds->state < DS_STATE_INTERMEDIATE) {
789: for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
790: TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work+nwu,nwall-nwu,ds->rwork+nwur,nwallr-nwur,ds->iwork+nwui,nwalli-nwui);
791: PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
792: ds->k = ds->l;
793: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
794: } else {
795: DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
796: }
797: }
798: return(0);
799: }