Actual source code: dsnhep.c
slepc-3.6.1 2015-09-03
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc/private/dsimpl.h>
23: #include <slepcblaslapack.h>
27: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
28: {
32: DSAllocateMat_Private(ds,DS_MAT_A);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: PetscFree(ds->perm);
35: PetscMalloc1(ld,&ds->perm);
36: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
37: return(0);
38: }
42: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
43: {
47: DSViewMat(ds,viewer,DS_MAT_A);
48: if (ds->state>DS_STATE_INTERMEDIATE) {
49: DSViewMat(ds,viewer,DS_MAT_Q);
50: }
51: if (ds->mat[DS_MAT_X]) {
52: DSViewMat(ds,viewer,DS_MAT_X);
53: }
54: if (ds->mat[DS_MAT_Y]) {
55: DSViewMat(ds,viewer,DS_MAT_Y);
56: }
57: return(0);
58: }
62: PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
63: {
64: #if defined(SLEPC_MISSING_LAPACK_GESVD)
66: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
67: #else
69: PetscInt i,j;
70: PetscBLASInt info,ld,n,n1,lwork,inc=1;
71: PetscScalar sdummy,done=1.0,zero=0.0;
72: PetscReal *sigma;
73: PetscBool iscomplex = PETSC_FALSE;
74: PetscScalar *A = ds->mat[DS_MAT_A];
75: PetscScalar *Q = ds->mat[DS_MAT_Q];
76: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
77: PetscScalar *W;
80: if (left) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for left vectors");
81: PetscBLASIntCast(ds->n,&n);
82: PetscBLASIntCast(ds->ld,&ld);
83: n1 = n+1;
84: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
85: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
86: DSAllocateWork_Private(ds,5*ld,6*ld,0);
87: DSAllocateMat_Private(ds,DS_MAT_W);
88: W = ds->mat[DS_MAT_W];
89: lwork = 5*ld;
90: sigma = ds->rwork+5*ld;
92: /* build A-w*I in W */
93: for (j=0;j<n;j++)
94: for (i=0;i<=n;i++)
95: W[i+j*ld] = A[i+j*ld];
96: for (i=0;i<n;i++)
97: W[i+i*ld] -= A[(*k)+(*k)*ld];
99: /* compute SVD of W */
100: #if !defined(PETSC_USE_COMPLEX)
101: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
102: #else
103: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
104: #endif
105: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
107: /* the smallest singular value is the new error estimate */
108: if (rnorm) *rnorm = sigma[n-1];
110: /* update vector with right singular vector associated to smallest singular value,
111: accumulating the transformation matrix Q */
112: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
113: return(0);
114: #endif
115: }
119: PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
120: {
122: PetscInt i;
125: for (i=0;i<ds->n;i++) {
126: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
127: }
128: return(0);
129: }
133: PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
134: {
135: #if defined(SLEPC_MISSING_LAPACK_TREVC)
137: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
138: #else
140: PetscInt i;
141: PetscBLASInt mm=1,mout,info,ld,n,inc = 1;
142: PetscScalar tmp,done=1.0,zero=0.0;
143: PetscReal norm;
144: PetscBool iscomplex = PETSC_FALSE;
145: PetscBLASInt *select;
146: PetscScalar *A = ds->mat[DS_MAT_A];
147: PetscScalar *Q = ds->mat[DS_MAT_Q];
148: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
149: PetscScalar *Y;
152: PetscBLASIntCast(ds->n,&n);
153: PetscBLASIntCast(ds->ld,&ld);
154: DSAllocateWork_Private(ds,0,0,ld);
155: select = ds->iwork;
156: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
158: /* Compute k-th eigenvector Y of A */
159: Y = X+(*k)*ld;
160: select[*k] = (PetscBLASInt)PETSC_TRUE;
161: #if !defined(PETSC_USE_COMPLEX)
162: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
163: mm = iscomplex? 2: 1;
164: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
165: DSAllocateWork_Private(ds,3*ld,0,0);
166: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
167: #else
168: DSAllocateWork_Private(ds,2*ld,ld,0);
169: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
170: #endif
171: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
172: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
174: /* accumulate and normalize eigenvectors */
175: if (ds->state>=DS_STATE_CONDENSED) {
176: PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
177: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
178: #if !defined(PETSC_USE_COMPLEX)
179: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
180: #endif
181: norm = BLASnrm2_(&n,Y,&inc);
182: #if !defined(PETSC_USE_COMPLEX)
183: if (iscomplex) {
184: tmp = BLASnrm2_(&n,Y+ld,&inc);
185: norm = SlepcAbsEigenvalue(norm,tmp);
186: }
187: #endif
188: tmp = 1.0 / norm;
189: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
190: #if !defined(PETSC_USE_COMPLEX)
191: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
192: #endif
193: }
195: /* set output arguments */
196: if (iscomplex) (*k)++;
197: if (rnorm) {
198: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
199: else *rnorm = PetscAbsScalar(Y[n-1]);
200: }
201: return(0);
202: #endif
203: }
207: PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
208: {
209: #if defined(SLEPC_MISSING_LAPACK_TREVC)
211: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
212: #else
214: PetscInt i;
215: PetscBLASInt n,ld,mout,info,inc = 1;
216: PetscBool iscomplex = PETSC_FALSE;
217: PetscScalar *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
218: PetscReal norm;
219: const char *side,*back;
222: PetscBLASIntCast(ds->n,&n);
223: PetscBLASIntCast(ds->ld,&ld);
224: if (left) {
225: X = NULL;
226: Y = ds->mat[DS_MAT_Y];
227: side = "L";
228: } else {
229: X = ds->mat[DS_MAT_X];
230: Y = NULL;
231: side = "R";
232: }
233: Z = left? Y: X;
234: if (ds->state>=DS_STATE_CONDENSED) {
235: /* DSSolve() has been called, backtransform with matrix Q */
236: back = "B";
237: PetscMemcpy(Z,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
238: } else back = "A";
239: #if !defined(PETSC_USE_COMPLEX)
240: DSAllocateWork_Private(ds,3*ld,0,0);
241: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
242: #else
243: DSAllocateWork_Private(ds,2*ld,ld,0);
244: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
245: #endif
246: if (info) SETERRQ1(PetscObjectComm((PetscObject)ds),PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
248: /* normalize eigenvectors */
249: for (i=0;i<n;i++) {
250: if (i<n-1 && A[i+1+i*ld]!=0.0) iscomplex = PETSC_TRUE;
251: norm = BLASnrm2_(&n,Z+i*ld,&inc);
252: #if !defined(PETSC_USE_COMPLEX)
253: if (iscomplex) {
254: tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
255: norm = SlepcAbsEigenvalue(norm,tmp);
256: }
257: #endif
258: tmp = 1.0 / norm;
259: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
260: #if !defined(PETSC_USE_COMPLEX)
261: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
262: #endif
263: if (iscomplex) i++;
264: }
265: return(0);
266: #endif
267: }
271: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
272: {
276: switch (mat) {
277: case DS_MAT_X:
278: if (ds->refined) {
279: if (!ds->extrarow) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Refined vectors require activating the extra row");
280: if (j) {
281: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
282: } else {
283: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
284: }
285: } else {
286: if (j) {
287: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
288: } else {
289: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
290: }
291: }
292: break;
293: case DS_MAT_Y:
294: if (ds->refined) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
295: if (j) {
296: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
297: } else {
298: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
299: }
300: break;
301: case DS_MAT_U:
302: case DS_MAT_VT:
303: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
304: break;
305: default:
306: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
307: }
308: if (ds->state < DS_STATE_CONDENSED) {
309: DSSetState(ds,DS_STATE_CONDENSED);
310: }
311: return(0);
312: }
316: PetscErrorCode DSNormalize_NHEP(DS ds,DSMatType mat,PetscInt col)
317: {
319: PetscInt i,i0,i1;
320: PetscBLASInt ld,n,one = 1;
321: PetscScalar *A = ds->mat[DS_MAT_A],norm,*x;
322: #if !defined(PETSC_USE_COMPLEX)
323: PetscScalar norm0;
324: #endif
327: switch (mat) {
328: case DS_MAT_X:
329: case DS_MAT_Y:
330: case DS_MAT_Q:
331: /* Supported matrices */
332: break;
333: case DS_MAT_U:
334: case DS_MAT_VT:
335: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
336: break;
337: default:
338: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
339: }
341: PetscBLASIntCast(ds->n,&n);
342: PetscBLASIntCast(ds->ld,&ld);
343: DSGetArray(ds,mat,&x);
344: if (col < 0) {
345: i0 = 0; i1 = ds->n;
346: } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
347: i0 = col-1; i1 = col+1;
348: } else {
349: i0 = col; i1 = col+1;
350: }
351: for (i=i0;i<i1;i++) {
352: #if !defined(PETSC_USE_COMPLEX)
353: if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
354: norm = BLASnrm2_(&n,&x[ld*i],&one);
355: norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
356: norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
357: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
358: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*(i+1)],&one));
359: i++;
360: } else
361: #endif
362: {
363: norm = BLASnrm2_(&n,&x[ld*i],&one);
364: norm = 1.0/norm;
365: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
366: }
367: }
368: return(0);
369: }
373: PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
374: {
375: #if defined(SLEPC_MISSING_LAPACK_TRSEN)
377: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
378: #else
380: PetscInt i;
381: PetscBLASInt info,n,ld,mout,lwork,*selection;
382: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
383: #if !defined(PETSC_USE_COMPLEX)
384: PetscBLASInt *iwork,liwork;
385: #endif
388: if (!k) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must supply argument k");
389: PetscBLASIntCast(ds->n,&n);
390: PetscBLASIntCast(ds->ld,&ld);
391: #if !defined(PETSC_USE_COMPLEX)
392: lwork = n;
393: liwork = 1;
394: DSAllocateWork_Private(ds,lwork,0,liwork+n);
395: work = ds->work;
396: lwork = ds->lwork;
397: selection = ds->iwork;
398: iwork = ds->iwork + n;
399: liwork = ds->liwork - n;
400: #else
401: lwork = 1;
402: DSAllocateWork_Private(ds,lwork,0,n);
403: work = ds->work;
404: selection = ds->iwork;
405: #endif
406: /* Compute the selected eigenvalue to be in the leading position */
407: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
408: PetscMemzero(selection,n*sizeof(PetscBLASInt));
409: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
410: #if !defined(PETSC_USE_COMPLEX)
411: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
412: #else
413: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
414: #endif
415: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSEN %d",info);
416: *k = mout;
417: return(0);
418: #endif
419: }
423: PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
424: {
425: #if defined(SLEPC_MISSING_LAPACK_TREXC)
427: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
428: #else
430: PetscScalar re;
431: PetscInt i,j,pos,result;
432: PetscBLASInt ifst,ilst,info,n,ld;
433: PetscScalar *T = ds->mat[DS_MAT_A];
434: PetscScalar *Q = ds->mat[DS_MAT_Q];
435: #if !defined(PETSC_USE_COMPLEX)
436: PetscScalar *work,im;
437: #endif
440: PetscBLASIntCast(ds->n,&n);
441: PetscBLASIntCast(ds->ld,&ld);
442: #if !defined(PETSC_USE_COMPLEX)
443: DSAllocateWork_Private(ds,ld,0,0);
444: work = ds->work;
445: #endif
446: /* selection sort */
447: for (i=ds->l;i<n-1;i++) {
448: re = wr[i];
449: #if !defined(PETSC_USE_COMPLEX)
450: im = wi[i];
451: #endif
452: pos = 0;
453: j=i+1; /* j points to the next eigenvalue */
454: #if !defined(PETSC_USE_COMPLEX)
455: if (im != 0) j=i+2;
456: #endif
457: /* find minimum eigenvalue */
458: for (;j<n;j++) {
459: #if !defined(PETSC_USE_COMPLEX)
460: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
461: #else
462: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
463: #endif
464: if (result > 0) {
465: re = wr[j];
466: #if !defined(PETSC_USE_COMPLEX)
467: im = wi[j];
468: #endif
469: pos = j;
470: }
471: #if !defined(PETSC_USE_COMPLEX)
472: if (wi[j] != 0) j++;
473: #endif
474: }
475: if (pos) {
476: /* interchange blocks */
477: PetscBLASIntCast(pos+1,&ifst);
478: PetscBLASIntCast(i+1,&ilst);
479: #if !defined(PETSC_USE_COMPLEX)
480: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
481: #else
482: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
483: #endif
484: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info);
485: /* recover original eigenvalues from T matrix */
486: for (j=i;j<n;j++) {
487: wr[j] = T[j+j*ld];
488: #if !defined(PETSC_USE_COMPLEX)
489: if (j<n-1 && T[j+1+j*ld] != 0.0) {
490: /* complex conjugate eigenvalue */
491: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
492: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
493: wr[j+1] = wr[j];
494: wi[j+1] = -wi[j];
495: j++;
496: } else {
497: wi[j] = 0.0;
498: }
499: #endif
500: }
501: }
502: #if !defined(PETSC_USE_COMPLEX)
503: if (wi[i] != 0) i++;
504: #endif
505: }
506: return(0);
507: #endif
508: }
512: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
513: {
517: if (!rr || wr == rr) {
518: DSSort_NHEP_Total(ds,wr,wi);
519: } else {
520: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
521: }
522: return(0);
523: }
527: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
528: {
530: PetscInt i;
531: PetscBLASInt n,ld,incx=1;
532: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
535: PetscBLASIntCast(ds->n,&n);
536: PetscBLASIntCast(ds->ld,&ld);
537: A = ds->mat[DS_MAT_A];
538: Q = ds->mat[DS_MAT_Q];
539: DSAllocateWork_Private(ds,2*ld,0,0);
540: x = ds->work;
541: y = ds->work+ld;
542: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
543: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
544: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
545: ds->k = n;
546: return(0);
547: }
551: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
552: {
553: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
555: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
556: #else
558: PetscScalar *work,*tau;
559: PetscInt i,j;
560: PetscBLASInt ilo,lwork,info,n,ld;
561: PetscScalar *A = ds->mat[DS_MAT_A];
562: PetscScalar *Q = ds->mat[DS_MAT_Q];
565: #if !defined(PETSC_USE_COMPLEX)
567: #endif
568: PetscBLASIntCast(ds->n,&n);
569: PetscBLASIntCast(ds->ld,&ld);
570: PetscBLASIntCast(ds->l+1,&ilo);
571: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
572: tau = ds->work;
573: work = ds->work+ld;
574: lwork = ld*ld;
576: /* initialize orthogonal matrix */
577: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
578: for (i=0;i<n;i++)
579: Q[i+i*ld] = 1.0;
580: if (n==1) { /* quick return */
581: wr[0] = A[0];
582: wi[0] = 0.0;
583: return(0);
584: }
586: /* reduce to upper Hessenberg form */
587: if (ds->state<DS_STATE_INTERMEDIATE) {
588: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
589: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info);
590: for (j=0;j<n-1;j++) {
591: for (i=j+2;i<n;i++) {
592: Q[i+j*ld] = A[i+j*ld];
593: A[i+j*ld] = 0.0;
594: }
595: }
596: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
597: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info);
598: }
600: /* compute the (real) Schur form */
601: #if !defined(PETSC_USE_COMPLEX)
602: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
603: for (j=0;j<ds->l;j++) {
604: if (j==n-1 || A[j+1+j*ld] == 0.0) {
605: /* real eigenvalue */
606: wr[j] = A[j+j*ld];
607: wi[j] = 0.0;
608: } else {
609: /* complex eigenvalue */
610: wr[j] = A[j+j*ld];
611: wr[j+1] = A[j+j*ld];
612: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
613: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
614: wi[j+1] = -wi[j];
615: j++;
616: }
617: }
618: #else
619: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
620: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
621: #endif
622: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info);
623: return(0);
624: #endif
625: }
629: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
630: {
631: PetscInt i,newn,ld=ds->ld,l=ds->l;
632: PetscScalar *A;
635: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
636: A = ds->mat[DS_MAT_A];
637: /* be careful not to break a diagonal 2x2 block */
638: if (A[n+(n-1)*ld]==0.0) newn = n;
639: else {
640: if (n<ds->n-1) newn = n+1;
641: else newn = n-1;
642: }
643: if (ds->extrarow && ds->k==ds->n) {
644: /* copy entries of extra row to the new position, then clean last row */
645: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
646: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
647: }
648: ds->k = 0;
649: ds->n = newn;
650: return(0);
651: }
655: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
656: {
657: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(SLEPC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
659: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
660: #else
662: PetscScalar *work;
663: PetscReal *rwork;
664: PetscBLASInt *ipiv;
665: PetscBLASInt lwork,info,n,ld;
666: PetscReal hn,hin;
667: PetscScalar *A;
670: PetscBLASIntCast(ds->n,&n);
671: PetscBLASIntCast(ds->ld,&ld);
672: lwork = 8*ld;
673: DSAllocateWork_Private(ds,lwork,ld,ld);
674: work = ds->work;
675: rwork = ds->rwork;
676: ipiv = ds->iwork;
678: /* use workspace matrix W to avoid overwriting A */
679: DSAllocateMat_Private(ds,DS_MAT_W);
680: A = ds->mat[DS_MAT_W];
681: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
683: /* norm of A */
684: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
685: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
687: /* norm of inv(A) */
688: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
689: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
690: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
691: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
692: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
694: *cond = hn*hin;
695: return(0);
696: #endif
697: }
701: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
702: {
703: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
705: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
706: #else
708: PetscInt i,j;
709: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
710: PetscScalar *A,*B,*Q,*g=gin,*ghat;
711: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
712: PetscReal gnorm;
715: PetscBLASIntCast(ds->n,&n);
716: PetscBLASIntCast(ds->ld,&ld);
717: A = ds->mat[DS_MAT_A];
719: if (!recover) {
721: DSAllocateWork_Private(ds,0,0,ld);
722: ipiv = ds->iwork;
723: if (!g) {
724: DSAllocateWork_Private(ds,ld,0,0);
725: g = ds->work;
726: }
727: /* use workspace matrix W to factor A-tau*eye(n) */
728: DSAllocateMat_Private(ds,DS_MAT_W);
729: B = ds->mat[DS_MAT_W];
730: PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);
732: /* Vector g initialy stores b = beta*e_n^T */
733: PetscMemzero(g,n*sizeof(PetscScalar));
734: g[n-1] = beta;
736: /* g = (A-tau*eye(n))'\b */
737: for (i=0;i<n;i++)
738: B[i+i*ld] -= tau;
739: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
740: if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
741: if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
742: PetscLogFlops(2.0*n*n*n/3.0);
743: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
744: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
745: PetscLogFlops(2.0*n*n-n);
747: /* A = A + g*b' */
748: for (i=0;i<n;i++)
749: A[i+(n-1)*ld] += g[i]*beta;
751: } else { /* recover */
754: DSAllocateWork_Private(ds,ld,0,0);
755: ghat = ds->work;
756: Q = ds->mat[DS_MAT_Q];
758: /* g^ = -Q(:,idx)'*g */
759: PetscBLASIntCast(ds->l+ds->k,&ncol);
760: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
762: /* A = A + g^*b' */
763: for (i=0;i<ds->l+ds->k;i++)
764: for (j=ds->l;j<ds->l+ds->k;j++)
765: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
767: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
768: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
769: }
771: /* Compute gamma factor */
772: if (gamma) {
773: gnorm = 0.0;
774: for (i=0;i<n;i++)
775: gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
776: *gamma = PetscSqrtReal(1.0+gnorm);
777: }
778: return(0);
779: #endif
780: }
784: PETSC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
785: {
787: ds->ops->allocate = DSAllocate_NHEP;
788: ds->ops->view = DSView_NHEP;
789: ds->ops->vectors = DSVectors_NHEP;
790: ds->ops->solve[0] = DSSolve_NHEP;
791: ds->ops->sort = DSSort_NHEP;
792: ds->ops->truncate = DSTruncate_NHEP;
793: ds->ops->update = DSUpdateExtraRow_NHEP;
794: ds->ops->cond = DSCond_NHEP;
795: ds->ops->transharm = DSTranslateHarmonic_NHEP;
796: ds->ops->normalize = DSNormalize_NHEP;
797: return(0);
798: }