Actual source code: stoar.c
slepc-3.6.1 2015-09-03
1: /*
3: SLEPc polynomial eigensolver: "stoar"
5: Method: S-TOAR
7: Algorithm:
9: Symmetric Two-Level Orthogonal Arnoldi.
11: References:
13: [1] C. Campos and J.E. Roman, " Restarted Q-Arnoldi-type methods
14: exploiting symmetry in quadratic eigenvalue problems",
15: submitted, 2015.
17: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18: SLEPc - Scalable Library for Eigenvalue Problem Computations
19: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
21: This file is part of SLEPc.
23: SLEPc is free software: you can redistribute it and/or modify it under the
24: terms of version 3 of the GNU Lesser General Public License as published by
25: the Free Software Foundation.
27: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
28: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
30: more details.
32: You should have received a copy of the GNU Lesser General Public License
33: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
34: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35: */
37: #include <slepc/private/pepimpl.h> /*I "slepcpep.h" I*/
38: #include ../src/pep/impls/krylov/pepkrylov.h
39: #include <slepcblaslapack.h>
43: /*
44: Compute B-norm of v=[v1;v2] whith B=diag(-pep->T[0],pep->T[2])
45: */
46: static PetscErrorCode PEPSTOARNorm(PEP pep,PetscInt j,PetscReal *norm)
47: {
49: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
50: PetscBLASInt n_,one=1,ld_;
51: PetscScalar sone=1.0,szero=0.0,*sp,*sq,*w1,*w2,*qK,*qM;
52: PetscInt n,i,lds=ctx->d*ctx->ld;
55: qK = ctx->qB;
56: qM = ctx->qB+ctx->ld*ctx->ld;
57: n = j+2;
58: PetscMalloc2(n,&w1,n,&w2);
59: sp = ctx->S+lds*j;
60: sq = sp+ctx->ld;
61: PetscBLASIntCast(n,&n_);
62: PetscBLASIntCast(ctx->ld,&ld_);
63: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,sp,&one,&szero,w1,&one));
64: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,sq,&one,&szero,w2,&one));
65: *norm = 0.0;
66: for (i=0;i<n;i++) *norm += PetscRealPart(w1[i]*PetscConj(sp[i])+w2[i]*PetscConj(sq[i]));
67: *norm = (*norm>0.0)?PetscSqrtReal(*norm):-PetscSqrtReal(-*norm);
68: PetscFree2(w1,w2);
69: return(0);
70: }
74: static PetscErrorCode PEPSTOARqKqMupdates(PEP pep,PetscInt j,Vec *wv,PetscInt nwv)
75: {
77: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
78: PetscInt i,ld=ctx->ld;
79: PetscScalar *qK,*qM;
80: Vec vj,v1,v2;
83: qK = ctx->qB;
84: qM = ctx->qB+ctx->ld*ctx->ld;
85: if (!wv||nwv<2) {
86: if (!wv) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",3);
87: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",4);
88: }
89: v1 = wv[0];
90: v2 = wv[1];
91: BVGetColumn(pep->V,j,&vj);
92: STMatMult(pep->st,0,vj,v1);
93: STMatMult(pep->st,2,vj,v2);
94: BVRestoreColumn(pep->V,j,&vj);
95: for (i=0;i<=j;i++) {
96: BVGetColumn(pep->V,i,&vj);
97: VecDot(v1,vj,qK+j*ld+i);
98: VecDot(v2,vj,qM+j*ld+i);
99: *(qM+j*ld+i) *= pep->sfactor*pep->sfactor;
100: BVRestoreColumn(pep->V,i,&vj);
101: }
102: for (i=0;i<j;i++) {
103: qK[i+j*ld] = -qK[i+ld*j];
104: qK[j+i*ld] = PetscConj(qK[i+j*ld]);
105: qM[j+i*ld] = PetscConj(qM[i+j*ld]);
106: }
107: qK[j+j*ld] = -PetscRealPart(qK[j+ld*j]);
108: qM[j+j*ld] = PetscRealPart(qM[j+ld*j]);
109: return(0);
110: }
114: PetscErrorCode PEPSetUp_STOAR(PEP pep)
115: {
117: PetscBool sinv,flg,lindep;
118: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
119: PetscInt ld,i;
120: PetscReal norm,*omega;
123: PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
124: if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
125: if (!pep->max_it) pep->max_it = PetscMax(100,2*pep->n/pep->ncv);
126: if (!pep->which) {
127: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
128: if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
129: else pep->which = PEP_LARGEST_MAGNITUDE;
130: }
131: if (pep->problem_type!=PEP_HERMITIAN) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Requested method is only available for Hermitian problems");
133: if (pep->nmat!=3) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver only available for quadratic problems");
134: if (pep->basis!=PEP_BASIS_MONOMIAL) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver not implemented for non-monomial bases");
135: STGetTransform(pep->st,&flg);
136: if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver requires the ST transformation flag set, see STSetTransform()");
138: PEPAllocateSolution(pep,2);
139: PEPSetWorkVecs(pep,4);
140: ld = pep->ncv+2;
141: DSSetType(pep->ds,DSGHIEP);
142: DSSetCompact(pep->ds,PETSC_TRUE);
143: DSAllocate(pep->ds,ld);
144: STGetNumMatrices(pep->st,&ctx->d);
145: ctx->d--;
146: ctx->ld = ld;
147: PetscCalloc1(ctx->d*ld*ld,&ctx->S);
148: PetscCalloc1(2*ld*ld,&ctx->qB);
150: /* process starting vector */
151: if (pep->nini>-2) {
152: BVSetRandomColumn(pep->V,0,pep->rand);
153: BVSetRandomColumn(pep->V,1,pep->rand);
154: } else {
155: BVInsertVec(pep->V,0,pep->IS[0]);
156: BVInsertVec(pep->V,1,pep->IS[1]);
157: }
158: BVOrthogonalizeColumn(pep->V,0,NULL,&norm,&lindep);
159: if (!lindep) {
160: BVScaleColumn(pep->V,0,1.0/norm);
161: ctx->S[0] = norm;
162: PEPSTOARqKqMupdates(pep,0,pep->work,2);
163: } else {
164: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Problem with initial vector");
165: }
166: BVOrthogonalizeColumn(pep->V,1,ctx->S+ld,&norm,&lindep);
167: if (!lindep) {
168: BVScaleColumn(pep->V,1,1.0/norm);
169: ctx->S[1] = norm;
170: PEPSTOARqKqMupdates(pep,1,pep->work,2);
171: } else {
172: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Problem with initial vector");
173: }
175: PEPSTOARNorm(pep,0,&norm);
176: for (i=0;i<2;i++) { ctx->S[i+ld] /= norm; ctx->S[i] /= norm; }
177: DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
178: omega[0] = (norm>0)?1.0:-1.0;
179: DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
180: if (pep->nini<0) {
181: SlepcBasisDestroy_Private(&pep->nini,&pep->IS);
182: }
183: return(0);
184: }
188: /*
189: Computes GS orthogonalization x = [z;x] - [Sp;Sq]*y,
190: where y = Omega\([Sp;Sq]'*[qK zeros(size(qK,1)) ;zeros(size(qK,1)) qM]*[z;x]).
191: n: Column from S to be orthogonalized against previous columns.
192: */
193: static PetscErrorCode PEPSTOAROrth2(PEP pep,PetscInt k,PetscReal *Omega,PetscScalar *y)
194: {
196: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
197: PetscBLASInt n_,lds_,k_,one=1,ld_;
198: PetscScalar *S=ctx->S,sonem=-1.0,sone=1.0,szero=0.0,*tp,*tq,*xp,*xq,*c,*qK,*qM;
199: PetscInt i,lds=ctx->d*ctx->ld,n,j;
200:
202: qK = ctx->qB;
203: qM = ctx->qB+ctx->ld*ctx->ld;
204: n = k+2;
205: PetscMalloc3(n,&tp,n,&tq,k,&c);
206: PetscBLASIntCast(n,&n_); /* Size of qK and qM */
207: PetscBLASIntCast(ctx->ld,&ld_);
208: PetscBLASIntCast(lds,&lds_);
209: PetscBLASIntCast(k,&k_); /* Number of vectors to orthogonalize against */
210: xp = S+k*lds;
211: xq = S+ctx->ld+k*lds;
212: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,xp,&one,&szero,tp,&one));
213: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,xq,&one,&szero,tq,&one));
214: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,ctx->S,&lds_,tp,&one,&szero,y,&one));
215: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+ctx->ld,&lds_,tq,&one,&sone,y,&one));
216: for (i=0;i<k;i++) y[i] /= Omega[i];
217: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S,&lds_,y,&one,&sone,xp,&one));
218: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+ctx->ld,&lds_,y,&one,&sone,xq,&one));
219: /* three times */
220: for (j=0;j<2;j++) {
221: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,xp,&one,&szero,tp,&one));
222: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,xq,&one,&szero,tq,&one));
223: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,ctx->S,&lds_,tp,&one,&szero,c,&one));
224: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+ctx->ld,&lds_,tq,&one,&sone,c,&one));
225: for (i=0;i<k;i++) c[i] /= Omega[i];
226: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S,&lds_,c,&one,&sone,xp,&one));
227: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+ctx->ld,&lds_,c,&one,&sone,xq,&one));
228: for (i=0;i<k;i++) y[i] += c[i];
229: }
230: PetscFree3(tp,tq,c);
231: return(0);
232: }
236: /*
237: Compute a run of Lanczos iterations
238: */
239: static PetscErrorCode PEPSTOARrun(PEP pep,PetscReal *a,PetscReal *b,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscScalar *work,PetscInt nw,Vec *t_,PetscInt nwv)
240: {
242: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
243: PetscInt i,j,m=*M,nwu=0,lwa,l;
244: PetscInt lds=ctx->d*ctx->ld,offq=ctx->ld;
245: Vec v=t_[0],t=t_[1],q=t_[2];
246: PetscReal norm,sym=0.0,fro=0.0,*f;
247: PetscScalar *y,*S=ctx->S;
248: PetscBLASInt j_,one=1;
249: PetscBool lindep;
252: *breakdown = PETSC_FALSE; /* ----- */
253: if (!t_||nwv<3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
254: lwa = (ctx->ld)*4;
255: if (!work||nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
256: DSGetDimensions(pep->ds,NULL,NULL,&l,NULL,NULL);
257: y = work;
258: nwu += ctx->ld;
259: for (j=k;j<m;j++) {
260: /* apply operator */
261: BVSetActiveColumns(pep->V,0,j+2);
262: BVMultVec(pep->V,1.0,0.0,v,S+j*lds);
263: STMatMult(pep->st,0,v,t);
264: BVMultVec(pep->V,1.0,0.0,v,S+offq+j*lds);
265: STMatMult(pep->st,1,v,q);
266: VecAXPY(t,pep->sfactor,q);
267: STMatSolve(pep->st,t,q);
268: VecScale(q,-1.0/(pep->sfactor*pep->sfactor));
270: /* orthogonalize */
271: BVOrthogonalizeVec(pep->V,q,S+offq+(j+1)*lds,&norm,&lindep);
272: if (lindep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STOAR does not support detection of linearly dependent TOAR vectors");
273: *(S+offq+(j+1)*lds+j+2) = norm;
274: VecScale(q,1.0/norm);
275: BVInsertVec(pep->V,j+2,q);
276: for (i=0;i<=j+1;i++) *(S+(j+1)*lds+i) = *(S+offq+j*lds+i);
277:
278: /* update qK and qM */
279: PEPSTOARqKqMupdates(pep,j+2,t_,2);
281: /* level-2 orthogonalization */
282: PEPSTOAROrth2(pep,j+1,omega,y);
283: a[j] = PetscRealPart(y[j])/omega[j];
284: PEPSTOARNorm(pep,j+1,&norm);
285: omega[j+1] = (norm > 0)?1.0:-1.0;
286: for (i=0;i<=j+2;i++) {
287: S[i+(j+1)*lds] /= norm;
288: S[i+offq+(j+1)*lds] /= norm;
289: }
290: b[j] = PetscAbsReal(norm);
292: /* check symmetry */
293: DSGetArrayReal(pep->ds,DS_MAT_T,&f);
294: if (j==k) {
295: for (i=l;i<j-1;i++) y[i] = PetscAbsScalar(y[i])-PetscAbsReal(f[2*ctx->ld+i]);
296: for (i=0;i<l;i++) y[i] = 0.0;
297: }
298: DSRestoreArrayReal(pep->ds,DS_MAT_T,&f);
299: if (j>0) y[j-1] = PetscAbsScalar(y[j-1])-PetscAbsReal(b[j-1]);
300: PetscBLASIntCast(j,&j_);
301: sym = SlepcAbs(BLASnrm2_(&j_,y,&one),sym);
302: fro = SlepcAbs(fro,SlepcAbs(a[j],b[j]));
303: if (j>0) fro = SlepcAbs(fro,b[j-1]);
304: if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*pep->tol)) {
305: *symmlost = PETSC_TRUE;
306: *M=j+1;
307: break;
308: }
309: }
310: return(0);
311: }
315: static PetscErrorCode PEPSTOARTrunc(PEP pep,PetscInt rs1,PetscInt cs1,PetscScalar *work,PetscInt nw,PetscReal *rwork,PetscInt nrw)
316: {
318: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
319: Mat G;
320: PetscInt lwa,nwu=0,lrwa,nrwu=0;
321: PetscInt i,n,lds=2*ctx->ld;
322: PetscScalar *M,*V,*U,*S=ctx->S,sone=1.0,zero=0.0,t,*qK,*qM;
323: PetscReal *sg;
324: PetscBLASInt cs1_,rs1_,cs1t2,cs1p1,n_,info,lw_,lds_,ld_;
327: qK = ctx->qB;
328: qM = ctx->qB+ctx->ld*ctx->ld;
329: n = (rs1>2*cs1)?2*cs1:rs1;
330: lwa = cs1*rs1*4+n*(rs1+2*cs1)+(cs1+1)*(cs1+2);
331: lrwa = n+cs1+1+5*n;
332: if (!work||nw<lwa) {
333: if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",6);
334: if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",5);
335: }
336: if (!rwork||nrw<lrwa) {
337: if (nrw<lrwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",8);
338: if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",7);
339: }
340: M = work+nwu;
341: nwu += rs1*cs1*2;
342: U = work+nwu;
343: nwu += rs1*n;
344: V = work+nwu;
345: nwu += 2*cs1*n;
346: sg = rwork+nrwu;
347: nrwu += n;
348: for (i=0;i<cs1;i++) {
349: PetscMemcpy(M+i*rs1,S+i*lds,rs1*sizeof(PetscScalar));
350: PetscMemcpy(M+(i+cs1)*rs1,S+i*lds+ctx->ld,rs1*sizeof(PetscScalar));
351: }
352: PetscBLASIntCast(n,&n_);
353: PetscBLASIntCast(cs1,&cs1_);
354: PetscBLASIntCast(rs1,&rs1_);
355: PetscBLASIntCast(cs1*2,&cs1t2);
356: PetscBLASIntCast(cs1+1,&cs1p1);
357: PetscBLASIntCast(lds,&lds_);
358: PetscBLASIntCast(ctx->ld,&ld_);
359: PetscBLASIntCast(lwa-nwu,&lw_);
360: #if !defined(PETSC_USE_COMPLEX)
361: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1t2,M,&rs1_,sg,U,&rs1_,V,&n_,work+nwu,&lw_,&info));
362: #else
363: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1t2,M,&rs1_,sg,U,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
364: #endif
365: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
367: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
368: MatCreateSeqDense(PETSC_COMM_SELF,rs1,2*cs1,U,&G);
369: BVSetActiveColumns(pep->V,0,rs1);
370: BVMultInPlace(pep->V,G,0,cs1+1);
371: MatDestroy(&G);
372:
373: /* Update S */
374: PetscMemzero(S,lds*ctx->ld*sizeof(PetscScalar));
376: for (i=0;i<cs1+1;i++) {
377: t = sg[i];
378: PetscStackCallBLAS("BLASscal",BLASscal_(&cs1t2,&t,V+i,&n_));
379: }
380: for (i=0;i<cs1;i++) {
381: PetscMemcpy(S+i*lds,V+i*n,(cs1+1)*sizeof(PetscScalar));
382: PetscMemcpy(S+ctx->ld+i*lds,V+(cs1+i)*n,(cs1+1)*sizeof(PetscScalar));
383: }
385: /* Update qM and qK */
386: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&cs1p1,&rs1_,&sone,qK,&ld_,U,&rs1_,&zero,work+nwu,&rs1_));
387: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&cs1p1,&cs1p1,&rs1_,&sone,U,&rs1_,work+nwu,&rs1_,&zero,qK,&ld_));
388: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&cs1p1,&rs1_,&sone,qM,&ld_,U,&rs1_,&zero,work+nwu,&rs1_));
389: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&cs1p1,&cs1p1,&rs1_,&sone,U,&rs1_,work+nwu,&rs1_,&zero,qM,&ld_));
390: return(0);
391: }
395: /*
396: S <- S*Q
397: columns s-s+ncu of S
398: rows 0-sr of S
399: size(Q) qr x ncu
400: */
401: static PetscErrorCode PEPSTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work,PetscInt nw)
402: {
404: PetscScalar a=1.0,b=0.0;
405: PetscBLASInt sr_,ncu_,ldq_,lds_,qr_;
406: PetscInt lwa,j,lds=2*ld;
409: lwa = sr*ncu;
410: if (!work||nw<lwa) {
411: if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
412: if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
413: }
414: PetscBLASIntCast(sr,&sr_);
415: PetscBLASIntCast(qr,&qr_);
416: PetscBLASIntCast(ncu,&ncu_);
417: PetscBLASIntCast(lds,&lds_);
418: PetscBLASIntCast(ldq,&ldq_);
419: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S,&lds_,Q,&ldq_,&b,work,&sr_));
420: for (j=0;j<ncu;j++) {
421: PetscMemcpy(S+lds*(s+j),work+j*sr,sr*sizeof(PetscScalar));
422: }
423: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+ld,&lds_,Q,&ldq_,&b,work,&sr_));
424: for (j=0;j<ncu;j++) {
425: PetscMemcpy(S+lds*(s+j)+ld,work+j*sr,sr*sizeof(PetscScalar));
426: }
427: return(0);
428: }
430: #if 0
433: static PetscErrorCode PEPSTOARpreKConvergence(PEP pep,PetscInt nv,PetscReal *norm,Vec *w)
434: {
436: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
437: PetscBLASInt n_,one=1;
438: PetscInt lds=2*ctx->ld;
439: PetscReal t1,t2;
440: PetscScalar *S=ctx->S;
443: PetscBLASIntCast(nv+2,&n_);
444: t1 = BLASnrm2_(&n_,S+nv*2*ctx->ld,&one);
445: t2 = BLASnrm2_(&n_,S+(nv*2+1)*ctx->ld,&one);
446: *norm = SlepcAbs(t1,t2);
447: BVSetActiveColumns(pep->V,0,nv+2);
448: BVMultVec(pep->V,1.0,0.0,w[1],S+nv*lds);
449: STMatMult(pep->st,0,w[1],w[2]);
450: VecNorm(w[2],NORM_2,&t1);
451: BVMultVec(pep->V,1.0,0.0,w[1],S+ctx->ld+nv*lds);
452: STMatMult(pep->st,2,w[1],w[2]);
453: VecNorm(w[2],NORM_2,&t2);
454: t2 *= pep->sfactor*pep->sfactor;
455: *norm = PetscMax(*norm,SlepcAbs(t1,t2));
456: return(0);
457: }
458: #endif
462: PetscErrorCode PEPSolve_STOAR(PEP pep)
463: {
465: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
466: PetscInt j,k,l,nv=0,ld=ctx->ld,lds=ctx->d*ctx->ld,off,ldds,t;
467: PetscInt lwa,lrwa,nwu=0,nrwu=0,nconv=0;
468: PetscScalar *S=ctx->S,*Q,*work;
469: PetscReal beta,norm=1.0,*omega,*a,*b,*r,*rwork;
470: PetscBool breakdown,symmlost=PETSC_FALSE,sinv;
473: BVSetMatrix(pep->V,NULL,PETSC_FALSE);
474: lwa = 9*ld*ld+5*ld;
475: lrwa = 8*ld;
476: PetscMalloc2(lwa,&work,lrwa,&rwork); /* REVIEW */
477: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
478: RGSetScale(pep->rg,sinv?1.0/pep->sfactor:pep->sfactor);
479: STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);
481: /* Restart loop */
482: l = 0;
483: DSGetLeadingDimension(pep->ds,&ldds);
484: while (pep->reason == PEP_CONVERGED_ITERATING) {
485: pep->its++;
486: DSGetArrayReal(pep->ds,DS_MAT_T,&a);
487: b = a+ldds;
488: r = b+ldds;
489: DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
490:
491: /* Compute an nv-step Lanczos factorization */
492: nv = PetscMin(pep->nconv+pep->mpd,pep->ncv);
493: PEPSTOARrun(pep,a,b,omega,pep->nconv+l,&nv,&breakdown,&symmlost,work+nwu,lwa-nwu,pep->work,3);
494: beta = b[nv-1];
495: if (symmlost) {
496: pep->reason = PEP_DIVERGED_SYMMETRY_LOST;
497: if (nv==pep->nconv+l+1) { pep->nconv = nconv; break; }
498: }
499: DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
500: DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
501: DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
502: if (l==0) {
503: DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
504: } else {
505: DSSetState(pep->ds,DS_STATE_RAW);
506: }
508: /* Solve projected problem */
509: DSSolve(pep->ds,pep->eigr,pep->eigi);
510: DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
512: /* Check convergence */
513: /* PEPSTOARpreKConvergence(pep,nv,&norm,pep->work);*/
514: norm = 1.0;
515: DSGetDimensions(pep->ds,NULL,NULL,NULL,NULL,&t);
516: PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,t-pep->nconv,PetscAbsReal(beta)*norm,&k);
517: nconv = k;
518: if (pep->its >= pep->max_it) pep->reason = PEP_DIVERGED_ITS;
519: if (k >= pep->nev) pep->reason = PEP_CONVERGED_TOL;
521: /* Update l */
522: if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
523: else {
524: l = PetscMax(1,(PetscInt)((nv-k)/2));
525: l = PetscMin(l,t);
526: DSGetArrayReal(pep->ds,DS_MAT_T,&a);
527: if (*(a+ldds+k+l-1)!=0) {
528: if (k+l<nv-1) l = l+1;
529: else l = l-1;
530: }
531: DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
532: }
533: if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
535: /* Update S */
536: off = pep->nconv*ldds;
537: DSGetArray(pep->ds,DS_MAT_Q,&Q);
538: PEPSTOARSupdate(S,ld,nv+2,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu,lwa-nwu);
539: DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
541: /* Copy last column of S */
542: PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));
544: if (pep->reason == PEP_CONVERGED_ITERATING) {
545: if (breakdown) {
546: /* Stop if breakdown */
547: PetscInfo2(pep,"Breakdown STOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
548: pep->reason = PEP_DIVERGED_BREAKDOWN;
549: } else {
550: /* Prepare the Rayleigh quotient for restart */
551: DSGetArray(pep->ds,DS_MAT_Q,&Q);
552: DSGetArrayReal(pep->ds,DS_MAT_T,&a);
553: DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
554: r = a + 2*ldds;
555: for (j=k;j<k+l;j++) {
556: r[j] = PetscRealPart(Q[nv-1+j*ldds]*beta);
557: }
558: b = a+ldds;
559: b[k+l-1] = r[k+l-1];
560: omega[k+l] = omega[nv];
561: DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
562: DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
563: DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
564: /* Truncate S */
565: DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
566: PEPSTOARTrunc(pep,nv+2,k+l+1,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
567: DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
568: }
569: }
572: pep->nconv = k;
573: PEPMonitor(pep,pep->its,pep->nconv,pep->eigr,pep->eigi,pep->errest,nv);
574: }
576: if (pep->nconv>0) {
577: /* Truncate S */
578: DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
579: PEPSTOARTrunc(pep,nv+2,pep->nconv,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
580: DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
581:
582: /* Extraction */
583: DSSetDimensions(pep->ds,pep->nconv,0,0,0);
584: DSSetState(pep->ds,DS_STATE_RAW);
585:
586: for (j=0;j<pep->nconv;j++) {
587: pep->eigr[j] *= pep->sfactor;
588: pep->eigi[j] *= pep->sfactor;
589: }
590: }
591: STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
592: RGSetScale(pep->rg,1.0);
594: /* truncate Schur decomposition and change the state to raw so that
595: DSVectors() computes eigenvectors from scratch */
596: DSSetDimensions(pep->ds,pep->nconv,0,0,0);
597: DSSetState(pep->ds,DS_STATE_RAW);
598: PetscFree2(work,rwork);
599: return(0);
600: }
604: PetscErrorCode PEPSetFromOptions_STOAR(PetscOptions *PetscOptionsObject,PEP pep)
605: {
607: PetscBool flg,lock;
610: PetscOptionsHead(PetscOptionsObject,"PEP STOAR Options");
611: PetscOptionsBool("-pep_stoar_locking","Choose between locking and non-locking variants","PEPSTOARSetLocking",PETSC_FALSE,&lock,&flg);
612: if (flg) {
613: PEPSTOARSetLocking(pep,lock);
614: }
615: PetscOptionsTail();
616: return(0);
617: }
621: static PetscErrorCode PEPSTOARSetLocking_STOAR(PEP pep,PetscBool lock)
622: {
623: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
626: ctx->lock = lock;
627: return(0);
628: }
632: /*@
633: PEPSTOARSetLocking - Choose between locking and non-locking variants of
634: the STOAR method.
636: Logically Collective on PEP
638: Input Parameters:
639: + pep - the eigenproblem solver context
640: - lock - true if the locking variant must be selected
642: Options Database Key:
643: . -pep_stoar_locking - Sets the locking flag
645: Notes:
646: The default is to lock converged eigenpairs when the method restarts.
647: This behaviour can be changed so that all directions are kept in the
648: working subspace even if already converged to working accuracy (the
649: non-locking variant).
651: Level: advanced
653: .seealso: PEPSTOARGetLocking()
654: @*/
655: PetscErrorCode PEPSTOARSetLocking(PEP pep,PetscBool lock)
656: {
662: PetscTryMethod(pep,"PEPSTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
663: return(0);
664: }
668: static PetscErrorCode PEPSTOARGetLocking_STOAR(PEP pep,PetscBool *lock)
669: {
670: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
673: *lock = ctx->lock;
674: return(0);
675: }
679: /*@
680: PEPSTOARGetLocking - Gets the locking flag used in the STOAR method.
682: Not Collective
684: Input Parameter:
685: . pep - the eigenproblem solver context
687: Output Parameter:
688: . lock - the locking flag
690: Level: advanced
692: .seealso: PEPSTOARSetLocking()
693: @*/
694: PetscErrorCode PEPSTOARGetLocking(PEP pep,PetscBool *lock)
695: {
701: PetscTryMethod(pep,"PEPSTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
702: return(0);
703: }
707: PetscErrorCode PEPView_STOAR(PEP pep,PetscViewer viewer)
708: {
710: PEP_TOAR *ctx = (PEP_TOAR*)pep->data;
711: PetscBool isascii;
714: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
715: if (isascii) {
716: PetscViewerASCIIPrintf(viewer," STOAR: using the %slocking variant\n",ctx->lock?"":"non-");
717: }
718: return(0);
719: }
723: PetscErrorCode PEPDestroy_STOAR(PEP pep)
724: {
728: PetscFree(pep->data);
729: PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARSetLocking_C",NULL);
730: PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARGetLocking_C",NULL);
731: return(0);
732: }
736: PETSC_EXTERN PetscErrorCode PEPCreate_STOAR(PEP pep)
737: {
739: PEP_TOAR *ctx;
742: PetscNewLog(pep,&ctx);
743: pep->data = (void*)ctx;
744: ctx->lock = PETSC_TRUE;
746: pep->ops->solve = PEPSolve_STOAR;
747: pep->ops->setup = PEPSetUp_STOAR;
748: pep->ops->setfromoptions = PEPSetFromOptions_STOAR;
749: pep->ops->view = PEPView_STOAR;
750: pep->ops->destroy = PEPDestroy_STOAR;
751: pep->ops->backtransform = PEPBackTransform_Default;
752: pep->ops->computevectors = PEPComputeVectors_Default;
753: pep->ops->extractvectors = PEPExtractVectors_TOAR;
754: pep->ops->reset = PEPReset_TOAR;
755: PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARSetLocking_C",PEPSTOARSetLocking_STOAR);
756: PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARGetLocking_C",PEPSTOARGetLocking_STOAR);
757: return(0);
758: }