Actual source code: stoar.c

slepc-3.6.1 2015-09-03
Report Typos and Errors
  1: /*

  3:    SLEPc polynomial eigensolver: "stoar"

  5:    Method: S-TOAR

  7:    Algorithm:

  9:        Symmetric Two-Level Orthogonal Arnoldi.

 11:    References:

 13:        [1] C. Campos and J.E. Roman, " Restarted Q-Arnoldi-type methods
 14:            exploiting symmetry in quadratic eigenvalue problems",
 15:            submitted, 2015.

 17:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 18:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 19:    Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain

 21:    This file is part of SLEPc.

 23:    SLEPc is free software: you can redistribute it and/or modify it under  the
 24:    terms of version 3 of the GNU Lesser General Public License as published by
 25:    the Free Software Foundation.

 27:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 28:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 29:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 30:    more details.

 32:    You  should have received a copy of the GNU Lesser General  Public  License
 33:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 34:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 35: */

 37: #include <slepc/private/pepimpl.h>         /*I "slepcpep.h" I*/
 38:  #include ../src/pep/impls/krylov/pepkrylov.h
 39: #include <slepcblaslapack.h>

 43: /*
 44:   Compute B-norm of v=[v1;v2] whith  B=diag(-pep->T[0],pep->T[2]) 
 45: */
 46: static PetscErrorCode PEPSTOARNorm(PEP pep,PetscInt j,PetscReal *norm)
 47: {
 49:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
 50:   PetscBLASInt   n_,one=1,ld_;
 51:   PetscScalar    sone=1.0,szero=0.0,*sp,*sq,*w1,*w2,*qK,*qM;
 52:   PetscInt       n,i,lds=ctx->d*ctx->ld;

 55:   qK = ctx->qB;
 56:   qM = ctx->qB+ctx->ld*ctx->ld;
 57:   n = j+2;
 58:   PetscMalloc2(n,&w1,n,&w2);
 59:   sp = ctx->S+lds*j;
 60:   sq = sp+ctx->ld;
 61:   PetscBLASIntCast(n,&n_);
 62:   PetscBLASIntCast(ctx->ld,&ld_);
 63:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,sp,&one,&szero,w1,&one));
 64:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,sq,&one,&szero,w2,&one));
 65:   *norm = 0.0;
 66:   for (i=0;i<n;i++) *norm += PetscRealPart(w1[i]*PetscConj(sp[i])+w2[i]*PetscConj(sq[i]));
 67:   *norm = (*norm>0.0)?PetscSqrtReal(*norm):-PetscSqrtReal(-*norm);
 68:   PetscFree2(w1,w2);
 69:   return(0);
 70: }

 74: static PetscErrorCode PEPSTOARqKqMupdates(PEP pep,PetscInt j,Vec *wv,PetscInt nwv)
 75: {
 77:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
 78:   PetscInt       i,ld=ctx->ld;
 79:   PetscScalar    *qK,*qM;
 80:   Vec            vj,v1,v2;

 83:   qK = ctx->qB;
 84:   qM = ctx->qB+ctx->ld*ctx->ld;
 85:   if (!wv||nwv<2) {
 86:     if (!wv) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",3);
 87:     else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",4);
 88:   }
 89:   v1 = wv[0];
 90:   v2 = wv[1];
 91:   BVGetColumn(pep->V,j,&vj);
 92:   STMatMult(pep->st,0,vj,v1);
 93:   STMatMult(pep->st,2,vj,v2);
 94:   BVRestoreColumn(pep->V,j,&vj);
 95:   for (i=0;i<=j;i++) {
 96:     BVGetColumn(pep->V,i,&vj);
 97:     VecDot(v1,vj,qK+j*ld+i);
 98:     VecDot(v2,vj,qM+j*ld+i);
 99:     *(qM+j*ld+i) *= pep->sfactor*pep->sfactor;
100:     BVRestoreColumn(pep->V,i,&vj);
101:   }
102:   for (i=0;i<j;i++) {
103:     qK[i+j*ld] = -qK[i+ld*j];
104:     qK[j+i*ld] = PetscConj(qK[i+j*ld]);
105:     qM[j+i*ld] = PetscConj(qM[i+j*ld]);
106:   }
107:   qK[j+j*ld] = -PetscRealPart(qK[j+ld*j]);
108:   qM[j+j*ld] = PetscRealPart(qM[j+ld*j]);
109:   return(0);
110: }

114: PetscErrorCode PEPSetUp_STOAR(PEP pep)
115: {
117:   PetscBool      sinv,flg,lindep;
118:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
119:   PetscInt       ld,i;
120:   PetscReal      norm,*omega;

123:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
124:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
125:   if (!pep->max_it) pep->max_it = PetscMax(100,2*pep->n/pep->ncv);
126:   if (!pep->which) {
127:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
128:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
129:     else pep->which = PEP_LARGEST_MAGNITUDE;
130:   }
131:   if (pep->problem_type!=PEP_HERMITIAN) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Requested method is only available for Hermitian problems");

133:   if (pep->nmat!=3) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver only available for quadratic problems");
134:   if (pep->basis!=PEP_BASIS_MONOMIAL) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver not implemented for non-monomial bases");
135:   STGetTransform(pep->st,&flg);
136:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver requires the ST transformation flag set, see STSetTransform()");

138:   PEPAllocateSolution(pep,2);
139:   PEPSetWorkVecs(pep,4);
140:   ld = pep->ncv+2;
141:   DSSetType(pep->ds,DSGHIEP);
142:   DSSetCompact(pep->ds,PETSC_TRUE);
143:   DSAllocate(pep->ds,ld);
144:   STGetNumMatrices(pep->st,&ctx->d);
145:   ctx->d--;
146:   ctx->ld = ld;
147:   PetscCalloc1(ctx->d*ld*ld,&ctx->S);
148:   PetscCalloc1(2*ld*ld,&ctx->qB);

150:   /* process starting vector */
151:   if (pep->nini>-2) {  
152:     BVSetRandomColumn(pep->V,0,pep->rand);
153:     BVSetRandomColumn(pep->V,1,pep->rand);
154:   } else {
155:     BVInsertVec(pep->V,0,pep->IS[0]);
156:     BVInsertVec(pep->V,1,pep->IS[1]);
157:   }
158:   BVOrthogonalizeColumn(pep->V,0,NULL,&norm,&lindep);
159:   if (!lindep) {
160:     BVScaleColumn(pep->V,0,1.0/norm);
161:     ctx->S[0] = norm;
162:     PEPSTOARqKqMupdates(pep,0,pep->work,2);
163:   } else {
164:     SETERRQ(PetscObjectComm((PetscObject)pep),1,"Problem with initial vector");
165:   }
166:   BVOrthogonalizeColumn(pep->V,1,ctx->S+ld,&norm,&lindep);
167:   if (!lindep) {
168:     BVScaleColumn(pep->V,1,1.0/norm);
169:     ctx->S[1] = norm;
170:     PEPSTOARqKqMupdates(pep,1,pep->work,2);
171:   } else {
172:     SETERRQ(PetscObjectComm((PetscObject)pep),1,"Problem with initial vector");
173:   }

175:   PEPSTOARNorm(pep,0,&norm);
176:   for (i=0;i<2;i++) { ctx->S[i+ld] /= norm; ctx->S[i] /= norm; }
177:   DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
178:   omega[0] = (norm>0)?1.0:-1.0;
179:   DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
180:   if (pep->nini<0) {
181:     SlepcBasisDestroy_Private(&pep->nini,&pep->IS);
182:   }
183:   return(0);
184: }

188: /*
189:   Computes GS orthogonalization  x = [z;x] - [Sp;Sq]*y,
190:   where y = Omega\([Sp;Sq]'*[qK zeros(size(qK,1)) ;zeros(size(qK,1)) qM]*[z;x]).
191:   n: Column from S to be orthogonalized against previous columns.
192: */
193: static PetscErrorCode PEPSTOAROrth2(PEP pep,PetscInt k,PetscReal *Omega,PetscScalar *y)
194: {
196:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
197:   PetscBLASInt   n_,lds_,k_,one=1,ld_;
198:   PetscScalar    *S=ctx->S,sonem=-1.0,sone=1.0,szero=0.0,*tp,*tq,*xp,*xq,*c,*qK,*qM;
199:   PetscInt       i,lds=ctx->d*ctx->ld,n,j;
200:   
202:   qK = ctx->qB;
203:   qM = ctx->qB+ctx->ld*ctx->ld;
204:   n = k+2;
205:   PetscMalloc3(n,&tp,n,&tq,k,&c);
206:   PetscBLASIntCast(n,&n_); /* Size of qK and qM */
207:   PetscBLASIntCast(ctx->ld,&ld_);
208:   PetscBLASIntCast(lds,&lds_);
209:   PetscBLASIntCast(k,&k_); /* Number of vectors to orthogonalize against */
210:   xp = S+k*lds;
211:   xq = S+ctx->ld+k*lds;
212:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,xp,&one,&szero,tp,&one));
213:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,xq,&one,&szero,tq,&one));
214:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,ctx->S,&lds_,tp,&one,&szero,y,&one));
215:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+ctx->ld,&lds_,tq,&one,&sone,y,&one));
216:   for (i=0;i<k;i++) y[i] /= Omega[i];
217:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S,&lds_,y,&one,&sone,xp,&one));
218:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+ctx->ld,&lds_,y,&one,&sone,xq,&one));
219:   /* three times */
220:   for (j=0;j<2;j++) {
221:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,xp,&one,&szero,tp,&one));
222:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,xq,&one,&szero,tq,&one));
223:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,ctx->S,&lds_,tp,&one,&szero,c,&one));
224:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+ctx->ld,&lds_,tq,&one,&sone,c,&one));
225:     for (i=0;i<k;i++) c[i] /= Omega[i];
226:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S,&lds_,c,&one,&sone,xp,&one));
227:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+ctx->ld,&lds_,c,&one,&sone,xq,&one));
228:     for (i=0;i<k;i++) y[i] += c[i];
229:   }
230:   PetscFree3(tp,tq,c);
231:   return(0);
232: }

236: /*
237:   Compute a run of Lanczos iterations
238: */
239: static PetscErrorCode PEPSTOARrun(PEP pep,PetscReal *a,PetscReal *b,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscScalar *work,PetscInt nw,Vec *t_,PetscInt nwv)
240: {
242:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
243:   PetscInt       i,j,m=*M,nwu=0,lwa,l;
244:   PetscInt       lds=ctx->d*ctx->ld,offq=ctx->ld;
245:   Vec            v=t_[0],t=t_[1],q=t_[2];
246:   PetscReal      norm,sym=0.0,fro=0.0,*f;
247:   PetscScalar    *y,*S=ctx->S;
248:   PetscBLASInt   j_,one=1;
249:   PetscBool      lindep;

252:   *breakdown = PETSC_FALSE; /* ----- */
253:   if (!t_||nwv<3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
254:   lwa = (ctx->ld)*4;
255:   if (!work||nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
256:   DSGetDimensions(pep->ds,NULL,NULL,&l,NULL,NULL);
257:   y = work;
258:   nwu += ctx->ld;
259:   for (j=k;j<m;j++) {
260:     /* apply operator */
261:     BVSetActiveColumns(pep->V,0,j+2);
262:     BVMultVec(pep->V,1.0,0.0,v,S+j*lds);
263:     STMatMult(pep->st,0,v,t);
264:     BVMultVec(pep->V,1.0,0.0,v,S+offq+j*lds);
265:     STMatMult(pep->st,1,v,q);
266:     VecAXPY(t,pep->sfactor,q);
267:     STMatSolve(pep->st,t,q);
268:     VecScale(q,-1.0/(pep->sfactor*pep->sfactor));

270:     /* orthogonalize */
271:     BVOrthogonalizeVec(pep->V,q,S+offq+(j+1)*lds,&norm,&lindep);
272:     if (lindep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STOAR does not support detection of linearly dependent TOAR vectors");
273:     *(S+offq+(j+1)*lds+j+2) = norm;
274:     VecScale(q,1.0/norm);
275:     BVInsertVec(pep->V,j+2,q);
276:     for (i=0;i<=j+1;i++) *(S+(j+1)*lds+i) = *(S+offq+j*lds+i);
277:    
278:     /* update qK and qM */
279:     PEPSTOARqKqMupdates(pep,j+2,t_,2);

281:     /* level-2 orthogonalization */
282:     PEPSTOAROrth2(pep,j+1,omega,y);
283:     a[j] = PetscRealPart(y[j])/omega[j];
284:     PEPSTOARNorm(pep,j+1,&norm);
285:     omega[j+1] = (norm > 0)?1.0:-1.0;
286:     for (i=0;i<=j+2;i++) {
287:       S[i+(j+1)*lds] /= norm;
288:       S[i+offq+(j+1)*lds] /= norm;
289:     }
290:     b[j] = PetscAbsReal(norm);

292:     /* check symmetry */
293:     DSGetArrayReal(pep->ds,DS_MAT_T,&f);
294:     if (j==k) {
295:       for (i=l;i<j-1;i++) y[i] = PetscAbsScalar(y[i])-PetscAbsReal(f[2*ctx->ld+i]);
296:       for (i=0;i<l;i++) y[i] = 0.0;
297:     }
298:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&f);
299:     if (j>0) y[j-1] = PetscAbsScalar(y[j-1])-PetscAbsReal(b[j-1]);
300:     PetscBLASIntCast(j,&j_);
301:     sym = SlepcAbs(BLASnrm2_(&j_,y,&one),sym);
302:     fro = SlepcAbs(fro,SlepcAbs(a[j],b[j]));
303:     if (j>0) fro = SlepcAbs(fro,b[j-1]);
304:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*pep->tol)) {
305:       *symmlost = PETSC_TRUE;
306:       *M=j+1;
307:       break;
308:     }
309:   }
310:   return(0);
311: }

315: static PetscErrorCode PEPSTOARTrunc(PEP pep,PetscInt rs1,PetscInt cs1,PetscScalar *work,PetscInt nw,PetscReal *rwork,PetscInt nrw)
316: {
318:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
319:   Mat            G;
320:   PetscInt       lwa,nwu=0,lrwa,nrwu=0;
321:   PetscInt       i,n,lds=2*ctx->ld;
322:   PetscScalar    *M,*V,*U,*S=ctx->S,sone=1.0,zero=0.0,t,*qK,*qM;
323:   PetscReal      *sg;
324:   PetscBLASInt   cs1_,rs1_,cs1t2,cs1p1,n_,info,lw_,lds_,ld_;

327:   qK = ctx->qB;
328:   qM = ctx->qB+ctx->ld*ctx->ld;
329:   n = (rs1>2*cs1)?2*cs1:rs1;
330:   lwa = cs1*rs1*4+n*(rs1+2*cs1)+(cs1+1)*(cs1+2);
331:   lrwa = n+cs1+1+5*n;
332:   if (!work||nw<lwa) {
333:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",6);
334:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",5);
335:   }
336:   if (!rwork||nrw<lrwa) {
337:     if (nrw<lrwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",8);
338:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",7);
339:   }
340:   M = work+nwu;
341:   nwu += rs1*cs1*2;
342:   U = work+nwu;
343:   nwu += rs1*n;
344:   V = work+nwu;
345:   nwu += 2*cs1*n;
346:   sg = rwork+nrwu;
347:   nrwu += n;
348:   for (i=0;i<cs1;i++) {
349:     PetscMemcpy(M+i*rs1,S+i*lds,rs1*sizeof(PetscScalar));  
350:     PetscMemcpy(M+(i+cs1)*rs1,S+i*lds+ctx->ld,rs1*sizeof(PetscScalar));  
351:   }
352:   PetscBLASIntCast(n,&n_);
353:   PetscBLASIntCast(cs1,&cs1_);
354:   PetscBLASIntCast(rs1,&rs1_);
355:   PetscBLASIntCast(cs1*2,&cs1t2);
356:   PetscBLASIntCast(cs1+1,&cs1p1);
357:   PetscBLASIntCast(lds,&lds_);
358:   PetscBLASIntCast(ctx->ld,&ld_);
359:   PetscBLASIntCast(lwa-nwu,&lw_);
360: #if !defined(PETSC_USE_COMPLEX)
361:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1t2,M,&rs1_,sg,U,&rs1_,V,&n_,work+nwu,&lw_,&info));
362: #else
363:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1t2,M,&rs1_,sg,U,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));  
364: #endif
365:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);

367:   /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
368:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,2*cs1,U,&G);
369:   BVSetActiveColumns(pep->V,0,rs1);
370:   BVMultInPlace(pep->V,G,0,cs1+1);
371:   MatDestroy(&G);
372:   
373:   /* Update S */
374:   PetscMemzero(S,lds*ctx->ld*sizeof(PetscScalar));

376:   for (i=0;i<cs1+1;i++) {
377:     t = sg[i];
378:     PetscStackCallBLAS("BLASscal",BLASscal_(&cs1t2,&t,V+i,&n_));
379:   }
380:   for (i=0;i<cs1;i++) {
381:     PetscMemcpy(S+i*lds,V+i*n,(cs1+1)*sizeof(PetscScalar));
382:     PetscMemcpy(S+ctx->ld+i*lds,V+(cs1+i)*n,(cs1+1)*sizeof(PetscScalar));
383:   }

385:   /* Update qM and qK */
386:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&cs1p1,&rs1_,&sone,qK,&ld_,U,&rs1_,&zero,work+nwu,&rs1_));
387:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&cs1p1,&cs1p1,&rs1_,&sone,U,&rs1_,work+nwu,&rs1_,&zero,qK,&ld_));
388:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&cs1p1,&rs1_,&sone,qM,&ld_,U,&rs1_,&zero,work+nwu,&rs1_));
389:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&cs1p1,&cs1p1,&rs1_,&sone,U,&rs1_,work+nwu,&rs1_,&zero,qM,&ld_));
390:   return(0);
391: }

395: /*
396:   S <- S*Q 
397:   columns s-s+ncu of S
398:   rows 0-sr of S
399:   size(Q) qr x ncu
400: */
401: static PetscErrorCode PEPSTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work,PetscInt nw)
402: {
404:   PetscScalar    a=1.0,b=0.0;
405:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
406:   PetscInt       lwa,j,lds=2*ld;

409:   lwa = sr*ncu;
410:   if (!work||nw<lwa) {
411:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
412:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
413:   }
414:   PetscBLASIntCast(sr,&sr_);
415:   PetscBLASIntCast(qr,&qr_);
416:   PetscBLASIntCast(ncu,&ncu_);
417:   PetscBLASIntCast(lds,&lds_);
418:   PetscBLASIntCast(ldq,&ldq_);
419:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S,&lds_,Q,&ldq_,&b,work,&sr_));
420:   for (j=0;j<ncu;j++) {
421:     PetscMemcpy(S+lds*(s+j),work+j*sr,sr*sizeof(PetscScalar));
422:   }
423:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+ld,&lds_,Q,&ldq_,&b,work,&sr_));
424:   for (j=0;j<ncu;j++) {
425:     PetscMemcpy(S+lds*(s+j)+ld,work+j*sr,sr*sizeof(PetscScalar));
426:   }
427:   return(0);
428: }

430: #if 0
433: static PetscErrorCode PEPSTOARpreKConvergence(PEP pep,PetscInt nv,PetscReal *norm,Vec *w)
434: {
436:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
437:   PetscBLASInt   n_,one=1;
438:   PetscInt       lds=2*ctx->ld;
439:   PetscReal      t1,t2;
440:   PetscScalar    *S=ctx->S;

443:   PetscBLASIntCast(nv+2,&n_);
444:   t1 = BLASnrm2_(&n_,S+nv*2*ctx->ld,&one);
445:   t2 = BLASnrm2_(&n_,S+(nv*2+1)*ctx->ld,&one);
446:   *norm = SlepcAbs(t1,t2);
447:   BVSetActiveColumns(pep->V,0,nv+2);
448:   BVMultVec(pep->V,1.0,0.0,w[1],S+nv*lds);
449:   STMatMult(pep->st,0,w[1],w[2]);
450:   VecNorm(w[2],NORM_2,&t1);
451:   BVMultVec(pep->V,1.0,0.0,w[1],S+ctx->ld+nv*lds);
452:   STMatMult(pep->st,2,w[1],w[2]);
453:   VecNorm(w[2],NORM_2,&t2);
454:   t2 *= pep->sfactor*pep->sfactor; 
455:   *norm = PetscMax(*norm,SlepcAbs(t1,t2));
456:   return(0);
457: }
458: #endif

462: PetscErrorCode PEPSolve_STOAR(PEP pep)
463: {
465:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
466:   PetscInt       j,k,l,nv=0,ld=ctx->ld,lds=ctx->d*ctx->ld,off,ldds,t;
467:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nconv=0;
468:   PetscScalar    *S=ctx->S,*Q,*work;
469:   PetscReal      beta,norm=1.0,*omega,*a,*b,*r,*rwork;
470:   PetscBool      breakdown,symmlost=PETSC_FALSE,sinv;

473:   BVSetMatrix(pep->V,NULL,PETSC_FALSE);
474:   lwa = 9*ld*ld+5*ld;
475:   lrwa = 8*ld;
476:   PetscMalloc2(lwa,&work,lrwa,&rwork); /* REVIEW */
477:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
478:   RGSetScale(pep->rg,sinv?1.0/pep->sfactor:pep->sfactor);
479:   STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);

481:   /* Restart loop */
482:   l = 0;
483:   DSGetLeadingDimension(pep->ds,&ldds);
484:   while (pep->reason == PEP_CONVERGED_ITERATING) {
485:     pep->its++;
486:     DSGetArrayReal(pep->ds,DS_MAT_T,&a);
487:     b = a+ldds;
488:     r = b+ldds;
489:     DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
490:     
491:     /* Compute an nv-step Lanczos factorization */
492:     nv = PetscMin(pep->nconv+pep->mpd,pep->ncv);
493:     PEPSTOARrun(pep,a,b,omega,pep->nconv+l,&nv,&breakdown,&symmlost,work+nwu,lwa-nwu,pep->work,3);
494:     beta = b[nv-1];
495:     if (symmlost) {
496:       pep->reason = PEP_DIVERGED_SYMMETRY_LOST;
497:       if (nv==pep->nconv+l+1) { pep->nconv = nconv; break; }
498:     }
499:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
500:     DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
501:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
502:     if (l==0) {
503:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
504:     } else {
505:       DSSetState(pep->ds,DS_STATE_RAW);
506:     }

508:     /* Solve projected problem */
509:     DSSolve(pep->ds,pep->eigr,pep->eigi);
510:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);

512:     /* Check convergence */
513:     /* PEPSTOARpreKConvergence(pep,nv,&norm,pep->work);*/
514:     norm = 1.0;
515:     DSGetDimensions(pep->ds,NULL,NULL,NULL,NULL,&t);    
516:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,t-pep->nconv,PetscAbsReal(beta)*norm,&k);
517:     nconv = k;
518:     if (pep->its >= pep->max_it) pep->reason = PEP_DIVERGED_ITS;
519:     if (k >= pep->nev) pep->reason = PEP_CONVERGED_TOL;

521:     /* Update l */
522:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
523:     else { 
524:       l = PetscMax(1,(PetscInt)((nv-k)/2));
525:       l = PetscMin(l,t);
526:       DSGetArrayReal(pep->ds,DS_MAT_T,&a);
527:       if (*(a+ldds+k+l-1)!=0) {
528:         if (k+l<nv-1) l = l+1;
529:         else l = l-1;
530:       }
531:       DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
532:     }
533:     if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

535:     /* Update S */
536:     off = pep->nconv*ldds;
537:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
538:     PEPSTOARSupdate(S,ld,nv+2,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu,lwa-nwu);
539:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

541:     /* Copy last column of S */
542:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

544:     if (pep->reason == PEP_CONVERGED_ITERATING) {
545:       if (breakdown) {
546:         /* Stop if breakdown */
547:         PetscInfo2(pep,"Breakdown STOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
548:         pep->reason = PEP_DIVERGED_BREAKDOWN;
549:       } else {
550:         /* Prepare the Rayleigh quotient for restart */
551:         DSGetArray(pep->ds,DS_MAT_Q,&Q);
552:         DSGetArrayReal(pep->ds,DS_MAT_T,&a);
553:         DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
554:         r = a + 2*ldds;
555:         for (j=k;j<k+l;j++) {
556:           r[j] = PetscRealPart(Q[nv-1+j*ldds]*beta);
557:         }
558:         b = a+ldds;
559:         b[k+l-1] = r[k+l-1];
560:         omega[k+l] = omega[nv];
561:         DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
562:         DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
563:         DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
564:         /* Truncate S */
565:         DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
566:         PEPSTOARTrunc(pep,nv+2,k+l+1,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
567:         DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
568:       }
569:     }


572:     pep->nconv = k;
573:     PEPMonitor(pep,pep->its,pep->nconv,pep->eigr,pep->eigi,pep->errest,nv);
574:   }

576:   if (pep->nconv>0) {
577:     /* Truncate S */
578:     DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
579:     PEPSTOARTrunc(pep,nv+2,pep->nconv,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
580:     DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
581:   
582:     /* Extraction */
583:     DSSetDimensions(pep->ds,pep->nconv,0,0,0);
584:     DSSetState(pep->ds,DS_STATE_RAW);
585:   
586:     for (j=0;j<pep->nconv;j++) {
587:       pep->eigr[j] *= pep->sfactor;
588:       pep->eigi[j] *= pep->sfactor;
589:     }
590:   }
591:   STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
592:   RGSetScale(pep->rg,1.0);

594:   /* truncate Schur decomposition and change the state to raw so that
595:      DSVectors() computes eigenvectors from scratch */
596:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
597:   DSSetState(pep->ds,DS_STATE_RAW);
598:   PetscFree2(work,rwork);
599:   return(0);
600: }

604: PetscErrorCode PEPSetFromOptions_STOAR(PetscOptions *PetscOptionsObject,PEP pep)
605: {
607:   PetscBool      flg,lock;

610:   PetscOptionsHead(PetscOptionsObject,"PEP STOAR Options");
611:   PetscOptionsBool("-pep_stoar_locking","Choose between locking and non-locking variants","PEPSTOARSetLocking",PETSC_FALSE,&lock,&flg);
612:   if (flg) {
613:     PEPSTOARSetLocking(pep,lock);
614:   }
615:   PetscOptionsTail();
616:   return(0);
617: }

621: static PetscErrorCode PEPSTOARSetLocking_STOAR(PEP pep,PetscBool lock)
622: {
623:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

626:   ctx->lock = lock;
627:   return(0);
628: }

632: /*@
633:    PEPSTOARSetLocking - Choose between locking and non-locking variants of
634:    the STOAR method.

636:    Logically Collective on PEP

638:    Input Parameters:
639: +  pep  - the eigenproblem solver context
640: -  lock - true if the locking variant must be selected

642:    Options Database Key:
643: .  -pep_stoar_locking - Sets the locking flag

645:    Notes:
646:    The default is to lock converged eigenpairs when the method restarts.
647:    This behaviour can be changed so that all directions are kept in the
648:    working subspace even if already converged to working accuracy (the
649:    non-locking variant).

651:    Level: advanced

653: .seealso: PEPSTOARGetLocking()
654: @*/
655: PetscErrorCode PEPSTOARSetLocking(PEP pep,PetscBool lock)
656: {

662:   PetscTryMethod(pep,"PEPSTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
663:   return(0);
664: }

668: static PetscErrorCode PEPSTOARGetLocking_STOAR(PEP pep,PetscBool *lock)
669: {
670:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

673:   *lock = ctx->lock;
674:   return(0);
675: }

679: /*@
680:    PEPSTOARGetLocking - Gets the locking flag used in the STOAR method.

682:    Not Collective

684:    Input Parameter:
685: .  pep - the eigenproblem solver context

687:    Output Parameter:
688: .  lock - the locking flag

690:    Level: advanced

692: .seealso: PEPSTOARSetLocking()
693: @*/
694: PetscErrorCode PEPSTOARGetLocking(PEP pep,PetscBool *lock)
695: {

701:   PetscTryMethod(pep,"PEPSTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
702:   return(0);
703: }

707: PetscErrorCode PEPView_STOAR(PEP pep,PetscViewer viewer)
708: {
710:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
711:   PetscBool      isascii;

714:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
715:   if (isascii) {
716:     PetscViewerASCIIPrintf(viewer,"  STOAR: using the %slocking variant\n",ctx->lock?"":"non-");
717:   }
718:   return(0);
719: }

723: PetscErrorCode PEPDestroy_STOAR(PEP pep)
724: {

728:   PetscFree(pep->data);
729:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARSetLocking_C",NULL);
730:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARGetLocking_C",NULL);
731:   return(0);
732: }

736: PETSC_EXTERN PetscErrorCode PEPCreate_STOAR(PEP pep)
737: {
739:   PEP_TOAR      *ctx;

742:   PetscNewLog(pep,&ctx);
743:   pep->data = (void*)ctx;
744:   ctx->lock = PETSC_TRUE;

746:   pep->ops->solve          = PEPSolve_STOAR;
747:   pep->ops->setup          = PEPSetUp_STOAR;
748:   pep->ops->setfromoptions = PEPSetFromOptions_STOAR;
749:   pep->ops->view           = PEPView_STOAR;
750:   pep->ops->destroy        = PEPDestroy_STOAR;
751:   pep->ops->backtransform  = PEPBackTransform_Default;
752:   pep->ops->computevectors = PEPComputeVectors_Default;
753:   pep->ops->extractvectors = PEPExtractVectors_TOAR;
754:   pep->ops->reset          = PEPReset_TOAR;
755:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARSetLocking_C",PEPSTOARSetLocking_STOAR);
756:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARGetLocking_C",PEPSTOARGetLocking_STOAR);
757:   return(0);
758: }