Actual source code: ptoar.c

slepc-3.6.1 2015-09-03
Report Typos and Errors
  1: /*

  3:    SLEPc polynomial eigensolver: "toar"

  5:    Method: TOAR

  7:    Algorithm:

  9:        Two-Level Orthogonal Arnoldi.

 11:    References:

 13:        [1] Y. Su, J. Zhang and Z. Bai, "A compact Arnoldi algorithm for
 14:            polynomial eigenvalue problems", talk presented at RANMEP 2008.

 16:        [2] C. Campos and J.E. Roman, "Parallel Krylov solvers for the
 17:            polynomial eigenvalue problem in SLEPc", submitted, 2015.

 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 21:    Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain

 23:    This file is part of SLEPc.

 25:    SLEPc is free software: you can redistribute it and/or modify it under  the
 26:    terms of version 3 of the GNU Lesser General Public License as published by
 27:    the Free Software Foundation.

 29:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 30:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 31:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 32:    more details.

 34:    You  should have received a copy of the GNU Lesser General  Public  License
 35:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 36:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 37: */

 39: #include <slepc/private/pepimpl.h>    /*I "slepcpep.h" I*/
 40:  #include ../src/pep/impls/krylov/pepkrylov.h
 41: #include <slepcblaslapack.h>

 45: /*
 46:   Norm of [sp;sq] 
 47: */
 48: static PetscErrorCode PEPTOARSNorm2(PetscInt n,PetscScalar *S,PetscReal *norm)
 49: {
 51:   PetscBLASInt   n_,one=1;

 54:   PetscBLASIntCast(n,&n_);
 55:   *norm = BLASnrm2_(&n_,S,&one);
 56:   return(0);
 57: }

 61: PetscErrorCode PEPSetUp_TOAR(PEP pep)
 62: {
 64:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 65:   PetscBool      sinv,flg,lindep;
 66:   PetscInt       i,lds,deg=pep->nmat-1,j;
 67:   PetscReal      norm;

 70:   pep->lineariz = PETSC_TRUE;
 71:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
 72:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
 73:   if (!pep->max_it) pep->max_it = PetscMax(100,2*(pep->nmat-1)*pep->n/pep->ncv); 
 74:   if (!pep->which) {
 75:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 76:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
 77:     else pep->which = PEP_LARGEST_MAGNITUDE;
 78:   }
 79:   if (pep->problem_type!=PEP_GENERAL) {
 80:     PetscInfo(pep,"Problem type ignored, performing a non-symmetric linearization\n");
 81:   }

 83:   if (!ctx->keep) ctx->keep = 0.5;

 85:   PEPAllocateSolution(pep,pep->nmat-1);
 86:   PEPSetWorkVecs(pep,3);
 87:   DSSetType(pep->ds,DSNHEP);
 88:   DSSetExtraRow(pep->ds,PETSC_TRUE);
 89:   DSAllocate(pep->ds,pep->ncv+1);

 91:   PEPBasisCoefficients(pep,pep->pbc);
 92:   STGetTransform(pep->st,&flg);
 93:   if (!flg) {
 94:     PetscMalloc1(pep->nmat,&pep->solvematcoeffs);
 95:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 96:     if (sinv) {
 97:       PEPEvaluateBasis(pep,pep->target,0,pep->solvematcoeffs,NULL);
 98:     } else {
 99:       for (i=0;i<pep->nmat-1;i++) pep->solvematcoeffs[i] = 0.0;
100:       pep->solvematcoeffs[pep->nmat-1] = 1.0;
101:     }
102:   }
103:   ctx->ld = pep->ncv+(pep->nmat-1);   /* number of rows of each fragment of S */
104:   lds = (pep->nmat-1)*ctx->ld;
105:   PetscCalloc1(lds*ctx->ld,&ctx->S);

107:   /* process starting vector */
108:   ctx->nq = 0;
109:   for (i=0;i<deg;i++) {
110:     if (pep->nini>-deg) {  
111:       BVSetRandomColumn(pep->V,ctx->nq,pep->rand);
112:     } else {
113:       BVInsertVec(pep->V,ctx->nq,pep->IS[i]);
114:     }
115:     BVOrthogonalizeColumn(pep->V,ctx->nq,ctx->S+i*ctx->ld,&norm,&lindep);
116:     if (!lindep) {
117:       BVScaleColumn(pep->V,ctx->nq,1.0/norm);
118:       ctx->S[ctx->nq+i*ctx->ld] = norm;
119:       ctx->nq++;
120:     }
121:   }
122:   if (ctx->nq==0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"PEP: Problem with initial vector");
123:   PEPTOARSNorm2(lds,ctx->S,&norm);
124:   for (j=0;j<deg;j++) {
125:     for (i=0;i<=j;i++) ctx->S[i+j*ctx->ld] /= norm;
126:   }
127:   if (pep->nini<0) {
128:     SlepcBasisDestroy_Private(&pep->nini,&pep->IS);
129:   }
130:   return(0);
131: }

135: /*
136:  Computes GS orthogonalization   [z;x] - [Sp;Sq]*y,
137:  where y = ([Sp;Sq]'*[z;x]).
138:    k: Column from S to be orthogonalized against previous columns.
139:    Sq = Sp+ld
140: */
141: static PetscErrorCode PEPTOAROrth2(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt k,PetscScalar *y,PetscReal *norm,PetscBool *lindep,PetscScalar *work,PetscInt nw)
142: {
144:   PetscBLASInt   n_,lds_,k_,one=1;
145:   PetscScalar    sonem=-1.0,sone=1.0,szero=0.0,*x0,*x,*c;
146:   PetscInt       lwa,nwu=0,i,lds=deg*ld,n;
147:   PetscReal      eta,onorm;
148:   
150:   BVGetOrthogonalization(pep->V,NULL,NULL,&eta,NULL);
151:   n = k+deg-1;
152:   PetscBLASIntCast(n,&n_);
153:   PetscBLASIntCast(deg*ld,&lds_);
154:   PetscBLASIntCast(k,&k_); /* number of vectors to orthogonalize against them */
155:   lwa = k;
156:   if (!work||nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",6);
157:   c = work+nwu;
158:   nwu += k;
159:   x0 = S+k*lds;
160:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,y,&one));
161:   for (i=1;i<deg;i++) {
162:     x = S+i*ld+k*lds;
163:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,y,&one));
164:   }
165:   for (i=0;i<deg;i++) {
166:     x= S+i*ld+k*lds;
167:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,y,&one,&sone,x,&one));
168:   }
169:   PEPTOARSNorm2(lds,S+k*lds,&onorm);
170:   /* twice */
171:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,c,&one));
172:   for (i=1;i<deg;i++) {
173:     x = S+i*ld+k*lds;
174:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,c,&one));
175:   }
176:   for (i=0;i<deg;i++) {
177:     x= S+i*ld+k*lds;
178:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,c,&one,&sone,x,&one));
179:   }
180:   for (i=0;i<k;i++) y[i] += c[i];
181:   if (norm) {
182:     PEPTOARSNorm2(lds,S+k*lds,norm);
183:   }
184:   if (lindep) {
185:     *lindep = (*norm < eta * onorm)?PETSC_TRUE:PETSC_FALSE;
186:   }
187:   return(0);
188: }

192: /*
193:   Extend the TOAR basis by applying the the matrix operator
194:   over a vector which is decomposed in the TOAR way
195:   Input:
196:     - pbc: array containing the polynomial basis coefficients
197:     - S,V: define the latest Arnoldi vector (nv vectors in V)
198:   Output:
199:     - t: new vector extending the TOAR basis
200:     - r: temporary coefficients to compute the TOAR coefficients
201:          for the new Arnoldi vector
202:   Workspace: t_ (two vectors)
203: */
204: static PetscErrorCode PEPTOARExtendBasis(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscScalar *S,PetscInt ls,PetscInt nv,BV V,Vec t,PetscScalar *r,PetscInt lr,Vec *t_,PetscInt nwv)
205: {
207:   PetscInt       nmat=pep->nmat,deg=nmat-1,k,j,off=0,lss;
208:   Vec            v=t_[0],ve=t_[1],q=t_[2];
209:   PetscScalar    alpha=1.0,*ss,a;
210:   PetscReal      *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
211:   PetscBool      flg;

214:   if (!t_||nwv<3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
215:   BVSetActiveColumns(pep->V,0,nv);
216:   STGetTransform(pep->st,&flg);
217:   if (sinvert) {
218:     for (j=0;j<nv;j++) {
219:       if (deg>1) r[lr+j] = S[j]/ca[0];
220:       if (deg>2) r[2*lr+j] = (S[ls+j]+(sigma-cb[1])*r[lr+j])/ca[1];
221:     }
222:     for (k=2;k<deg-1;k++) {
223:       for (j=0;j<nv;j++) r[(k+1)*lr+j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
224:     }
225:     k = deg-1;
226:     for (j=0;j<nv;j++) r[j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
227:     ss = r; lss = lr; off = 1; alpha = -1.0; a = pep->sfactor;
228:   } else {
229:     ss = S; lss = ls; off = 0; alpha = -ca[deg-1]; a = 1.0;
230:   }
231:   BVMultVec(V,1.0,0.0,v,ss+off*lss);
232:   if (pep->Dr) { /* balancing */
233:     VecPointwiseMult(v,v,pep->Dr);
234:   }
235:   STMatMult(pep->st,off,v,q);
236:   VecScale(q,a);
237:   for (j=1+off;j<deg+off-1;j++) {
238:     BVMultVec(V,1.0,0.0,v,ss+j*lss);
239:     if (pep->Dr) {
240:       VecPointwiseMult(v,v,pep->Dr);
241:     }
242:     STMatMult(pep->st,j,v,t);
243:     a *= pep->sfactor;
244:     VecAXPY(q,a,t);
245:   }
246:   if (sinvert) {
247:     BVMultVec(V,1.0,0.0,v,ss);
248:     if (pep->Dr) {
249:       VecPointwiseMult(v,v,pep->Dr);
250:     }
251:     STMatMult(pep->st,deg,v,t);
252:     a *= pep->sfactor;
253:     VecAXPY(q,a,t);
254:   } else {
255:     BVMultVec(V,1.0,0.0,ve,ss+(deg-1)*lss);
256:     if (pep->Dr) {
257:       VecPointwiseMult(ve,ve,pep->Dr);
258:     }
259:     a *= pep->sfactor;
260:     STMatMult(pep->st,deg-1,ve,t);
261:     VecAXPY(q,a,t);    
262:     a *= pep->sfactor;
263:   }
264:   if (flg || !sinvert) alpha /= a;
265:   STMatSolve(pep->st,q,t);
266:   VecScale(t,alpha);
267:   if (!sinvert) {
268:     if (cg[deg-1]!=0) {VecAXPY(t,cg[deg-1],v);}    
269:     if (cb[deg-1]!=0) {VecAXPY(t,cb[deg-1],ve);}    
270:   }
271:   if (pep->Dr) {
272:     VecPointwiseDivide(t,t,pep->Dr);
273:   }
274:   return(0);
275: }

279: /*
280:   Compute TOAR coefficients of the blocks of the new Arnoldi vector computed
281: */
282: static PetscErrorCode PEPTOARCoefficients(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscInt nv,PetscScalar *S,PetscInt ls,PetscScalar *r,PetscInt lr,PetscScalar *x)
283: {
284:   PetscInt    k,j,nmat=pep->nmat,d=nmat-1;
285:   PetscReal   *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
286:   PetscScalar t=1.0,tp=0.0,tt;

289:   if (sinvert) {
290:     for (k=1;k<d;k++) {
291:       tt = t;
292:       t = ((sigma-cb[k-1])*t-cg[k-1]*tp)/ca[k-1]; /* k-th basis polynomial */
293:       tp = tt;
294:       for (j=0;j<=nv;j++) r[k*lr+j] += t*x[j];
295:     }
296:   } else {
297:     for (j=0;j<=nv;j++) r[j] = (cb[0]-sigma)*S[j]+ca[0]*S[ls+j];
298:     for (k=1;k<d-1;k++) {
299:       for (j=0;j<=nv;j++) r[k*lr+j] = (cb[k]-sigma)*S[k*ls+j]+ca[k]*S[(k+1)*ls+j]+cg[k]*S[(k-1)*ls+j];
300:     }
301:     if (sigma!=0.0) for (j=0;j<=nv;j++) r[(d-1)*lr+j] -= sigma*S[(d-1)*ls+j];
302:   }
303:   return(0);
304: }

308: /*
309:   Compute a run of Arnoldi iterations
310: */
311: static PetscErrorCode PEPTOARrun(PEP pep,PetscScalar sigma,PetscInt *nq,PetscScalar *S,PetscInt ld,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscScalar *work,PetscInt nw,Vec *t_,PetscInt nwv)
312: {
314:   PetscInt       i,j,p,m=*M,nwu=0,lwa,deg=pep->nmat-1;
315:   PetscInt       lds=ld*deg,nqt=*nq;
316:   Vec            t;
317:   PetscReal      norm;
318:   PetscBool      flg,sinvert=PETSC_FALSE,lindep;
319:   PetscScalar    *x;

322:   if (!t_||nwv<3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
323:   lwa = ld;
324:   if (!work||nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
325:   STGetTransform(pep->st,&flg);
326:   if (!flg) {
327:     /* spectral transformation handled by the solver */
328:     PetscObjectTypeCompareAny((PetscObject)pep->st,&flg,STSINVERT,STSHIFT,"");
329:     if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"STtype not supported fr TOAR without transforming matrices");
330:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
331:   }
332:   for (j=k;j<m;j++) {
333:     /* apply operator */
334:     BVGetColumn(pep->V,nqt,&t);
335:     PEPTOARExtendBasis(pep,sinvert,sigma,S+j*lds,ld,nqt,pep->V,t,S+(j+1)*lds,ld,t_,3);
336:     BVRestoreColumn(pep->V,nqt,&t);

338:     /* orthogonalize */
339:     if (sinvert) x = S+(j+1)*lds;
340:     else x = S+(deg-1)*ld+(j+1)*lds;
341:     BVOrthogonalizeColumn(pep->V,nqt,x,&norm,&lindep);
342:     if (!lindep) {
343:       x[nqt] = norm;
344:       BVScaleColumn(pep->V,nqt,1.0/norm);
345:       nqt++;
346:     }

348:     PEPTOARCoefficients(pep,sinvert,sigma,nqt-1,S+j*lds,ld,S+(j+1)*lds,ld,x);
349:     /* level-2 orthogonalization */
350:     PEPTOAROrth2(pep,S,ld,deg,j+1,H+j*ldh,&norm,breakdown,work+nwu,lwa-nwu);
351:     if (!*breakdown) {
352:       for (p=0;p<deg;p++) {
353:         for (i=0;i<=j+deg;i++) {
354:           S[i+p*ld+(j+1)*lds] /= norm;
355:         }
356:       }
357:       H[j+1+ldh*j] = norm;
358:     } else {
359:       H[j+1+ldh*j] = norm;
360:       *M = j+1;
361:       *nq = nqt;
362:       return(0);
363:     }
364:     *nq = nqt;
365:   }
366:   return(0);
367: }

371: static PetscErrorCode PEPTOARTrunc(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt *rs1a,PetscInt cs1,PetscInt lock,PetscInt newc,PetscBool final,PetscScalar *work,PetscInt nw,PetscReal *rwork,PetscInt nrw)
372: {
374:   PetscInt       lwa,nwu=0,lrwa,nrwu=0,nnc,nrow;
375:   PetscInt       j,i,k,n,lds=deg*ld,rs1=*rs1a,rk=0,offu;
376:   PetscScalar    *M,*V,*pU,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau;
377:   PetscReal      *sg,tol;
378:   PetscBLASInt   cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
379:   Mat            U;

382:   if (cs1==0) return(0);
383:   n = (rs1>deg*cs1)?deg*cs1:rs1;
384:   lwa = 6*ld*lds+2*cs1;
385:   lrwa = 6*n;
386:   if (!work||nw<lwa) {
387:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",6);
388:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",5);
389:   }
390:   if (!rwork||nrw<lrwa) {
391:     if (nrw<lrwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",8);
392:     if (!rwork) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",7);
393:   }
394:   nnc = cs1-lock-newc;
395:   nrow = rs1-lock;
396:   PetscMalloc4(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pU,deg*rs1,&tau);
397:   offu = lock*(rs1+1);
398:   M = work+nwu;
399:   nwu += rs1*cs1*deg;
400:   sg = rwork+nrwu;
401:   nrwu += n;
402:   PetscMemzero(pU,rs1*n*sizeof(PetscScalar));
403:   V = work+nwu;
404:   nwu += deg*cs1*n;
405:   PetscBLASIntCast(n,&n_);
406:   PetscBLASIntCast(nnc,&nnc_);
407:   PetscBLASIntCast(cs1,&cs1_);
408:   PetscBLASIntCast(rs1,&rs1_);
409:   PetscBLASIntCast(newc,&newc_);
410:   PetscBLASIntCast(newc*deg,&newctdeg);
411:   PetscBLASIntCast(nnc*deg,&nnctdeg);
412:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
413:   PetscBLASIntCast(lwa-nwu,&lw_);
414:   PetscBLASIntCast(nrow,&nrow_);
415:   PetscBLASIntCast(lds,&lds_);
416:   if (newc>0) {
417:   /* truncate columns associated with new converged eigenpairs */
418:     for (j=0;j<deg;j++) {
419:       for (i=lock;i<lock+newc;i++) {
420:         PetscMemcpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
421:       } 
422:     }
423: #if !defined (PETSC_USE_COMPLEX)
424:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,&info));
425: #else
426:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));  
427: #endif
428:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
429:     /* SVD has rank min(newc,nrow) */
430:     rk = PetscMin(newc,nrow);
431:     for (i=0;i<rk;i++) {
432:       t = sg[i];
433:       PetscStackCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,V+i,&n_));
434:     }
435:     for (i=0;i<deg;i++) {
436:       for (j=lock;j<lock+newc;j++) {
437:         PetscMemcpy(S+j*lds+i*ld+lock,V+(newc*i+j-lock)*n,rk*sizeof(PetscScalar));
438:         PetscMemzero(S+j*lds+i*ld+lock+rk,(ld-lock-rk)*sizeof(PetscScalar));
439:       }
440:     }
441:     /*
442:       update columns associated with non-converged vectors, orthogonalize
443:        against pU so that next M has rank nnc+d-1 insted of nrow+d-1
444:     */
445:     for (i=0;i<deg;i++) {
446:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
447:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
448:       /* repeat orthogonalization step */
449:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS2,&newc_));
450:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
451:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
452:     }
453:   }
454:   /* truncate columns associated with non-converged eigenpairs */
455:   for (j=0;j<deg;j++) {
456:     for (i=lock+newc;i<cs1;i++) {
457:       PetscMemcpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
458:     } 
459:   }
460: #if !defined (PETSC_USE_COMPLEX)
461:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,&info));
462: #else
463:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));  
464: #endif
465:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
466:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
467:   for (i=0;i<PetscMin(n_,nnctdeg);i++) if (sg[i]>tol) rk++;
468:   rk = PetscMin(nnc+deg-1,rk);
469:   /* the SVD has rank (atmost) nnc+deg-1 */
470:   for (i=0;i<rk;i++) {
471:     t = sg[i];
472:     PetscStackCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,V+i,&n_));
473:   }
474:   /* update S */
475:   PetscMemzero(S+cs1*lds,(ld-cs1)*lds*sizeof(PetscScalar));
476:   k = ld-lock-newc-rk;
477:   for (i=0;i<deg;i++) {
478:     for (j=lock+newc;j<cs1;j++) {
479:       PetscMemcpy(S+j*lds+i*ld+lock+newc,V+(nnc*i+j-lock-newc)*n,rk*sizeof(PetscScalar));
480:       PetscMemzero(S+j*lds+i*ld+lock+newc+rk,k*sizeof(PetscScalar));
481:     }
482:   }
483:   if (newc>0) {
484:     for (i=0;i<deg;i++) {
485:       p = SS+nnc*newc*i;
486:       for (j=lock+newc;j<cs1;j++) {
487:         for (k=0;k<newc;k++) S[j*lds+i*ld+lock+k] = *(p++);
488:       }
489:     }
490:   }

492:   /* orthogonalize pU */
493:   rk = rk+newc;
494:   PetscBLASIntCast(rk,&rk_);
495:   PetscBLASIntCast(cs1-lock,&nnc_);
496:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));
497:   for (i=0;i<deg;i++) {
498:     PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pU+offu,&rs1_,S+lock*lds+lock+i*ld,&lds_));
499:   }  
500:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&nrow_,&rk_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));

502:   /* update vectors V(:,idx) = V*Q(:,idx) */
503:   rk = rk+lock;
504:   for (i=0;i<lock;i++) pU[(i+1)*rs1] = 1.0;
505:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pU,&U);
506:   BVSetActiveColumns(pep->V,lock,rs1);
507:   BVMultInPlace(pep->V,U,lock,rk);
508:   BVSetActiveColumns(pep->V,0,rk);
509:   MatDestroy(&U);  
510:   *rs1a = rk;

512:   /* free work space */
513:   PetscFree4(SS,SS2,pU,tau);
514:   return(0);
515: }

519: /*
520:   S <- S*Q 
521:   columns s-s+ncu of S
522:   rows 0-sr of S
523:   size(Q) qr x ncu
524: */
525: static PetscErrorCode PEPTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work,PetscInt nw)
526: {
528:   PetscScalar    a=1.0,b=0.0;
529:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
530:   PetscInt       lwa,j,lds=deg*ld,i;

533:   lwa = sr*ncu;
534:   if (!work||nw<lwa) {
535:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
536:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
537:   }
538:   PetscBLASIntCast(sr,&sr_);
539:   PetscBLASIntCast(qr,&qr_);
540:   PetscBLASIntCast(ncu,&ncu_);
541:   PetscBLASIntCast(lds,&lds_);
542:   PetscBLASIntCast(ldq,&ldq_);
543:   for (i=0;i<deg;i++) {
544:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+i*ld,&lds_,Q,&ldq_,&b,work,&sr_));
545:     for (j=0;j<ncu;j++) {
546:       PetscMemcpy(S+lds*(s+j)+i*ld,work+j*sr,sr*sizeof(PetscScalar));
547:     }
548:   }
549:   return(0);
550: }

554: /*
555:   Computes T_j = phi_idx(T). In T_j and T_p are phi_{idx-1}(T)
556:    and phi_{idx-2}(T) respectively or null if idx=0,1.
557:    Tp and Tj are input/output arguments
558: */
559: static PetscErrorCode PEPEvaluateBasisM(PEP pep,PetscInt k,PetscScalar *T,PetscInt ldt,PetscInt idx,PetscScalar **Tp,PetscScalar **Tj)
560: {
562:   PetscInt       i;
563:   PetscReal      *ca,*cb,*cg;
564:   PetscScalar    *pt,g,a;
565:   PetscBLASInt   k_,ldt_;

568:   if (idx==0) {
569:     PetscMemzero(*Tj,k*k*sizeof(PetscScalar));
570:     PetscMemzero(*Tp,k*k*sizeof(PetscScalar));
571:     for (i=0;i<k;i++) {
572:       (*Tj)[i+i*k] = 1.0;
573:     }
574:   } else {
575:     PetscBLASIntCast(ldt,&ldt_);
576:     PetscBLASIntCast(k,&k_);
577:     ca = pep->pbc; cb = pep->pbc+pep->nmat; cg = pep->pbc+2*pep->nmat;
578:     for (i=0;i<k;i++) T[i*ldt+i] -= cb[idx-1];
579:     a = 1/ca[idx-1];
580:     g = (idx==1)?0.0:-cg[idx-1]/ca[idx-1];
581:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&a,T,&ldt_,*Tj,&k_,&g,*Tp,&k_));    
582:     pt = *Tj; *Tj = *Tp; *Tp = pt;
583:     for (i=0;i<k;i++) T[i*ldt+i] += cb[idx-1];
584:   }
585:   return(0);
586: }

590: static PetscErrorCode PEPExtractInvariantPair(PEP pep,PetscScalar sigma,PetscInt sr,PetscInt k,PetscScalar *S,PetscInt ld,PetscInt deg,PetscScalar *H,PetscInt ldh,PetscScalar *work,PetscInt nw)
591: {
593:   PetscInt       i,j,jj,nwu=0,lwa,lds,ldt,d=pep->nmat-1,idxcpy=0;
594:   PetscScalar    *At,*Bt,*Hj,*Hp,*T,sone=1.0,g,a,*pM;
595:   PetscBLASInt   k_,sr_,lds_,ldh_,info,*p,lwork,ldt_;
596:   PetscBool      transf=PETSC_FALSE,flg;
597:   PetscReal      nrm,norm,maxnrm,*rwork;
598:   BV             *R,Y;
599:   Mat            M,*A;
600:   Vec            v;

603:   if (k==0) return(0);
604:   lwa = 6*sr*k;
605:   if (!work||nw<lwa) {
606:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
607:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
608:   }
609:   lds = deg*ld;
610:   At = work+nwu;
611:   nwu += sr*k;
612:   Bt = work+nwu;
613:   nwu += k*k;
614:   PetscMemzero(Bt,k*k*sizeof(PetscScalar));
615:   Hj = work+nwu;
616:   nwu += k*k;
617:   Hp = work+nwu;
618:   nwu += k*k;
619:   PetscMemzero(Hp,k*k*sizeof(PetscScalar));
620:   PetscMalloc1(k,&p);
621:   PetscBLASIntCast(sr,&sr_);
622:   PetscBLASIntCast(k,&k_);
623:   PetscBLASIntCast(lds,&lds_);
624:   PetscBLASIntCast(ldh,&ldh_);
625:   STGetTransform(pep->st,&flg);
626:   if (!flg) {
627:      PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&flg);
628:     if (flg || sigma!=0.0) transf=PETSC_TRUE;
629:   }
630:   if (transf) {
631:     ldt = k;
632:     T = work+nwu;
633:     nwu += k*k;
634:     for (i=0;i<k;i++) {
635:       PetscMemcpy(T+k*i,H+i*ldh,k*sizeof(PetscScalar));
636:     }
637:     if (flg) {
638:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,T,&k_,p,&info));
639:       PetscBLASIntCast(nw-nwu,&lwork);
640:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,T,&k_,p,work+nwu,&lwork,&info));  
641:     }
642:     if (sigma!=0.0) for (i=0;i<k;i++) T[i+k*i] += sigma;
643:   } else {
644:     T = H; ldt = ldh;
645:   }
646:   PetscBLASIntCast(ldt,&ldt_);
647:   switch (pep->extract) {
648:   case PEP_EXTRACT_NONE:
649:     break;
650:   case PEP_EXTRACT_NORM:
651:     if (pep->basis == PEP_BASIS_MONOMIAL) {
652:       PetscBLASIntCast(ldt,&ldt_);
653:       PetscMalloc1(k,&rwork);
654:       norm = LAPACKlange_("F",&k_,&k_,T,&ldt_,rwork);
655:       PetscFree(rwork);
656:       if (norm>1.0) idxcpy = d-1;
657:     } else {
658:       PetscBLASIntCast(ldt,&ldt_);
659:       PetscMalloc1(k,&rwork);
660:       maxnrm = 0.0;
661:       for (i=0;i<pep->nmat-1;i++) {
662:         PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
663:         norm = LAPACKlange_("F",&k_,&k_,Hj,&k_,rwork);
664:         if (norm > maxnrm) {
665:           idxcpy = i;
666:           maxnrm = norm;
667:         }
668:       }
669:       PetscFree(rwork);
670:     }
671:     if (idxcpy>0) {
672:       /* copy block idxcpy of S to the first one */
673:       for (j=0;j<k;j++) {
674:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
675:       }
676:     }
677:     break;
678:   case PEP_EXTRACT_RESIDUAL:
679:     STGetTransform(pep->st,&flg);
680:     if (flg) {
681:       PetscMalloc1(pep->nmat,&A);
682:       for (i=0;i<pep->nmat;i++) {
683:         STGetTOperators(pep->st,i,A+i);
684:       }
685:     } else A = pep->A;
686:     PetscMalloc1(pep->nmat-1,&R);
687:     for (i=0;i<pep->nmat-1;i++) {
688:       BVDuplicateResize(pep->V,k,R+i);
689:     }
690:     BVDuplicateResize(pep->V,sr,&Y);
691:     MatCreateSeqDense(PETSC_COMM_SELF,sr,k,NULL,&M);
692:     g = 0.0; a = 1.0;
693:     BVSetActiveColumns(pep->V,0,sr);
694:     for (j=0;j<pep->nmat;j++) {
695:       BVMatMult(pep->V,A[j],Y);    
696:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
697:       for (i=0;i<pep->nmat-1;i++) {
698:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&k_,&k_,&a,S+i*ld,&lds_,Hj,&k_,&g,At,&sr_));
699:         MatDenseGetArray(M,&pM);
700:         for (jj=0;jj<k;jj++) {
701:           PetscMemcpy(pM+jj*sr,At+jj*sr,sr*sizeof(PetscScalar));
702:         }
703:         MatDenseRestoreArray(M,&pM);
704:         BVMult(R[i],1.0,(i==0)?0.0:1.0,Y,M);
705:       }
706:     }
707:     
708:     /* frobenius norm */
709:     maxnrm = 0.0;
710:     for (i=0;i<pep->nmat-1;i++) {
711:       norm = 0.0;
712:       for (j=0;j<k;j++) {
713:         BVGetColumn(R[i],j,&v);
714:         VecNorm(v,NORM_2,&nrm); 
715:         BVRestoreColumn(R[i],j,&v);
716:         norm += nrm*nrm;
717:       }
718:       norm = PetscSqrtReal(norm);
719:       if (maxnrm > norm) {
720:         maxnrm = norm;
721:         idxcpy = i;
722:       }
723:     }
724:     if (idxcpy>0) {
725:       /* copy block idxcpy of S to the first one */
726:       for (j=0;j<k;j++) {
727:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
728:       }
729:     }
730:     if (flg) PetscFree(A);
731:     for (i=0;i<pep->nmat-1;i++) {
732:       BVDestroy(&R[i]);
733:     }
734:     PetscFree(R);
735:     BVDestroy(&Y);
736:     MatDestroy(&M);
737:     break;
738:   case PEP_EXTRACT_STRUCTURED:
739:     for (j=0;j<k;j++) Bt[j+j*k] = 1.0;
740:     for (j=0;j<sr;j++) {
741:       for (i=0;i<k;i++) At[j*k+i] = PetscConj(S[i*lds+j]);
742:     }
743:     PEPEvaluateBasisM(pep,k,T,ldt,0,&Hp,&Hj);
744:     for (i=1;i<deg;i++) {
745:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
746:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&sr_,&k_,&sone,Hj,&k_,S+i*ld,&lds_,&sone,At,&k_));
747:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&k_,&k_,&sone,Hj,&k_,Hj,&k_,&sone,Bt,&k_));
748:     }
749:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&sr_,Bt,&k_,p,At,&k_,&info));
750:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
751:     for (j=0;j<sr;j++) {
752:       for (i=0;i<k;i++) S[i*lds+j] = PetscConj(At[j*k+i]);
753:     } 
754:     break;
755:   default:
756:     SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Extraction not implemented in this solver");
757:   }
758:   PetscFree(p);
759:   return(0);
760: }

764: static PetscErrorCode PEPLookfordeflation(PEP pep,PetscInt *nl)
765: {
767:   PetscInt       i,l,n,ld;
768:   PetscReal      norm;
769:   PetscBool      cplx;
770:   PetscScalar    *H;

773:   *nl = 0;
774:   DSGetDimensions(pep->ds,&n,NULL,&l,NULL,NULL);
775:   DSGetLeadingDimension(pep->ds,&ld);
776:   DSGetArray(pep->ds,DS_MAT_A,&H);
777:   for (i=l;i<n;i++) {
778: #if defined(PETSC_USE_COMPLEX)
779:     cplx = PetscImaginaryPart(pep->eigr[i])?PETSC_TRUE:PETSC_FALSE;
780:     norm = PetscAbsScalar(pep->eigr[i]);
781: #else
782:     cplx = pep->eigi[i]?PETSC_TRUE:PETSC_FALSE;
783:     norm = SlepcAbsEigenvalue(pep->eigr[i],pep->eigi[i]);
784: #endif
785:     if (PetscAbsScalar(H[n+i*ld])/norm < pep->tol){
786:       if (cplx) {
787:         if (PetscAbsScalar(H[n+(i+1)*ld])/norm < pep->tol) (*nl)++;
788:         else break;
789:         i++;
790:       }
791:       (*nl)++;
792:     } else break;
793:   }
794:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
795:   return(0);
796: }

800: PetscErrorCode PEPSolve_TOAR(PEP pep)
801: {
803:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
804:   PetscInt       i,j,k,l,nv=0,ld,lds,off,ldds,newn,nq=ctx->nq,nl,nconv=0,locked=0,newc;
805:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nmat=pep->nmat,deg=nmat-1;
806:   PetscScalar    *S,*Q,*work,*H,sigma;
807:   PetscReal      beta,*rwork;
808:   PetscBool      breakdown=PETSC_FALSE,flg,falselock=PETSC_FALSE,def=PETSC_FALSE,sinv;

811:   if (ctx->lock) {
812:     PetscOptionsGetBool(NULL,"-pep_toar_falselocking",&falselock,NULL);
813:     PetscOptionsGetBool(NULL,"-pep_toar_lockdeflated",&def,NULL);
814:   }
815:   ld = ctx->ld;
816:   S = ctx->S;
817:   lds = deg*ld;        /* leading dimension of S */
818:   lwa = (deg+6)*ld*lds;
819:   lrwa = 7*lds;
820:   PetscMalloc2(lwa,&work,lrwa,&rwork);
821:   DSGetLeadingDimension(pep->ds,&ldds); 
822:   STGetShift(pep->st,&sigma);

824:   /* update polynomial basis coefficients */
825:   STGetTransform(pep->st,&flg);
826:   if (pep->sfactor!=1.0) {
827:     for (i=0;i<nmat;i++) {
828:       pep->pbc[nmat+i] /= pep->sfactor;
829:       pep->pbc[2*nmat+i] /= pep->sfactor*pep->sfactor; 
830:     }
831:     if (!flg) {
832:       pep->target /= pep->sfactor;
833:       RGSetScale(pep->rg,pep->sfactor);
834:       STScaleShift(pep->st,1.0/pep->sfactor);
835:       sigma /= pep->sfactor;
836:     } else {
837:       PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
838:       RGSetScale(pep->rg,sinv?1.0/pep->sfactor:pep->sfactor);
839:       STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);
840:     }
841:   }

843:   if (flg) sigma = 0.0;

845:   /* restart loop */
846:   l = 0;
847:   while (pep->reason == PEP_CONVERGED_ITERATING) {
848:     pep->its++;
849:     
850:     /* compute an nv-step Lanczos factorization */
851:     nv = PetscMax(PetscMin(nconv+pep->mpd,pep->ncv),nv);
852:     DSGetArray(pep->ds,DS_MAT_A,&H);
853:     PEPTOARrun(pep,sigma,&nq,S,ld,H,ldds,pep->nconv+l,&nv,&breakdown,work+nwu,lwa-nwu,pep->work,4);
854:     beta = PetscAbsScalar(H[(nv-1)*ldds+nv]);
855:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
856:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
857:     if (l==0) {
858:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
859:     } else {
860:       DSSetState(pep->ds,DS_STATE_RAW);
861:     }

863:     /* solve projected problem */
864:     DSSolve(pep->ds,pep->eigr,pep->eigi);
865:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
866:     DSUpdateExtraRow(pep->ds);

868:     /* check convergence */
869:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,nv-pep->nconv,beta,&k);
870:     if (pep->its >= pep->max_it) pep->reason = PEP_DIVERGED_ITS;
871:     if (k >= pep->nev) pep->reason = PEP_CONVERGED_TOL;

873:     /* update l */
874:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
875:     else {
876:       l = (nv==k)?0:PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
877:       if (!breakdown) {
878:         /* prepare the Rayleigh quotient for restart */
879:         DSTruncate(pep->ds,k+l);
880:         DSGetDimensions(pep->ds,&newn,NULL,NULL,NULL,NULL);
881:         l = newn-k;
882:       }
883:     }
884:     nconv = k;
885:     /* decide on deflating Krylov vectors */
886:     if (def) {
887:       PEPLookfordeflation(pep,&nl);
888:       nl = PetscMin(nl,k-pep->nconv);
889:       if (ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { 
890:         k = pep->nconv+nl; l = newn-k;
891:       }
892:     } else nl = k-pep->nconv;

894:     if (!ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

896:     /* update S */
897:     off = pep->nconv*ldds;
898:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
899:     PEPTOARSupdate(S,ld,deg,nq,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu,lwa-nwu);
900:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

902:     /* copy last column of S */
903:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

905:     if (breakdown) {
906:       /* stop if breakdown */
907:       PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
908:       pep->reason = PEP_DIVERGED_BREAKDOWN;
909:     }
910:     if (pep->reason != PEP_CONVERGED_ITERATING) {l--; flg = PETSC_TRUE;}
911:     else flg = PETSC_FALSE;
912:     /* truncate S */
913:     if (k+l+deg<nq) {
914:       if (!falselock && ctx->lock) {
915:         newc = flg?k-pep->nconv:nl;
916:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,locked,newc,flg,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
917:         locked += newc;
918:       } else {
919:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,0,0,flg,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
920:       }
921:     }
922:     pep->nconv = k;
923:     PEPMonitor(pep,pep->its,nconv,pep->eigr,pep->eigi,pep->errest,nv);
924:   }
925:   if (pep->nconv>0) {
926:     /* {V*S_nconv^i}_{i=0}^{d-1} has rank nconv instead of nconv+d-1. Force zeros in each S_nconv^i block */
927:     nq = pep->nconv;

929:     /* perform Newton refinement if required */
930:     if (pep->refine==PEP_REFINE_MULTIPLE && pep->rits>0) {
931:       /* extract invariant pair */
932:       DSGetArray(pep->ds,DS_MAT_A,&H);
933:       PEPExtractInvariantPair(pep,sigma,nq,pep->nconv,S,ld,deg,H,ldds,work+nwu,lwa-nwu);
934:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
935:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
936:       DSSetState(pep->ds,DS_STATE_RAW);
937:       PEPNewtonRefinement_TOAR(pep,sigma,&pep->rits,NULL,pep->nconv,S,lds,&nq);
938:       DSSolve(pep->ds,pep->eigr,pep->eigi);
939:       DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
940:       DSGetArray(pep->ds,DS_MAT_Q,&Q);
941:       PEPTOARSupdate(S,ld,deg,nq,0,pep->nconv,pep->nconv,Q,ldds,work+nwu,lwa-nwu);
942:       DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
943:     } else {
944:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
945:       DSSetState(pep->ds,DS_STATE_RAW);
946:     }
947:   }
948:   if (pep->refine!=PEP_REFINE_MULTIPLE || pep->rits==0) {
949:     STGetTransform(pep->st,&flg);
950:     if (!flg) {
951:       if (pep->ops->backtransform) {
952:         (*pep->ops->backtransform)(pep);
953:       }
954:       /* restore original values */
955:       pep->target *= pep->sfactor;
956:       STScaleShift(pep->st,pep->sfactor);
957:     } else {
958:       STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
959:     }
960:     RGSetScale(pep->rg,1.0);
961:     if (pep->sfactor!=1.0) {
962:       for (j=0;j<pep->nconv;j++) {
963:         pep->eigr[j] *= pep->sfactor;
964:         pep->eigi[j] *= pep->sfactor;
965:       }
966:       /* restore original values */
967:       for (i=0;i<pep->nmat;i++){
968:         pep->pbc[pep->nmat+i] *= pep->sfactor;
969:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
970:       }
971:     }
972:   }

974:   /* change the state to raw so that DSVectors() computes eigenvectors from scratch */
975:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
976:   DSSetState(pep->ds,DS_STATE_RAW);

978:   PetscFree2(work,rwork);
979:   return(0);
980: }

984: static PetscErrorCode PEPTOARSetRestart_TOAR(PEP pep,PetscReal keep)
985: {
986:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

989:   if (keep==PETSC_DEFAULT) ctx->keep = 0.5;
990:   else {
991:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
992:     ctx->keep = keep;
993:   }
994:   return(0);
995: }

999: /*@
1000:    PEPTOARSetRestart - Sets the restart parameter for the TOAR
1001:    method, in particular the proportion of basis vectors that must be kept
1002:    after restart.

1004:    Logically Collective on PEP

1006:    Input Parameters:
1007: +  pep  - the eigenproblem solver context
1008: -  keep - the number of vectors to be kept at restart

1010:    Options Database Key:
1011: .  -pep_toar_restart - Sets the restart parameter

1013:    Notes:
1014:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

1016:    Level: advanced

1018: .seealso: PEPTOARGetRestart()
1019: @*/
1020: PetscErrorCode PEPTOARSetRestart(PEP pep,PetscReal keep)
1021: {

1027:   PetscTryMethod(pep,"PEPTOARSetRestart_C",(PEP,PetscReal),(pep,keep));
1028:   return(0);
1029: }

1033: static PetscErrorCode PEPTOARGetRestart_TOAR(PEP pep,PetscReal *keep)
1034: {
1035:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1038:   *keep = ctx->keep;
1039:   return(0);
1040: }

1044: /*@
1045:    PEPTOARGetRestart - Gets the restart parameter used in the TOAR method.

1047:    Not Collective

1049:    Input Parameter:
1050: .  pep - the eigenproblem solver context

1052:    Output Parameter:
1053: .  keep - the restart parameter

1055:    Level: advanced

1057: .seealso: PEPTOARSetRestart()
1058: @*/
1059: PetscErrorCode PEPTOARGetRestart(PEP pep,PetscReal *keep)
1060: {

1066:   PetscTryMethod(pep,"PEPTOARGetRestart_C",(PEP,PetscReal*),(pep,keep));
1067:   return(0);
1068: }

1072: static PetscErrorCode PEPTOARSetLocking_TOAR(PEP pep,PetscBool lock)
1073: {
1074:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1077:   ctx->lock = lock;
1078:   return(0);
1079: }

1083: /*@
1084:    PEPTOARSetLocking - Choose between locking and non-locking variants of
1085:    the TOAR method.

1087:    Logically Collective on PEP

1089:    Input Parameters:
1090: +  pep  - the eigenproblem solver context
1091: -  lock - true if the locking variant must be selected

1093:    Options Database Key:
1094: .  -pep_toar_locking - Sets the locking flag

1096:    Notes:
1097:    The default is to lock converged eigenpairs when the method restarts.
1098:    This behaviour can be changed so that all directions are kept in the
1099:    working subspace even if already converged to working accuracy (the
1100:    non-locking variant).

1102:    Level: advanced

1104: .seealso: PEPTOARGetLocking()
1105: @*/
1106: PetscErrorCode PEPTOARSetLocking(PEP pep,PetscBool lock)
1107: {

1113:   PetscTryMethod(pep,"PEPTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
1114:   return(0);
1115: }

1119: static PetscErrorCode PEPTOARGetLocking_TOAR(PEP pep,PetscBool *lock)
1120: {
1121:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1124:   *lock = ctx->lock;
1125:   return(0);
1126: }

1130: /*@
1131:    PEPTOARGetLocking - Gets the locking flag used in the TOAR method.

1133:    Not Collective

1135:    Input Parameter:
1136: .  pep - the eigenproblem solver context

1138:    Output Parameter:
1139: .  lock - the locking flag

1141:    Level: advanced

1143: .seealso: PEPTOARSetLocking()
1144: @*/
1145: PetscErrorCode PEPTOARGetLocking(PEP pep,PetscBool *lock)
1146: {

1152:   PetscTryMethod(pep,"PEPTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
1153:   return(0);
1154: }

1158: PetscErrorCode PEPSetFromOptions_TOAR(PetscOptions *PetscOptionsObject,PEP pep)
1159: {
1161:   PetscBool      flg,lock;
1162:   PetscReal      keep;

1165:   PetscOptionsHead(PetscOptionsObject,"PEP TOAR Options");
1166:   PetscOptionsReal("-pep_toar_restart","Proportion of vectors kept after restart","PEPTOARSetRestart",0.5,&keep,&flg);
1167:   if (flg) {
1168:     PEPTOARSetRestart(pep,keep);
1169:   }
1170:   PetscOptionsBool("-pep_toar_locking","Choose between locking and non-locking variants","PEPTOARSetLocking",PETSC_FALSE,&lock,&flg);
1171:   if (flg) {
1172:     PEPTOARSetLocking(pep,lock);
1173:   }
1174:   PetscOptionsTail();
1175:   return(0);
1176: }

1180: PetscErrorCode PEPView_TOAR(PEP pep,PetscViewer viewer)
1181: {
1183:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
1184:   PetscBool      isascii;

1187:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1188:   if (isascii) {
1189:     PetscViewerASCIIPrintf(viewer,"  TOAR: %d%% of basis vectors kept after restart\n",(int)(100*ctx->keep));
1190:     PetscViewerASCIIPrintf(viewer,"  TOAR: using the %slocking variant\n",ctx->lock?"":"non-");
1191:   }
1192:   return(0);
1193: }

1197: PetscErrorCode PEPDestroy_TOAR(PEP pep)
1198: {

1202:   PetscFree(pep->data);
1203:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",NULL);
1204:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",NULL);
1205:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",NULL);
1206:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",NULL);
1207:   return(0);
1208: }

1212: PETSC_EXTERN PetscErrorCode PEPCreate_TOAR(PEP pep)
1213: {
1214:   PEP_TOAR       *ctx;

1218:   PetscNewLog(pep,&ctx);
1219:   pep->data = (void*)ctx;
1220:   ctx->lock = PETSC_TRUE;

1222:   pep->ops->solve          = PEPSolve_TOAR;
1223:   pep->ops->setup          = PEPSetUp_TOAR;
1224:   pep->ops->setfromoptions = PEPSetFromOptions_TOAR;
1225:   pep->ops->destroy        = PEPDestroy_TOAR;
1226:   pep->ops->view           = PEPView_TOAR;
1227:   pep->ops->backtransform  = PEPBackTransform_Default;
1228:   pep->ops->computevectors = PEPComputeVectors_Default;
1229:   pep->ops->extractvectors = PEPExtractVectors_TOAR;
1230:   pep->ops->reset          = PEPReset_TOAR;
1231:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",PEPTOARSetRestart_TOAR);
1232:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",PEPTOARGetRestart_TOAR);
1233:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",PEPTOARSetLocking_TOAR);
1234:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",PEPTOARGetLocking_TOAR);
1235:   return(0);
1236: }