Actual source code: ks-slice.c

slepc-3.6.1 2015-09-03
Report Typos and Errors
  1: /*

  3:    SLEPc eigensolver: "krylovschur"

  5:    Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems

  7:    References:

  9:        [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
 10:            solving sparse symmetric generalized eigenproblems", SIAM J.
 11:            Matrix Anal. Appl. 15(1):228-272, 1994.

 13:        [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
 14:            on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
 15:            2012.

 17:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 18:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 19:    Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain

 21:    This file is part of SLEPc.

 23:    SLEPc is free software: you can redistribute it and/or modify it under  the
 24:    terms of version 3 of the GNU Lesser General Public License as published by
 25:    the Free Software Foundation.

 27:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 28:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 29:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 30:    more details.

 32:    You  should have received a copy of the GNU Lesser General  Public  License
 33:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 34:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 35: */

 37: #include <slepc/private/epsimpl.h>
 38:  #include krylovschur.h

 40: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON

 44: static PetscErrorCode EPSSliceResetSR(EPS eps) {
 45:   PetscErrorCode  ierr;
 46:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
 47:   EPS_SR          sr=ctx->sr;
 48:   EPS_shift       s;

 51:   if (sr) {
 52:     if (ctx->npart>1) {
 53:       BVDestroy(&sr->V);
 54:       PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
 55:     }
 56:     /* Reviewing list of shifts to free memory */
 57:     s = sr->s0;
 58:     if (s) {
 59:       while (s->neighb[1]) {
 60:         s = s->neighb[1];
 61:         PetscFree(s->neighb[0]);
 62:       }
 63:       PetscFree(s);
 64:     }
 65:     PetscFree(sr);
 66:   }
 67:   ctx->sr = PETSC_NULL;
 68:   return(0);
 69: }

 73: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
 74: {
 75:   PetscErrorCode  ierr;
 76:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

 79:   if (!ctx->global) return(0);
 80:   /* Destroy auxiliary EPS */
 81:   EPSSliceResetSR(ctx->eps);
 82:   EPSDestroy(&ctx->eps);
 83:   if (ctx->npart>1) {
 84:     PetscSubcommDestroy(&ctx->subc);
 85:     if (ctx->commset) {
 86:       MPI_Comm_free(&ctx->commrank);
 87:       ctx->commset = PETSC_FALSE;
 88:     }
 89:   }
 90:   PetscFree(ctx->subintervals);
 91:   PetscFree(ctx->nconv_loc);
 92:   EPSSliceResetSR(eps);
 93:   PetscFree(ctx->inertias);
 94:   PetscFree(ctx->shifts);
 95:   return(0);
 96: }

100: /*
101:   EPSSliceAllocateSolution - Allocate memory storage for common variables such
102:   as eigenvalues and eigenvectors. The argument extra is used for methods
103:   that require a working basis slightly larger than ncv.
104: */
105: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
106: {
107:   PetscErrorCode     ierr;
108:   EPS_KRYLOVSCHUR    *ctx=(EPS_KRYLOVSCHUR*)eps->data;
109:   PetscReal          eta;
110:   PetscInt           k;
111:   PetscLogDouble     cnt;
112:   BVType             type;
113:   BVOrthogType       orthog_type;
114:   BVOrthogRefineType orthog_ref;
115:   BVOrthogBlockType  ob_type;
116:   Mat                matrix;
117:   Vec                t;
118:   EPS_SR             sr = ctx->sr;

121:   /* allocate space for eigenvalues and friends */
122:   k = PetscMax(1,sr->numEigs);
123:   PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
124:   PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
125:   cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
126:   PetscLogObjectMemory((PetscObject)eps,cnt);

128:   /* allocate sr->V and transfer options from eps->V */
129:   BVDestroy(&sr->V);
130:   BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
131:   PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
132:   if (!eps->V) { EPSGetBV(eps,&eps->V); }
133:   if (!((PetscObject)(eps->V))->type_name) {
134:     BVSetType(sr->V,BVSVEC);
135:   } else {
136:     BVGetType(eps->V,&type);
137:     BVSetType(sr->V,type);
138:   }
139:   STMatCreateVecs(eps->st,&t,NULL);
140:   BVSetSizesFromVec(sr->V,t,k);
141:   VecDestroy(&t);
142:   EPS_SetInnerProduct(eps);
143:   BVGetMatrix(eps->V,&matrix,NULL);
144:   BVSetMatrix(sr->V,matrix,PETSC_FALSE);
145:   BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
146:   BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
147:   return(0);
148: }

152: static PetscErrorCode EPSSliceGetEPS(EPS eps)
153: {
154:   PetscErrorCode     ierr;
155:   EPS_KRYLOVSCHUR    *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
156:   BV                 V;
157:   BVType             type;
158:   PetscReal          eta;
159:   BVOrthogType       orthog_type;
160:   BVOrthogRefineType orthog_ref;
161:   BVOrthogBlockType  ob_type;
162:   Mat                A,B=NULL,Ar,Br=NULL;
163:   PetscInt           i;
164:   PetscReal          h,a,b;
165:   PetscMPIInt        rank;
166:   EPS_SR             sr=ctx->sr;
167:   PC                 pc;
168:   PCType             pctype;
169:   KSP                ksp;
170:   KSPType            ksptype;
171:   STType             sttype;
172:   const MatSolverPackage stype;

175:   EPSGetOperators(eps,&A,&B);
176:   if (ctx->npart==1) {
177:     if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
178:     EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
179:     EPSSetST(ctx->eps,eps->st);
180:     a = eps->inta; b = eps->intb;
181:   } else {
182:     if (!ctx->subc) {
183:       /* Create context for subcommunicators */
184:       PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
185:       PetscSubcommSetNumber(ctx->subc,ctx->npart);
186:       PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
187:       PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));

189:       /* Duplicate matrices */
190:       MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
191:       if (B) { MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br); }
192:     }

194:     /* Determine subintervals */
195:     if (!ctx->subintset) { /* uniform distribution if no set by user */
196:       if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
197:       h = (eps->intb-eps->inta)/ctx->npart;
198:       a = eps->inta+ctx->subc->color*h;
199:       b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
200:       PetscFree(ctx->subintervals);
201:       PetscMalloc1(ctx->npart+1,&ctx->subintervals);
202:       for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
203:       ctx->subintervals[ctx->npart] = eps->intb;
204:     } else {
205:       a = ctx->subintervals[ctx->subc->color];
206:       b = ctx->subintervals[ctx->subc->color+1];
207:     }

209:     if (!ctx->eps) {
210:       /* Create auxiliary EPS */
211:       EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
212:       EPSSetOperators(ctx->eps,Ar,Br);
213:       MatDestroy(&Ar);
214:       MatDestroy(&Br);
215:     }
216:     EPSSetType(ctx->eps,((PetscObject)eps)->type_name);

218:     /* Transfer options for ST, KSP and PC */
219:     STGetType(eps->st,&sttype);
220:     STSetType(ctx->eps->st,sttype);
221:     STGetKSP(eps->st,&ksp);
222:     KSPGetType(ksp,&ksptype);
223:     KSPGetPC(ksp,&pc);
224:     PCGetType(pc,&pctype);
225:     PCFactorGetMatSolverPackage(pc,&stype);
226:     STGetKSP(ctx->eps->st,&ksp);
227:     KSPSetType(ksp,ksptype);
228:     KSPGetPC(ksp,&pc);
229:     PCSetType(pc,pctype);
230:     PCFactorSetMatSolverPackage(pc,stype);

232:     /* Create subcommunicator grouping processes with same rank */
233:     if (ctx->commrank) { MPI_Comm_free(&ctx->commrank); }
234:     MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
235:     MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
236:     ctx->commset = PETSC_TRUE;
237:   }
238:   EPSSetConvergenceTest(ctx->eps,eps->conv);
239:   EPSSetInterval(ctx->eps,a,b);
240:   ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
241:   ctx_local->npart = ctx->npart;
242:   ctx_local->detect = ctx->detect;
243:   ctx_local->global = PETSC_FALSE;
244:   ctx_local->eps = eps;
245:   ctx_local->subc = ctx->subc;
246:   ctx_local->commrank = ctx->commrank;

248:   EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
249:   EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);

251:   /* transfer options from eps->V */
252:   EPSGetBV(ctx->eps,&V);
253:   if (!eps->V) { EPSGetBV(eps,&eps->V); }
254:   if (!((PetscObject)(eps->V))->type_name) {
255:     BVSetType(V,BVSVEC);
256:   } else {
257:     BVGetType(eps->V,&type);
258:     BVSetType(V,type);
259:   }
260:   BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
261:   BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
262:   ctx->eps->which = eps->which;
263:   ctx->eps->max_it = eps->max_it;
264:   ctx->eps->tol = eps->tol;
265:   ctx->eps->purify = eps->purify;
266:   if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
267:   EPSSetProblemType(ctx->eps,eps->problem_type);
268:   EPSSetUp(ctx->eps);
269:   ctx->eps->nconv = 0;
270:   ctx->eps->its   = 0;
271:   for (i=0;i<ctx->eps->ncv;i++) {
272:     ctx->eps->eigr[i]   = 0.0;
273:     ctx->eps->eigi[i]   = 0.0;
274:     ctx->eps->errest[i] = 0.0;
275:   }
276:   return(0);
277: }

281: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
282: {
284:   KSP            ksp;
285:   PC             pc;
286:   Mat            F;

289:   if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
290:     if (inertia) *inertia = eps->n;
291:   } else if (shift <= PETSC_MIN_REAL) {
292:     if (inertia) *inertia = 0;
293:     if (zeros) *zeros = 0;
294:   } else {
295:     STSetShift(eps->st,shift);
296:     STSetUp(eps->st);
297:     STGetKSP(eps->st,&ksp);
298:     KSPGetPC(ksp,&pc);
299:     PCFactorGetMatrix(pc,&F);
300:     MatGetInertia(F,inertia,zeros,NULL);
301:   }
302:   return(0);
303: }

307: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
308: {
309:   PetscErrorCode  ierr;
310:   PetscBool       issinv;
311:   EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
312:   EPS_SR          sr,sr_loc,sr_glob;
313:   PetscInt        nEigs,dssz=1,i,zeros=0,off=0;
314:   PetscMPIInt     nproc,rank,aux;
315:   MPI_Request     req;

318:   if (ctx->global) {
319:     if (eps->inta==0.0 && eps->intb==0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Must define a computational interval when using EPS_ALL");
320:     if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
321:     if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
322:     if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
323:     if (!((PetscObject)(eps->st))->type_name) { /* default to shift-and-invert */
324:       STSetType(eps->st,STSINVERT);
325:     }
326:     PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
327:     if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
328:     if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2;  /* use tighter tolerance */
329:     if (!eps->max_it) eps->max_it = 100;
330:     if (ctx->nev==1) ctx->nev = 40;  /* nev not set, use default value */
331:     if (ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
332:   }
333:   eps->ops->backtransform = NULL;

335:   /* create spectrum slicing context and initialize it */
336:   EPSSliceResetSR(eps);
337:   PetscNewLog(eps,&sr);
338:   ctx->sr = sr;
339:   sr->itsKs = 0;
340:   sr->nleap = 0;
341:   sr->nMAXCompl = eps->nev/4;
342:   sr->iterCompl = eps->max_it/4;
343:   sr->sPres = NULL;
344:   sr->nS = 0;

346:   if (ctx->npart==1 || ctx->global) {
347:     /* check presence of ends and finding direction */
348:     if ((eps->inta > PETSC_MIN_REAL && eps->inta != 0.0) || eps->intb >= PETSC_MAX_REAL) {
349:       sr->int0 = eps->inta;
350:       sr->int1 = eps->intb;
351:       sr->dir = 1;
352:       if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
353:         sr->hasEnd = PETSC_FALSE;
354:       } else sr->hasEnd = PETSC_TRUE;
355:     } else {
356:       sr->int0 = eps->intb;
357:       sr->int1 = eps->inta;
358:       sr->dir = -1;
359:       sr->hasEnd = (eps->inta <= PETSC_MIN_REAL)?PETSC_FALSE:PETSC_TRUE;
360:     }
361:   }
362:   if (ctx->global) {
363:     if (ctx->npart>1) {
364:       /* prevent computation of factorization in global eps unless npart==1 */
365:       STSetTransform(eps->st,PETSC_FALSE);
366:     }
367:     EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
368:     /* create subintervals and initialize auxiliary eps for slicing runs */
369:     EPSSliceGetEPS(eps);
370:     sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
371:     if (ctx->npart>1) {
372:       if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
373:       MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
374:       if (rank==0) {
375:         MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
376:       }
377:       MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
378:       PetscFree(ctx->nconv_loc);
379:       PetscMalloc1(ctx->npart,&ctx->nconv_loc);
380:       MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
381:       if (sr->dir<0) off = 1;
382:       if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
383:         PetscMPIIntCast(sr_loc->numEigs,&aux);
384:         MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
385:         MPI_Allgather(&sr_loc->int0,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
386:       } else {
387:         MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
388:         if (!rank) {
389:           PetscMPIIntCast(sr_loc->numEigs,&aux);
390:           MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
391:           MPI_Allgather(&sr_loc->int0,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
392:         }
393:         PetscMPIIntCast(ctx->npart,&aux);
394:         MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
395:         MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
396:       }
397:       nEigs = 0;
398:       for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
399:     } else {
400:       nEigs = sr_loc->numEigs;
401:       sr->inertia0 = sr_loc->inertia0;
402:     }
403:     sr->inertia1 = sr->inertia0+sr->dir*nEigs;
404:     sr->numEigs = nEigs;
405:     eps->nev = nEigs;
406:     eps->ncv = nEigs;
407:     eps->mpd = nEigs;
408:   } else {
409:     ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
410:     sr_glob = ctx_glob->sr;
411:     if (ctx->npart>1) {
412:       sr->dir = sr_glob->dir;
413:       sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
414:       sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
415:       if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
416:       else sr->hasEnd = PETSC_TRUE;
417:     }

419:     /* last process in eps comm computes inertia1 */
420:     if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
421:       EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
422:       if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
423:     }

425:     /* compute inertia0 */
426:     EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
427:     if (zeros) { /* error in factorization */
428:       if (ctx->npart==1 || ctx_glob->subintset || ((sr->dir>0 && ctx->subc->color==0) || (sr->dir<0 && ctx->subc->color==ctx->npart-1))) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
429:       else { /* perturb shift */
430:         sr->int0 *= (1.0+SLICE_PTOL);
431:         EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
432:         if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
433:       }
434:     }
435:     if (ctx->npart>1) {
436:       /* inertia1 is received from neighbour */
437:       MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
438:       if (!rank) {
439:         if ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) { /* send inertia0 to neighbour0 */
440:           MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
441:           MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
442:         }
443:         if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
444:           MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
445:           MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
446:         }
447:       }
448:       if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
449:         MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
450:         MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
451:       } else sr_glob->inertia1 = sr->inertia1;
452:     }

454:     /* number of eigenvalues in interval */
455:     sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
456:     if (ctx->npart>1) {
457:       /* memory allocate for subinterval eigenpairs */
458:       EPSSliceAllocateSolution(eps,1);
459:     }
460:     dssz = eps->ncv+1;
461:   }
462:   DSSetType(eps->ds,DSHEP);
463:   DSSetCompact(eps->ds,PETSC_TRUE);
464:   DSAllocate(eps->ds,dssz);
465:   return(0);
466: }

470: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
471: {
472:   PetscErrorCode  ierr;
473:   Vec             v,vg,v_loc;
474:   IS              is1,is2;
475:   VecScatter      vec_sc;
476:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
477:   PetscInt        nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
478:   PetscScalar     *array;
479:   EPS_SR          sr_loc;
480:   BV              V_loc;

483:   sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
484:   V_loc = sr_loc->V;

486:   /* Gather parallel eigenvectors */
487:   BVGetColumn(eps->V,0,&v);
488:   VecGetOwnershipRange(v,&n0,&m0);
489:   BVRestoreColumn(eps->V,0,&v);
490:   BVGetColumn(ctx->eps->V,0,&v);
491:   VecGetLocalSize(v,&nloc);
492:   BVRestoreColumn(ctx->eps->V,0,&v);
493:   PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
494:   VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
495:   idx = -1;
496:   for (si=0;si<ctx->npart;si++) {
497:     j = 0;
498:     for (i=n0;i<m0;i++) {
499:       idx1[j]   = i;
500:       idx2[j++] = i+eps->n*si;
501:     }
502:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
503:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
504:     BVGetColumn(eps->V,0,&v);
505:     VecScatterCreate(v,is1,vg,is2,&vec_sc);
506:     BVRestoreColumn(eps->V,0,&v);
507:     ISDestroy(&is1);
508:     ISDestroy(&is2);
509:     for (i=0;i<ctx->nconv_loc[si];i++) {
510:       BVGetColumn(eps->V,++idx,&v);
511:       if (ctx->subc->color==si) {
512:         BVGetColumn(V_loc,i,&v_loc);
513:         VecGetArray(v_loc,&array);
514:         VecPlaceArray(vg,array);
515:       }
516:       VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
517:       VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
518:       if (ctx->subc->color==si) {
519:         VecResetArray(vg);
520:         VecRestoreArray(v_loc,&array);
521:         BVRestoreColumn(V_loc,i,&v_loc);
522:       }
523:       BVRestoreColumn(eps->V,idx,&v);
524:     }
525:     VecScatterDestroy(&vec_sc);
526:   }
527:   PetscFree2(idx1,idx2);
528:   VecDestroy(&vg);
529:   return(0);
530: }

534: /*
535:   EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
536:  */
537: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
538: {
539:   PetscErrorCode  ierr;
540:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

543:   if (ctx->global && ctx->npart>1) {
544:     EPSComputeVectors(ctx->eps);
545:     EPSSliceGatherEigenVectors(eps);
546:   }
547:   return(0);
548: }

550: #define SWAP(a,b,t) {t=a;a=b;b=t;}

554: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
555: {
556:   PetscErrorCode  ierr;
557:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
558:   PetscInt        i=0,j,tmpi;
559:   PetscReal       v,tmpr;
560:   EPS_shift       s;

563:   if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
564:   if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
565:   if (!ctx->sr->s0) {  /* EPSSolve not called yet */
566:     *n = 2;
567:   } else {
568:     *n = 1;
569:     s = ctx->sr->s0;
570:     while (s) {
571:       (*n)++;
572:       s = s->neighb[1];
573:     }
574:   }
575:   PetscMalloc1(*n,shifts);
576:   PetscMalloc1(*n,inertias);
577:   if (!ctx->sr->s0) {  /* EPSSolve not called yet */
578:     (*shifts)[0]   = ctx->sr->int0;
579:     (*shifts)[1]   = ctx->sr->int1;
580:     (*inertias)[0] = ctx->sr->inertia0;
581:     (*inertias)[1] = ctx->sr->inertia1;
582:   } else {
583:     s = ctx->sr->s0;
584:     while (s) {
585:       (*shifts)[i]     = s->value;
586:       (*inertias)[i++] = s->inertia;
587:       s = s->neighb[1];
588:     }
589:     (*shifts)[i]   = ctx->sr->int1;
590:     (*inertias)[i] = ctx->sr->inertia1;
591:   }
592:   /* remove possible duplicate in last position */
593:   if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
594:   /* sort result */
595:   for (i=0;i<*n;i++) {
596:     v = (*shifts)[i];
597:     for (j=i+1;j<*n;j++) {
598:       if (v > (*shifts)[j]) {
599:         SWAP((*shifts)[i],(*shifts)[j],tmpr);
600:         SWAP((*inertias)[i],(*inertias)[j],tmpi);
601:         v = (*shifts)[i];
602:       }
603:     }
604:   }
605:   return(0);
606: }

610: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
611: {
612:   PetscErrorCode  ierr;
613:   PetscMPIInt     rank,nproc;
614:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
615:   PetscInt        i,idx,j;
616:   PetscInt        *perm_loc,off=0,*inertias_loc,ns;
617:   PetscScalar     *eigr_loc;
618:   EPS_SR          sr_loc;
619:   PetscReal       *shifts_loc;
620:   PetscMPIInt     *disp,*ns_loc,aux;

623:   eps->nconv = 0;
624:   for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
625:   sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;

627:   /* Gather the shifts used and the inertias computed */
628:   EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
629:   if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
630:   if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
631:     ns--;
632:     for (i=0;i<ns;i++) {
633:       inertias_loc[i] = inertias_loc[i+1];
634:       shifts_loc[i] = shifts_loc[i+1];
635:     }
636:   }
637:   PetscMalloc1(ctx->npart,&ns_loc);
638:   MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
639:   PetscMPIIntCast(ns,&aux);
640:   if (rank==0) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
641:   PetscMPIIntCast(ctx->npart,&aux);
642:   MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
643:   ctx->nshifts = 0;
644:   for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
645:   PetscFree(ctx->inertias);
646:   PetscFree(ctx->shifts);
647:   PetscMalloc1(ctx->nshifts,&ctx->inertias);
648:   PetscMalloc1(ctx->nshifts,&ctx->shifts);

650:   /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
651:   eigr_loc = sr_loc->eigr;
652:   perm_loc = sr_loc->perm;
653:   MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
654:   PetscMalloc1(ctx->npart,&disp);
655:   disp[0] = 0;
656:   for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
657:   if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
658:     PetscMPIIntCast(sr_loc->numEigs,&aux);
659:     MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
660:     MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
661:     for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
662:     PetscMPIIntCast(ns,&aux);
663:     MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
664:     MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
665:     MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
666:   } else { /* subcommunicators with different size */
667:     MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
668:     if (rank==0) {
669:       PetscMPIIntCast(sr_loc->numEigs,&aux);
670:       MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
671:       MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
672:       for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
673:       PetscMPIIntCast(ns,&aux);
674:       MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
675:       MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
676:       MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
677:     }
678:     PetscMPIIntCast(eps->nconv,&aux);
679:     MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
680:     MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
681:     MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
682:     PetscMPIIntCast(ctx->nshifts,&aux);
683:     MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
684:     MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
685:   }
686:   /* Update global array eps->perm */
687:   idx = ctx->nconv_loc[0];
688:   for (i=1;i<ctx->npart;i++) {
689:     off += ctx->nconv_loc[i-1];
690:     for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
691:   }

693:   /* Gather parallel eigenvectors */
694:   PetscFree(ns_loc);
695:   PetscFree(disp);
696:   PetscFree(shifts_loc);
697:   PetscFree(inertias_loc);
698:   return(0);
699: }

701: /*
702:    Fills the fields of a shift structure
703: */
706: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
707: {
708:   PetscErrorCode  ierr;
709:   EPS_shift       s,*pending2;
710:   PetscInt        i;
711:   EPS_SR          sr;
712:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

715:   sr = ctx->sr;
716:   PetscNewLog(eps,&s);
717:   s->value = val;
718:   s->neighb[0] = neighb0;
719:   if (neighb0) neighb0->neighb[1] = s;
720:   s->neighb[1] = neighb1;
721:   if (neighb1) neighb1->neighb[0] = s;
722:   s->comp[0] = PETSC_FALSE;
723:   s->comp[1] = PETSC_FALSE;
724:   s->index = -1;
725:   s->neigs = 0;
726:   s->nconv[0] = s->nconv[1] = 0;
727:   s->nsch[0] = s->nsch[1]=0;
728:   /* Inserts in the stack of pending shifts */
729:   /* If needed, the array is resized */
730:   if (sr->nPend >= sr->maxPend) {
731:     sr->maxPend *= 2;
732:     PetscMalloc1(sr->maxPend,&pending2);
733:     PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
734:     for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
735:     PetscFree(sr->pending);
736:     sr->pending = pending2;
737:   }
738:   sr->pending[sr->nPend++]=s;
739:   return(0);
740: }

742: /* Prepare for Rational Krylov update */
745: static PetscErrorCode EPSPrepareRational(EPS eps)
746: {
747:   EPS_KRYLOVSCHUR  *ctx=(EPS_KRYLOVSCHUR*)eps->data;
748:   PetscErrorCode   ierr;
749:   PetscInt         dir,i,k,ld,nv;
750:   PetscScalar      *A;
751:   EPS_SR           sr = ctx->sr;
752:   Vec              v;

755:   DSGetLeadingDimension(eps->ds,&ld);
756:   dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
757:   dir*=sr->dir;
758:   k = 0;
759:   for (i=0;i<sr->nS;i++) {
760:     if (dir*PetscRealPart(sr->S[i])>0.0) {
761:       sr->S[k] = sr->S[i];
762:       sr->S[sr->nS+k] = sr->S[sr->nS+i];
763:       BVGetColumn(sr->Vnext,k,&v);
764:       BVCopyVec(eps->V,eps->nconv+i,v);
765:       BVRestoreColumn(sr->Vnext,k,&v);
766:       k++;
767:       if (k>=sr->nS/2)break;
768:     }
769:   }
770:   /* Copy to DS */
771:   DSGetArray(eps->ds,DS_MAT_A,&A);
772:   PetscMemzero(A,ld*ld*sizeof(PetscScalar));
773:   for (i=0;i<k;i++) {
774:     A[i*(1+ld)] = sr->S[i];
775:     A[k+i*ld] = sr->S[sr->nS+i];
776:   }
777:   sr->nS = k;
778:   DSRestoreArray(eps->ds,DS_MAT_A,&A);
779:   DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
780:   DSSetDimensions(eps->ds,nv,0,0,k);
781:   /* Append u to V */
782:   BVGetColumn(sr->Vnext,sr->nS,&v);
783:   BVCopyVec(eps->V,sr->nv,v);
784:   BVRestoreColumn(sr->Vnext,sr->nS,&v);
785:   return(0);
786: }

788: /* Provides next shift to be computed */
791: static PetscErrorCode EPSExtractShift(EPS eps)
792: {
793:   PetscErrorCode   ierr;
794:   PetscInt         iner,zeros=0;
795:   EPS_KRYLOVSCHUR  *ctx=(EPS_KRYLOVSCHUR*)eps->data;
796:   EPS_SR           sr;
797:   PetscReal        newShift;
798:   EPS_shift        sPres;

801:   sr = ctx->sr;
802:   if (sr->nPend > 0) {
803:     sr->sPrev = sr->sPres;
804:     sr->sPres = sr->pending[--sr->nPend];
805:     sPres = sr->sPres;
806:     EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
807:     if (zeros) {
808:       newShift = sPres->value*(1.0+SLICE_PTOL);
809:       if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
810:       else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
811:       EPSSliceGetInertia(eps,newShift,&iner,&zeros);
812:       if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
813:       sPres->value = newShift;
814:     }
815:     sr->sPres->inertia = iner;
816:     eps->target = sr->sPres->value;
817:     eps->reason = EPS_CONVERGED_ITERATING;
818:     eps->its = 0;
819:   } else sr->sPres = NULL;
820:   return(0);
821: }

823: /*
824:    Symmetric KrylovSchur adapted to spectrum slicing:
825:    Allows searching an specific amount of eigenvalues in the subintervals left and right.
826:    Returns whether the search has succeeded
827: */
830: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
831: {
832:   PetscErrorCode  ierr;
833:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
834:   PetscInt        i,conv,k,l,ld,nv,*iwork,j,p;
835:   Mat             U;
836:   PetscScalar     *Q,*A,rtmp;
837:   PetscReal       *a,*b,beta;
838:   PetscBool       breakdown;
839:   PetscInt        count0,count1;
840:   PetscReal       lambda;
841:   EPS_shift       sPres;
842:   PetscBool       complIterating;
843:   PetscBool       sch0,sch1;
844:   PetscInt        iterCompl=0,n0,n1;
845:   EPS_SR          sr = ctx->sr;

848:   /* Spectrum slicing data */
849:   sPres = sr->sPres;
850:   complIterating =PETSC_FALSE;
851:   sch1 = sch0 = PETSC_TRUE;
852:   DSGetLeadingDimension(eps->ds,&ld);
853:   PetscMalloc1(2*ld,&iwork);
854:   count0=0;count1=0; /* Found on both sides */
855:   if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
856:     /* Rational Krylov */
857:     DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
858:     DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
859:     DSSetDimensions(eps->ds,l+1,0,0,0);
860:     BVSetActiveColumns(eps->V,0,l+1);
861:     DSGetMat(eps->ds,DS_MAT_Q,&U);
862:     BVMultInPlace(eps->V,U,0,l+1);
863:     MatDestroy(&U);
864:   } else {
865:     /* Get the starting Lanczos vector */
866:     EPSGetStartVector(eps,0,NULL);
867:     l = 0;
868:   }
869:   /* Restart loop */
870:   while (eps->reason == EPS_CONVERGED_ITERATING) {
871:     eps->its++; sr->itsKs++;
872:     /* Compute an nv-step Lanczos factorization */
873:     nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
874:     DSGetArrayReal(eps->ds,DS_MAT_T,&a);
875:     b = a + ld;
876:     EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
877:     sr->nv = nv;
878:     beta = b[nv-1];
879:     DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
880:     DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
881:     if (l==0) {
882:       DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
883:     } else {
884:       DSSetState(eps->ds,DS_STATE_RAW);
885:     }
886:     BVSetActiveColumns(eps->V,eps->nconv,nv);

888:     /* Solve projected problem and compute residual norm estimates */
889:     if (eps->its == 1 && l > 0) {/* After rational update */
890:       DSGetArray(eps->ds,DS_MAT_A,&A);
891:       DSGetArrayReal(eps->ds,DS_MAT_T,&a);
892:       b = a + ld;
893:       k = eps->nconv+l;
894:       A[k*ld+k-1] = A[(k-1)*ld+k];
895:       A[k*ld+k] = a[k];
896:       for (j=k+1; j< nv; j++) {
897:         A[j*ld+j] = a[j];
898:         A[j*ld+j-1] = b[j-1] ;
899:         A[(j-1)*ld+j] = b[j-1];
900:       }
901:       DSRestoreArray(eps->ds,DS_MAT_A,&A);
902:       DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
903:       DSSolve(eps->ds,eps->eigr,NULL);
904:       DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
905:       DSSetCompact(eps->ds,PETSC_TRUE);
906:     } else { /* Restart */
907:       DSSolve(eps->ds,eps->eigr,NULL);
908:       DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
909:     }
910:     /* Residual */
911:     EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);

913:     if (ctx->lock) {
914:       /* Check convergence */
915:       DSGetArrayReal(eps->ds,DS_MAT_T,&a);
916:       b = a + ld;
917:       conv = 0;
918:       j = k = eps->nconv;
919:       for (i=eps->nconv;i<nv;i++) if (eps->errest[i] < eps->tol) conv++;
920:       for (i=eps->nconv;i<nv;i++) {
921:         if (eps->errest[i] < eps->tol) {
922:           iwork[j++]=i;
923:         } else iwork[conv+k++]=i;
924:       }
925:       for (i=eps->nconv;i<nv;i++) {
926:         a[i]=PetscRealPart(eps->eigr[i]);
927:         b[i]=eps->errest[i];
928:       }
929:       for (i=eps->nconv;i<nv;i++) {
930:         eps->eigr[i] = a[iwork[i]];
931:         eps->errest[i] = b[iwork[i]];
932:       }
933:       for (i=eps->nconv;i<nv;i++) {
934:         a[i]=PetscRealPart(eps->eigr[i]);
935:         b[i]=eps->errest[i];
936:       }
937:       DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
938:       DSGetArray(eps->ds,DS_MAT_Q,&Q);
939:       for (i=eps->nconv;i<nv;i++) {
940:         p=iwork[i];
941:         if (p!=i) {
942:           j=i+1;
943:           while (iwork[j]!=i) j++;
944:           iwork[j]=p;iwork[i]=i;
945:           for (k=0;k<nv;k++) {
946:             rtmp=Q[k+p*ld];Q[k+p*ld]=Q[k+i*ld];Q[k+i*ld]=rtmp;
947:           }
948:         }
949:       }
950:       DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
951:       k=eps->nconv+conv;
952:     }

954:     /* Checking values obtained for completing */
955:     for (i=0;i<k;i++) {
956:       sr->back[i]=eps->eigr[i];
957:     }
958:     STBackTransform(eps->st,k,sr->back,eps->eigi);
959:     count0=count1=0;
960:     for (i=0;i<k;i++) {
961:       lambda = PetscRealPart(sr->back[i]);
962:       if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
963:       if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
964:     }
965:     if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
966:     else {
967:       /* Checks completion */
968:       if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
969:         eps->reason = EPS_CONVERGED_TOL;
970:       } else {
971:         if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
972:         if (complIterating) {
973:           if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
974:         } else if (k >= eps->nev) {
975:           n0 = sPres->nsch[0]-count0;
976:           n1 = sPres->nsch[1]-count1;
977:           if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
978:             /* Iterating for completion*/
979:             complIterating = PETSC_TRUE;
980:             if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
981:             if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
982:             iterCompl = sr->iterCompl;
983:           } else eps->reason = EPS_CONVERGED_TOL;
984:         }
985:       }
986:     }
987:     /* Update l */
988:     if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
989:     else l = 0;
990:     if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
991:     if (breakdown) l=0;

993:     if (eps->reason == EPS_CONVERGED_ITERATING) {
994:       if (breakdown) {
995:         /* Start a new Lanczos factorization */
996:         PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
997:         EPSGetStartVector(eps,k,&breakdown);
998:         if (breakdown) {
999:           eps->reason = EPS_DIVERGED_BREAKDOWN;
1000:           PetscInfo(eps,"Unable to generate more start vectors\n");
1001:         }
1002:       } else {
1003:         /* Prepare the Rayleigh quotient for restart */
1004:         DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1005:         DSGetArray(eps->ds,DS_MAT_Q,&Q);
1006:         b = a + ld;
1007:         for (i=k;i<k+l;i++) {
1008:           a[i] = PetscRealPart(eps->eigr[i]);
1009:           b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1010:         }
1011:         DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1012:         DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1013:       }
1014:     }
1015:     /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1016:     DSGetMat(eps->ds,DS_MAT_Q,&U);
1017:     BVMultInPlace(eps->V,U,eps->nconv,k+l);
1018:     MatDestroy(&U);

1020:     /* Normalize u and append it to V */
1021:     if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1022:       BVCopyColumn(eps->V,nv,k+l);
1023:     }
1024:     eps->nconv = k;
1025:     if (eps->reason != EPS_CONVERGED_ITERATING) {
1026:       /* Store approximated values for next shift */
1027:       DSGetArray(eps->ds,DS_MAT_Q,&Q);
1028:       sr->nS = l;
1029:       for (i=0;i<l;i++) {
1030:         sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1031:         sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1032:       }
1033:       DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1034:     }
1035:   }
1036:   /* Check for completion */
1037:   for (i=0;i< eps->nconv; i++) {
1038:     if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1039:     else sPres->nconv[0]++;
1040:   }
1041:   sPres->comp[0] = (count0 >= sPres->nsch[0])?PETSC_TRUE:PETSC_FALSE;
1042:   sPres->comp[1] = (count1 >= sPres->nsch[1])?PETSC_TRUE:PETSC_FALSE;
1043:   if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1])SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1044:   PetscFree(iwork);
1045:   return(0);
1046: }

1048: /*
1049:   Obtains value of subsequent shift
1050: */
1053: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1054: {
1055:   PetscReal       lambda,d_prev;
1056:   PetscInt        i,idxP;
1057:   EPS_SR          sr;
1058:   EPS_shift       sPres,s;
1059:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

1062:   sr = ctx->sr;
1063:   sPres = sr->sPres;
1064:   if (sPres->neighb[side]) {
1065:   /* Completing a previous interval */
1066:     if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1067:       if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1068:       else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1069:     } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1070:   } else { /* (Only for side=1). Creating a new interval. */
1071:     if (sPres->neigs==0) {/* No value has been accepted*/
1072:       if (sPres->neighb[0]) {
1073:         /* Multiplying by 10 the previous distance */
1074:         *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1075:         sr->nleap++;
1076:         /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1077:         if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1078:       } else { /* First shift */
1079:         if (eps->nconv != 0) {
1080:           /* Unaccepted values give information for next shift */
1081:           idxP=0;/* Number of values left from shift */
1082:           for (i=0;i<eps->nconv;i++) {
1083:             lambda = PetscRealPart(sr->eigr[i]);
1084:             if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1085:             else break;
1086:           }
1087:           /* Avoiding subtraction of eigenvalues (might be the same).*/
1088:           if (idxP>0) {
1089:             d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[0]))/(idxP+0.3);
1090:           } else {
1091:             d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1092:           }
1093:           *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1094:         } else { /* No values found, no information for next shift */
1095:           SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1096:         }
1097:       }
1098:     } else { /* Accepted values found */
1099:       sr->nleap = 0;
1100:       /* Average distance of values in previous subinterval */
1101:       s = sPres->neighb[0];
1102:       while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1103:         s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1104:       }
1105:       if (s) {
1106:         d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1107:       } else { /* First shift. Average distance obtained with values in this shift */
1108:         /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1109:         if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1110:           d_prev =  PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1111:         } else {
1112:           d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1113:         }
1114:       }
1115:       /* Average distance is used for next shift by adding it to value on the right or to shift */
1116:       if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1117:         *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1118:       } else { /* Last accepted value is on the left of shift. Adding to shift */
1119:         *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1120:       }
1121:     }
1122:     /* End of interval can not be surpassed */
1123:     if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1124:   }/* of neighb[side]==null */
1125:   return(0);
1126: }

1128: /*
1129:   Function for sorting an array of real values
1130: */
1133: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1134: {
1135:   PetscReal      re;
1136:   PetscInt       i,j,tmp;

1139:   if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1140:   /* Insertion sort */
1141:   for (i=1;i<nr;i++) {
1142:     re = PetscRealPart(r[perm[i]]);
1143:     j = i-1;
1144:     while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1145:       tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1146:     }
1147:   }
1148:   return(0);
1149: }

1151: /* Stores the pairs obtained since the last shift in the global arrays */
1154: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1155: {
1156:   PetscErrorCode  ierr;
1157:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1158:   PetscReal       lambda,err,norm;
1159:   PetscInt        i,count;
1160:   PetscBool       iscayley;
1161:   EPS_SR          sr = ctx->sr;
1162:   EPS_shift       sPres;
1163:   Vec             v,w;

1166:   sPres = sr->sPres;
1167:   sPres->index = sr->indexEig;
1168:   count = sr->indexEig;
1169:   /* Back-transform */
1170:   STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1171:   PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1172:   /* Sort eigenvalues */
1173:   sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1174:   /* Values stored in global array */
1175:   for (i=0;i<eps->nconv;i++) {
1176:     lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1177:     err = eps->errest[eps->perm[i]];

1179:     if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1180:       if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1181:       sr->eigr[count] = lambda;
1182:       sr->errest[count] = err;
1183:       /* Explicit purification */
1184:       if (eps->purify) {
1185:         BVGetColumn(sr->V,count,&v);
1186:         BVGetColumn(eps->V,eps->perm[i],&w);
1187:         STApply(eps->st,w,v);
1188:         BVRestoreColumn(sr->V,count,&v);
1189:         BVRestoreColumn(eps->V,eps->perm[i],&w);
1190:         BVNormColumn(sr->V,count,NORM_2,&norm);
1191:         BVScaleColumn(sr->V,count,1.0/norm);
1192:       } else {
1193:         BVGetColumn(eps->V,eps->perm[i],&w);
1194:         BVInsertVec(sr->V,count,w);
1195:         BVRestoreColumn(eps->V,eps->perm[i],&w);
1196:         BVNormColumn(sr->V,count,NORM_2,&norm);
1197:         BVScaleColumn(sr->V,count,1.0/norm);
1198:       }
1199:       count++;
1200:     }
1201:   }
1202:   sPres->neigs = count - sr->indexEig;
1203:   sr->indexEig = count;
1204:   /* Global ordering array updating */
1205:   sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1206:   return(0);
1207: }

1211: static PetscErrorCode EPSLookForDeflation(EPS eps)
1212: {
1213:   PetscErrorCode  ierr;
1214:   PetscReal       val;
1215:   PetscInt        i,count0=0,count1=0;
1216:   EPS_shift       sPres;
1217:   PetscInt        ini,fin,k,idx0,idx1;
1218:   EPS_SR          sr;
1219:   Vec             v;
1220:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

1223:   sr = ctx->sr;
1224:   sPres = sr->sPres;

1226:   if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1227:   else ini = 0;
1228:   fin = sr->indexEig;
1229:   /* Selection of ends for searching new values */
1230:   if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1231:   else sPres->ext[0] = sPres->neighb[0]->value;
1232:   if (!sPres->neighb[1]) {
1233:     if (sr->hasEnd) sPres->ext[1] = sr->int1;
1234:     else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1235:   } else sPres->ext[1] = sPres->neighb[1]->value;
1236:   /* Selection of values between right and left ends */
1237:   for (i=ini;i<fin;i++) {
1238:     val=PetscRealPart(sr->eigr[sr->perm[i]]);
1239:     /* Values to the right of left shift */
1240:     if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1241:       if ((sr->dir)*(val - sPres->value) < 0) count0++;
1242:       else count1++;
1243:     } else break;
1244:   }
1245:   /* The number of values on each side are found */
1246:   if (sPres->neighb[0]) {
1247:     sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1248:     if (sPres->nsch[0]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1249:   } else sPres->nsch[0] = 0;

1251:   if (sPres->neighb[1]) {
1252:     sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1253:     if (sPres->nsch[1]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1254:   } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);

1256:   /* Completing vector of indexes for deflation */
1257:   idx0 = ini;
1258:   idx1 = ini+count0+count1;
1259:   k=0;
1260:   for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1261:   BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1262:   BVSetNumConstraints(sr->Vnext,k);
1263:   for (i=0;i<k;i++) {
1264:     BVGetColumn(sr->Vnext,-i-1,&v);
1265:     BVCopyVec(sr->V,sr->idxDef[i],v);
1266:     BVRestoreColumn(sr->Vnext,-i-1,&v);
1267:   }

1269:   /* For rational Krylov */
1270:   if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1271:     EPSPrepareRational(eps);
1272:   }
1273:   eps->nconv = 0;
1274:   /* Get rid of temporary Vnext */
1275:   BVDestroy(&eps->V);
1276:   eps->V = sr->Vnext;
1277:   sr->Vnext = NULL;
1278:   return(0);
1279: }

1283: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1284: {
1285:   PetscErrorCode  ierr;
1286:   PetscInt        i,lds;
1287:   PetscReal       newS;
1288:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1289:   EPS_SR          sr=ctx->sr;

1292:   if (ctx->global) {
1293:     EPSSolve_KrylovSchur_Slice(ctx->eps);
1294:     ctx->eps->state = EPS_STATE_SOLVED;
1295:     eps->reason = EPS_CONVERGED_TOL;
1296:     if (ctx->npart>1) {
1297:       /* Gather solution from subsolvers */
1298:       EPSSliceGatherSolution(eps);
1299:     } else {
1300:       eps->nconv = sr->numEigs;
1301:       eps->its   = ctx->eps->its;
1302:       PetscFree(ctx->inertias);
1303:       PetscFree(ctx->shifts);
1304:       EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1305:     }
1306:   } else {
1307:     if (ctx->npart==1) {
1308:       sr->eigr   = ctx->eps->eigr;
1309:       sr->eigi   = ctx->eps->eigi;
1310:       sr->perm   = ctx->eps->perm;
1311:       sr->errest = ctx->eps->errest;
1312:       sr->V      = ctx->eps->V;
1313:     }
1314:     /* Only with eigenvalues present in the interval ...*/
1315:     if (sr->numEigs==0) {
1316:       eps->reason = EPS_CONVERGED_TOL;
1317:       return(0);
1318:     }
1319:     /* Array of pending shifts */
1320:     sr->maxPend = 100; /* Initial size */
1321:     sr->nPend = 0;
1322:     PetscMalloc1(sr->maxPend,&sr->pending);
1323:     PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1324:     EPSCreateShift(eps,sr->int0,NULL,NULL);
1325:     /* extract first shift */
1326:     sr->sPrev = NULL;
1327:     sr->sPres = sr->pending[--sr->nPend];
1328:     sr->sPres->inertia = sr->inertia0;
1329:     eps->target = sr->sPres->value;
1330:     sr->s0 = sr->sPres;
1331:     sr->indexEig = 0;
1332:     /* Memory reservation for auxiliary variables */
1333:     lds = PetscMin(eps->mpd,eps->ncv);
1334:     PetscCalloc1(lds*lds,&sr->S);
1335:     PetscMalloc1(eps->ncv,&sr->back);
1336:     PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1337:     for (i=0;i<sr->numEigs;i++) {
1338:       sr->eigr[i]   = 0.0;
1339:       sr->eigi[i]   = 0.0;
1340:       sr->errest[i] = 0.0;
1341:       sr->perm[i]   = i;
1342:     }
1343:     /* Vectors for deflation */
1344:     PetscMalloc1(sr->numEigs,&sr->idxDef);
1345:     PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1346:     sr->indexEig = 0;
1347:     /* Main loop */
1348:     while (sr->sPres) {
1349:       /* Search for deflation */
1350:       EPSLookForDeflation(eps);
1351:       /* KrylovSchur */
1352:       EPSKrylovSchur_Slice(eps);

1354:       EPSStoreEigenpairs(eps);
1355:       /* Select new shift */
1356:       if (!sr->sPres->comp[1]) {
1357:         EPSGetNewShiftValue(eps,1,&newS);
1358:         EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1359:       }
1360:       if (!sr->sPres->comp[0]) {
1361:         /* Completing earlier interval */
1362:         EPSGetNewShiftValue(eps,0,&newS);
1363:         EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1364:       }
1365:       /* Preparing for a new search of values */
1366:       EPSExtractShift(eps);
1367:     }

1369:     /* Updating eps values prior to exit */
1370:     PetscFree(sr->S);
1371:     PetscFree(sr->idxDef);
1372:     PetscFree(sr->pending);
1373:     PetscFree(sr->back);
1374:     BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1375:     BVSetNumConstraints(sr->Vnext,0);
1376:     BVDestroy(&eps->V);
1377:     eps->V      = sr->Vnext;
1378:     eps->nconv  = sr->indexEig;
1379:     eps->reason = EPS_CONVERGED_TOL;
1380:     eps->its    = sr->itsKs;
1381:     eps->nds    = 0;
1382:   }
1383:   return(0);
1384: }