Actual source code: ks-slice.c
slepc-3.6.1 2015-09-03
1: /*
3: SLEPc eigensolver: "krylovschur"
5: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
7: References:
9: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
10: solving sparse symmetric generalized eigenproblems", SIAM J.
11: Matrix Anal. Appl. 15(1):228-272, 1994.
13: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
14: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
15: 2012.
17: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18: SLEPc - Scalable Library for Eigenvalue Problem Computations
19: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
21: This file is part of SLEPc.
23: SLEPc is free software: you can redistribute it and/or modify it under the
24: terms of version 3 of the GNU Lesser General Public License as published by
25: the Free Software Foundation.
27: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
28: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
30: more details.
32: You should have received a copy of the GNU Lesser General Public License
33: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
34: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35: */
37: #include <slepc/private/epsimpl.h>
38: #include krylovschur.h
40: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps) {
45: PetscErrorCode ierr;
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
51: if (sr) {
52: if (ctx->npart>1) {
53: BVDestroy(&sr->V);
54: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
55: }
56: /* Reviewing list of shifts to free memory */
57: s = sr->s0;
58: if (s) {
59: while (s->neighb[1]) {
60: s = s->neighb[1];
61: PetscFree(s->neighb[0]);
62: }
63: PetscFree(s);
64: }
65: PetscFree(sr);
66: }
67: ctx->sr = PETSC_NULL;
68: return(0);
69: }
73: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
74: {
75: PetscErrorCode ierr;
76: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
79: if (!ctx->global) return(0);
80: /* Destroy auxiliary EPS */
81: EPSSliceResetSR(ctx->eps);
82: EPSDestroy(&ctx->eps);
83: if (ctx->npart>1) {
84: PetscSubcommDestroy(&ctx->subc);
85: if (ctx->commset) {
86: MPI_Comm_free(&ctx->commrank);
87: ctx->commset = PETSC_FALSE;
88: }
89: }
90: PetscFree(ctx->subintervals);
91: PetscFree(ctx->nconv_loc);
92: EPSSliceResetSR(eps);
93: PetscFree(ctx->inertias);
94: PetscFree(ctx->shifts);
95: return(0);
96: }
100: /*
101: EPSSliceAllocateSolution - Allocate memory storage for common variables such
102: as eigenvalues and eigenvectors. The argument extra is used for methods
103: that require a working basis slightly larger than ncv.
104: */
105: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
106: {
107: PetscErrorCode ierr;
108: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
109: PetscReal eta;
110: PetscInt k;
111: PetscLogDouble cnt;
112: BVType type;
113: BVOrthogType orthog_type;
114: BVOrthogRefineType orthog_ref;
115: BVOrthogBlockType ob_type;
116: Mat matrix;
117: Vec t;
118: EPS_SR sr = ctx->sr;
121: /* allocate space for eigenvalues and friends */
122: k = PetscMax(1,sr->numEigs);
123: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
124: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
125: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
126: PetscLogObjectMemory((PetscObject)eps,cnt);
128: /* allocate sr->V and transfer options from eps->V */
129: BVDestroy(&sr->V);
130: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
131: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
132: if (!eps->V) { EPSGetBV(eps,&eps->V); }
133: if (!((PetscObject)(eps->V))->type_name) {
134: BVSetType(sr->V,BVSVEC);
135: } else {
136: BVGetType(eps->V,&type);
137: BVSetType(sr->V,type);
138: }
139: STMatCreateVecs(eps->st,&t,NULL);
140: BVSetSizesFromVec(sr->V,t,k);
141: VecDestroy(&t);
142: EPS_SetInnerProduct(eps);
143: BVGetMatrix(eps->V,&matrix,NULL);
144: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
145: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
146: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
147: return(0);
148: }
152: static PetscErrorCode EPSSliceGetEPS(EPS eps)
153: {
154: PetscErrorCode ierr;
155: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
156: BV V;
157: BVType type;
158: PetscReal eta;
159: BVOrthogType orthog_type;
160: BVOrthogRefineType orthog_ref;
161: BVOrthogBlockType ob_type;
162: Mat A,B=NULL,Ar,Br=NULL;
163: PetscInt i;
164: PetscReal h,a,b;
165: PetscMPIInt rank;
166: EPS_SR sr=ctx->sr;
167: PC pc;
168: PCType pctype;
169: KSP ksp;
170: KSPType ksptype;
171: STType sttype;
172: const MatSolverPackage stype;
175: EPSGetOperators(eps,&A,&B);
176: if (ctx->npart==1) {
177: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
178: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
179: EPSSetST(ctx->eps,eps->st);
180: a = eps->inta; b = eps->intb;
181: } else {
182: if (!ctx->subc) {
183: /* Create context for subcommunicators */
184: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
185: PetscSubcommSetNumber(ctx->subc,ctx->npart);
186: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
187: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
189: /* Duplicate matrices */
190: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
191: if (B) { MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br); }
192: }
194: /* Determine subintervals */
195: if (!ctx->subintset) { /* uniform distribution if no set by user */
196: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
197: h = (eps->intb-eps->inta)/ctx->npart;
198: a = eps->inta+ctx->subc->color*h;
199: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
200: PetscFree(ctx->subintervals);
201: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
202: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
203: ctx->subintervals[ctx->npart] = eps->intb;
204: } else {
205: a = ctx->subintervals[ctx->subc->color];
206: b = ctx->subintervals[ctx->subc->color+1];
207: }
209: if (!ctx->eps) {
210: /* Create auxiliary EPS */
211: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
212: EPSSetOperators(ctx->eps,Ar,Br);
213: MatDestroy(&Ar);
214: MatDestroy(&Br);
215: }
216: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
218: /* Transfer options for ST, KSP and PC */
219: STGetType(eps->st,&sttype);
220: STSetType(ctx->eps->st,sttype);
221: STGetKSP(eps->st,&ksp);
222: KSPGetType(ksp,&ksptype);
223: KSPGetPC(ksp,&pc);
224: PCGetType(pc,&pctype);
225: PCFactorGetMatSolverPackage(pc,&stype);
226: STGetKSP(ctx->eps->st,&ksp);
227: KSPSetType(ksp,ksptype);
228: KSPGetPC(ksp,&pc);
229: PCSetType(pc,pctype);
230: PCFactorSetMatSolverPackage(pc,stype);
232: /* Create subcommunicator grouping processes with same rank */
233: if (ctx->commrank) { MPI_Comm_free(&ctx->commrank); }
234: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
235: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
236: ctx->commset = PETSC_TRUE;
237: }
238: EPSSetConvergenceTest(ctx->eps,eps->conv);
239: EPSSetInterval(ctx->eps,a,b);
240: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
241: ctx_local->npart = ctx->npart;
242: ctx_local->detect = ctx->detect;
243: ctx_local->global = PETSC_FALSE;
244: ctx_local->eps = eps;
245: ctx_local->subc = ctx->subc;
246: ctx_local->commrank = ctx->commrank;
248: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
249: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
251: /* transfer options from eps->V */
252: EPSGetBV(ctx->eps,&V);
253: if (!eps->V) { EPSGetBV(eps,&eps->V); }
254: if (!((PetscObject)(eps->V))->type_name) {
255: BVSetType(V,BVSVEC);
256: } else {
257: BVGetType(eps->V,&type);
258: BVSetType(V,type);
259: }
260: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
261: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
262: ctx->eps->which = eps->which;
263: ctx->eps->max_it = eps->max_it;
264: ctx->eps->tol = eps->tol;
265: ctx->eps->purify = eps->purify;
266: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
267: EPSSetProblemType(ctx->eps,eps->problem_type);
268: EPSSetUp(ctx->eps);
269: ctx->eps->nconv = 0;
270: ctx->eps->its = 0;
271: for (i=0;i<ctx->eps->ncv;i++) {
272: ctx->eps->eigr[i] = 0.0;
273: ctx->eps->eigi[i] = 0.0;
274: ctx->eps->errest[i] = 0.0;
275: }
276: return(0);
277: }
281: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
282: {
284: KSP ksp;
285: PC pc;
286: Mat F;
289: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
290: if (inertia) *inertia = eps->n;
291: } else if (shift <= PETSC_MIN_REAL) {
292: if (inertia) *inertia = 0;
293: if (zeros) *zeros = 0;
294: } else {
295: STSetShift(eps->st,shift);
296: STSetUp(eps->st);
297: STGetKSP(eps->st,&ksp);
298: KSPGetPC(ksp,&pc);
299: PCFactorGetMatrix(pc,&F);
300: MatGetInertia(F,inertia,zeros,NULL);
301: }
302: return(0);
303: }
307: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
308: {
309: PetscErrorCode ierr;
310: PetscBool issinv;
311: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
312: EPS_SR sr,sr_loc,sr_glob;
313: PetscInt nEigs,dssz=1,i,zeros=0,off=0;
314: PetscMPIInt nproc,rank,aux;
315: MPI_Request req;
318: if (ctx->global) {
319: if (eps->inta==0.0 && eps->intb==0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Must define a computational interval when using EPS_ALL");
320: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
321: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
322: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
323: if (!((PetscObject)(eps->st))->type_name) { /* default to shift-and-invert */
324: STSetType(eps->st,STSINVERT);
325: }
326: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
327: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
328: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
329: if (!eps->max_it) eps->max_it = 100;
330: if (ctx->nev==1) ctx->nev = 40; /* nev not set, use default value */
331: if (ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
332: }
333: eps->ops->backtransform = NULL;
335: /* create spectrum slicing context and initialize it */
336: EPSSliceResetSR(eps);
337: PetscNewLog(eps,&sr);
338: ctx->sr = sr;
339: sr->itsKs = 0;
340: sr->nleap = 0;
341: sr->nMAXCompl = eps->nev/4;
342: sr->iterCompl = eps->max_it/4;
343: sr->sPres = NULL;
344: sr->nS = 0;
346: if (ctx->npart==1 || ctx->global) {
347: /* check presence of ends and finding direction */
348: if ((eps->inta > PETSC_MIN_REAL && eps->inta != 0.0) || eps->intb >= PETSC_MAX_REAL) {
349: sr->int0 = eps->inta;
350: sr->int1 = eps->intb;
351: sr->dir = 1;
352: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
353: sr->hasEnd = PETSC_FALSE;
354: } else sr->hasEnd = PETSC_TRUE;
355: } else {
356: sr->int0 = eps->intb;
357: sr->int1 = eps->inta;
358: sr->dir = -1;
359: sr->hasEnd = (eps->inta <= PETSC_MIN_REAL)?PETSC_FALSE:PETSC_TRUE;
360: }
361: }
362: if (ctx->global) {
363: if (ctx->npart>1) {
364: /* prevent computation of factorization in global eps unless npart==1 */
365: STSetTransform(eps->st,PETSC_FALSE);
366: }
367: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
368: /* create subintervals and initialize auxiliary eps for slicing runs */
369: EPSSliceGetEPS(eps);
370: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
371: if (ctx->npart>1) {
372: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
373: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
374: if (rank==0) {
375: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
376: }
377: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
378: PetscFree(ctx->nconv_loc);
379: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
380: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
381: if (sr->dir<0) off = 1;
382: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
383: PetscMPIIntCast(sr_loc->numEigs,&aux);
384: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
385: MPI_Allgather(&sr_loc->int0,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
386: } else {
387: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
388: if (!rank) {
389: PetscMPIIntCast(sr_loc->numEigs,&aux);
390: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
391: MPI_Allgather(&sr_loc->int0,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
392: }
393: PetscMPIIntCast(ctx->npart,&aux);
394: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
395: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
396: }
397: nEigs = 0;
398: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
399: } else {
400: nEigs = sr_loc->numEigs;
401: sr->inertia0 = sr_loc->inertia0;
402: }
403: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
404: sr->numEigs = nEigs;
405: eps->nev = nEigs;
406: eps->ncv = nEigs;
407: eps->mpd = nEigs;
408: } else {
409: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
410: sr_glob = ctx_glob->sr;
411: if (ctx->npart>1) {
412: sr->dir = sr_glob->dir;
413: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
414: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
415: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
416: else sr->hasEnd = PETSC_TRUE;
417: }
419: /* last process in eps comm computes inertia1 */
420: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
421: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
422: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
423: }
425: /* compute inertia0 */
426: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
427: if (zeros) { /* error in factorization */
428: if (ctx->npart==1 || ctx_glob->subintset || ((sr->dir>0 && ctx->subc->color==0) || (sr->dir<0 && ctx->subc->color==ctx->npart-1))) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
429: else { /* perturb shift */
430: sr->int0 *= (1.0+SLICE_PTOL);
431: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
432: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
433: }
434: }
435: if (ctx->npart>1) {
436: /* inertia1 is received from neighbour */
437: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
438: if (!rank) {
439: if ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) { /* send inertia0 to neighbour0 */
440: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
441: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
442: }
443: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
444: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
445: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
446: }
447: }
448: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
449: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
450: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
451: } else sr_glob->inertia1 = sr->inertia1;
452: }
454: /* number of eigenvalues in interval */
455: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
456: if (ctx->npart>1) {
457: /* memory allocate for subinterval eigenpairs */
458: EPSSliceAllocateSolution(eps,1);
459: }
460: dssz = eps->ncv+1;
461: }
462: DSSetType(eps->ds,DSHEP);
463: DSSetCompact(eps->ds,PETSC_TRUE);
464: DSAllocate(eps->ds,dssz);
465: return(0);
466: }
470: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
471: {
472: PetscErrorCode ierr;
473: Vec v,vg,v_loc;
474: IS is1,is2;
475: VecScatter vec_sc;
476: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
477: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
478: PetscScalar *array;
479: EPS_SR sr_loc;
480: BV V_loc;
483: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
484: V_loc = sr_loc->V;
486: /* Gather parallel eigenvectors */
487: BVGetColumn(eps->V,0,&v);
488: VecGetOwnershipRange(v,&n0,&m0);
489: BVRestoreColumn(eps->V,0,&v);
490: BVGetColumn(ctx->eps->V,0,&v);
491: VecGetLocalSize(v,&nloc);
492: BVRestoreColumn(ctx->eps->V,0,&v);
493: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
494: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
495: idx = -1;
496: for (si=0;si<ctx->npart;si++) {
497: j = 0;
498: for (i=n0;i<m0;i++) {
499: idx1[j] = i;
500: idx2[j++] = i+eps->n*si;
501: }
502: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
503: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
504: BVGetColumn(eps->V,0,&v);
505: VecScatterCreate(v,is1,vg,is2,&vec_sc);
506: BVRestoreColumn(eps->V,0,&v);
507: ISDestroy(&is1);
508: ISDestroy(&is2);
509: for (i=0;i<ctx->nconv_loc[si];i++) {
510: BVGetColumn(eps->V,++idx,&v);
511: if (ctx->subc->color==si) {
512: BVGetColumn(V_loc,i,&v_loc);
513: VecGetArray(v_loc,&array);
514: VecPlaceArray(vg,array);
515: }
516: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
517: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
518: if (ctx->subc->color==si) {
519: VecResetArray(vg);
520: VecRestoreArray(v_loc,&array);
521: BVRestoreColumn(V_loc,i,&v_loc);
522: }
523: BVRestoreColumn(eps->V,idx,&v);
524: }
525: VecScatterDestroy(&vec_sc);
526: }
527: PetscFree2(idx1,idx2);
528: VecDestroy(&vg);
529: return(0);
530: }
534: /*
535: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
536: */
537: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
538: {
539: PetscErrorCode ierr;
540: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
543: if (ctx->global && ctx->npart>1) {
544: EPSComputeVectors(ctx->eps);
545: EPSSliceGatherEigenVectors(eps);
546: }
547: return(0);
548: }
550: #define SWAP(a,b,t) {t=a;a=b;b=t;}
554: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
555: {
556: PetscErrorCode ierr;
557: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
558: PetscInt i=0,j,tmpi;
559: PetscReal v,tmpr;
560: EPS_shift s;
563: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
564: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
565: if (!ctx->sr->s0) { /* EPSSolve not called yet */
566: *n = 2;
567: } else {
568: *n = 1;
569: s = ctx->sr->s0;
570: while (s) {
571: (*n)++;
572: s = s->neighb[1];
573: }
574: }
575: PetscMalloc1(*n,shifts);
576: PetscMalloc1(*n,inertias);
577: if (!ctx->sr->s0) { /* EPSSolve not called yet */
578: (*shifts)[0] = ctx->sr->int0;
579: (*shifts)[1] = ctx->sr->int1;
580: (*inertias)[0] = ctx->sr->inertia0;
581: (*inertias)[1] = ctx->sr->inertia1;
582: } else {
583: s = ctx->sr->s0;
584: while (s) {
585: (*shifts)[i] = s->value;
586: (*inertias)[i++] = s->inertia;
587: s = s->neighb[1];
588: }
589: (*shifts)[i] = ctx->sr->int1;
590: (*inertias)[i] = ctx->sr->inertia1;
591: }
592: /* remove possible duplicate in last position */
593: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
594: /* sort result */
595: for (i=0;i<*n;i++) {
596: v = (*shifts)[i];
597: for (j=i+1;j<*n;j++) {
598: if (v > (*shifts)[j]) {
599: SWAP((*shifts)[i],(*shifts)[j],tmpr);
600: SWAP((*inertias)[i],(*inertias)[j],tmpi);
601: v = (*shifts)[i];
602: }
603: }
604: }
605: return(0);
606: }
610: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
611: {
612: PetscErrorCode ierr;
613: PetscMPIInt rank,nproc;
614: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
615: PetscInt i,idx,j;
616: PetscInt *perm_loc,off=0,*inertias_loc,ns;
617: PetscScalar *eigr_loc;
618: EPS_SR sr_loc;
619: PetscReal *shifts_loc;
620: PetscMPIInt *disp,*ns_loc,aux;
623: eps->nconv = 0;
624: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
625: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
627: /* Gather the shifts used and the inertias computed */
628: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
629: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
630: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
631: ns--;
632: for (i=0;i<ns;i++) {
633: inertias_loc[i] = inertias_loc[i+1];
634: shifts_loc[i] = shifts_loc[i+1];
635: }
636: }
637: PetscMalloc1(ctx->npart,&ns_loc);
638: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
639: PetscMPIIntCast(ns,&aux);
640: if (rank==0) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
641: PetscMPIIntCast(ctx->npart,&aux);
642: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
643: ctx->nshifts = 0;
644: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
645: PetscFree(ctx->inertias);
646: PetscFree(ctx->shifts);
647: PetscMalloc1(ctx->nshifts,&ctx->inertias);
648: PetscMalloc1(ctx->nshifts,&ctx->shifts);
650: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
651: eigr_loc = sr_loc->eigr;
652: perm_loc = sr_loc->perm;
653: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
654: PetscMalloc1(ctx->npart,&disp);
655: disp[0] = 0;
656: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
657: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
658: PetscMPIIntCast(sr_loc->numEigs,&aux);
659: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
660: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
661: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
662: PetscMPIIntCast(ns,&aux);
663: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
664: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
665: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
666: } else { /* subcommunicators with different size */
667: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
668: if (rank==0) {
669: PetscMPIIntCast(sr_loc->numEigs,&aux);
670: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
671: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
672: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
673: PetscMPIIntCast(ns,&aux);
674: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
675: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
676: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
677: }
678: PetscMPIIntCast(eps->nconv,&aux);
679: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
680: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
681: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
682: PetscMPIIntCast(ctx->nshifts,&aux);
683: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
684: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
685: }
686: /* Update global array eps->perm */
687: idx = ctx->nconv_loc[0];
688: for (i=1;i<ctx->npart;i++) {
689: off += ctx->nconv_loc[i-1];
690: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
691: }
693: /* Gather parallel eigenvectors */
694: PetscFree(ns_loc);
695: PetscFree(disp);
696: PetscFree(shifts_loc);
697: PetscFree(inertias_loc);
698: return(0);
699: }
701: /*
702: Fills the fields of a shift structure
703: */
706: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
707: {
708: PetscErrorCode ierr;
709: EPS_shift s,*pending2;
710: PetscInt i;
711: EPS_SR sr;
712: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
715: sr = ctx->sr;
716: PetscNewLog(eps,&s);
717: s->value = val;
718: s->neighb[0] = neighb0;
719: if (neighb0) neighb0->neighb[1] = s;
720: s->neighb[1] = neighb1;
721: if (neighb1) neighb1->neighb[0] = s;
722: s->comp[0] = PETSC_FALSE;
723: s->comp[1] = PETSC_FALSE;
724: s->index = -1;
725: s->neigs = 0;
726: s->nconv[0] = s->nconv[1] = 0;
727: s->nsch[0] = s->nsch[1]=0;
728: /* Inserts in the stack of pending shifts */
729: /* If needed, the array is resized */
730: if (sr->nPend >= sr->maxPend) {
731: sr->maxPend *= 2;
732: PetscMalloc1(sr->maxPend,&pending2);
733: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
734: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
735: PetscFree(sr->pending);
736: sr->pending = pending2;
737: }
738: sr->pending[sr->nPend++]=s;
739: return(0);
740: }
742: /* Prepare for Rational Krylov update */
745: static PetscErrorCode EPSPrepareRational(EPS eps)
746: {
747: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
748: PetscErrorCode ierr;
749: PetscInt dir,i,k,ld,nv;
750: PetscScalar *A;
751: EPS_SR sr = ctx->sr;
752: Vec v;
755: DSGetLeadingDimension(eps->ds,&ld);
756: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
757: dir*=sr->dir;
758: k = 0;
759: for (i=0;i<sr->nS;i++) {
760: if (dir*PetscRealPart(sr->S[i])>0.0) {
761: sr->S[k] = sr->S[i];
762: sr->S[sr->nS+k] = sr->S[sr->nS+i];
763: BVGetColumn(sr->Vnext,k,&v);
764: BVCopyVec(eps->V,eps->nconv+i,v);
765: BVRestoreColumn(sr->Vnext,k,&v);
766: k++;
767: if (k>=sr->nS/2)break;
768: }
769: }
770: /* Copy to DS */
771: DSGetArray(eps->ds,DS_MAT_A,&A);
772: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
773: for (i=0;i<k;i++) {
774: A[i*(1+ld)] = sr->S[i];
775: A[k+i*ld] = sr->S[sr->nS+i];
776: }
777: sr->nS = k;
778: DSRestoreArray(eps->ds,DS_MAT_A,&A);
779: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
780: DSSetDimensions(eps->ds,nv,0,0,k);
781: /* Append u to V */
782: BVGetColumn(sr->Vnext,sr->nS,&v);
783: BVCopyVec(eps->V,sr->nv,v);
784: BVRestoreColumn(sr->Vnext,sr->nS,&v);
785: return(0);
786: }
788: /* Provides next shift to be computed */
791: static PetscErrorCode EPSExtractShift(EPS eps)
792: {
793: PetscErrorCode ierr;
794: PetscInt iner,zeros=0;
795: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
796: EPS_SR sr;
797: PetscReal newShift;
798: EPS_shift sPres;
801: sr = ctx->sr;
802: if (sr->nPend > 0) {
803: sr->sPrev = sr->sPres;
804: sr->sPres = sr->pending[--sr->nPend];
805: sPres = sr->sPres;
806: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
807: if (zeros) {
808: newShift = sPres->value*(1.0+SLICE_PTOL);
809: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
810: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
811: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
812: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
813: sPres->value = newShift;
814: }
815: sr->sPres->inertia = iner;
816: eps->target = sr->sPres->value;
817: eps->reason = EPS_CONVERGED_ITERATING;
818: eps->its = 0;
819: } else sr->sPres = NULL;
820: return(0);
821: }
823: /*
824: Symmetric KrylovSchur adapted to spectrum slicing:
825: Allows searching an specific amount of eigenvalues in the subintervals left and right.
826: Returns whether the search has succeeded
827: */
830: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
831: {
832: PetscErrorCode ierr;
833: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
834: PetscInt i,conv,k,l,ld,nv,*iwork,j,p;
835: Mat U;
836: PetscScalar *Q,*A,rtmp;
837: PetscReal *a,*b,beta;
838: PetscBool breakdown;
839: PetscInt count0,count1;
840: PetscReal lambda;
841: EPS_shift sPres;
842: PetscBool complIterating;
843: PetscBool sch0,sch1;
844: PetscInt iterCompl=0,n0,n1;
845: EPS_SR sr = ctx->sr;
848: /* Spectrum slicing data */
849: sPres = sr->sPres;
850: complIterating =PETSC_FALSE;
851: sch1 = sch0 = PETSC_TRUE;
852: DSGetLeadingDimension(eps->ds,&ld);
853: PetscMalloc1(2*ld,&iwork);
854: count0=0;count1=0; /* Found on both sides */
855: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
856: /* Rational Krylov */
857: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
858: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
859: DSSetDimensions(eps->ds,l+1,0,0,0);
860: BVSetActiveColumns(eps->V,0,l+1);
861: DSGetMat(eps->ds,DS_MAT_Q,&U);
862: BVMultInPlace(eps->V,U,0,l+1);
863: MatDestroy(&U);
864: } else {
865: /* Get the starting Lanczos vector */
866: EPSGetStartVector(eps,0,NULL);
867: l = 0;
868: }
869: /* Restart loop */
870: while (eps->reason == EPS_CONVERGED_ITERATING) {
871: eps->its++; sr->itsKs++;
872: /* Compute an nv-step Lanczos factorization */
873: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
874: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
875: b = a + ld;
876: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
877: sr->nv = nv;
878: beta = b[nv-1];
879: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
880: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
881: if (l==0) {
882: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
883: } else {
884: DSSetState(eps->ds,DS_STATE_RAW);
885: }
886: BVSetActiveColumns(eps->V,eps->nconv,nv);
888: /* Solve projected problem and compute residual norm estimates */
889: if (eps->its == 1 && l > 0) {/* After rational update */
890: DSGetArray(eps->ds,DS_MAT_A,&A);
891: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
892: b = a + ld;
893: k = eps->nconv+l;
894: A[k*ld+k-1] = A[(k-1)*ld+k];
895: A[k*ld+k] = a[k];
896: for (j=k+1; j< nv; j++) {
897: A[j*ld+j] = a[j];
898: A[j*ld+j-1] = b[j-1] ;
899: A[(j-1)*ld+j] = b[j-1];
900: }
901: DSRestoreArray(eps->ds,DS_MAT_A,&A);
902: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
903: DSSolve(eps->ds,eps->eigr,NULL);
904: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
905: DSSetCompact(eps->ds,PETSC_TRUE);
906: } else { /* Restart */
907: DSSolve(eps->ds,eps->eigr,NULL);
908: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
909: }
910: /* Residual */
911: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);
913: if (ctx->lock) {
914: /* Check convergence */
915: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
916: b = a + ld;
917: conv = 0;
918: j = k = eps->nconv;
919: for (i=eps->nconv;i<nv;i++) if (eps->errest[i] < eps->tol) conv++;
920: for (i=eps->nconv;i<nv;i++) {
921: if (eps->errest[i] < eps->tol) {
922: iwork[j++]=i;
923: } else iwork[conv+k++]=i;
924: }
925: for (i=eps->nconv;i<nv;i++) {
926: a[i]=PetscRealPart(eps->eigr[i]);
927: b[i]=eps->errest[i];
928: }
929: for (i=eps->nconv;i<nv;i++) {
930: eps->eigr[i] = a[iwork[i]];
931: eps->errest[i] = b[iwork[i]];
932: }
933: for (i=eps->nconv;i<nv;i++) {
934: a[i]=PetscRealPart(eps->eigr[i]);
935: b[i]=eps->errest[i];
936: }
937: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
938: DSGetArray(eps->ds,DS_MAT_Q,&Q);
939: for (i=eps->nconv;i<nv;i++) {
940: p=iwork[i];
941: if (p!=i) {
942: j=i+1;
943: while (iwork[j]!=i) j++;
944: iwork[j]=p;iwork[i]=i;
945: for (k=0;k<nv;k++) {
946: rtmp=Q[k+p*ld];Q[k+p*ld]=Q[k+i*ld];Q[k+i*ld]=rtmp;
947: }
948: }
949: }
950: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
951: k=eps->nconv+conv;
952: }
954: /* Checking values obtained for completing */
955: for (i=0;i<k;i++) {
956: sr->back[i]=eps->eigr[i];
957: }
958: STBackTransform(eps->st,k,sr->back,eps->eigi);
959: count0=count1=0;
960: for (i=0;i<k;i++) {
961: lambda = PetscRealPart(sr->back[i]);
962: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
963: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
964: }
965: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
966: else {
967: /* Checks completion */
968: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
969: eps->reason = EPS_CONVERGED_TOL;
970: } else {
971: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
972: if (complIterating) {
973: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
974: } else if (k >= eps->nev) {
975: n0 = sPres->nsch[0]-count0;
976: n1 = sPres->nsch[1]-count1;
977: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
978: /* Iterating for completion*/
979: complIterating = PETSC_TRUE;
980: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
981: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
982: iterCompl = sr->iterCompl;
983: } else eps->reason = EPS_CONVERGED_TOL;
984: }
985: }
986: }
987: /* Update l */
988: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
989: else l = 0;
990: if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
991: if (breakdown) l=0;
993: if (eps->reason == EPS_CONVERGED_ITERATING) {
994: if (breakdown) {
995: /* Start a new Lanczos factorization */
996: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
997: EPSGetStartVector(eps,k,&breakdown);
998: if (breakdown) {
999: eps->reason = EPS_DIVERGED_BREAKDOWN;
1000: PetscInfo(eps,"Unable to generate more start vectors\n");
1001: }
1002: } else {
1003: /* Prepare the Rayleigh quotient for restart */
1004: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1005: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1006: b = a + ld;
1007: for (i=k;i<k+l;i++) {
1008: a[i] = PetscRealPart(eps->eigr[i]);
1009: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1010: }
1011: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1012: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1013: }
1014: }
1015: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1016: DSGetMat(eps->ds,DS_MAT_Q,&U);
1017: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1018: MatDestroy(&U);
1020: /* Normalize u and append it to V */
1021: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1022: BVCopyColumn(eps->V,nv,k+l);
1023: }
1024: eps->nconv = k;
1025: if (eps->reason != EPS_CONVERGED_ITERATING) {
1026: /* Store approximated values for next shift */
1027: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1028: sr->nS = l;
1029: for (i=0;i<l;i++) {
1030: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1031: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1032: }
1033: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1034: }
1035: }
1036: /* Check for completion */
1037: for (i=0;i< eps->nconv; i++) {
1038: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1039: else sPres->nconv[0]++;
1040: }
1041: sPres->comp[0] = (count0 >= sPres->nsch[0])?PETSC_TRUE:PETSC_FALSE;
1042: sPres->comp[1] = (count1 >= sPres->nsch[1])?PETSC_TRUE:PETSC_FALSE;
1043: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1])SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1044: PetscFree(iwork);
1045: return(0);
1046: }
1048: /*
1049: Obtains value of subsequent shift
1050: */
1053: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1054: {
1055: PetscReal lambda,d_prev;
1056: PetscInt i,idxP;
1057: EPS_SR sr;
1058: EPS_shift sPres,s;
1059: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1062: sr = ctx->sr;
1063: sPres = sr->sPres;
1064: if (sPres->neighb[side]) {
1065: /* Completing a previous interval */
1066: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1067: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1068: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1069: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1070: } else { /* (Only for side=1). Creating a new interval. */
1071: if (sPres->neigs==0) {/* No value has been accepted*/
1072: if (sPres->neighb[0]) {
1073: /* Multiplying by 10 the previous distance */
1074: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1075: sr->nleap++;
1076: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1077: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1078: } else { /* First shift */
1079: if (eps->nconv != 0) {
1080: /* Unaccepted values give information for next shift */
1081: idxP=0;/* Number of values left from shift */
1082: for (i=0;i<eps->nconv;i++) {
1083: lambda = PetscRealPart(sr->eigr[i]);
1084: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1085: else break;
1086: }
1087: /* Avoiding subtraction of eigenvalues (might be the same).*/
1088: if (idxP>0) {
1089: d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[0]))/(idxP+0.3);
1090: } else {
1091: d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1092: }
1093: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1094: } else { /* No values found, no information for next shift */
1095: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1096: }
1097: }
1098: } else { /* Accepted values found */
1099: sr->nleap = 0;
1100: /* Average distance of values in previous subinterval */
1101: s = sPres->neighb[0];
1102: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1103: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1104: }
1105: if (s) {
1106: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1107: } else { /* First shift. Average distance obtained with values in this shift */
1108: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1109: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1110: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1111: } else {
1112: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1113: }
1114: }
1115: /* Average distance is used for next shift by adding it to value on the right or to shift */
1116: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1117: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1118: } else { /* Last accepted value is on the left of shift. Adding to shift */
1119: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1120: }
1121: }
1122: /* End of interval can not be surpassed */
1123: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1124: }/* of neighb[side]==null */
1125: return(0);
1126: }
1128: /*
1129: Function for sorting an array of real values
1130: */
1133: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1134: {
1135: PetscReal re;
1136: PetscInt i,j,tmp;
1139: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1140: /* Insertion sort */
1141: for (i=1;i<nr;i++) {
1142: re = PetscRealPart(r[perm[i]]);
1143: j = i-1;
1144: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1145: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1146: }
1147: }
1148: return(0);
1149: }
1151: /* Stores the pairs obtained since the last shift in the global arrays */
1154: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1155: {
1156: PetscErrorCode ierr;
1157: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1158: PetscReal lambda,err,norm;
1159: PetscInt i,count;
1160: PetscBool iscayley;
1161: EPS_SR sr = ctx->sr;
1162: EPS_shift sPres;
1163: Vec v,w;
1166: sPres = sr->sPres;
1167: sPres->index = sr->indexEig;
1168: count = sr->indexEig;
1169: /* Back-transform */
1170: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1171: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1172: /* Sort eigenvalues */
1173: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1174: /* Values stored in global array */
1175: for (i=0;i<eps->nconv;i++) {
1176: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1177: err = eps->errest[eps->perm[i]];
1179: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1180: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1181: sr->eigr[count] = lambda;
1182: sr->errest[count] = err;
1183: /* Explicit purification */
1184: if (eps->purify) {
1185: BVGetColumn(sr->V,count,&v);
1186: BVGetColumn(eps->V,eps->perm[i],&w);
1187: STApply(eps->st,w,v);
1188: BVRestoreColumn(sr->V,count,&v);
1189: BVRestoreColumn(eps->V,eps->perm[i],&w);
1190: BVNormColumn(sr->V,count,NORM_2,&norm);
1191: BVScaleColumn(sr->V,count,1.0/norm);
1192: } else {
1193: BVGetColumn(eps->V,eps->perm[i],&w);
1194: BVInsertVec(sr->V,count,w);
1195: BVRestoreColumn(eps->V,eps->perm[i],&w);
1196: BVNormColumn(sr->V,count,NORM_2,&norm);
1197: BVScaleColumn(sr->V,count,1.0/norm);
1198: }
1199: count++;
1200: }
1201: }
1202: sPres->neigs = count - sr->indexEig;
1203: sr->indexEig = count;
1204: /* Global ordering array updating */
1205: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1206: return(0);
1207: }
1211: static PetscErrorCode EPSLookForDeflation(EPS eps)
1212: {
1213: PetscErrorCode ierr;
1214: PetscReal val;
1215: PetscInt i,count0=0,count1=0;
1216: EPS_shift sPres;
1217: PetscInt ini,fin,k,idx0,idx1;
1218: EPS_SR sr;
1219: Vec v;
1220: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1223: sr = ctx->sr;
1224: sPres = sr->sPres;
1226: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1227: else ini = 0;
1228: fin = sr->indexEig;
1229: /* Selection of ends for searching new values */
1230: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1231: else sPres->ext[0] = sPres->neighb[0]->value;
1232: if (!sPres->neighb[1]) {
1233: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1234: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1235: } else sPres->ext[1] = sPres->neighb[1]->value;
1236: /* Selection of values between right and left ends */
1237: for (i=ini;i<fin;i++) {
1238: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1239: /* Values to the right of left shift */
1240: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1241: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1242: else count1++;
1243: } else break;
1244: }
1245: /* The number of values on each side are found */
1246: if (sPres->neighb[0]) {
1247: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1248: if (sPres->nsch[0]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1249: } else sPres->nsch[0] = 0;
1251: if (sPres->neighb[1]) {
1252: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1253: if (sPres->nsch[1]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1254: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1256: /* Completing vector of indexes for deflation */
1257: idx0 = ini;
1258: idx1 = ini+count0+count1;
1259: k=0;
1260: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1261: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1262: BVSetNumConstraints(sr->Vnext,k);
1263: for (i=0;i<k;i++) {
1264: BVGetColumn(sr->Vnext,-i-1,&v);
1265: BVCopyVec(sr->V,sr->idxDef[i],v);
1266: BVRestoreColumn(sr->Vnext,-i-1,&v);
1267: }
1269: /* For rational Krylov */
1270: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1271: EPSPrepareRational(eps);
1272: }
1273: eps->nconv = 0;
1274: /* Get rid of temporary Vnext */
1275: BVDestroy(&eps->V);
1276: eps->V = sr->Vnext;
1277: sr->Vnext = NULL;
1278: return(0);
1279: }
1283: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1284: {
1285: PetscErrorCode ierr;
1286: PetscInt i,lds;
1287: PetscReal newS;
1288: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1289: EPS_SR sr=ctx->sr;
1292: if (ctx->global) {
1293: EPSSolve_KrylovSchur_Slice(ctx->eps);
1294: ctx->eps->state = EPS_STATE_SOLVED;
1295: eps->reason = EPS_CONVERGED_TOL;
1296: if (ctx->npart>1) {
1297: /* Gather solution from subsolvers */
1298: EPSSliceGatherSolution(eps);
1299: } else {
1300: eps->nconv = sr->numEigs;
1301: eps->its = ctx->eps->its;
1302: PetscFree(ctx->inertias);
1303: PetscFree(ctx->shifts);
1304: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1305: }
1306: } else {
1307: if (ctx->npart==1) {
1308: sr->eigr = ctx->eps->eigr;
1309: sr->eigi = ctx->eps->eigi;
1310: sr->perm = ctx->eps->perm;
1311: sr->errest = ctx->eps->errest;
1312: sr->V = ctx->eps->V;
1313: }
1314: /* Only with eigenvalues present in the interval ...*/
1315: if (sr->numEigs==0) {
1316: eps->reason = EPS_CONVERGED_TOL;
1317: return(0);
1318: }
1319: /* Array of pending shifts */
1320: sr->maxPend = 100; /* Initial size */
1321: sr->nPend = 0;
1322: PetscMalloc1(sr->maxPend,&sr->pending);
1323: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1324: EPSCreateShift(eps,sr->int0,NULL,NULL);
1325: /* extract first shift */
1326: sr->sPrev = NULL;
1327: sr->sPres = sr->pending[--sr->nPend];
1328: sr->sPres->inertia = sr->inertia0;
1329: eps->target = sr->sPres->value;
1330: sr->s0 = sr->sPres;
1331: sr->indexEig = 0;
1332: /* Memory reservation for auxiliary variables */
1333: lds = PetscMin(eps->mpd,eps->ncv);
1334: PetscCalloc1(lds*lds,&sr->S);
1335: PetscMalloc1(eps->ncv,&sr->back);
1336: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1337: for (i=0;i<sr->numEigs;i++) {
1338: sr->eigr[i] = 0.0;
1339: sr->eigi[i] = 0.0;
1340: sr->errest[i] = 0.0;
1341: sr->perm[i] = i;
1342: }
1343: /* Vectors for deflation */
1344: PetscMalloc1(sr->numEigs,&sr->idxDef);
1345: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1346: sr->indexEig = 0;
1347: /* Main loop */
1348: while (sr->sPres) {
1349: /* Search for deflation */
1350: EPSLookForDeflation(eps);
1351: /* KrylovSchur */
1352: EPSKrylovSchur_Slice(eps);
1354: EPSStoreEigenpairs(eps);
1355: /* Select new shift */
1356: if (!sr->sPres->comp[1]) {
1357: EPSGetNewShiftValue(eps,1,&newS);
1358: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1359: }
1360: if (!sr->sPres->comp[0]) {
1361: /* Completing earlier interval */
1362: EPSGetNewShiftValue(eps,0,&newS);
1363: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1364: }
1365: /* Preparing for a new search of values */
1366: EPSExtractShift(eps);
1367: }
1369: /* Updating eps values prior to exit */
1370: PetscFree(sr->S);
1371: PetscFree(sr->idxDef);
1372: PetscFree(sr->pending);
1373: PetscFree(sr->back);
1374: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1375: BVSetNumConstraints(sr->Vnext,0);
1376: BVDestroy(&eps->V);
1377: eps->V = sr->Vnext;
1378: eps->nconv = sr->indexEig;
1379: eps->reason = EPS_CONVERGED_TOL;
1380: eps->its = sr->itsKs;
1381: eps->nds = 0;
1382: }
1383: return(0);
1384: }