Actual source code: nrefine.c
slepc-3.6.1 2015-09-03
1: /*
2: Newton refinement for nonlinear eigenproblem.
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/pepimpl.h>
25: #include <slepcblaslapack.h>
27: typedef struct {
28: Mat *A;
29: BV V;
30: PetscInt k,nmat;
31: PetscScalar *Mm;
32: PetscScalar *fih;
33: PetscScalar *work;
34: Vec w1,w2;
35: } FSubctx;
37: typedef struct {
38: Mat E[2];
39: Vec tN,ttN,t1,vseq;
40: VecScatter scatterctx;
41: PetscBool computedt11;
42: PetscInt *map0,*map1,*idxg,*idxp;
43: PetscSubcomm subc;
44: VecScatter scatter_sub;
45: VecScatter *scatter_id,*scatterp_id;
46: Mat *A;
47: BV V,W;
48: Vec t,tg,Rv,Vi,tp,tpg;
49: PetscInt idx;
50: } MatExplicitCtx;
54: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
55: {
57: FSubctx *ctx;
58: PetscInt i,k,nmat;
59: PetscScalar *fih,*c,*vals,sone=1.0,zero=0.0;
60: Mat *A;
61: PetscBLASInt k_,lda_,one=1;
62:
64: MatShellGetContext(M,&ctx);
65: fih = ctx->fih;
66: k = ctx->k;
67: nmat = ctx->nmat;
68: A = ctx->A;
69: c = ctx->work;
70: vals = ctx->work+k;
71: PetscBLASIntCast(k,&k_);
72: PetscBLASIntCast(nmat*k,&lda_);
73: BVDotVec(ctx->V,x,c);
74: MatMult(A[0],x,y);
75: for (i=1;i<nmat;i++) {
76: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,ctx->Mm+i*k,&lda_,c,&one,&zero,vals,&one));
77: VecCopy(x,ctx->w1);
78: BVMultVec(ctx->V,-1.0,fih[i],ctx->w1,vals);
79: MatMult(A[i],ctx->w1,ctx->w2);
80: VecAXPY(y,1.0,ctx->w2);
81: }
82: return(0);
83: }
87: /*
88: Evaluates the first d elements of the polynomial basis
89: on a given matrix H which is considered to be triangular
90: */
91: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
92: {
94: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
95: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
96: PetscScalar corr=0.0,alpha,beta;
97: PetscBLASInt k_,ldh_,ldfh_;
98:
100: PetscBLASIntCast(ldh,&ldh_);
101: PetscBLASIntCast(k,&k_);
102: PetscBLASIntCast(ldfh,&ldfh_);
103: PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
104: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
105: if (nm>1) {
106: t = b[0]/a[0];
107: off = k;
108: for (j=0;j<k;j++) {
109: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
110: fH[j+j*ldfh] -= t;
111: }
112: }
113: for (i=2;i<nm;i++) {
114: off = i*k;
115: if (i==2) {
116: for (j=0;j<k;j++) {
117: fH[off+j+j*ldfh] = 1.0;
118: H[j+j*ldh] -= b[1];
119: }
120: } else {
121: for (j=0;j<k;j++) {
122: PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
123: H[j+j*ldh] += corr-b[i-1];
124: }
125: }
126: corr = b[i-1];
127: beta = -g[i-1]/a[i-1];
128: alpha = 1/a[i-1];
129: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
130: }
131: for (j=0;j<k;j++) H[j+j*ldh] += corr;
132: return(0);
133: }
137: static PetscErrorCode NRefSysSetup_shell(PetscInt nmat,PetscReal *pcf,PetscInt k,PetscInt deg,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PetscScalar *Mm,PetscScalar *T22,PetscBLASInt *p22,PetscScalar *T21,PetscScalar *T12)
138: {
140: PetscScalar *DHii,*Tr,*Ts,s,sone=1.0,zero=0.0;
141: PetscInt i,d,j,lda=nmat*k;
142: PetscReal *a=pcf,*b=pcf+nmat,*g=pcf+2*nmat;
143: PetscBLASInt k_,lda_,lds_,info;
144:
146: DHii = T12;
147: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
148: PetscMalloc2(k*k,&Tr,k*k,&Ts);
149: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
150: for (d=2;d<nmat;d++) {
151: for (j=0;j<k;j++) {
152: for (i=0;i<k;i++) {
153: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
154: }
155: }
156: }
157: /* T22 */
158: PetscBLASIntCast(lds,&lds_);
159: PetscBLASIntCast(k,&k_);
160: PetscBLASIntCast(lda,&lda_);
161: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
162: for (i=1;i<deg;i++) {
163: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
164: s = (i==1)?0.0:1.0;
165: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
166: }
168: /* T21 */
169: for (i=1;i<deg;i++) {
170: s = (i==1)?0.0:1.0;
171: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","C",&k_,&k_,&k_,fh+i,fH+i*k,&lda_,S,&lds_,&s,T21,&k_));
172: }
173: /* Mm */
174: PetscMemcpy(Tr,T21,k*k*sizeof(PetscScalar));
175: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&k_,T22,&k_,p22,Tr,&k_,&info));
176:
177: s = 0.0;
178: for (i=1;i<nmat;i++) {
179: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
180: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Ts,&k_,Tr,&k_,&s,Mm+i*k,&lda_));
181: for (j=0;j<k;j++) {
182: PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
183: }
184: }
185: PetscFree2(Tr,Ts);
186: return(0);
187: }
191: static PetscErrorCode NRefSysSolve_shell(Mat *A,KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,PetscScalar *T22,PetscBLASInt *p22,PetscScalar *T21,PetscScalar *T12,BV V,Vec dVi,PetscScalar *dHi,BV W,Vec t,PetscScalar *work,PetscInt lw)
192: {
194: PetscScalar *t0,*t1,zero=0.0,none=-1.0,sone=1.0;
195: PetscBLASInt k_,one=1,info,lda_;
196: PetscInt i,lda=nmat*k,nwu=0;
197: Vec w;
198: KSPConvergedReason reason;
201: t0 = work+nwu;
202: nwu += k;
203: t1 = work+nwu;
204: nwu += k;
205: PetscBLASIntCast(lda,&lda_);
206: PetscBLASIntCast(k,&k_);
207: for (i=0;i<k;i++) t0[i] = Rh[i];
208: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,T22,&k_,p22,t0,&k_,&info));
209: for (i=1;i<nmat;i++) {
210: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,T12+i*k,&lda_,t0,&one,&zero,t1,&one));
211: BVMultVec(V,1.0,0.0,t,t1);
212: BVGetColumn(W,i,&w);
213: MatMult(A[i],t,w);
214: BVRestoreColumn(W,i,&w);
215: }
216: for (i=0;i<nmat-1;i++) t1[i]=-1.0;
217: BVSetActiveColumns(W,1,nmat);
218: BVMultVec(W,1.0,1.0,Rv,t1);
219: KSPSolve(ksp,Rv,dVi);
220: KSPGetConvergedReason(ksp,&reason);
221: if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
222: BVDotVec(V,dVi,t1);
223: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&none,T21,&k_,t1,&one,&zero,dHi,&one));
224: for (i=0;i<k;i++) dHi[i] += Rh[i];
225: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,T22,&k_,p22,dHi,&k_,&info));
226: return(0);
227: }
231: /*
232: Computes the residual P(H,V*S)*e_j for the polynomial
233: */
234: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t,PetscScalar *work,PetscInt lw)
235: {
237: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
238: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
239: PetscInt i,ii,jj,nwu=0,lda;
240: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
241: Mat M0;
242: Vec w;
243:
245: h = work+nwu;
246: nwu += k*nmat;
247: lda = k*nmat;
248: PetscBLASIntCast(k,&k_);
249: PetscBLASIntCast(lds,&lds_);
250: PetscBLASIntCast(lda,&lda_);
251: PetscBLASIntCast(nmat,&nmat_);
252: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
253: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
254: BVSetActiveColumns(W,0,nmat);
255: BVMult(W,1.0,0.0,V,M0);
256: MatDestroy(&M0);
258: BVGetColumn(W,0,&w);
259: MatMult(A[0],w,Rv);
260: BVRestoreColumn(W,0,&w);
261: for (i=1;i<nmat;i++) {
262: BVGetColumn(W,i,&w);
263: MatMult(A[i],w,t);
264: BVRestoreColumn(W,i,&w);
265: VecAXPY(Rv,1.0,t);
266: }
267: /* Update right-hand side */
268: if (j) {
269: DS0 = work+nwu;
270: nwu += k*k;
271: DS1 = work+nwu;
272: nwu += k*k;
273: PetscBLASIntCast(ldh,&ldh_);
274: Z = work+nwu;
275: nwu += k*k;
276: PetscMemzero(Z,k*k*sizeof(PetscScalar));
277: PetscMemzero(DS0,k*k*sizeof(PetscScalar));
278: PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
279: /* Update DfH */
280: for (i=1;i<nmat;i++) {
281: if (i>1) {
282: beta = -g[i-1];
283: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
284: tt += -b[i-1];
285: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
286: tt = b[i-1];
287: beta = 1.0/a[i-1];
288: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
289: F = DS0; DS0 = DS1; DS1 = F;
290: } else {
291: PetscMemzero(DS1,k*k*sizeof(PetscScalar));
292: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
293: }
294: for (jj=j;jj<k;jj++) {
295: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
296: }
297: }
298: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
299: /* Update right-hand side */
300: PetscBLASIntCast(2*k,&k2_);
301: PetscBLASIntCast(j,&j_);
302: PetscBLASIntCast(k+rds,&krds_);
303: c0 = DS0;
304: PetscMemzero(Rh,k*sizeof(PetscScalar));
305: for (i=0;i<nmat;i++) {
306: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
307: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
308: BVMultVec(V,1.0,0.0,t,h);
309: BVSetActiveColumns(dV,0,rds);
310: BVMultVec(dV,1.0,1.0,t,h+k);
311: BVGetColumn(W,i,&w);
312: MatMult(A[i],t,w);
313: BVRestoreColumn(W,i,&w);
314: if (i>0 && i<nmat-1) {
315: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
316: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
317: }
318: }
319:
320: for (i=0;i<nmat;i++) h[i] = -1.0;
321: BVMultVec(W,1.0,1.0,Rv,h);
322: }
323: return(0);
324: }
328: static PetscErrorCode NRefSysIter_shell(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,BV W,Vec t,PetscScalar *work,PetscInt lwork)
329: {
331: PetscInt nwu=0,nmat=pep->nmat,deg=nmat-1,i;
332: PetscScalar *T22,*T21,*T12;
333: PetscBLASInt *p22;
334: FSubctx *ctx;
335: Mat M,*A;
336: PetscBool flg;
339: STGetTransform(pep->st,&flg);
340: if (flg) {
341: PetscMalloc1(pep->nmat,&A);
342: for (i=0;i<pep->nmat;i++) {
343: STGetTOperators(pep->st,i,&A[i]);
344: }
345: } else A = pep->A;
346: PetscMalloc1(k,&p22);
347: T22 = work+nwu;
348: nwu += k*k;
349: T21 = work+nwu;
350: nwu += k*k;
351: T12 = work+nwu;
352: nwu += nmat*k*k;
353: KSPGetOperators(ksp,&M,NULL);
354: MatShellGetContext(M,&ctx);
355: /* Update the matrix for the system */
356: NRefSysSetup_shell(nmat,pep->pbc,k,deg,fH,S,lds,fh,h,ctx->Mm,T22,p22,T21,T12);
357: /* Solve system */
358: NRefSysSolve_shell(A,ksp,nmat,Rv,Rh,k,T22,p22,T21,T12,V,dVi,dHi,W,t,work+nwu,lwork-nwu);
359: PetscFree(p22);
360: if (flg) {
361: PetscFree(A);
362: }
363: return(0);
364: }
368: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W,PetscScalar *work,PetscInt lwork)
369: {
370: PetscErrorCode ierr;
371: PetscInt nwu=0,i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
372: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
373: Mat M,*E=matctx->E,*A,*At,Mk,Md;
374: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
375: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
376: PetscBLASInt lds_,lda_,k_;
377: const PetscInt *idxmc;
378: const PetscScalar *valsc,*carray;
379: MatStructure str;
380: Vec vc,vc0;
381: PetscBool flg;
382:
384: T22 = work+nwu;
385: nwu += k*k;
386: T21 = work+nwu;
387: nwu += k*k;
388: T12 = work+nwu;
389: nwu += nmat*k*k;
390: Tr = work+nwu;
391: nwu += k*k;
392: Ts = work+nwu;
393: nwu += k*k;
394: STGetMatStructure(pep->st,&str);
395: KSPGetOperators(ksp,&M,NULL);
396: MatGetOwnershipRange(E[1],&n1,&m1);
397: MatGetOwnershipRange(E[0],&n0,&m0);
398: MatGetOwnershipRange(M,&n,NULL);
399: PetscMalloc1(nmat,&ts);
400: STGetTransform(pep->st,&flg);
401: if (flg) {
402: PetscMalloc1(pep->nmat,&At);
403: for (i=0;i<pep->nmat;i++) {
404: STGetTOperators(pep->st,i,&At[i]);
405: }
406: } else At = pep->A;
407: if (matctx->subc) A = matctx->A;
408: else A = At;
409: /* Form the explicit system matrix */
410: DHii = T12;
411: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
412: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
413: for (d=2;d<nmat;d++) {
414: for (j=0;j<k;j++) {
415: for (i=0;i<k;i++) {
416: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
417: }
418: }
419: }
421: /* T11 */
422: if (!matctx->computedt11) {
423: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
424: PEPEvaluateBasis(pep,h,0,Ts,NULL);
425: for (j=1;j<nmat;j++) {
426: MatAXPY(E[0],Ts[j],A[j],str);
427: }
428: }
429: for (i=n0;i<m0;i++) {
430: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
431: idx = n+i-n0;
432: for (j=0;j<ncols;j++) {
433: idxg[j] = matctx->map0[idxmc[j]];
434: }
435: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
436: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
437: }
439: /* T22 */
440: PetscBLASIntCast(lds,&lds_);
441: PetscBLASIntCast(k,&k_);
442: PetscBLASIntCast(lda,&lda_);
443: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
444: for (i=1;i<deg;i++) {
445: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
446: s = (i==1)?0.0:1.0;
447: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
448: }
449: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
450: for (i=0;i<m1-n1;i++) {
451: idx = n+m0-n0+i;
452: for (j=0;j<k;j++) {
453: Tr[j] = T22[n1+i+j*k];
454: }
455: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
456: }
458: /* T21 */
459: for (i=1;i<deg;i++) {
460: s = (i==1)?0.0:1.0;
461: ss = PetscConj(fh[i]);
462: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
463: }
464: BVSetActiveColumns(W,0,k);
465: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
466: BVMult(W,1.0,0.0,V,Mk);
467: for (i=0;i<k;i++) {
468: BVGetColumn(W,i,&vc);
469: VecConjugate(vc);
470: VecGetArrayRead(vc,&carray);
471: idx = matctx->map1[i];
472: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
473: VecRestoreArrayRead(vc,&carray);
474: BVRestoreColumn(W,i,&vc);
475: }
477: /* T12 */
478: for (i=1;i<nmat;i++) {
479: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
480: for (j=0;j<k;j++) {
481: PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
482: }
483: }
484: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
485: for (i=0;i<nmat;i++) ts[i] = 1.0;
486: for (j=0;j<k;j++) {
487: MatDenseGetArray(Md,&array);
488: PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
489: MatDenseRestoreArray(Md,&array);
490: BVSetActiveColumns(W,0,nmat-1);
491: BVMult(W,1.0,0.0,V,Md);
492: for (i=nmat-1;i>0;i--) {
493: BVGetColumn(W,i-1,&vc0);
494: BVGetColumn(W,i,&vc);
495: MatMult(A[i],vc0,vc);
496: BVRestoreColumn(W,i-1,&vc0);
497: BVRestoreColumn(W,i,&vc);
498: }
499: BVSetActiveColumns(W,1,nmat);
500: BVGetColumn(W,0,&vc0);
501: BVMultVec(W,1.0,0.0,vc0,ts);
502: VecGetArrayRead(vc0,&carray);
503: idx = matctx->map1[j];
504: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
505: VecRestoreArrayRead(vc0,&carray);
506: BVRestoreColumn(W,0,&vc0);
507: }
508: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
509: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
510: KSPSetOperators(ksp,M,M);
511: KSPSetUp(ksp);
512: PetscFree(ts);
513: MatDestroy(&Mk);
514: MatDestroy(&Md);
515: if (flg) {
516: PetscFree(At);
517: }
518: return(0);
519: }
520:
523: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
524: {
525: PetscErrorCode ierr;
526: PetscInt n0,m0,n1,m1,i;
527: PetscScalar *arrayV;
528: const PetscScalar *array;
529: KSPConvergedReason reason;
532: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
533: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
534: /* Right side */
535: VecGetArrayRead(Rv,&array);
536: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
537: VecRestoreArrayRead(Rv,&array);
538: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
539: VecAssemblyBegin(matctx->tN);
540: VecAssemblyEnd(matctx->tN);
542: /* Solve */
543: KSPSolve(ksp,matctx->tN,matctx->ttN);
544: KSPGetConvergedReason(ksp,&reason);
545: if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
546:
547: /* Retrieve solution */
548: VecGetArray(dVi,&arrayV);
549: VecGetArrayRead(matctx->ttN,&array);
550: PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
551: VecRestoreArray(dVi,&arrayV);
552: if (!matctx->subc) {
553: VecGetArray(matctx->t1,&arrayV);
554: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
555: VecRestoreArray(matctx->t1,&arrayV);
556: VecRestoreArrayRead(matctx->ttN,&array);
557: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
558: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
559: VecGetArrayRead(matctx->vseq,&array);
560: for (i=0;i<k;i++) dHi[i] = array[i];
561: VecRestoreArrayRead(matctx->vseq,&array);
562: }
563: return(0);
564: }
568: static PetscErrorCode NRefSysIter_explicit(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W,PetscScalar *work,PetscInt lwork)
569: {
570: PetscErrorCode ierr;
571: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
572: PetscScalar *array2,h;
573: const PetscScalar *array;
574: Vec R,Vi;
575:
577: if (!matctx->subc) {
578: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
579: h = H[i+i*ldh];
580: idx = i;
581: R = Rv;
582: Vi = dVi;
583: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W,work,lwork);
584: } else {
585: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
586: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
587: h = H[idx+idx*ldh];
588: matctx->idx = idx;
589: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W,work,lwork);
590: } else idx = matctx->idx;
591: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
592: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
593: VecGetArrayRead(matctx->tg,&array);
594: VecPlaceArray(matctx->t,array);
595: VecCopy(matctx->t,matctx->Rv);
596: VecResetArray(matctx->t);
597: VecRestoreArrayRead(matctx->tg,&array);
598: R = matctx->Rv;
599: Vi = matctx->Vi;
600: }
601: if (idx==i && idx<k) {
602: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
603: }
604: if (matctx->subc) {
605: VecGetLocalSize(Vi,&m);
606: VecGetArrayRead(Vi,&array);
607: VecGetArray(matctx->tg,&array2);
608: PetscMemcpy(array2,array,m*sizeof(PetscScalar));
609: VecRestoreArray(matctx->tg,&array2);
610: VecRestoreArrayRead(Vi,&array);
611: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
612: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
613: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
614: VecGetArrayRead(matctx->ttN,&array);
615: VecPlaceArray(matctx->tp,array+m0-n0);
616: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
617: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
618: VecResetArray(matctx->tp);
619: VecRestoreArrayRead(matctx->ttN,&array);
620: VecGetArrayRead(matctx->tpg,&array);
621: for (j=0;j<k;j++) dHi[j] = array[j];
622: VecRestoreArrayRead(matctx->tpg,&array);
623: }
624: return(0);
625: }
629: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PetscScalar *work,PetscInt lw,MatExplicitCtx *matctx)
630: {
632: PetscInt i,j,nmat=pep->nmat,nwu=0,lda=nmat*k;
633: PetscScalar h,*fh,*Rh,*DfH;
634: PetscReal norm;
635: BV W;
636: Vec Rv,t,dvi;
637: FSubctx *ctx;
638: Mat M,*At;
639: PetscBool flg,lindep;
642: *rds = 0;
643: DfH = work+nwu;
644: nwu += nmat*k*k;
645: Rh = work+nwu;
646: nwu += k;
647: BVCreateVec(pep->V,&t);
648: BVCreateVec(pep->V,&Rv);
649: KSPGetOperators(ksp,&M,NULL);
650: if (matctx) {
651: fh = work+nwu;
652: nwu += nmat;
653: } else {
654: MatShellGetContext(M,&ctx);
655: fh = ctx->fih;
656: }
657: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
658: PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
659: PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
660: STGetTransform(pep->st,&flg);
661: if (flg) {
662: PetscMalloc1(pep->nmat,&At);
663: for (i=0;i<pep->nmat;i++) {
664: STGetTOperators(pep->st,i,&At[i]);
665: }
666: } else At = pep->A;
668: /* Main loop for computing the ith columns of dX and dS */
669: for (i=0;i<k;i++) {
670: h = H[i+i*ldh];
672: /* Compute and update i-th column of the right hand side */
673: PetscMemzero(Rh,k*sizeof(PetscScalar));
674: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t,work+nwu,lw-nwu);
676: /* Update and solve system */
677: BVGetColumn(dV,i,&dvi);
678: if (matctx) {
679: NRefSysIter_explicit(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W,work+nwu,lw-nwu);
680: if (i==0) matctx->computedt11 = PETSC_FALSE;
681: } else {
682: for (j=0;j<nmat;j++) fh[j] = fH[j*k+i+i*lda];
683: NRefSysIter_shell(pep,k,ksp,fH,S,lds,fh,h,Rv,Rh,pep->V,dvi,dH+i*k,W,t,work+nwu,lw-nwu);
684: }
685: /* Orthogonalize computed solution */
686: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
687: BVRestoreColumn(dV,i,&dvi);
688: if (!lindep) {
689: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
690: if (!lindep) {
691: dVS[k+i+i*2*k] = norm;
692: BVScaleColumn(dV,i,1.0/norm);
693: (*rds)++;
694: }
695: }
696: }
697: BVSetActiveColumns(dV,0,*rds);
698: VecDestroy(&t);
699: VecDestroy(&Rv);
700: BVDestroy(&W);
701: if (flg) {
702: PetscFree(At);
703: }
704: return(0);
705: }
709: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscInt *prs,PetscScalar *work,PetscInt lwork)
710: {
712: PetscInt i,j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,nwu=0,rs=*prs,ldg;
713: PetscScalar *T,*G,*tau,*array,sone=1.0,zero=0.0;
714: PetscBLASInt rs_,lds_,k_,ldh_,lw_,info,ldg_,lda_;
715: Mat M0;
718: T = work+nwu;
719: nwu += rs*k;
720: tau = work+nwu;
721: nwu += k;
722: PetscBLASIntCast(lds,&lds_);
723: PetscBLASIntCast(lda,&lda_);
724: PetscBLASIntCast(k,&k_);
725: PetscBLASIntCast(lwork-nwu,&lw_);
726: if (rs>k) { /* Truncate S to have k columns*/
727: for (j=0;j<k;j++) {
728: PetscMemcpy(T+j*rs,S+j*lds,rs*sizeof(PetscScalar));
729: }
730: PetscBLASIntCast(rs,&rs_);
731: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&rs_,&k_,T,&rs_,tau,work+nwu,&lw_,&info));
732: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
733: /* Copy triangular matrix in S */
734: PetscMemzero(S,lds*k*sizeof(PetscScalar));
735: for (j=0;j<k;j++) for (i=0;i<=j;i++) S[j*lds+i] = T[j*rs+i];
736: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&rs_,&k_,&k_,T,&rs_,tau,work+nwu,&lw_,&info));
737: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
738: MatCreateSeqDense(PETSC_COMM_SELF,rs,k,NULL,&M0);
739: MatDenseGetArray(M0,&array);
740: for (j=0;j<k;j++) {
741: PetscMemcpy(array+j*rs,T+j*rs,rs*sizeof(PetscScalar));
742: }
743: MatDenseRestoreArray(M0,&array);
744: BVSetActiveColumns(pep->V,0,rs);
745: BVMultInPlace(pep->V,M0,0,k);
746: BVSetActiveColumns(pep->V,0,k);
747: MatDestroy(&M0);
748: *prs = rs = k;
749: }
750: /* Form auxiliary matrix for the orthogonalization step */
751: G = work+nwu;
752: ldg = deg*k;
753: nwu += ldg*k;
754: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
755: PetscBLASIntCast(ldg,&ldg_);
756: PetscBLASIntCast(lwork-nwu,&lw_);
757: PetscBLASIntCast(ldh,&ldh_);
758: for (j=0;j<deg;j++) {
759: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
760: }
761: /* Orthogonalize and update S */
762: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work+nwu,&lw_,&info));
763: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
764: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
766: /* Update H */
767: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
768: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
769: return(0);
770: }
774: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds,PetscScalar *work,PetscInt lwork)
775: {
777: PetscInt i,j,nmat=pep->nmat,lda=nmat*k,nwu=0;
778: PetscScalar *tau,*array;
779: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,lw_,info,k2_;
780: Mat M0;
783: tau = work+nwu;
784: nwu += k;
785: PetscBLASIntCast(lds,&lds_);
786: PetscBLASIntCast(lda,&lda_);
787: PetscBLASIntCast(ldh,&ldh_);
788: PetscBLASIntCast(k,&k_);
789: PetscBLASIntCast(2*k,&k2_);
790: PetscBLASIntCast((k+rds),&kdrs_);
791: /* Update H */
792: for (j=0;j<k;j++) {
793: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
794: }
795: /* Update V */
796: for (j=0;j<k;j++) {
797: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
798: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
799: }
800: PetscBLASIntCast(lwork-nwu,&lw_);
801: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work+nwu,&lw_,&info));
802: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
803: /* Copy triangular matrix in S */
804: for (j=0;j<k;j++) {
805: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
806: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
807: }
808: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work+nwu,&lw_,&info));
809: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
810: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
811: MatDenseGetArray(M0,&array);
812: for (j=0;j<k;j++) {
813: PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
814: }
815: MatDenseRestoreArray(M0,&array);
816: BVMultInPlace(pep->V,M0,0,k);
817: if (rds) {
818: MatDenseGetArray(M0,&array);
819: for (j=0;j<k;j++) {
820: PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
821: }
822: MatDenseRestoreArray(M0,&array);
823: BVMultInPlace(dV,M0,0,k);
824: BVAXPY(pep->V,1.0,dV);
825: }
826: MatDestroy(&M0);
827: NRefOrthogStep(pep,k,H,ldh,fH,S,lds,&k,work,lwork);
828: return(0);
829: }
833: static PetscErrorCode PEPNRefSetUpMatrices(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,Mat *M,Mat *P,MatExplicitCtx *matctx,PetscBool ini)
834: {
835: PetscErrorCode ierr;
836: FSubctx *ctx;
837: PetscScalar t,*coef;
838: const PetscScalar *array;
839: MatStructure str;
840: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
841: MPI_Comm comm;
842: PetscMPIInt np;
843: const PetscInt *rgs0,*rgs1;
844: Mat B,C,*E,*A,*At;
845: IS is1,is2;
846: Vec v;
847: PetscBool flg;
850: PetscMalloc1(nmat,&coef);
851: STGetTransform(pep->st,&flg);
852: if (flg) {
853: PetscMalloc1(pep->nmat,&At);
854: for (i=0;i<pep->nmat;i++) {
855: STGetTOperators(pep->st,i,&At[i]);
856: }
857: } else At = pep->A;
858: if (matctx) {
859: if (ini) {
860: if (matctx->subc) {
861: A = matctx->A;
862: comm = PetscSubcommChild(matctx->subc);
863: } else {
864: A = At;
865: PetscObjectGetComm((PetscObject)pep,&comm);
866: }
867: E = matctx->E;
868: STGetMatStructure(pep->st,&str);
869: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
870: j = (matctx->subc)?matctx->subc->color:0;
871: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
872: for (j=1;j<nmat;j++) {
873: MatAXPY(E[0],coef[j],A[j],str);
874: }
875: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
876: MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
877: MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
878: MatGetOwnershipRange(E[0],&n0,&m0);
879: MatGetOwnershipRange(E[1],&n1,&m1);
880: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
881: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
882: /* T12 and T21 are computed from V and V*, so,
883: they must have the same column and row ranges */
884: if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
885: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
886: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
887: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
888: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
889: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
890: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
891: SlepcMatTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],M);
892: *P = *M;
893: MatDestroy(&B);
894: MatDestroy(&C);
895: matctx->computedt11 = PETSC_TRUE;
896: MatGetSize(E[0],NULL,&N0);
897: MatGetSize(E[1],NULL,&N1);
898: MPI_Comm_size(PetscObjectComm((PetscObject)*M),&np);
899: MatGetOwnershipRanges(E[0],&rgs0);
900: MatGetOwnershipRanges(E[1],&rgs1);
901: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
902: /* Create column (and row) mapping */
903: for (p=0;p<np;p++) {
904: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
905: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
906: }
907: MatCreateVecs(*M,NULL,&matctx->tN);
908: MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
909: VecDuplicate(matctx->tN,&matctx->ttN);
910: if (matctx->subc) {
911: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
912: count = np*k;
913: PetscMalloc2(count,&idx1,count,&idx2);
914: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
915: VecGetOwnershipRange(matctx->tp,&l0,NULL);
916: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
917: for (si=0;si<matctx->subc->n;si++) {
918: if (matctx->subc->color==si) {
919: j=0;
920: if (matctx->subc->color==si) {
921: for (p=0;p<np;p++) {
922: for (i=n1;i<m1;i++) {
923: idx1[j] = l0+i-n1;
924: idx2[j++] =p*k+i;
925: }
926: }
927: }
928: count = np*(m1-n1);
929: } else count =0;
930: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
931: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
932: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
933: ISDestroy(&is1);
934: ISDestroy(&is2);
935: }
936: PetscFree2(idx1,idx2);
937: } else {
938: VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
939: }
940: } else {
941: if (matctx->subc) {
942: /* Scatter vectors pep->V */
943: for (i=0;i<k;i++) {
944: BVGetColumn(pep->V,i,&v);
945: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
946: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
947: BVRestoreColumn(pep->V,i,&v);
948: VecGetArrayRead(matctx->tg,&array);
949: VecPlaceArray(matctx->t,(const PetscScalar*)array);
950: BVInsertVec(matctx->V,i,matctx->t);
951: VecResetArray(matctx->t);
952: VecRestoreArrayRead(matctx->tg,&array);
953: }
954: }
955: }
956: } else {
957: if (ini) {
958: PetscObjectGetComm((PetscObject)pep,&comm);
959: MatGetSize(At[0],&m0,&n0);
960: PetscMalloc1(1,&ctx);
961: STGetMatStructure(pep->st,&str);
962: /* Create a shell matrix to solve the linear system */
963: ctx->A = At;
964: ctx->V = pep->V;
965: ctx->k = k; ctx->nmat = nmat;
966: PetscMalloc3(k*k*nmat,&ctx->Mm,2*k*k,&ctx->work,nmat,&ctx->fih);
967: PetscMemzero(ctx->Mm,k*k*nmat*sizeof(PetscScalar));
968: BVCreateVec(pep->V,&ctx->w1);
969: BVCreateVec(pep->V,&ctx->w2);
970: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,M);
971: MatShellSetOperation(*M,MATOP_MULT,(void(*)(void))MatFSMult);
972: }
973: /* Compute a precond matrix for the system */
974: t = 0.0;
975: for (j=0;j<k;j++) t += H[j+j*ldh];
976: t /= k;
977: if (ini) {
978: MatDuplicate(At[0],MAT_COPY_VALUES,P);
979: } else {
980: MatCopy(At[0],*P,str);
981: }
982: PEPEvaluateBasis(pep,t,0,coef,NULL);
983: for (j=1;j<nmat;j++) {
984: MatAXPY(*P,coef[j],At[j],str);
985: }
986: }
987: PetscFree(coef);
988: if (flg) {
989: PetscFree(At);
990: }
991: return(0);
992: }
996: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
997: {
998: PetscErrorCode ierr;
999: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1000: IS is1,is2;
1001: BVType type;
1002: Vec v;
1003: const PetscScalar *array;
1004: Mat *A;
1005: PetscBool flg;
1008: STGetTransform(pep->st,&flg);
1009: if (flg) {
1010: PetscMalloc1(pep->nmat,&A);
1011: for (i=0;i<pep->nmat;i++) {
1012: STGetTOperators(pep->st,i,&A[i]);
1013: }
1014: } else A = pep->A;
1015:
1016: /* Duplicate pep matrices */
1017: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1018: for (i=0;i<pep->nmat;i++) {
1019: MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1020: }
1022: /* Create Scatter */
1023: MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1024: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1025: VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1026: BVGetColumn(pep->V,0,&v);
1027: VecGetOwnershipRange(v,&n0,&m0);
1028: nloc0 = m0-n0;
1029: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1030: j = 0;
1031: for (si=0;si<matctx->subc->n;si++) {
1032: for (i=n0;i<m0;i++) {
1033: idx1[j] = i;
1034: idx2[j++] = i+pep->n*si;
1035: }
1036: }
1037: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1038: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1039: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1040: ISDestroy(&is1);
1041: ISDestroy(&is2);
1042: for (si=0;si<matctx->subc->n;si++) {
1043: j=0;
1044: for (i=n0;i<m0;i++) {
1045: idx1[j] = i;
1046: idx2[j++] = i+pep->n*si;
1047: }
1048: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1049: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1050: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1051: ISDestroy(&is1);
1052: ISDestroy(&is2);
1053: }
1054: BVRestoreColumn(pep->V,0,&v);
1055: PetscFree2(idx1,idx2);
1057: /* Duplicate pep->V vecs */
1058: BVGetType(pep->V,&type);
1059: BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1060: BVSetType(matctx->V,type);
1061: BVSetSizesFromVec(matctx->V,matctx->t,k);
1062: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1063: for (i=0;i<k;i++) {
1064: BVGetColumn(pep->V,i,&v);
1065: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1066: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1067: BVRestoreColumn(pep->V,i,&v);
1068: VecGetArrayRead(matctx->tg,&array);
1069: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1070: BVInsertVec(matctx->V,i,matctx->t);
1071: VecResetArray(matctx->t);
1072: VecRestoreArrayRead(matctx->tg,&array);
1073: }
1075: VecDuplicate(matctx->t,&matctx->Rv);
1076: VecDuplicate(matctx->t,&matctx->Vi);
1077: if (flg) {
1078: PetscFree(A);
1079: }
1080: return(0);
1081: }
1085: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1086: {
1088: PetscInt i;
1091: VecScatterDestroy(&matctx->scatter_sub);
1092: for (i=0;i<matctx->subc->n;i++) {
1093: VecScatterDestroy(&matctx->scatter_id[i]);
1094: VecScatterDestroy(&matctx->scatterp_id[i]);
1095: }
1096: for (i=0;i<pep->nmat;i++) {
1097: MatDestroy(&matctx->A[i]);
1098: }
1099: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1100: BVDestroy(&matctx->V);
1101: BVDestroy(&matctx->W);
1102: VecDestroy(&matctx->t);
1103: VecDestroy(&matctx->tg);
1104: VecDestroy(&matctx->tp);
1105: VecDestroy(&matctx->tpg);
1106: VecDestroy(&matctx->Rv);
1107: VecDestroy(&matctx->Vi);
1108: return(0);
1109: }
1113: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds,PetscInt *prs)
1114: {
1116: PetscScalar *H,*work,*dH,*fH,*dVS;
1117: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nwu=0,lwa=0,nsubc=pep->npart,rds;
1118: PetscLogDouble cnt;
1119: PetscBLASInt k_,ld_,*p,info,lwork=0;
1120: BV dV;
1121: PetscBool sinvert,flg;
1122: Mat P,M;
1123: FSubctx *ctx;
1124: KSP ksp;
1125: MatExplicitCtx *matctx=NULL;
1126: Vec v;
1129: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1130: if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1131: /* the input tolerance is not being taken into account (by the moment) */
1132: its = *maxits;
1133: lwa = (5+3*nmat)*k*k+2*k;
1134: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,lwa,&work);
1135: DSGetLeadingDimension(pep->ds,&ldh);
1136: DSGetArray(pep->ds,DS_MAT_A,&H);
1137: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1138: PetscMalloc1(2*k*k,&dVS);
1139: STGetTransform(pep->st,&flg);
1140: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1141: PetscBLASIntCast(k,&k_);
1142: PetscBLASIntCast(ldh,&ld_);
1143: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1144: if (sinvert){
1145: DSGetArray(pep->ds,DS_MAT_A,&H);
1146: PetscBLASIntCast(lwa-nwu,&lwork);
1147: PetscMalloc1(k,&p);
1148: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1149: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&lwork,&info));
1150: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1151: pep->ops->backtransform = NULL;
1152: }
1153: if (sigma!=0.0) {
1154: DSGetArray(pep->ds,DS_MAT_A,&H);
1155: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1156: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1157: pep->ops->backtransform = NULL;
1158: }
1159: }
1160: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1161: DSGetArray(pep->ds,DS_MAT_A,&H);
1162: for (j=0;j<k;j++) {
1163: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1164: }
1165: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1166: if (!flg) {
1167: /* Restore original values */
1168: for (i=0;i<pep->nmat;i++){
1169: pep->pbc[pep->nmat+i] *= pep->sfactor;
1170: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1171: }
1172: }
1173: }
1174: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1175: for (i=0;i<k;i++) {
1176: BVGetColumn(pep->V,i,&v);
1177: VecPointwiseMult(v,v,pep->Dr);
1178: BVRestoreColumn(pep->V,i,&v);
1179: }
1180: }
1181: DSGetArray(pep->ds,DS_MAT_A,&H);
1183: NRefOrthogStep(pep,k,H,ldh,fH,S,lds,prs,work,lwa);
1184: /* check if H is in Schur form */
1185: for (i=0;i<k-1;i++) {
1186: if (H[i+1+i*ldh]!=0.0) {
1187: #if !defined(PETSC_USES_COMPLEX)
1188: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement require the complex Schur form of the projected matrix");
1189: #else
1190: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1191: #endif
1192: }
1193: }
1194: if (pep->schur && nsubc>1) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Split communicator only allowed for the explicit matrix option");
1195: if (!pep->schur && nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair's dimension");
1196: cnt = k*sizeof(PetscBLASInt)+(lwork+k*k*(nmat+3)+nmat+k)*sizeof(PetscScalar);
1197: PetscLogObjectMemory((PetscObject)pep,cnt);
1198: BVSetActiveColumns(pep->V,0,k);
1199: BVDuplicateResize(pep->V,k,&dV);
1200: PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1201: PEPRefineGetKSP(pep,&ksp);
1202: if (!pep->schur) {
1203: PetscMalloc1(1,&matctx);
1204: if (nsubc>1) { /* spliting in subcommunicators */
1205: matctx->subc = pep->refinesubc;
1206: NRefSubcommSetup(pep,k,matctx,nsubc);
1207: } else matctx->subc=NULL;
1208: }
1210: /* Loop performing iterative refinements */
1211: for (i=0;i<its;i++) {
1212: /* Pre-compute the polynomial basis evaluated in H */
1213: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1214: PEPNRefSetUpMatrices(pep,k,H,ldh,&M,&P,matctx,(i==0)?PETSC_TRUE:PETSC_FALSE);
1215: KSPSetOperators(ksp,M,P);
1216: if (i==0) {
1217: KSPSetFromOptions(ksp);
1218: }
1219: /* Solve the linear system */
1220: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,ksp,work+nwu,lwa-nwu,matctx);
1221: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1222: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds,work+nwu,lwa-nwu);
1223: }
1224: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1225: if (!flg && sinvert) {
1226: PetscFree(p);
1227: }
1228: PetscFree3(dH,fH,work);
1229: PetscFree(dVS);
1230: BVDestroy(&dV);
1231: if (!pep->schur) {
1232: for (i=0;i<2;i++) {
1233: MatDestroy(&matctx->E[i]);
1234: }
1235: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1236: VecDestroy(&matctx->tN);
1237: VecDestroy(&matctx->ttN);
1238: VecDestroy(&matctx->t1);
1239: if (nsubc>1) {
1240: NRefSubcommDestroy(pep,matctx);
1241: } else {
1242: VecDestroy(&matctx->vseq);
1243: VecScatterDestroy(&matctx->scatterctx);
1244: }
1245: PetscFree(matctx);
1246: } else {
1247: MatShellGetContext(M,&ctx);
1248: PetscFree3(ctx->Mm,ctx->work,ctx->fih);
1249: VecDestroy(&ctx->w1);
1250: VecDestroy(&ctx->w2);
1251: PetscFree(ctx);
1252: MatDestroy(&P);
1253: }
1254: MatDestroy(&M);
1255: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1256: return(0);
1257: }