Actual source code: nrefine.c

slepc-3.6.1 2015-09-03
Report Typos and Errors
  1: /*
  2:    Newton refinement for nonlinear eigenproblem.

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2015, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc/private/pepimpl.h>
 25: #include <slepcblaslapack.h>

 27: typedef struct {
 28:   Mat          *A;
 29:   BV           V;
 30:   PetscInt     k,nmat;
 31:   PetscScalar  *Mm;
 32:   PetscScalar  *fih;
 33:   PetscScalar  *work;
 34:   Vec           w1,w2;
 35: } FSubctx;

 37: typedef struct {
 38:   Mat          E[2];
 39:   Vec          tN,ttN,t1,vseq;
 40:   VecScatter   scatterctx;
 41:   PetscBool    computedt11;
 42:   PetscInt     *map0,*map1,*idxg,*idxp;
 43:   PetscSubcomm subc;
 44:   VecScatter   scatter_sub;
 45:   VecScatter   *scatter_id,*scatterp_id;
 46:   Mat          *A;
 47:   BV           V,W;
 48:   Vec          t,tg,Rv,Vi,tp,tpg;
 49:   PetscInt     idx;
 50: } MatExplicitCtx;

 54: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
 55: {
 57:   FSubctx        *ctx;
 58:   PetscInt       i,k,nmat;
 59:   PetscScalar    *fih,*c,*vals,sone=1.0,zero=0.0;
 60:   Mat            *A;
 61:   PetscBLASInt   k_,lda_,one=1;
 62:   
 64:   MatShellGetContext(M,&ctx);
 65:   fih  = ctx->fih;
 66:   k    = ctx->k;
 67:   nmat = ctx->nmat;
 68:   A    = ctx->A;
 69:   c    = ctx->work;
 70:   vals = ctx->work+k;
 71:   PetscBLASIntCast(k,&k_);
 72:   PetscBLASIntCast(nmat*k,&lda_);
 73:   BVDotVec(ctx->V,x,c);
 74:   MatMult(A[0],x,y);
 75:   for (i=1;i<nmat;i++) {
 76:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,ctx->Mm+i*k,&lda_,c,&one,&zero,vals,&one));
 77:     VecCopy(x,ctx->w1);
 78:     BVMultVec(ctx->V,-1.0,fih[i],ctx->w1,vals);
 79:     MatMult(A[i],ctx->w1,ctx->w2);
 80:     VecAXPY(y,1.0,ctx->w2);
 81:   }
 82:   return(0);
 83: }

 87: /*
 88:   Evaluates the first d elements of the polynomial basis
 89:   on a given matrix H which is considered to be triangular
 90: */
 91: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 92: {
 94:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
 95:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
 96:   PetscScalar    corr=0.0,alpha,beta;
 97:   PetscBLASInt   k_,ldh_,ldfh_;
 98:   
100:   PetscBLASIntCast(ldh,&ldh_);
101:   PetscBLASIntCast(k,&k_);
102:   PetscBLASIntCast(ldfh,&ldfh_);
103:   PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
104:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
105:   if (nm>1) {
106:     t = b[0]/a[0];
107:     off = k;
108:     for (j=0;j<k;j++) {
109:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
110:       fH[j+j*ldfh] -= t;
111:     }
112:   }
113:   for (i=2;i<nm;i++) {
114:     off = i*k;
115:     if (i==2) {
116:       for (j=0;j<k;j++) {
117:         fH[off+j+j*ldfh] = 1.0;
118:         H[j+j*ldh] -= b[1];
119:       }
120:     } else {
121:       for (j=0;j<k;j++) {
122:         PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
123:         H[j+j*ldh] += corr-b[i-1];
124:       }
125:     }
126:     corr  = b[i-1];
127:     beta  = -g[i-1]/a[i-1];
128:     alpha = 1/a[i-1];
129:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
130:   }
131:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
132:   return(0);
133: }

137: static PetscErrorCode NRefSysSetup_shell(PetscInt nmat,PetscReal *pcf,PetscInt k,PetscInt deg,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PetscScalar *Mm,PetscScalar *T22,PetscBLASInt *p22,PetscScalar *T21,PetscScalar *T12)
138: {
140:   PetscScalar    *DHii,*Tr,*Ts,s,sone=1.0,zero=0.0;
141:   PetscInt       i,d,j,lda=nmat*k;
142:   PetscReal      *a=pcf,*b=pcf+nmat,*g=pcf+2*nmat;
143:   PetscBLASInt   k_,lda_,lds_,info;
144:   
146:   DHii = T12;
147:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
148:   PetscMalloc2(k*k,&Tr,k*k,&Ts);
149:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
150:   for (d=2;d<nmat;d++) {
151:     for (j=0;j<k;j++) {
152:       for (i=0;i<k;i++) {
153:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
154:       }
155:     }
156:   }
157:   /* T22 */
158:   PetscBLASIntCast(lds,&lds_);
159:   PetscBLASIntCast(k,&k_);
160:   PetscBLASIntCast(lda,&lda_);
161:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
162:   for (i=1;i<deg;i++) {
163:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
164:     s = (i==1)?0.0:1.0; 
165:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
166:   }

168:   /* T21 */
169:   for (i=1;i<deg;i++) {
170:     s = (i==1)?0.0:1.0; 
171:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","C",&k_,&k_,&k_,fh+i,fH+i*k,&lda_,S,&lds_,&s,T21,&k_));
172:   }
173:   /* Mm */
174:   PetscMemcpy(Tr,T21,k*k*sizeof(PetscScalar));
175:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&k_,T22,&k_,p22,Tr,&k_,&info));
176:   
177:   s = 0.0;
178:   for (i=1;i<nmat;i++) {
179:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));    
180:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Ts,&k_,Tr,&k_,&s,Mm+i*k,&lda_));
181:     for (j=0;j<k;j++) {
182:       PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
183:     }  
184:   }
185:   PetscFree2(Tr,Ts);
186:   return(0); 
187: }

191: static PetscErrorCode NRefSysSolve_shell(Mat *A,KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,PetscScalar *T22,PetscBLASInt *p22,PetscScalar *T21,PetscScalar *T12,BV V,Vec dVi,PetscScalar *dHi,BV W,Vec t,PetscScalar *work,PetscInt lw)
192: {
194:   PetscScalar    *t0,*t1,zero=0.0,none=-1.0,sone=1.0;
195:   PetscBLASInt   k_,one=1,info,lda_;
196:   PetscInt       i,lda=nmat*k,nwu=0;
197:   Vec            w;
198:   KSPConvergedReason reason;

201:   t0 = work+nwu;
202:   nwu += k;
203:   t1 = work+nwu;
204:   nwu += k;
205:   PetscBLASIntCast(lda,&lda_);
206:   PetscBLASIntCast(k,&k_);
207:   for (i=0;i<k;i++) t0[i] = Rh[i];
208:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,T22,&k_,p22,t0,&k_,&info));
209:   for (i=1;i<nmat;i++) {
210:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,T12+i*k,&lda_,t0,&one,&zero,t1,&one));
211:     BVMultVec(V,1.0,0.0,t,t1);
212:     BVGetColumn(W,i,&w);
213:     MatMult(A[i],t,w);
214:     BVRestoreColumn(W,i,&w);
215:   }
216:   for (i=0;i<nmat-1;i++) t1[i]=-1.0;
217:   BVSetActiveColumns(W,1,nmat);
218:   BVMultVec(W,1.0,1.0,Rv,t1);
219:   KSPSolve(ksp,Rv,dVi);
220:   KSPGetConvergedReason(ksp,&reason);
221:   if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
222:   BVDotVec(V,dVi,t1);
223:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&none,T21,&k_,t1,&one,&zero,dHi,&one));
224:   for (i=0;i<k;i++) dHi[i] += Rh[i];
225:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,T22,&k_,p22,dHi,&k_,&info));
226:   return(0);
227: }

231: /*
232:    Computes the residual P(H,V*S)*e_j for the polynomial
233: */
234: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t,PetscScalar *work,PetscInt lw)
235: {
237:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
238:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
239:   PetscInt       i,ii,jj,nwu=0,lda;
240:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
241:   Mat            M0;
242:   Vec            w;
243:   
245:   h = work+nwu;
246:   nwu += k*nmat;
247:   lda = k*nmat;
248:   PetscBLASIntCast(k,&k_);
249:   PetscBLASIntCast(lds,&lds_);
250:   PetscBLASIntCast(lda,&lda_);
251:   PetscBLASIntCast(nmat,&nmat_);
252:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
253:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
254:   BVSetActiveColumns(W,0,nmat);
255:   BVMult(W,1.0,0.0,V,M0);
256:   MatDestroy(&M0);

258:   BVGetColumn(W,0,&w);
259:   MatMult(A[0],w,Rv);
260:   BVRestoreColumn(W,0,&w);
261:   for (i=1;i<nmat;i++) {
262:     BVGetColumn(W,i,&w);
263:     MatMult(A[i],w,t);
264:     BVRestoreColumn(W,i,&w);
265:     VecAXPY(Rv,1.0,t);
266:   }
267:   /* Update right-hand side */
268:   if (j) {
269:     DS0 = work+nwu;
270:     nwu += k*k;
271:     DS1 = work+nwu;
272:     nwu += k*k;
273:     PetscBLASIntCast(ldh,&ldh_); 
274:     Z = work+nwu;
275:     nwu += k*k;
276:     PetscMemzero(Z,k*k*sizeof(PetscScalar));
277:     PetscMemzero(DS0,k*k*sizeof(PetscScalar));
278:     PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
279:     /* Update DfH */
280:     for (i=1;i<nmat;i++) {
281:       if (i>1) {
282:         beta = -g[i-1];
283:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
284:         tt += -b[i-1];
285:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
286:         tt = b[i-1];
287:         beta = 1.0/a[i-1];
288:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
289:         F = DS0; DS0 = DS1; DS1 = F;
290:       } else {
291:         PetscMemzero(DS1,k*k*sizeof(PetscScalar));
292:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
293:       } 
294:       for (jj=j;jj<k;jj++) {
295:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
296:       }
297:     }
298:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
299:     /* Update right-hand side */
300:     PetscBLASIntCast(2*k,&k2_); 
301:     PetscBLASIntCast(j,&j_);
302:     PetscBLASIntCast(k+rds,&krds_);
303:     c0 = DS0;
304:     PetscMemzero(Rh,k*sizeof(PetscScalar));
305:     for (i=0;i<nmat;i++) {
306:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
307:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
308:       BVMultVec(V,1.0,0.0,t,h);
309:       BVSetActiveColumns(dV,0,rds);
310:       BVMultVec(dV,1.0,1.0,t,h+k);
311:       BVGetColumn(W,i,&w);
312:       MatMult(A[i],t,w);
313:       BVRestoreColumn(W,i,&w);
314:       if (i>0 && i<nmat-1) {
315:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
316:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
317:       }
318:     }
319:      
320:     for (i=0;i<nmat;i++) h[i] = -1.0;
321:     BVMultVec(W,1.0,1.0,Rv,h);
322:   }
323:   return(0);
324: }

328: static PetscErrorCode NRefSysIter_shell(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,BV W,Vec t,PetscScalar *work,PetscInt lwork)
329: {
331:   PetscInt       nwu=0,nmat=pep->nmat,deg=nmat-1,i;
332:   PetscScalar    *T22,*T21,*T12;
333:   PetscBLASInt   *p22;
334:   FSubctx        *ctx;
335:   Mat            M,*A;
336:   PetscBool      flg;

339:   STGetTransform(pep->st,&flg);
340:   if (flg) {
341:     PetscMalloc1(pep->nmat,&A);
342:     for (i=0;i<pep->nmat;i++) {
343:       STGetTOperators(pep->st,i,&A[i]);
344:     }
345:   } else A = pep->A; 
346:   PetscMalloc1(k,&p22);
347:   T22 = work+nwu;
348:   nwu += k*k;
349:   T21 = work+nwu;
350:   nwu += k*k;
351:   T12 = work+nwu;
352:   nwu += nmat*k*k;
353:   KSPGetOperators(ksp,&M,NULL);
354:   MatShellGetContext(M,&ctx);
355:   /* Update the matrix for the system */
356:   NRefSysSetup_shell(nmat,pep->pbc,k,deg,fH,S,lds,fh,h,ctx->Mm,T22,p22,T21,T12);
357:   /* Solve system */
358:   NRefSysSolve_shell(A,ksp,nmat,Rv,Rh,k,T22,p22,T21,T12,V,dVi,dHi,W,t,work+nwu,lwork-nwu);
359:   PetscFree(p22);
360:   if (flg) {
361:     PetscFree(A);
362:   }
363:   return(0);
364: }

368: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W,PetscScalar *work,PetscInt lwork)
369: {
370:   PetscErrorCode    ierr;
371:   PetscInt          nwu=0,i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
372:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
373:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
374:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
375:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
376:   PetscBLASInt      lds_,lda_,k_;
377:   const PetscInt    *idxmc;
378:   const PetscScalar *valsc,*carray;
379:   MatStructure      str;
380:   Vec               vc,vc0;
381:   PetscBool         flg;
382:   
384:   T22 = work+nwu;
385:   nwu += k*k;
386:   T21 = work+nwu;
387:   nwu += k*k;
388:   T12 = work+nwu;
389:   nwu += nmat*k*k;
390:   Tr = work+nwu;
391:   nwu += k*k;
392:   Ts = work+nwu;
393:   nwu += k*k;
394:   STGetMatStructure(pep->st,&str);
395:   KSPGetOperators(ksp,&M,NULL);
396:   MatGetOwnershipRange(E[1],&n1,&m1);
397:   MatGetOwnershipRange(E[0],&n0,&m0);
398:   MatGetOwnershipRange(M,&n,NULL);
399:   PetscMalloc1(nmat,&ts);
400:   STGetTransform(pep->st,&flg);
401:   if (flg) {
402:     PetscMalloc1(pep->nmat,&At);
403:     for (i=0;i<pep->nmat;i++) {
404:       STGetTOperators(pep->st,i,&At[i]);
405:     }
406:   } else At = pep->A; 
407:   if (matctx->subc) A = matctx->A;
408:   else A = At;
409:   /* Form the explicit system matrix */
410:   DHii = T12;
411:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));  
412:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
413:   for (d=2;d<nmat;d++) {
414:     for (j=0;j<k;j++) {
415:       for (i=0;i<k;i++) {
416:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
417:       }
418:     }
419:   }

421:   /* T11 */
422:   if (!matctx->computedt11) {
423:     MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
424:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
425:     for (j=1;j<nmat;j++) {
426:       MatAXPY(E[0],Ts[j],A[j],str);
427:     }
428:   }
429:   for (i=n0;i<m0;i++) {
430:     MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
431:     idx = n+i-n0;
432:     for (j=0;j<ncols;j++) {
433:       idxg[j] = matctx->map0[idxmc[j]];
434:     }
435:     MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
436:     MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
437:   }

439:   /* T22 */
440:   PetscBLASIntCast(lds,&lds_);
441:   PetscBLASIntCast(k,&k_);
442:   PetscBLASIntCast(lda,&lda_);
443:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
444:   for (i=1;i<deg;i++) {
445:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
446:     s = (i==1)?0.0:1.0; 
447:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
448:   }
449:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
450:   for (i=0;i<m1-n1;i++) {
451:     idx = n+m0-n0+i;
452:     for (j=0;j<k;j++) {
453:       Tr[j] = T22[n1+i+j*k];
454:     }
455:     MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
456:   }

458:   /* T21 */
459:   for (i=1;i<deg;i++) {
460:     s = (i==1)?0.0:1.0;
461:     ss = PetscConj(fh[i]);
462:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
463:   }
464:   BVSetActiveColumns(W,0,k);
465:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
466:   BVMult(W,1.0,0.0,V,Mk);
467:   for (i=0;i<k;i++) {
468:     BVGetColumn(W,i,&vc);
469:     VecConjugate(vc);
470:     VecGetArrayRead(vc,&carray);
471:     idx = matctx->map1[i];
472:     MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
473:     VecRestoreArrayRead(vc,&carray);
474:     BVRestoreColumn(W,i,&vc);
475:   }

477:   /* T12 */  
478:   for (i=1;i<nmat;i++) {
479:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));    
480:     for (j=0;j<k;j++) {
481:       PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
482:     }  
483:   }
484:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
485:   for (i=0;i<nmat;i++) ts[i] = 1.0;
486:   for (j=0;j<k;j++) {
487:     MatDenseGetArray(Md,&array);
488:     PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
489:     MatDenseRestoreArray(Md,&array);
490:     BVSetActiveColumns(W,0,nmat-1);
491:     BVMult(W,1.0,0.0,V,Md);
492:     for (i=nmat-1;i>0;i--) {
493:       BVGetColumn(W,i-1,&vc0);
494:       BVGetColumn(W,i,&vc);
495:       MatMult(A[i],vc0,vc);
496:       BVRestoreColumn(W,i-1,&vc0);
497:       BVRestoreColumn(W,i,&vc);
498:     }
499:     BVSetActiveColumns(W,1,nmat);
500:     BVGetColumn(W,0,&vc0);
501:     BVMultVec(W,1.0,0.0,vc0,ts);
502:     VecGetArrayRead(vc0,&carray);
503:     idx = matctx->map1[j];
504:     MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);   
505:     VecRestoreArrayRead(vc0,&carray);
506:     BVRestoreColumn(W,0,&vc0);
507:   }
508:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
509:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
510:   KSPSetOperators(ksp,M,M);
511:   KSPSetUp(ksp);
512:   PetscFree(ts);
513:   MatDestroy(&Mk);
514:   MatDestroy(&Md);
515:   if (flg) {
516:     PetscFree(At);
517:   }
518:   return(0);
519: }
520:  
523: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
524: {
525:   PetscErrorCode    ierr;
526:   PetscInt          n0,m0,n1,m1,i;
527:   PetscScalar       *arrayV;
528:   const PetscScalar *array;
529:   KSPConvergedReason reason;

532:   MatGetOwnershipRange(matctx->E[1],&n1,&m1);
533:   MatGetOwnershipRange(matctx->E[0],&n0,&m0);
534:   /* Right side */  
535:   VecGetArrayRead(Rv,&array);
536:   VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
537:   VecRestoreArrayRead(Rv,&array);
538:   VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
539:   VecAssemblyBegin(matctx->tN);
540:   VecAssemblyEnd(matctx->tN);

542:   /* Solve */
543:   KSPSolve(ksp,matctx->tN,matctx->ttN);
544:   KSPGetConvergedReason(ksp,&reason);
545:   if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
546:  
547:  /* Retrieve solution */
548:   VecGetArray(dVi,&arrayV);
549:   VecGetArrayRead(matctx->ttN,&array);
550:   PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
551:   VecRestoreArray(dVi,&arrayV);
552:   if (!matctx->subc) {
553:     VecGetArray(matctx->t1,&arrayV);
554:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
555:     VecRestoreArray(matctx->t1,&arrayV);
556:     VecRestoreArrayRead(matctx->ttN,&array);
557:     VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
558:     VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
559:     VecGetArrayRead(matctx->vseq,&array);
560:     for (i=0;i<k;i++) dHi[i] = array[i];
561:     VecRestoreArrayRead(matctx->vseq,&array);
562:   }
563:   return(0);
564: }

568: static PetscErrorCode NRefSysIter_explicit(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W,PetscScalar *work,PetscInt lwork)
569: {
570:   PetscErrorCode    ierr;
571:   PetscInt          j,m,lda=pep->nmat*k,n0,m0,idx;
572:   PetscScalar       *array2,h;
573:   const PetscScalar *array;
574:   Vec               R,Vi;
575:  
577:   if (!matctx->subc) {
578:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
579:     h   = H[i+i*ldh];
580:     idx = i;
581:     R   = Rv;
582:     Vi  = dVi;
583:     NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W,work,lwork);
584:   } else {
585:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
586:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
587:       h = H[idx+idx*ldh];
588:       matctx->idx = idx;
589:       NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W,work,lwork);
590:     } else idx = matctx->idx;
591:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
592:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
593:     VecGetArrayRead(matctx->tg,&array);
594:     VecPlaceArray(matctx->t,array);
595:     VecCopy(matctx->t,matctx->Rv);
596:     VecResetArray(matctx->t);
597:     VecRestoreArrayRead(matctx->tg,&array);
598:     R  = matctx->Rv;
599:     Vi = matctx->Vi;
600:   }
601:   if (idx==i && idx<k) {
602:     NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
603:   }
604:   if (matctx->subc) {
605:     VecGetLocalSize(Vi,&m);
606:     VecGetArrayRead(Vi,&array);
607:     VecGetArray(matctx->tg,&array2);
608:     PetscMemcpy(array2,array,m*sizeof(PetscScalar));
609:     VecRestoreArray(matctx->tg,&array2);
610:     VecRestoreArrayRead(Vi,&array);
611:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
612:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
613:     MatGetOwnershipRange(matctx->E[0],&n0,&m0);
614:     VecGetArrayRead(matctx->ttN,&array);
615:     VecPlaceArray(matctx->tp,array+m0-n0);
616:     VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);  
617:     VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);  
618:     VecResetArray(matctx->tp);
619:     VecRestoreArrayRead(matctx->ttN,&array);
620:     VecGetArrayRead(matctx->tpg,&array);
621:     for (j=0;j<k;j++) dHi[j] = array[j];
622:     VecRestoreArrayRead(matctx->tpg,&array);
623:   }
624:   return(0);
625: }

629: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PetscScalar *work,PetscInt lw,MatExplicitCtx *matctx)
630: {
632:   PetscInt       i,j,nmat=pep->nmat,nwu=0,lda=nmat*k;
633:   PetscScalar    h,*fh,*Rh,*DfH;
634:   PetscReal      norm;
635:   BV             W;
636:   Vec            Rv,t,dvi;
637:   FSubctx        *ctx;
638:   Mat            M,*At;
639:   PetscBool      flg,lindep;

642:   *rds = 0;
643:   DfH = work+nwu;
644:   nwu += nmat*k*k;
645:   Rh = work+nwu;
646:   nwu += k;
647:   BVCreateVec(pep->V,&t);
648:   BVCreateVec(pep->V,&Rv);
649:   KSPGetOperators(ksp,&M,NULL);
650:   if (matctx) {
651:     fh = work+nwu;
652:     nwu += nmat;
653:   } else {
654:     MatShellGetContext(M,&ctx);
655:     fh = ctx->fih;
656:   }
657:   BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
658:   PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
659:   PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
660:   STGetTransform(pep->st,&flg);
661:   if (flg) {
662:     PetscMalloc1(pep->nmat,&At);
663:     for (i=0;i<pep->nmat;i++) {
664:       STGetTOperators(pep->st,i,&At[i]);
665:     }
666:   } else At = pep->A; 

668:   /* Main loop for computing the ith columns of dX and dS */
669:   for (i=0;i<k;i++) {
670:     h = H[i+i*ldh];

672:     /* Compute and update i-th column of the right hand side */
673:     PetscMemzero(Rh,k*sizeof(PetscScalar));
674:     NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t,work+nwu,lw-nwu);

676:     /* Update and solve system */
677:     BVGetColumn(dV,i,&dvi);
678:     if (matctx) {
679:       NRefSysIter_explicit(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W,work+nwu,lw-nwu);
680:       if (i==0) matctx->computedt11 = PETSC_FALSE;
681:     } else {
682:       for (j=0;j<nmat;j++) fh[j] = fH[j*k+i+i*lda];
683:       NRefSysIter_shell(pep,k,ksp,fH,S,lds,fh,h,Rv,Rh,pep->V,dvi,dH+i*k,W,t,work+nwu,lw-nwu);
684:     }
685:     /* Orthogonalize computed solution */
686:     BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
687:     BVRestoreColumn(dV,i,&dvi);
688:     if (!lindep) {
689:       BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
690:       if (!lindep) {
691:         dVS[k+i+i*2*k] = norm;
692:         BVScaleColumn(dV,i,1.0/norm);
693:         (*rds)++;
694:       }
695:     }
696:   }
697:   BVSetActiveColumns(dV,0,*rds);
698:   VecDestroy(&t);
699:   VecDestroy(&Rv);
700:   BVDestroy(&W);
701:   if (flg) {
702:     PetscFree(At);
703:   }
704:   return(0);
705: }

709: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscInt *prs,PetscScalar *work,PetscInt lwork)
710: {
712:   PetscInt       i,j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,nwu=0,rs=*prs,ldg;
713:   PetscScalar    *T,*G,*tau,*array,sone=1.0,zero=0.0;
714:   PetscBLASInt   rs_,lds_,k_,ldh_,lw_,info,ldg_,lda_;
715:   Mat            M0;

718:   T = work+nwu;
719:   nwu += rs*k;
720:   tau = work+nwu;
721:   nwu += k;
722:   PetscBLASIntCast(lds,&lds_);
723:   PetscBLASIntCast(lda,&lda_);
724:   PetscBLASIntCast(k,&k_);
725:   PetscBLASIntCast(lwork-nwu,&lw_);
726:   if (rs>k) { /* Truncate S to have k columns*/
727:     for (j=0;j<k;j++) {
728:       PetscMemcpy(T+j*rs,S+j*lds,rs*sizeof(PetscScalar));
729:     }
730:     PetscBLASIntCast(rs,&rs_);
731:     PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&rs_,&k_,T,&rs_,tau,work+nwu,&lw_,&info));
732:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
733:     /* Copy triangular matrix in S */
734:     PetscMemzero(S,lds*k*sizeof(PetscScalar));
735:     for (j=0;j<k;j++) for (i=0;i<=j;i++) S[j*lds+i] = T[j*rs+i];
736:     PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&rs_,&k_,&k_,T,&rs_,tau,work+nwu,&lw_,&info));
737:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
738:     MatCreateSeqDense(PETSC_COMM_SELF,rs,k,NULL,&M0);
739:     MatDenseGetArray(M0,&array);
740:     for (j=0;j<k;j++) {
741:       PetscMemcpy(array+j*rs,T+j*rs,rs*sizeof(PetscScalar));
742:     }
743:     MatDenseRestoreArray(M0,&array);
744:     BVSetActiveColumns(pep->V,0,rs);
745:     BVMultInPlace(pep->V,M0,0,k);
746:     BVSetActiveColumns(pep->V,0,k);
747:     MatDestroy(&M0);
748:     *prs = rs = k;
749:   }
750:   /* Form auxiliary matrix for the orthogonalization step */
751:   G = work+nwu;
752:   ldg = deg*k;
753:   nwu += ldg*k;
754:   PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
755:   PetscBLASIntCast(ldg,&ldg_);
756:   PetscBLASIntCast(lwork-nwu,&lw_);
757:   PetscBLASIntCast(ldh,&ldh_);
758:   for (j=0;j<deg;j++) {
759:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
760:   }
761:   /* Orthogonalize and update S */
762:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work+nwu,&lw_,&info));
763:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
764:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

766:   /* Update H */
767:   PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
768:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
769:   return(0);
770: }

774: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds,PetscScalar *work,PetscInt lwork)
775: {
777:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k,nwu=0;
778:   PetscScalar    *tau,*array;
779:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,lw_,info,k2_;
780:   Mat            M0;

783:   tau = work+nwu;
784:   nwu += k;
785:   PetscBLASIntCast(lds,&lds_);
786:   PetscBLASIntCast(lda,&lda_);
787:   PetscBLASIntCast(ldh,&ldh_);
788:   PetscBLASIntCast(k,&k_);
789:   PetscBLASIntCast(2*k,&k2_);
790:   PetscBLASIntCast((k+rds),&kdrs_);
791:   /* Update H */
792:   for (j=0;j<k;j++) {
793:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
794:   }
795:   /* Update V */
796:   for (j=0;j<k;j++) {
797:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
798:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
799:   }
800:   PetscBLASIntCast(lwork-nwu,&lw_);
801:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work+nwu,&lw_,&info));
802:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
803:   /* Copy triangular matrix in S */
804:   for (j=0;j<k;j++) {
805:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
806:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
807:   }
808:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work+nwu,&lw_,&info));
809:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
810:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
811:   MatDenseGetArray(M0,&array);
812:   for (j=0;j<k;j++) {
813:     PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
814:   }
815:   MatDenseRestoreArray(M0,&array);
816:   BVMultInPlace(pep->V,M0,0,k);
817:   if (rds) {
818:     MatDenseGetArray(M0,&array);
819:     for (j=0;j<k;j++) {
820:       PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
821:     }
822:     MatDenseRestoreArray(M0,&array);
823:     BVMultInPlace(dV,M0,0,k);
824:     BVAXPY(pep->V,1.0,dV);
825:   }
826:   MatDestroy(&M0);
827:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds,&k,work,lwork);
828:   return(0);
829: }

833: static PetscErrorCode PEPNRefSetUpMatrices(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,Mat *M,Mat *P,MatExplicitCtx *matctx,PetscBool ini)
834: {
835:   PetscErrorCode    ierr;
836:   FSubctx           *ctx;
837:   PetscScalar       t,*coef;
838:   const PetscScalar *array; 
839:   MatStructure      str;
840:   PetscInt          j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
841:   MPI_Comm          comm;
842:   PetscMPIInt       np;
843:   const PetscInt    *rgs0,*rgs1;
844:   Mat               B,C,*E,*A,*At;
845:   IS                is1,is2;
846:   Vec               v;
847:   PetscBool         flg;

850:   PetscMalloc1(nmat,&coef);
851:   STGetTransform(pep->st,&flg);
852:   if (flg) {
853:     PetscMalloc1(pep->nmat,&At);
854:     for (i=0;i<pep->nmat;i++) {
855:       STGetTOperators(pep->st,i,&At[i]);
856:     }
857:   } else At = pep->A;
858:   if (matctx) {
859:     if (ini) {
860:       if (matctx->subc) {
861:         A = matctx->A;
862:         comm = PetscSubcommChild(matctx->subc);
863:       } else {
864:         A = At;
865:         PetscObjectGetComm((PetscObject)pep,&comm);
866:       }
867:       E = matctx->E;
868:       STGetMatStructure(pep->st,&str);
869:       MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
870:       j = (matctx->subc)?matctx->subc->color:0;
871:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
872:       for (j=1;j<nmat;j++) {
873:         MatAXPY(E[0],coef[j],A[j],str);
874:       }
875:       MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
876:       MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
877:       MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
878:       MatGetOwnershipRange(E[0],&n0,&m0);
879:       MatGetOwnershipRange(E[1],&n1,&m1);
880:       MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
881:       MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
882:       /* T12 and T21 are computed from V and V*, so,
883:          they must have the same column and row ranges */
884:       if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
885:       MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
886:       MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
887:       MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
888:       MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
889:       MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
890:       MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
891:       SlepcMatTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],M);
892:       *P = *M;
893:       MatDestroy(&B);
894:       MatDestroy(&C);
895:       matctx->computedt11 = PETSC_TRUE;
896:       MatGetSize(E[0],NULL,&N0);
897:       MatGetSize(E[1],NULL,&N1);
898:       MPI_Comm_size(PetscObjectComm((PetscObject)*M),&np);
899:       MatGetOwnershipRanges(E[0],&rgs0);
900:       MatGetOwnershipRanges(E[1],&rgs1);
901:       PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
902:       /* Create column (and row) mapping */
903:       for (p=0;p<np;p++) {
904:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
905:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
906:       }
907:       MatCreateVecs(*M,NULL,&matctx->tN);
908:       MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
909:       VecDuplicate(matctx->tN,&matctx->ttN);
910:       if (matctx->subc) {
911:         MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
912:         count = np*k;
913:         PetscMalloc2(count,&idx1,count,&idx2);
914:         VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
915:         VecGetOwnershipRange(matctx->tp,&l0,NULL);
916:         VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
917:         for (si=0;si<matctx->subc->n;si++) {
918:           if (matctx->subc->color==si) {
919:             j=0;
920:             if (matctx->subc->color==si) {
921:               for (p=0;p<np;p++) {
922:                 for (i=n1;i<m1;i++) {
923:                   idx1[j] = l0+i-n1;
924:                   idx2[j++] =p*k+i;
925:                 }
926:               }
927:             }
928:             count = np*(m1-n1);
929:           } else count =0;
930:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
931:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
932:           VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
933:           ISDestroy(&is1);
934:           ISDestroy(&is2);
935:         }
936:         PetscFree2(idx1,idx2);
937:       } else {
938:         VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
939:       }
940:     } else {
941:       if (matctx->subc) {
942:         /* Scatter vectors pep->V */
943:         for (i=0;i<k;i++) {
944:           BVGetColumn(pep->V,i,&v);
945:           VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
946:           VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
947:           BVRestoreColumn(pep->V,i,&v);
948:           VecGetArrayRead(matctx->tg,&array);
949:           VecPlaceArray(matctx->t,(const PetscScalar*)array);
950:           BVInsertVec(matctx->V,i,matctx->t);
951:           VecResetArray(matctx->t);
952:           VecRestoreArrayRead(matctx->tg,&array);
953:         }
954:       }
955:     }
956:   } else {
957:     if (ini) {
958:       PetscObjectGetComm((PetscObject)pep,&comm);
959:       MatGetSize(At[0],&m0,&n0);
960:       PetscMalloc1(1,&ctx);
961:       STGetMatStructure(pep->st,&str);
962:       /* Create a shell matrix to solve the linear system */
963:       ctx->A = At;
964:       ctx->V = pep->V;
965:       ctx->k = k; ctx->nmat = nmat;
966:       PetscMalloc3(k*k*nmat,&ctx->Mm,2*k*k,&ctx->work,nmat,&ctx->fih);
967:       PetscMemzero(ctx->Mm,k*k*nmat*sizeof(PetscScalar));
968:       BVCreateVec(pep->V,&ctx->w1);
969:       BVCreateVec(pep->V,&ctx->w2);
970:       MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,M);
971:       MatShellSetOperation(*M,MATOP_MULT,(void(*)(void))MatFSMult);
972:     }
973:     /* Compute a precond matrix for the system */
974:     t = 0.0;
975:     for (j=0;j<k;j++) t += H[j+j*ldh];
976:     t /= k;
977:     if (ini) {
978:       MatDuplicate(At[0],MAT_COPY_VALUES,P);
979:     } else {
980:       MatCopy(At[0],*P,str);
981:     }
982:     PEPEvaluateBasis(pep,t,0,coef,NULL);
983:     for (j=1;j<nmat;j++) {
984:       MatAXPY(*P,coef[j],At[j],str);
985:     }
986:   }
987:   PetscFree(coef);
988:   if (flg) {
989:     PetscFree(At);
990:   }
991:   return(0);
992: }

996: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
997: {
998:   PetscErrorCode    ierr;
999:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1000:   IS                is1,is2;
1001:   BVType            type;
1002:   Vec               v;
1003:   const PetscScalar *array;
1004:   Mat               *A;
1005:   PetscBool         flg;

1008:   STGetTransform(pep->st,&flg);
1009:   if (flg) {
1010:     PetscMalloc1(pep->nmat,&A);
1011:     for (i=0;i<pep->nmat;i++) {
1012:       STGetTOperators(pep->st,i,&A[i]);
1013:     }
1014:   } else A = pep->A;
1015:   
1016:   /* Duplicate pep matrices */
1017:   PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1018:   for (i=0;i<pep->nmat;i++) {
1019:     MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);    
1020:   }

1022:   /* Create Scatter */
1023:   MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1024:   MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1025:   VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1026:   BVGetColumn(pep->V,0,&v);
1027:   VecGetOwnershipRange(v,&n0,&m0);
1028:   nloc0 = m0-n0;
1029:   PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1030:   j = 0;
1031:   for (si=0;si<matctx->subc->n;si++) {
1032:     for (i=n0;i<m0;i++) {
1033:       idx1[j]   = i;
1034:       idx2[j++] = i+pep->n*si;
1035:     }
1036:   }
1037:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1038:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1039:   VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1040:   ISDestroy(&is1);
1041:   ISDestroy(&is2);
1042:   for (si=0;si<matctx->subc->n;si++) {
1043:     j=0;
1044:     for (i=n0;i<m0;i++) {
1045:       idx1[j] = i;
1046:       idx2[j++] = i+pep->n*si;
1047:     }
1048:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1049:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1050:     VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1051:     ISDestroy(&is1);
1052:     ISDestroy(&is2);
1053:   }
1054:   BVRestoreColumn(pep->V,0,&v);
1055:   PetscFree2(idx1,idx2);

1057:   /* Duplicate pep->V vecs */
1058:   BVGetType(pep->V,&type);
1059:   BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1060:   BVSetType(matctx->V,type);
1061:   BVSetSizesFromVec(matctx->V,matctx->t,k);
1062:   BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1063:   for (i=0;i<k;i++) {
1064:     BVGetColumn(pep->V,i,&v);
1065:     VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1066:     VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1067:     BVRestoreColumn(pep->V,i,&v);
1068:     VecGetArrayRead(matctx->tg,&array);
1069:     VecPlaceArray(matctx->t,(const PetscScalar*)array);
1070:     BVInsertVec(matctx->V,i,matctx->t);
1071:     VecResetArray(matctx->t);
1072:     VecRestoreArrayRead(matctx->tg,&array);
1073:   }

1075:   VecDuplicate(matctx->t,&matctx->Rv);
1076:   VecDuplicate(matctx->t,&matctx->Vi);
1077:   if (flg) {
1078:     PetscFree(A);
1079:   }
1080:   return(0);
1081: }

1085: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1086: {
1088:   PetscInt       i;

1091:   VecScatterDestroy(&matctx->scatter_sub);
1092:   for (i=0;i<matctx->subc->n;i++) {
1093:     VecScatterDestroy(&matctx->scatter_id[i]);
1094:     VecScatterDestroy(&matctx->scatterp_id[i]);
1095:   }
1096:   for (i=0;i<pep->nmat;i++) {
1097:     MatDestroy(&matctx->A[i]);
1098:   }
1099:   PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1100:   BVDestroy(&matctx->V);
1101:   BVDestroy(&matctx->W);
1102:   VecDestroy(&matctx->t);
1103:   VecDestroy(&matctx->tg);
1104:   VecDestroy(&matctx->tp);
1105:   VecDestroy(&matctx->tpg);
1106:   VecDestroy(&matctx->Rv);
1107:   VecDestroy(&matctx->Vi);
1108:   return(0);
1109: }

1113: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds,PetscInt *prs)
1114: {
1116:   PetscScalar    *H,*work,*dH,*fH,*dVS;
1117:   PetscInt       ldh,i,j,its=1,nmat=pep->nmat,nwu=0,lwa=0,nsubc=pep->npart,rds;
1118:   PetscLogDouble cnt;
1119:   PetscBLASInt   k_,ld_,*p,info,lwork=0;
1120:   BV             dV;
1121:   PetscBool      sinvert,flg;
1122:   Mat            P,M;
1123:   FSubctx        *ctx;
1124:   KSP            ksp;
1125:   MatExplicitCtx *matctx=NULL;
1126:   Vec            v;

1129:   PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1130:   if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1131:   /* the input tolerance is not being taken into account (by the moment) */
1132:   its = *maxits;
1133:   lwa = (5+3*nmat)*k*k+2*k;
1134:   PetscMalloc3(k*k,&dH,nmat*k*k,&fH,lwa,&work);
1135:   DSGetLeadingDimension(pep->ds,&ldh);
1136:   DSGetArray(pep->ds,DS_MAT_A,&H);
1137:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1138:   PetscMalloc1(2*k*k,&dVS);
1139:   STGetTransform(pep->st,&flg);
1140:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1141:     PetscBLASIntCast(k,&k_);
1142:     PetscBLASIntCast(ldh,&ld_);
1143:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1144:     if (sinvert){
1145:       DSGetArray(pep->ds,DS_MAT_A,&H);
1146:       PetscBLASIntCast(lwa-nwu,&lwork);
1147:       PetscMalloc1(k,&p);
1148:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1149:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&lwork,&info));
1150:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1151:       pep->ops->backtransform = NULL;
1152:     }
1153:     if (sigma!=0.0) {
1154:       DSGetArray(pep->ds,DS_MAT_A,&H);
1155:       for (i=0;i<k;i++) H[i+ldh*i] += sigma; 
1156:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1157:       pep->ops->backtransform = NULL;
1158:     }
1159:   }
1160:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1161:     DSGetArray(pep->ds,DS_MAT_A,&H);
1162:     for (j=0;j<k;j++) {
1163:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1164:     }
1165:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
1166:     if (!flg) {
1167:       /* Restore original values */
1168:       for (i=0;i<pep->nmat;i++){
1169:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1170:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1171:       }
1172:     }
1173:   }
1174:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1175:     for (i=0;i<k;i++) {
1176:       BVGetColumn(pep->V,i,&v);
1177:       VecPointwiseMult(v,v,pep->Dr);
1178:       BVRestoreColumn(pep->V,i,&v);
1179:     }
1180:   }
1181:   DSGetArray(pep->ds,DS_MAT_A,&H);

1183:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds,prs,work,lwa);
1184:   /* check if H is in Schur form */
1185:   for (i=0;i<k-1;i++) {
1186:     if (H[i+1+i*ldh]!=0.0) {
1187: #if !defined(PETSC_USES_COMPLEX)
1188:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement require the complex Schur form of the projected matrix");
1189: #else
1190:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1191: #endif
1192:     }
1193:   }
1194:   if (pep->schur && nsubc>1) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Split communicator only allowed for the explicit matrix option");
1195:   if (!pep->schur && nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair's dimension");
1196:   cnt = k*sizeof(PetscBLASInt)+(lwork+k*k*(nmat+3)+nmat+k)*sizeof(PetscScalar);
1197:   PetscLogObjectMemory((PetscObject)pep,cnt);
1198:   BVSetActiveColumns(pep->V,0,k);
1199:   BVDuplicateResize(pep->V,k,&dV);
1200:   PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);  
1201:   PEPRefineGetKSP(pep,&ksp);
1202:   if (!pep->schur) {
1203:     PetscMalloc1(1,&matctx);
1204:     if (nsubc>1) { /* spliting in subcommunicators */
1205:       matctx->subc = pep->refinesubc;
1206:       NRefSubcommSetup(pep,k,matctx,nsubc);
1207:     } else matctx->subc=NULL;
1208:   }

1210:   /* Loop performing iterative refinements */
1211:   for (i=0;i<its;i++) {
1212:     /* Pre-compute the polynomial basis evaluated in H */
1213:     PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1214:     PEPNRefSetUpMatrices(pep,k,H,ldh,&M,&P,matctx,(i==0)?PETSC_TRUE:PETSC_FALSE);
1215:     KSPSetOperators(ksp,M,P);
1216:     if (i==0) {
1217:       KSPSetFromOptions(ksp);
1218:     }
1219:     /* Solve the linear system */
1220:     PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,ksp,work+nwu,lwa-nwu,matctx);
1221:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1222:     PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds,work+nwu,lwa-nwu);    
1223:   }
1224:   DSRestoreArray(pep->ds,DS_MAT_A,&H);  
1225:   if (!flg && sinvert) {
1226:     PetscFree(p);
1227:   }
1228:   PetscFree3(dH,fH,work);
1229:   PetscFree(dVS);
1230:   BVDestroy(&dV);
1231:   if (!pep->schur) {
1232:     for (i=0;i<2;i++) {
1233:       MatDestroy(&matctx->E[i]);
1234:     }
1235:     PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1236:     VecDestroy(&matctx->tN);
1237:     VecDestroy(&matctx->ttN);
1238:     VecDestroy(&matctx->t1);
1239:     if (nsubc>1) {
1240:       NRefSubcommDestroy(pep,matctx);
1241:     } else {
1242:       VecDestroy(&matctx->vseq);
1243:       VecScatterDestroy(&matctx->scatterctx);
1244:     }
1245:     PetscFree(matctx);
1246:   } else {
1247:     MatShellGetContext(M,&ctx);
1248:     PetscFree3(ctx->Mm,ctx->work,ctx->fih);
1249:     VecDestroy(&ctx->w1);  
1250:     VecDestroy(&ctx->w2);  
1251:     PetscFree(ctx);
1252:     MatDestroy(&P);
1253:   }
1254:   MatDestroy(&M);
1255:   PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1256:   return(0);
1257: }