Actual source code: nrefine.c

slepc-3.18.3 2023-03-24
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Newton refinement for polynomial eigenproblems.

 13:    References:

 15:        [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
 16:            of invariant pairs for matrix polynomials", Linear Algebra Appl.
 17:            435(3):514-536, 2011.

 19:        [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
 20:            polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
 21:            730-745, 2016.
 22: */

 24: #include <slepc/private/pepimpl.h>
 25: #include <slepcblaslapack.h>

 27: typedef struct {
 28:   Mat          *A,M1;
 29:   BV           V,M2,M3,W;
 30:   PetscInt     k,nmat;
 31:   PetscScalar  *fih,*work,*M4;
 32:   PetscBLASInt *pM4;
 33:   PetscBool    compM1;
 34:   Vec          t;
 35: } PEP_REFINE_MATSHELL;

 37: typedef struct {
 38:   Mat          E[2],M1;
 39:   Vec          tN,ttN,t1,vseq;
 40:   VecScatter   scatterctx;
 41:   PetscBool    compM1;
 42:   PetscInt     *map0,*map1,*idxg,*idxp;
 43:   PetscSubcomm subc;
 44:   VecScatter   scatter_sub;
 45:   VecScatter   *scatter_id,*scatterp_id;
 46:   Mat          *A;
 47:   BV           V,W,M2,M3,Wt;
 48:   PetscScalar  *M4,*w,*wt,*d,*dt;
 49:   Vec          t,tg,Rv,Vi,tp,tpg;
 50:   PetscInt     idx,*cols;
 51: } PEP_REFINE_EXPLICIT;

 53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
 54: {
 55:   PEP_REFINE_MATSHELL *ctx;
 56:   PetscInt            k,i;
 57:   PetscScalar         *c;
 58:   PetscBLASInt        k_,one=1,info;

 60:   MatShellGetContext(M,&ctx);
 61:   VecCopy(x,ctx->t);
 62:   k    = ctx->k;
 63:   c    = ctx->work;
 64:   PetscBLASIntCast(k,&k_);
 65:   MatMult(ctx->M1,x,y);
 66:   VecConjugate(ctx->t);
 67:   BVDotVec(ctx->M3,ctx->t,c);
 68:   for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
 69:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 70:   PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
 71:   PetscFPTrapPop();
 72:   SlepcCheckLapackInfo("getrs",info);
 73:   BVMultVec(ctx->M2,-1.0,1.0,y,c);
 74:   return 0;
 75: }

 77: /*
 78:   Evaluates the first d elements of the polynomial basis
 79:   on a given matrix H which is considered to be triangular
 80: */
 81: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 82: {
 83:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
 84:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
 85:   PetscScalar    corr=0.0,alpha,beta;
 86:   PetscBLASInt   k_,ldh_,ldfh_;

 88:   PetscBLASIntCast(ldh,&ldh_);
 89:   PetscBLASIntCast(k,&k_);
 90:   PetscBLASIntCast(ldfh,&ldfh_);
 91:   PetscArrayzero(fH,nm*k*k);
 92:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
 93:   if (nm>1) {
 94:     t = b[0]/a[0];
 95:     off = k;
 96:     for (j=0;j<k;j++) {
 97:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
 98:       fH[j+j*ldfh] -= t;
 99:     }
100:   }
101:   for (i=2;i<nm;i++) {
102:     off = i*k;
103:     if (i==2) {
104:       for (j=0;j<k;j++) {
105:         fH[off+j+j*ldfh] = 1.0;
106:         H[j+j*ldh] -= b[1];
107:       }
108:     } else {
109:       for (j=0;j<k;j++) {
110:         PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k);
111:         H[j+j*ldh] += corr-b[i-1];
112:       }
113:     }
114:     corr  = b[i-1];
115:     beta  = -g[i-1]/a[i-1];
116:     alpha = 1/a[i-1];
117:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
118:   }
119:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
120:   return 0;
121: }

123: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
124: {
125:   PetscScalar       *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
126:   const PetscScalar *m3,*m2;
127:   PetscInt          i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
128:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
129:   PetscBLASInt      k_,lda_,lds_,nloc_,one=1,info;
130:   Mat               *A=ctx->A,Mk,M1=ctx->M1,P;
131:   BV                V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
132:   MatStructure      str;
133:   Vec               vc;

135:   STGetMatStructure(pep->st,&str);
136:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
137:   DHii = T12;
138:   PetscArrayzero(DHii,k*k*nmat);
139:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
140:   for (d=2;d<nmat;d++) {
141:     for (j=0;j<k;j++) {
142:       for (i=0;i<k;i++) {
143:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
144:       }
145:     }
146:   }
147:   /* T11 */
148:   if (!ctx->compM1) {
149:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
150:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
151:     for (j=1;j<nmat;j++) MatAXPY(M1,Ts[j],A[j],str);
152:   }

154:   /* T22 */
155:   PetscBLASIntCast(lds,&lds_);
156:   PetscBLASIntCast(k,&k_);
157:   PetscBLASIntCast(lda,&lda_);
158:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
159:   for (i=1;i<deg;i++) {
160:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
161:     s = (i==1)?0.0:1.0;
162:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
163:   }
164:   for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }

166:   /* T12 */
167:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
168:   for (i=1;i<nmat;i++) {
169:     MatDenseGetArrayWrite(Mk,&array);
170:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
171:     MatDenseRestoreArrayWrite(Mk,&array);
172:     BVSetActiveColumns(W,0,k);
173:     BVMult(W,1.0,0.0,V,Mk);
174:     if (i==1) BVMatMult(W,A[i],M2);
175:     else {
176:       BVMatMult(W,A[i],M3); /* using M3 as work space */
177:       BVMult(M2,1.0,1.0,M3,NULL);
178:     }
179:   }

181:   /* T21 */
182:   MatDenseGetArrayWrite(Mk,&array);
183:   for (i=1;i<deg;i++) {
184:     s = (i==1)?0.0:1.0;
185:     ss = PetscConj(fh[i]);
186:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
187:   }
188:   MatDenseRestoreArrayWrite(Mk,&array);
189:   BVSetActiveColumns(M3,0,k);
190:   BVMult(M3,1.0,0.0,V,Mk);
191:   for (i=0;i<k;i++) {
192:     BVGetColumn(M3,i,&vc);
193:     VecConjugate(vc);
194:     BVRestoreColumn(M3,i,&vc);
195:   }
196:   MatDestroy(&Mk);
197:   PetscFree3(T12,Tr,Ts);

199:   VecGetLocalSize(ctx->t,&nloc);
200:   PetscBLASIntCast(nloc,&nloc_);
201:   PetscMalloc1(nloc*k,&T);
202:   KSPGetOperators(pep->refineksp,NULL,&P);
203:   if (!ctx->compM1) MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN);
204:   BVGetArrayRead(ctx->M2,&m2);
205:   BVGetArrayRead(ctx->M3,&m3);
206:   VecGetArray(ctx->t,&v);
207:   for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
208:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
209:   PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
210:   PetscFPTrapPop();
211:   SlepcCheckLapackInfo("gesv",info);
212:   for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
213:   VecRestoreArray(ctx->t,&v);
214:   BVRestoreArrayRead(ctx->M2,&m2);
215:   BVRestoreArrayRead(ctx->M3,&m3);
216:   MatDiagonalSet(P,ctx->t,ADD_VALUES);
217:   PetscFree(T);
218:   KSPSetUp(pep->refineksp);
219:   return 0;
220: }

222: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
223: {
224:   PetscScalar         *t0;
225:   PetscBLASInt        k_,one=1,info,lda_;
226:   PetscInt            i,lda=nmat*k;
227:   Mat                 M;
228:   PEP_REFINE_MATSHELL *ctx;

230:   KSPGetOperators(ksp,&M,NULL);
231:   MatShellGetContext(M,&ctx);
232:   PetscCalloc1(k,&t0);
233:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
234:   PetscBLASIntCast(lda,&lda_);
235:   PetscBLASIntCast(k,&k_);
236:   for (i=0;i<k;i++) t0[i] = Rh[i];
237:   PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
238:   SlepcCheckLapackInfo("getrs",info);
239:   BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
240:   KSPSolve(ksp,Rv,dVi);
241:   VecConjugate(dVi);
242:   BVDotVec(ctx->M3,dVi,dHi);
243:   VecConjugate(dVi);
244:   for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
245:   PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
246:   SlepcCheckLapackInfo("getrs",info);
247:   PetscFPTrapPop();
248:   PetscFree(t0);
249:   return 0;
250: }

252: /*
253:    Computes the residual P(H,V*S)*e_j for the polynomial
254: */
255: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
256: {
257:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
258:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
259:   PetscInt       i,ii,jj,lda;
260:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
261:   Mat            M0;
262:   Vec            w;

264:   PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
265:   lda = k*nmat;
266:   PetscBLASIntCast(k,&k_);
267:   PetscBLASIntCast(lds,&lds_);
268:   PetscBLASIntCast(lda,&lda_);
269:   PetscBLASIntCast(nmat,&nmat_);
270:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
271:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
272:   BVSetActiveColumns(W,0,nmat);
273:   BVMult(W,1.0,0.0,V,M0);
274:   MatDestroy(&M0);

276:   BVGetColumn(W,0,&w);
277:   MatMult(A[0],w,Rv);
278:   BVRestoreColumn(W,0,&w);
279:   for (i=1;i<nmat;i++) {
280:     BVGetColumn(W,i,&w);
281:     MatMult(A[i],w,t);
282:     BVRestoreColumn(W,i,&w);
283:     VecAXPY(Rv,1.0,t);
284:   }
285:   /* Update right-hand side */
286:   if (j) {
287:     PetscBLASIntCast(ldh,&ldh_);
288:     PetscArrayzero(Z,k*k);
289:     PetscArrayzero(DS0,k*k);
290:     PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k);
291:     /* Update DfH */
292:     for (i=1;i<nmat;i++) {
293:       if (i>1) {
294:         beta = -g[i-1];
295:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
296:         tt += -b[i-1];
297:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
298:         tt = b[i-1];
299:         beta = 1.0/a[i-1];
300:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
301:         F = DS0; DS0 = DS1; DS1 = F;
302:       } else {
303:         PetscArrayzero(DS1,k*k);
304:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
305:       }
306:       for (jj=j;jj<k;jj++) {
307:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
308:       }
309:     }
310:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
311:     /* Update right-hand side */
312:     PetscBLASIntCast(2*k,&k2_);
313:     PetscBLASIntCast(j,&j_);
314:     PetscBLASIntCast(k+rds,&krds_);
315:     c0 = DS0;
316:     PetscArrayzero(Rh,k);
317:     for (i=0;i<nmat;i++) {
318:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
319:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
320:       BVMultVec(V,1.0,0.0,t,h);
321:       BVSetActiveColumns(dV,0,rds);
322:       BVMultVec(dV,1.0,1.0,t,h+k);
323:       BVGetColumn(W,i,&w);
324:       MatMult(A[i],t,w);
325:       BVRestoreColumn(W,i,&w);
326:       if (i>0 && i<nmat-1) {
327:         PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
328:         PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
329:       }
330:     }

332:     for (i=0;i<nmat;i++) h[i] = -1.0;
333:     BVMultVec(W,1.0,1.0,Rv,h);
334:   }
335:   PetscFree4(h,DS0,DS1,Z);
336:   return 0;
337: }

339: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
340: {
341:   PetscInt       i,j,incf,incc;
342:   PetscScalar    *y,*g,*xx2,*ww,y2,*dd;
343:   Vec            v,t,xx1;
344:   BV             WW,T;

346:   PetscMalloc3(sz,&y,sz,&g,k,&xx2);
347:   if (trans) {
348:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
349:   } else {
350:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
351:   }
352:   xx1 = vw;
353:   VecCopy(x1,xx1);
354:   PetscArraycpy(xx2,x2,sz);
355:   PetscArrayzero(sol2,k);
356:   for (i=sz-1;i>=0;i--) {
357:     BVGetColumn(WW,i,&v);
358:     VecConjugate(v);
359:     VecDot(xx1,v,y+i);
360:     VecConjugate(v);
361:     BVRestoreColumn(WW,i,&v);
362:     for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
363:     y[i] = -(y[i]-xx2[i])/dd[i];
364:     BVGetColumn(T,i,&t);
365:     VecAXPY(xx1,-y[i],t);
366:     BVRestoreColumn(T,i,&t);
367:     for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
368:     g[i] = xx2[i];
369:   }
370:   if (trans) KSPSolveTranspose(ksp,xx1,sol1);
371:   else KSPSolve(ksp,xx1,sol1);
372:   if (trans) {
373:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
374:   } else {
375:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
376:   }
377:   for (i=0;i<sz;i++) {
378:     BVGetColumn(T,i,&t);
379:     VecConjugate(t);
380:     VecDot(sol1,t,&y2);
381:     VecConjugate(t);
382:     BVRestoreColumn(T,i,&t);
383:     for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
384:     y2 = (g[i]-y2)/dd[i];
385:     BVGetColumn(WW,i,&v);
386:     VecAXPY(sol1,-y2,v);
387:     for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
388:     sol2[i] = y[i]+y2;
389:     BVRestoreColumn(WW,i,&v);
390:   }
391:   PetscFree3(y,g,xx2);
392:   return 0;
393: }

395: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
396: {
397:   PetscInt       i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
398:   Mat            M1=matctx->M1,*A,*At,Mk;
399:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
400:   PetscScalar    s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
401:   PetscScalar    *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
402:   PetscBLASInt   lds_,lda_,k_;
403:   MatStructure   str;
404:   PetscBool      flg;
405:   BV             M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
406:   Vec            vc,vc2;

408:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
409:   STGetMatStructure(pep->st,&str);
410:   STGetTransform(pep->st,&flg);
411:   if (flg) {
412:     PetscMalloc1(pep->nmat,&At);
413:     for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
414:   } else At = pep->A;
415:   if (matctx->subc) A = matctx->A;
416:   else A = At;
417:   /* Form the explicit system matrix */
418:   DHii = T12;
419:   PetscArrayzero(DHii,k*k*nmat);
420:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
421:   for (l=2;l<nmat;l++) {
422:     for (j=0;j<k;j++) {
423:       for (i=0;i<k;i++) {
424:         DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
425:       }
426:     }
427:   }

429:   /* T11 */
430:   if (!matctx->compM1) {
431:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
432:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
433:     for (j=1;j<nmat;j++) MatAXPY(M1,Ts[j],A[j],str);
434:   }

436:   /* T22 */
437:   PetscBLASIntCast(lds,&lds_);
438:   PetscBLASIntCast(k,&k_);
439:   PetscBLASIntCast(lda,&lda_);
440:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
441:   for (i=1;i<deg;i++) {
442:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
443:     s = (i==1)?0.0:1.0;
444:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
445:   }

447:   /* T12 */
448:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
449:   for (i=1;i<nmat;i++) {
450:     MatDenseGetArrayWrite(Mk,&array);
451:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
452:     MatDenseRestoreArrayWrite(Mk,&array);
453:     BVSetActiveColumns(W,0,k);
454:     BVMult(W,1.0,0.0,V,Mk);
455:     if (i==1) BVMatMult(W,A[i],M2);
456:     else {
457:       BVMatMult(W,A[i],M3); /* using M3 as work space */
458:       BVMult(M2,1.0,1.0,M3,NULL);
459:     }
460:   }

462:   /* T21 */
463:   MatDenseGetArrayWrite(Mk,&array);
464:   for (i=1;i<deg;i++) {
465:     s = (i==1)?0.0:1.0;
466:     ss = PetscConj(fh[i]);
467:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
468:   }
469:   MatDenseRestoreArrayWrite(Mk,&array);
470:   BVSetActiveColumns(M3,0,k);
471:   BVMult(M3,1.0,0.0,V,Mk);
472:   for (i=0;i<k;i++) {
473:     BVGetColumn(M3,i,&vc);
474:     VecConjugate(vc);
475:     BVRestoreColumn(M3,i,&vc);
476:   }

478:   PEP_KSPSetOperators(ksp,M1,M1);
479:   KSPSetUp(ksp);
480:   MatDestroy(&Mk);

482:   /* Set up for BEMW */
483:   for (i=0;i<k;i++) {
484:     BVGetColumn(M2,i,&vc);
485:     BVGetColumn(W,i,&vc2);
486:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
487:     BVRestoreColumn(M2,i,&vc);
488:     BVGetColumn(M3,i,&vc);
489:     VecConjugate(vc);
490:     VecDot(vc2,vc,&d[i]);
491:     VecConjugate(vc);
492:     BVRestoreColumn(M3,i,&vc);
493:     for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
494:     d[i] = M4[i+i*k]-d[i];
495:     BVRestoreColumn(W,i,&vc2);

497:     BVGetColumn(M3,i,&vc);
498:     BVGetColumn(Wt,i,&vc2);
499:     for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
500:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
501:     BVRestoreColumn(M3,i,&vc);
502:     BVGetColumn(M2,i,&vc);
503:     VecConjugate(vc2);
504:     VecDot(vc,vc2,&dt[i]);
505:     VecConjugate(vc2);
506:     BVRestoreColumn(M2,i,&vc);
507:     for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
508:     dt[i] = M4[i+i*k]-dt[i];
509:     BVRestoreColumn(Wt,i,&vc2);
510:   }

512:   if (flg) PetscFree(At);
513:   PetscFree3(T12,Tr,Ts);
514:   return 0;
515: }

517: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
518: {
519:   PetscInt          i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
520:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
521:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
522:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
523:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
524:   PetscBLASInt      lds_,lda_,k_;
525:   const PetscInt    *idxmc;
526:   const PetscScalar *valsc,*carray;
527:   MatStructure      str;
528:   Vec               vc,vc0;
529:   PetscBool         flg;

531:   PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
532:   STGetMatStructure(pep->st,&str);
533:   KSPGetOperators(ksp,&M,NULL);
534:   MatGetOwnershipRange(E[1],&n1,&m1);
535:   MatGetOwnershipRange(E[0],&n0,&m0);
536:   MatGetOwnershipRange(M,&n,NULL);
537:   PetscMalloc1(nmat,&ts);
538:   STGetTransform(pep->st,&flg);
539:   if (flg) {
540:     PetscMalloc1(pep->nmat,&At);
541:     for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
542:   } else At = pep->A;
543:   if (matctx->subc) A = matctx->A;
544:   else A = At;
545:   /* Form the explicit system matrix */
546:   DHii = T12;
547:   PetscArrayzero(DHii,k*k*nmat);
548:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
549:   for (d=2;d<nmat;d++) {
550:     for (j=0;j<k;j++) {
551:       for (i=0;i<k;i++) {
552:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
553:       }
554:     }
555:   }

557:   /* T11 */
558:   if (!matctx->compM1) {
559:     MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
560:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
561:     for (j=1;j<nmat;j++) MatAXPY(E[0],Ts[j],A[j],str);
562:   }
563:   for (i=n0;i<m0;i++) {
564:     MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
565:     idx = n+i-n0;
566:     for (j=0;j<ncols;j++) {
567:       idxg[j] = matctx->map0[idxmc[j]];
568:     }
569:     MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
570:     MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
571:   }

573:   /* T22 */
574:   PetscBLASIntCast(lds,&lds_);
575:   PetscBLASIntCast(k,&k_);
576:   PetscBLASIntCast(lda,&lda_);
577:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
578:   for (i=1;i<deg;i++) {
579:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
580:     s = (i==1)?0.0:1.0;
581:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
582:   }
583:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
584:   for (i=0;i<m1-n1;i++) {
585:     idx = n+m0-n0+i;
586:     for (j=0;j<k;j++) {
587:       Tr[j] = T22[n1+i+j*k];
588:     }
589:     MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
590:   }

592:   /* T21 */
593:   for (i=1;i<deg;i++) {
594:     s = (i==1)?0.0:1.0;
595:     ss = PetscConj(fh[i]);
596:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
597:   }
598:   BVSetActiveColumns(W,0,k);
599:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
600:   BVMult(W,1.0,0.0,V,Mk);
601:   for (i=0;i<k;i++) {
602:     BVGetColumn(W,i,&vc);
603:     VecConjugate(vc);
604:     VecGetArrayRead(vc,&carray);
605:     idx = matctx->map1[i];
606:     MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
607:     VecRestoreArrayRead(vc,&carray);
608:     BVRestoreColumn(W,i,&vc);
609:   }

611:   /* T12 */
612:   for (i=1;i<nmat;i++) {
613:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
614:     for (j=0;j<k;j++) PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k);
615:   }
616:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
617:   for (i=0;i<nmat;i++) ts[i] = 1.0;
618:   for (j=0;j<k;j++) {
619:     MatDenseGetArrayWrite(Md,&array);
620:     PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k);
621:     MatDenseRestoreArrayWrite(Md,&array);
622:     BVSetActiveColumns(W,0,nmat-1);
623:     BVMult(W,1.0,0.0,V,Md);
624:     for (i=nmat-1;i>0;i--) {
625:       BVGetColumn(W,i-1,&vc0);
626:       BVGetColumn(W,i,&vc);
627:       MatMult(A[i],vc0,vc);
628:       BVRestoreColumn(W,i-1,&vc0);
629:       BVRestoreColumn(W,i,&vc);
630:     }
631:     BVSetActiveColumns(W,1,nmat);
632:     BVGetColumn(W,0,&vc0);
633:     BVMultVec(W,1.0,0.0,vc0,ts);
634:     VecGetArrayRead(vc0,&carray);
635:     idx = matctx->map1[j];
636:     MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
637:     VecRestoreArrayRead(vc0,&carray);
638:     BVRestoreColumn(W,0,&vc0);
639:   }
640:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
641:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
642:   PEP_KSPSetOperators(ksp,M,M);
643:   KSPSetUp(ksp);
644:   PetscFree(ts);
645:   MatDestroy(&Mk);
646:   MatDestroy(&Md);
647:   if (flg) PetscFree(At);
648:   PetscFree5(T22,T21,T12,Tr,Ts);
649:   return 0;
650: }

652: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
653: {
654:   PetscInt          n0,m0,n1,m1,i;
655:   PetscScalar       *arrayV;
656:   const PetscScalar *array;

658:   MatGetOwnershipRange(matctx->E[1],&n1,&m1);
659:   MatGetOwnershipRange(matctx->E[0],&n0,&m0);

661:   /* Right side */
662:   VecGetArrayRead(Rv,&array);
663:   VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
664:   VecRestoreArrayRead(Rv,&array);
665:   VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
666:   VecAssemblyBegin(matctx->tN);
667:   VecAssemblyEnd(matctx->tN);

669:   /* Solve */
670:   KSPSolve(ksp,matctx->tN,matctx->ttN);

672:   /* Retrieve solution */
673:   VecGetArray(dVi,&arrayV);
674:   VecGetArrayRead(matctx->ttN,&array);
675:   PetscArraycpy(arrayV,array,m0-n0);
676:   VecRestoreArray(dVi,&arrayV);
677:   if (!matctx->subc) {
678:     VecGetArray(matctx->t1,&arrayV);
679:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
680:     VecRestoreArray(matctx->t1,&arrayV);
681:     VecRestoreArrayRead(matctx->ttN,&array);
682:     VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
683:     VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
684:     VecGetArrayRead(matctx->vseq,&array);
685:     for (i=0;i<k;i++) dHi[i] = array[i];
686:     VecRestoreArrayRead(matctx->vseq,&array);
687:   }
688:   return 0;
689: }

691: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
692: {
693:   PetscInt            j,m,lda=pep->nmat*k,n0,m0,idx;
694:   PetscMPIInt         root,len;
695:   PetscScalar         *array2,h;
696:   const PetscScalar   *array;
697:   Vec                 R,Vi;
698:   PEP_REFINE_MATSHELL *ctx;
699:   Mat                 M;

701:   if (!matctx || !matctx->subc) {
702:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
703:     h   = H[i+i*ldh];
704:     idx = i;
705:     R   = Rv;
706:     Vi  = dVi;
707:     switch (pep->scheme) {
708:     case PEP_REFINE_SCHEME_EXPLICIT:
709:       NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
710:       matctx->compM1 = PETSC_FALSE;
711:       break;
712:     case PEP_REFINE_SCHEME_MBE:
713:       NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
714:       matctx->compM1 = PETSC_FALSE;
715:       break;
716:     case PEP_REFINE_SCHEME_SCHUR:
717:       KSPGetOperators(ksp,&M,NULL);
718:       MatShellGetContext(M,&ctx);
719:       NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
720:       ctx->compM1 = PETSC_FALSE;
721:       break;
722:     }
723:   } else {
724:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
725:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
726:       h = H[idx+idx*ldh];
727:       matctx->idx = idx;
728:       switch (pep->scheme) {
729:       case PEP_REFINE_SCHEME_EXPLICIT:
730:         NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
731:         matctx->compM1 = PETSC_FALSE;
732:         break;
733:       case PEP_REFINE_SCHEME_MBE:
734:         NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
735:         matctx->compM1 = PETSC_FALSE;
736:         break;
737:       case PEP_REFINE_SCHEME_SCHUR:
738:         break;
739:       }
740:     } else idx = matctx->idx;
741:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
742:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
743:     VecGetArrayRead(matctx->tg,&array);
744:     VecPlaceArray(matctx->t,array);
745:     VecCopy(matctx->t,matctx->Rv);
746:     VecResetArray(matctx->t);
747:     VecRestoreArrayRead(matctx->tg,&array);
748:     R  = matctx->Rv;
749:     Vi = matctx->Vi;
750:   }
751:   if (idx==i && idx<k) {
752:     switch (pep->scheme) {
753:       case PEP_REFINE_SCHEME_EXPLICIT:
754:         NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
755:         break;
756:       case PEP_REFINE_SCHEME_MBE:
757:         NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
758:         break;
759:       case PEP_REFINE_SCHEME_SCHUR:
760:         NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
761:         break;
762:     }
763:   }
764:   if (matctx && matctx->subc) {
765:     VecGetLocalSize(Vi,&m);
766:     VecGetArrayRead(Vi,&array);
767:     VecGetArray(matctx->tg,&array2);
768:     PetscArraycpy(array2,array,m);
769:     VecRestoreArray(matctx->tg,&array2);
770:     VecRestoreArrayRead(Vi,&array);
771:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
772:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
773:     switch (pep->scheme) {
774:     case PEP_REFINE_SCHEME_EXPLICIT:
775:       MatGetOwnershipRange(matctx->E[0],&n0,&m0);
776:       VecGetArrayRead(matctx->ttN,&array);
777:       VecPlaceArray(matctx->tp,array+m0-n0);
778:       VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
779:       VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
780:       VecResetArray(matctx->tp);
781:       VecRestoreArrayRead(matctx->ttN,&array);
782:       VecGetArrayRead(matctx->tpg,&array);
783:       for (j=0;j<k;j++) dHi[j] = array[j];
784:       VecRestoreArrayRead(matctx->tpg,&array);
785:       break;
786:      case PEP_REFINE_SCHEME_MBE:
787:       root = 0;
788:       for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
789:       PetscMPIIntCast(k,&len);
790:       MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
791:       break;
792:     case PEP_REFINE_SCHEME_SCHUR:
793:       break;
794:     }
795:   }
796:   return 0;
797: }

799: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
800: {
801:   PetscInt            i,nmat=pep->nmat,lda=nmat*k;
802:   PetscScalar         *fh,*Rh,*DfH;
803:   PetscReal           norm;
804:   BV                  W;
805:   Vec                 Rv,t,dvi;
806:   PEP_REFINE_MATSHELL *ctx;
807:   Mat                 M,*At;
808:   PetscBool           flg,lindep;

810:   PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
811:   *rds = 0;
812:   BVCreateVec(pep->V,&Rv);
813:   switch (pep->scheme) {
814:   case PEP_REFINE_SCHEME_EXPLICIT:
815:     BVCreateVec(pep->V,&t);
816:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
817:     PetscMalloc1(nmat,&fh);
818:     break;
819:   case PEP_REFINE_SCHEME_MBE:
820:     if (matctx->subc) {
821:       BVCreateVec(pep->V,&t);
822:       BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
823:     } else {
824:       W = matctx->W;
825:       PetscObjectReference((PetscObject)W);
826:       t = matctx->t;
827:       PetscObjectReference((PetscObject)t);
828:     }
829:     BVScale(matctx->W,0.0);
830:     BVScale(matctx->Wt,0.0);
831:     BVScale(matctx->M2,0.0);
832:     BVScale(matctx->M3,0.0);
833:     PetscMalloc1(nmat,&fh);
834:     break;
835:   case PEP_REFINE_SCHEME_SCHUR:
836:     KSPGetOperators(ksp,&M,NULL);
837:     MatShellGetContext(M,&ctx);
838:     BVCreateVec(pep->V,&t);
839:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
840:     fh = ctx->fih;
841:     break;
842:   }
843:   PetscArrayzero(dVS,2*k*k);
844:   PetscArrayzero(DfH,lda*k);
845:   STGetTransform(pep->st,&flg);
846:   if (flg) {
847:     PetscMalloc1(pep->nmat,&At);
848:     for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
849:   } else At = pep->A;

851:   /* Main loop for computing the i-th columns of dX and dS */
852:   for (i=0;i<k;i++) {
853:     /* Compute and update i-th column of the right hand side */
854:     PetscArrayzero(Rh,k);
855:     NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);

857:     /* Update and solve system */
858:     BVGetColumn(dV,i,&dvi);
859:     NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
860:     /* Orthogonalize computed solution */
861:     BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
862:     BVRestoreColumn(dV,i,&dvi);
863:     if (!lindep) {
864:       BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
865:       if (!lindep) {
866:         dVS[k+i+i*2*k] = norm;
867:         BVScaleColumn(dV,i,1.0/norm);
868:         (*rds)++;
869:       }
870:     }
871:   }
872:   BVSetActiveColumns(dV,0,*rds);
873:   VecDestroy(&t);
874:   VecDestroy(&Rv);
875:   BVDestroy(&W);
876:   if (flg) PetscFree(At);
877:   PetscFree2(DfH,Rh);
878:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) PetscFree(fh);
879:   return 0;
880: }

882: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
883: {
884:   PetscInt       j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
885:   PetscScalar    *G,*tau,sone=1.0,zero=0.0,*work;
886:   PetscBLASInt   lds_,k_,ldh_,info,ldg_,lda_;

888:   PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
889:   PetscBLASIntCast(lds,&lds_);
890:   PetscBLASIntCast(lda,&lda_);
891:   PetscBLASIntCast(k,&k_);

893:   /* Form auxiliary matrix for the orthogonalization step */
894:   ldg = deg*k;
895:   PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
896:   PetscBLASIntCast(ldg,&ldg_);
897:   PetscBLASIntCast(ldh,&ldh_);
898:   for (j=0;j<deg;j++) {
899:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
900:   }
901:   /* Orthogonalize and update S */
902:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
903:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
904:   PetscFPTrapPop();
905:   SlepcCheckLapackInfo("geqrf",info);
906:   PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

908:   /* Update H */
909:   PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
910:   PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
911:   PetscFree3(tau,work,G);
912:   return 0;
913: }

915: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
916: {
917:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k;
918:   PetscScalar    *tau,*array,*work;
919:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,info,k2_;
920:   Mat            M0;

922:   PetscMalloc2(k,&tau,k,&work);
923:   PetscBLASIntCast(lds,&lds_);
924:   PetscBLASIntCast(lda,&lda_);
925:   PetscBLASIntCast(ldh,&ldh_);
926:   PetscBLASIntCast(k,&k_);
927:   PetscBLASIntCast(2*k,&k2_);
928:   PetscBLASIntCast((k+rds),&kdrs_);
929:   /* Update H */
930:   for (j=0;j<k;j++) {
931:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
932:   }
933:   /* Update V */
934:   for (j=0;j<k;j++) {
935:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
936:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
937:   }
938:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
939:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
940:   SlepcCheckLapackInfo("geqrf",info);
941:   /* Copy triangular matrix in S */
942:   for (j=0;j<k;j++) {
943:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
944:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
945:   }
946:   PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
947:   SlepcCheckLapackInfo("orgqr",info);
948:   PetscFPTrapPop();
949:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
950:   MatDenseGetArrayWrite(M0,&array);
951:   for (j=0;j<k;j++) PetscArraycpy(array+j*k,dVS+j*2*k,k);
952:   MatDenseRestoreArrayWrite(M0,&array);
953:   BVMultInPlace(pep->V,M0,0,k);
954:   if (rds) {
955:     MatDenseGetArrayWrite(M0,&array);
956:     for (j=0;j<k;j++) PetscArraycpy(array+j*k,dVS+k+j*2*k,rds);
957:     MatDenseRestoreArrayWrite(M0,&array);
958:     BVMultInPlace(dV,M0,0,k);
959:     BVMult(pep->V,1.0,1.0,dV,NULL);
960:   }
961:   MatDestroy(&M0);
962:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
963:   PetscFree2(tau,work);
964:   return 0;
965: }

967: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
968: {
969:   PEP_REFINE_MATSHELL *ctx;
970:   PetscScalar         t,*coef;
971:   const PetscScalar   *array;
972:   MatStructure        str;
973:   PetscInt            j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
974:   MPI_Comm            comm;
975:   PetscMPIInt         np;
976:   const PetscInt      *rgs0,*rgs1;
977:   Mat                 B,C,*E,*A,*At;
978:   IS                  is1,is2;
979:   Vec                 v;
980:   PetscBool           flg;
981:   Mat                 M,P;

983:   PetscMalloc1(nmat,&coef);
984:   STGetTransform(pep->st,&flg);
985:   if (flg) {
986:     PetscMalloc1(pep->nmat,&At);
987:     for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
988:   } else At = pep->A;
989:   switch (pep->scheme) {
990:   case PEP_REFINE_SCHEME_EXPLICIT:
991:     if (ini) {
992:       if (matctx->subc) {
993:         A = matctx->A;
994:         PetscSubcommGetChild(matctx->subc,&comm);
995:       } else {
996:         A = At;
997:         PetscObjectGetComm((PetscObject)pep,&comm);
998:       }
999:       E = matctx->E;
1000:       STGetMatStructure(pep->st,&str);
1001:       MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1002:       j = (matctx->subc)?matctx->subc->color:0;
1003:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1004:       for (j=1;j<nmat;j++) MatAXPY(E[0],coef[j],A[j],str);
1005:       MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1006:       MatGetOwnershipRange(E[0],&n0,&m0);
1007:       MatGetOwnershipRange(E[1],&n1,&m1);
1008:       MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1009:       MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1010:       /* T12 and T21 are computed from V and V*, so,
1011:          they must have the same column and row ranges */
1013:       MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1014:       MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1015:       MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1016:       MatDestroy(&B);
1017:       MatDestroy(&C);
1018:       matctx->compM1 = PETSC_TRUE;
1019:       MatGetSize(E[0],NULL,&N0);
1020:       MatGetSize(E[1],NULL,&N1);
1021:       MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1022:       MatGetOwnershipRanges(E[0],&rgs0);
1023:       MatGetOwnershipRanges(E[1],&rgs1);
1024:       PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1025:       /* Create column (and row) mapping */
1026:       for (p=0;p<np;p++) {
1027:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1028:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1029:       }
1030:       MatCreateVecs(M,NULL,&matctx->tN);
1031:       MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1032:       VecDuplicate(matctx->tN,&matctx->ttN);
1033:       if (matctx->subc) {
1034:         MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1035:         count = np*k;
1036:         PetscMalloc2(count,&idx1,count,&idx2);
1037:         VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1038:         VecGetOwnershipRange(matctx->tp,&l0,NULL);
1039:         VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1040:         for (si=0;si<matctx->subc->n;si++) {
1041:           if (matctx->subc->color==si) {
1042:             j=0;
1043:             if (matctx->subc->color==si) {
1044:               for (p=0;p<np;p++) {
1045:                 for (i=n1;i<m1;i++) {
1046:                   idx1[j] = l0+i-n1;
1047:                   idx2[j++] =p*k+i;
1048:                 }
1049:               }
1050:             }
1051:             count = np*(m1-n1);
1052:           } else count =0;
1053:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1054:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1055:           VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1056:           ISDestroy(&is1);
1057:           ISDestroy(&is2);
1058:         }
1059:         PetscFree2(idx1,idx2);
1060:       } else VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1061:       P = M;
1062:     } else {
1063:       if (matctx->subc) {
1064:         /* Scatter vectors pep->V */
1065:         for (i=0;i<k;i++) {
1066:           BVGetColumn(pep->V,i,&v);
1067:           VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1068:           VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1069:           BVRestoreColumn(pep->V,i,&v);
1070:           VecGetArrayRead(matctx->tg,&array);
1071:           VecPlaceArray(matctx->t,(const PetscScalar*)array);
1072:           BVInsertVec(matctx->V,i,matctx->t);
1073:           VecResetArray(matctx->t);
1074:           VecRestoreArrayRead(matctx->tg,&array);
1075:         }
1076:       }
1077:     }
1078:     break;
1079:   case PEP_REFINE_SCHEME_MBE:
1080:     if (ini) {
1081:       if (matctx->subc) {
1082:         A = matctx->A;
1083:         PetscSubcommGetChild(matctx->subc,&comm);
1084:       } else {
1085:         matctx->V = pep->V;
1086:         A = At;
1087:         PetscObjectGetComm((PetscObject)pep,&comm);
1088:         MatCreateVecs(pep->A[0],&matctx->t,NULL);
1089:       }
1090:       STGetMatStructure(pep->st,&str);
1091:       MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1092:       j = (matctx->subc)?matctx->subc->color:0;
1093:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1094:       for (j=1;j<nmat;j++) MatAXPY(matctx->M1,coef[j],A[j],str);
1095:       BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1096:       BVDuplicateResize(matctx->V,k,&matctx->M2);
1097:       BVDuplicate(matctx->M2,&matctx->M3);
1098:       BVDuplicate(matctx->M2,&matctx->Wt);
1099:       PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1100:       matctx->compM1 = PETSC_TRUE;
1101:       M = matctx->M1;
1102:       P = M;
1103:     }
1104:     break;
1105:   case PEP_REFINE_SCHEME_SCHUR:
1106:     if (ini) {
1107:       PetscObjectGetComm((PetscObject)pep,&comm);
1108:       MatGetSize(At[0],&m0,&n0);
1109:       PetscMalloc1(1,&ctx);
1110:       STGetMatStructure(pep->st,&str);
1111:       /* Create a shell matrix to solve the linear system */
1112:       ctx->V = pep->V;
1113:       ctx->k = k; ctx->nmat = nmat;
1114:       PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1115:       for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1116:       PetscArrayzero(ctx->M4,k*k);
1117:       MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1118:       MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS);
1119:       BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1120:       BVDuplicateResize(ctx->V,k,&ctx->M2);
1121:       BVDuplicate(ctx->M2,&ctx->M3);
1122:       BVCreateVec(pep->V,&ctx->t);
1123:       MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1124:       PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1125:       for (j=1;j<nmat;j++) MatAXPY(ctx->M1,coef[j],At[j],str);
1126:       MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1127:       /* Compute a precond matrix for the system */
1128:       t = H[0];
1129:       PEPEvaluateBasis(pep,t,0,coef,NULL);
1130:       for (j=1;j<nmat;j++) MatAXPY(P,coef[j],At[j],str);
1131:       ctx->compM1 = PETSC_TRUE;
1132:     }
1133:     break;
1134:   }
1135:   if (ini) {
1136:     PEPRefineGetKSP(pep,&pep->refineksp);
1137:     KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1138:     PEP_KSPSetOperators(pep->refineksp,M,P);
1139:     KSPSetFromOptions(pep->refineksp);
1140:   }

1142:   if (!ini && matctx && matctx->subc) {
1143:      /* Scatter vectors pep->V */
1144:     for (i=0;i<k;i++) {
1145:       BVGetColumn(pep->V,i,&v);
1146:       VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1147:       VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1148:       BVRestoreColumn(pep->V,i,&v);
1149:       VecGetArrayRead(matctx->tg,&array);
1150:       VecPlaceArray(matctx->t,(const PetscScalar*)array);
1151:       BVInsertVec(matctx->V,i,matctx->t);
1152:       VecResetArray(matctx->t);
1153:       VecRestoreArrayRead(matctx->tg,&array);
1154:     }
1155:    }
1156:   PetscFree(coef);
1157:   if (flg) PetscFree(At);
1158:   return 0;
1159: }

1161: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1162: {
1163:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1164:   IS                is1,is2;
1165:   BVType            type;
1166:   Vec               v;
1167:   const PetscScalar *array;
1168:   Mat               *A;
1169:   PetscBool         flg;
1170:   MPI_Comm          contpar,child;

1172:   STGetTransform(pep->st,&flg);
1173:   if (flg) {
1174:     PetscMalloc1(pep->nmat,&A);
1175:     for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&A[i]);
1176:   } else A = pep->A;
1177:   PetscSubcommGetChild(matctx->subc,&child);
1178:   PetscSubcommGetContiguousParent(matctx->subc,&contpar);

1180:   /* Duplicate pep matrices */
1181:   PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1182:   for (i=0;i<pep->nmat;i++) MatCreateRedundantMatrix(A[i],0,child,MAT_INITIAL_MATRIX,&matctx->A[i]);

1184:   /* Create Scatter */
1185:   MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1186:   MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1187:   VecCreateMPI(contpar,nloc_sub,PETSC_DECIDE,&matctx->tg);
1188:   BVGetColumn(pep->V,0,&v);
1189:   VecGetOwnershipRange(v,&n0,&m0);
1190:   nloc0 = m0-n0;
1191:   PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1192:   j = 0;
1193:   for (si=0;si<matctx->subc->n;si++) {
1194:     for (i=n0;i<m0;i++) {
1195:       idx1[j]   = i;
1196:       idx2[j++] = i+pep->n*si;
1197:     }
1198:   }
1199:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1200:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1201:   VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1202:   ISDestroy(&is1);
1203:   ISDestroy(&is2);
1204:   for (si=0;si<matctx->subc->n;si++) {
1205:     j=0;
1206:     for (i=n0;i<m0;i++) {
1207:       idx1[j] = i;
1208:       idx2[j++] = i+pep->n*si;
1209:     }
1210:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1211:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1212:     VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1213:     ISDestroy(&is1);
1214:     ISDestroy(&is2);
1215:   }
1216:   BVRestoreColumn(pep->V,0,&v);
1217:   PetscFree2(idx1,idx2);

1219:   /* Duplicate pep->V vecs */
1220:   BVGetType(pep->V,&type);
1221:   BVCreate(child,&matctx->V);
1222:   BVSetType(matctx->V,type);
1223:   BVSetSizesFromVec(matctx->V,matctx->t,k);
1224:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1225:   for (i=0;i<k;i++) {
1226:     BVGetColumn(pep->V,i,&v);
1227:     VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1228:     VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1229:     BVRestoreColumn(pep->V,i,&v);
1230:     VecGetArrayRead(matctx->tg,&array);
1231:     VecPlaceArray(matctx->t,(const PetscScalar*)array);
1232:     BVInsertVec(matctx->V,i,matctx->t);
1233:     VecResetArray(matctx->t);
1234:     VecRestoreArrayRead(matctx->tg,&array);
1235:   }

1237:   VecDuplicate(matctx->t,&matctx->Rv);
1238:   VecDuplicate(matctx->t,&matctx->Vi);
1239:   if (flg) PetscFree(A);
1240:   return 0;
1241: }

1243: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1244: {
1245:   PetscInt       i;

1247:   VecScatterDestroy(&matctx->scatter_sub);
1248:   for (i=0;i<matctx->subc->n;i++) VecScatterDestroy(&matctx->scatter_id[i]);
1249:   for (i=0;i<pep->nmat;i++) MatDestroy(&matctx->A[i]);
1250:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1251:     for (i=0;i<matctx->subc->n;i++) VecScatterDestroy(&matctx->scatterp_id[i]);
1252:     VecDestroy(&matctx->tp);
1253:     VecDestroy(&matctx->tpg);
1254:     BVDestroy(&matctx->W);
1255:   }
1256:   PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1257:   BVDestroy(&matctx->V);
1258:   VecDestroy(&matctx->t);
1259:   VecDestroy(&matctx->tg);
1260:   VecDestroy(&matctx->Rv);
1261:   VecDestroy(&matctx->Vi);
1262:   return 0;
1263: }

1265: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1266: {
1267:   PetscScalar         *H,*work,*dH,*fH,*dVS;
1268:   PetscInt            ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1269:   PetscBLASInt        k_,ld_,*p,info;
1270:   BV                  dV;
1271:   PetscBool           sinvert,flg;
1272:   PEP_REFINE_EXPLICIT *matctx=NULL;
1273:   Vec                 v;
1274:   Mat                 M,P;
1275:   PEP_REFINE_MATSHELL *ctx;

1277:   PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1279:   /* the input tolerance is not being taken into account (by the moment) */
1280:   its = *maxits;
1281:   PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1282:   DSGetLeadingDimension(pep->ds,&ldh);
1283:   PetscMalloc1(2*k*k,&dVS);
1284:   STGetTransform(pep->st,&flg);
1285:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1286:     PetscBLASIntCast(k,&k_);
1287:     PetscBLASIntCast(ldh,&ld_);
1288:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1289:     if (sinvert) {
1290:       DSGetArray(pep->ds,DS_MAT_A,&H);
1291:       PetscMalloc1(k,&p);
1292:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1293:       PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1294:       SlepcCheckLapackInfo("getrf",info);
1295:       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1296:       SlepcCheckLapackInfo("getri",info);
1297:       PetscFPTrapPop();
1298:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1299:       pep->ops->backtransform = NULL;
1300:     }
1301:     if (sigma!=0.0) {
1302:       DSGetArray(pep->ds,DS_MAT_A,&H);
1303:       for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1304:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1305:       pep->ops->backtransform = NULL;
1306:     }
1307:   }
1308:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1309:     DSGetArray(pep->ds,DS_MAT_A,&H);
1310:     for (j=0;j<k;j++) {
1311:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1312:     }
1313:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
1314:     if (!flg) {
1315:       /* Restore original values */
1316:       for (i=0;i<pep->nmat;i++) {
1317:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1318:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1319:       }
1320:     }
1321:   }
1322:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1323:     for (i=0;i<k;i++) {
1324:       BVGetColumn(pep->V,i,&v);
1325:       VecPointwiseMult(v,v,pep->Dr);
1326:       BVRestoreColumn(pep->V,i,&v);
1327:     }
1328:   }
1329:   DSGetArray(pep->ds,DS_MAT_A,&H);

1331:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1332:   /* check if H is in Schur form */
1333:   for (i=0;i<k-1;i++) {
1334: #if !defined(PETSC_USE_COMPLEX)
1336: #else
1338: #endif
1339:   }
1341:   BVSetActiveColumns(pep->V,0,k);
1342:   BVDuplicateResize(pep->V,k,&dV);
1343:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1344:     PetscMalloc1(1,&matctx);
1345:     if (nsubc>1) { /* splitting in subcommunicators */
1346:       matctx->subc = pep->refinesubc;
1347:       NRefSubcommSetup(pep,k,matctx,nsubc);
1348:     } else matctx->subc=NULL;
1349:   }

1351:   /* Loop performing iterative refinements */
1352:   for (i=0;i<its;i++) {
1353:     /* Pre-compute the polynomial basis evaluated in H */
1354:     PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1355:     PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1356:     /* Solve the linear system */
1357:     PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1358:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1359:     PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1360:   }
1361:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1362:   if (!flg && sinvert) PetscFree(p);
1363:   PetscFree3(dH,fH,work);
1364:   PetscFree(dVS);
1365:   BVDestroy(&dV);
1366:   switch (pep->scheme) {
1367:   case PEP_REFINE_SCHEME_EXPLICIT:
1368:     for (i=0;i<2;i++) MatDestroy(&matctx->E[i]);
1369:     PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1370:     VecDestroy(&matctx->tN);
1371:     VecDestroy(&matctx->ttN);
1372:     VecDestroy(&matctx->t1);
1373:     if (nsubc>1) NRefSubcommDestroy(pep,matctx);
1374:     else {
1375:       VecDestroy(&matctx->vseq);
1376:       VecScatterDestroy(&matctx->scatterctx);
1377:     }
1378:     PetscFree(matctx);
1379:     KSPGetOperators(pep->refineksp,&M,NULL);
1380:     MatDestroy(&M);
1381:     break;
1382:   case PEP_REFINE_SCHEME_MBE:
1383:     BVDestroy(&matctx->W);
1384:     BVDestroy(&matctx->Wt);
1385:     BVDestroy(&matctx->M2);
1386:     BVDestroy(&matctx->M3);
1387:     MatDestroy(&matctx->M1);
1388:     VecDestroy(&matctx->t);
1389:     PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1390:     if (nsubc>1) NRefSubcommDestroy(pep,matctx);
1391:     PetscFree(matctx);
1392:     break;
1393:   case PEP_REFINE_SCHEME_SCHUR:
1394:     KSPGetOperators(pep->refineksp,&M,&P);
1395:     MatShellGetContext(M,&ctx);
1396:     PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1397:     MatDestroy(&ctx->M1);
1398:     BVDestroy(&ctx->M2);
1399:     BVDestroy(&ctx->M3);
1400:     BVDestroy(&ctx->W);
1401:     VecDestroy(&ctx->t);
1402:     PetscFree(ctx);
1403:     MatDestroy(&M);
1404:     MatDestroy(&P);
1405:     break;
1406:   }
1407:   PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1408:   return 0;
1409: }