Actual source code: dshep.c

slepc-3.18.3 2023-03-24
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
 15: {
 16:   DSAllocateMat_Private(ds,DS_MAT_A);
 17:   DSAllocateMat_Private(ds,DS_MAT_Q);
 18:   DSAllocateMat_Private(ds,DS_MAT_T);
 19:   PetscFree(ds->perm);
 20:   PetscMalloc1(ld,&ds->perm);
 21:   return 0;
 22: }

 24: /*   0       l           k                 n-1
 25:     -----------------------------------------
 26:     |*       .           .                  |
 27:     |  *     .           .                  |
 28:     |    *   .           .                  |
 29:     |      * .           .                  |
 30:     |. . . . o           o                  |
 31:     |          o         o                  |
 32:     |            o       o                  |
 33:     |              o     o                  |
 34:     |                o   o                  |
 35:     |                  o o                  |
 36:     |. . . . o o o o o o o x                |
 37:     |                    x x x              |
 38:     |                      x x x            |
 39:     |                        x x x          |
 40:     |                          x x x        |
 41:     |                            x x x      |
 42:     |                              x x x    |
 43:     |                                x x x  |
 44:     |                                  x x x|
 45:     |                                    x x|
 46:     -----------------------------------------
 47: */

 49: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
 50: {
 51:   PetscReal      *T;
 52:   PetscScalar    *A;
 53:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld;

 55:   /* switch from compact (arrow) to dense storage */
 56:   MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A);
 57:   DSGetArrayReal(ds,DS_MAT_T,&T);
 58:   PetscArrayzero(A,ld*ld);
 59:   for (i=0;i<k;i++) {
 60:     A[i+i*ld] = T[i];
 61:     A[k+i*ld] = T[i+ld];
 62:     A[i+k*ld] = T[i+ld];
 63:   }
 64:   A[k+k*ld] = T[k];
 65:   for (i=k+1;i<n;i++) {
 66:     A[i+i*ld]     = T[i];
 67:     A[i-1+i*ld]   = T[i-1+ld];
 68:     A[i+(i-1)*ld] = T[i-1+ld];
 69:   }
 70:   if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
 71:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A);
 72:   DSRestoreArrayReal(ds,DS_MAT_T,&T);
 73:   return 0;
 74: }

 76: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
 77: {
 78:   PetscViewerFormat format;
 79:   PetscInt          i,j,r,c,rows;
 80:   PetscReal         *T,value;
 81:   const char        *methodname[] = {
 82:                      "Implicit QR method (_steqr)",
 83:                      "Relatively Robust Representations (_stevr)",
 84:                      "Divide and Conquer method (_stedc)",
 85:                      "Block Divide and Conquer method (dsbtdc)"
 86:   };
 87:   const int         nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);

 89:   PetscViewerGetFormat(viewer,&format);
 90:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 91:     if (ds->bs>1) PetscViewerASCIIPrintf(viewer,"block size: %" PetscInt_FMT "\n",ds->bs);
 92:     if (ds->method<nmeth) PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
 93:     return 0;
 94:   }
 95:   if (ds->compact) {
 96:     DSGetArrayReal(ds,DS_MAT_T,&T);
 97:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
 98:     rows = ds->extrarow? ds->n+1: ds->n;
 99:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
100:       PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,ds->n);
101:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",3*ds->n);
102:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
103:       for (i=0;i<ds->n;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)T[i]);
104:       for (i=0;i<rows-1;i++) {
105:         r = PetscMax(i+2,ds->k+1);
106:         c = i+1;
107:         PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",r,c,(double)T[i+ds->ld]);
108:         if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
109:           PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",c,r,(double)T[i+ds->ld]);
110:         }
111:       }
112:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
113:     } else {
114:       for (i=0;i<rows;i++) {
115:         for (j=0;j<ds->n;j++) {
116:           if (i==j) value = T[i];
117:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = T[PetscMin(i,j)+ds->ld];
118:           else if (i==j+1 && i>ds->k) value = T[i-1+ds->ld];
119:           else if (i+1==j && j>ds->k) value = T[j-1+ds->ld];
120:           else value = 0.0;
121:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
122:         }
123:         PetscViewerASCIIPrintf(viewer,"\n");
124:       }
125:     }
126:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
127:     PetscViewerFlush(viewer);
128:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
129:   } else DSViewMat(ds,viewer,DS_MAT_A);
130:   if (ds->state>DS_STATE_INTERMEDIATE) DSViewMat(ds,viewer,DS_MAT_Q);
131:   return 0;
132: }

134: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
135: {
136:   PetscScalar       *Z;
137:   const PetscScalar *Q;
138:   PetscInt          ld = ds->ld;

140:   switch (mat) {
141:     case DS_MAT_X:
142:     case DS_MAT_Y:
143:       if (j) {
144:         MatDenseGetArray(ds->omat[mat],&Z);
145:         if (ds->state>=DS_STATE_CONDENSED) {
146:           MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q);
147:           PetscArraycpy(Z+(*j)*ld,Q+(*j)*ld,ld);
148:           if (rnorm) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
149:           MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q);
150:         } else {
151:           PetscArrayzero(Z+(*j)*ld,ld);
152:           Z[(*j)+(*j)*ld] = 1.0;
153:           if (rnorm) *rnorm = 0.0;
154:         }
155:         MatDenseRestoreArray(ds->omat[mat],&Z);
156:       } else {
157:         if (ds->state>=DS_STATE_CONDENSED) MatCopy(ds->omat[DS_MAT_Q],ds->omat[mat],SAME_NONZERO_PATTERN);
158:         else DSSetIdentity(ds,mat);
159:       }
160:       break;
161:     case DS_MAT_U:
162:     case DS_MAT_V:
163:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
164:     default:
165:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
166:   }
167:   return 0;
168: }

170: /*
171:   ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form

173:                 [ d 0 0 0 e ]
174:                 [ 0 d 0 0 e ]
175:             A = [ 0 0 d 0 e ]
176:                 [ 0 0 0 d e ]
177:                 [ e e e e d ]

179:   to tridiagonal form

181:                 [ d e 0 0 0 ]
182:                 [ e d e 0 0 ]
183:    T = Q'*A*Q = [ 0 e d e 0 ],
184:                 [ 0 0 e d e ]
185:                 [ 0 0 0 e d ]

187:   where Q is an orthogonal matrix. Rutishauser's algorithm is used to
188:   perform the reduction, which requires O(n**2) flops. The accumulation
189:   of the orthogonal factor Q, however, requires O(n**3) flops.

191:   Arguments
192:   =========

194:   N       (input) INTEGER
195:           The order of the matrix A.  N >= 0.

197:   D       (input/output) DOUBLE PRECISION array, dimension (N)
198:           On entry, the diagonal entries of the matrix A to be
199:           reduced.
200:           On exit, the diagonal entries of the reduced matrix T.

202:   E       (input/output) DOUBLE PRECISION array, dimension (N-1)
203:           On entry, the off-diagonal entries of the matrix A to be
204:           reduced.
205:           On exit, the subdiagonal entries of the reduced matrix T.

207:   Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
208:           On exit, the orthogonal matrix Q.

210:   LDQ     (input) INTEGER
211:           The leading dimension of the array Q.

213:   Note
214:   ====
215:   Based on Fortran code contributed by Daniel Kressner
216: */
217: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
218: {
219:   PetscBLASInt i,j,j2,one=1;
220:   PetscReal    c,s,p,off,temp;

222:   if (n<=2) return 0;

224:   for (j=0;j<n-2;j++) {

226:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
227:     temp = e[j+1];
228:     PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
229:     s = -s;

231:     /* Apply rotation to diagonal elements */
232:     temp   = d[j+1];
233:     e[j]   = c*s*(temp-d[j]);
234:     d[j+1] = s*s*d[j] + c*c*temp;
235:     d[j]   = c*c*d[j] + s*s*temp;

237:     /* Apply rotation to Q */
238:     j2 = j+2;
239:     PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));

241:     /* Chase newly introduced off-diagonal entry to the top left corner */
242:     for (i=j-1;i>=0;i--) {
243:       off  = -s*e[i];
244:       e[i] = c*e[i];
245:       temp = e[i+1];
246:       PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
247:       s = -s;
248:       temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
249:       p = s*temp;
250:       d[i+1] += p;
251:       d[i] -= p;
252:       e[i] = -e[i] - c*temp;
253:       j2 = j+2;
254:       PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
255:     }
256:   }
257:   return 0;
258: }

260: /*
261:    Reduce to tridiagonal form by means of ArrowTridiag.
262: */
263: static PetscErrorCode DSIntermediate_HEP(DS ds)
264: {
265:   PetscInt          i;
266:   PetscBLASInt      n1 = 0,n2,lwork,info,l = 0,n = 0,ld,off;
267:   PetscScalar       *Q,*work,*tau;
268:   const PetscScalar *A;
269:   PetscReal         *d,*e;
270:   Mat               At,Qt;  /* trailing submatrices */

272:   PetscBLASIntCast(ds->n,&n);
273:   PetscBLASIntCast(ds->l,&l);
274:   PetscBLASIntCast(ds->ld,&ld);
275:   PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1); /* size of leading block, excl. locked */
276:   n2 = n-l;     /* n2 = n1 + size of trailing block */
277:   off = l+l*ld;
278:   DSGetArrayReal(ds,DS_MAT_T,&d);
279:   e = d+ld;
280:   DSSetIdentity(ds,DS_MAT_Q);
281:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);

283:   if (ds->compact) {

285:     if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);

287:   } else {

289:     MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A);
290:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

292:     if (ds->state<DS_STATE_INTERMEDIATE) {
293:       MatDenseGetSubMatrix(ds->omat[DS_MAT_A],ds->l,ds->n,ds->l,ds->n,&At);
294:       MatDenseGetSubMatrix(ds->omat[DS_MAT_Q],ds->l,ds->n,ds->l,ds->n,&Qt);
295:       MatCopy(At,Qt,SAME_NONZERO_PATTERN);
296:       MatDenseRestoreSubMatrix(ds->omat[DS_MAT_A],&At);
297:       MatDenseRestoreSubMatrix(ds->omat[DS_MAT_Q],&Qt);
298:       DSAllocateWork_Private(ds,ld+ld*ld,0,0);
299:       tau  = ds->work;
300:       work = ds->work+ld;
301:       lwork = ld*ld;
302:       PetscCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n2,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
303:       SlepcCheckLapackInfo("sytrd",info);
304:       PetscCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n2,Q+off,&ld,tau,work,&lwork,&info));
305:       SlepcCheckLapackInfo("orgtr",info);
306:     } else {
307:       /* copy tridiagonal to d,e */
308:       for (i=l;i<n;i++)   d[i] = PetscRealPart(A[i+i*ld]);
309:       for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
310:     }
311:     MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A);
312:   }
313:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
314:   DSRestoreArrayReal(ds,DS_MAT_T,&d);
315:   return 0;
316: }

318: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
319: {
320:   PetscInt       n,l,i,*perm,ld=ds->ld;
321:   PetscScalar    *A;
322:   PetscReal      *d;

324:   if (!ds->sc) return 0;
325:   n = ds->n;
326:   l = ds->l;
327:   DSGetArrayReal(ds,DS_MAT_T,&d);
328:   perm = ds->perm;
329:   if (!rr) DSSortEigenvaluesReal_Private(ds,d,perm);
330:   else DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
331:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
332:   DSPermuteColumns_Private(ds,l,n,n,DS_MAT_Q,perm);
333:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
334:   if (!ds->compact) {
335:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
336:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
337:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
338:   }
339:   DSRestoreArrayReal(ds,DS_MAT_T,&d);
340:   return 0;
341: }

343: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
344: {
345:   PetscInt          i;
346:   PetscBLASInt      n,ld,incx=1;
347:   PetscScalar       *A,*x,*y,one=1.0,zero=0.0;
348:   PetscReal         *T,*e,beta;
349:   const PetscScalar *Q;

351:   PetscBLASIntCast(ds->n,&n);
352:   PetscBLASIntCast(ds->ld,&ld);
353:   MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q);
354:   if (ds->compact) {
355:     DSGetArrayReal(ds,DS_MAT_T,&T);
356:     e = T+ld;
357:     beta = e[n-1];   /* in compact, we assume all entries are zero except the last one */
358:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
359:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
360:     ds->k = n;
361:   } else {
362:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
363:     DSAllocateWork_Private(ds,2*ld,0,0);
364:     x = ds->work;
365:     y = ds->work+ld;
366:     for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
367:     PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
368:     for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
369:     ds->k = n;
370:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
371:   }
372:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q);
373:   return 0;
374: }

376: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
377: {
378:   PetscInt       i;
379:   PetscBLASInt   n1,info,l = 0,n = 0,ld,off;
380:   PetscScalar    *Q,*A;
381:   PetscReal      *d,*e;

384:   PetscBLASIntCast(ds->n,&n);
385:   PetscBLASIntCast(ds->l,&l);
386:   PetscBLASIntCast(ds->ld,&ld);
387:   n1 = n-l;     /* n1 = size of leading block, excl. locked + size of trailing block */
388:   off = l+l*ld;
389:   DSGetArrayReal(ds,DS_MAT_T,&d);
390:   e = d+ld;

392:   /* Reduce to tridiagonal form */
393:   DSIntermediate_HEP(ds);

395:   /* Solve the tridiagonal eigenproblem */
396:   for (i=0;i<l;i++) wr[i] = d[i];

398:   DSAllocateWork_Private(ds,0,2*ld,0);
399:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
400:   PetscCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&info));
401:   SlepcCheckLapackInfo("steqr",info);
402:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
403:   for (i=l;i<n;i++) wr[i] = d[i];

405:   /* Create diagonal matrix as a result */
406:   if (ds->compact) PetscArrayzero(e,n-1);
407:   else {
408:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
409:     for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
410:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
411:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
412:   }
413:   DSRestoreArrayReal(ds,DS_MAT_T,&d);

415:   /* Set zero wi */
416:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
417:   return 0;
418: }

420: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
421: {
422:   Mat            At,Qt;  /* trailing submatrices */
423:   PetscInt       i;
424:   PetscBLASInt   n1 = 0,n2 = 0,n3,lrwork,liwork,info,l = 0,n = 0,m = 0,ld,off,il,iu,*isuppz;
425:   PetscScalar    *A,*Q,*W=NULL,one=1.0,zero=0.0;
426:   PetscReal      *d,*e,abstol=0.0,vl,vu;
427: #if defined(PETSC_USE_COMPLEX)
428:   PetscInt       j;
429:   PetscReal      *Qr,*ritz;
430: #endif

433:   PetscBLASIntCast(ds->n,&n);
434:   PetscBLASIntCast(ds->l,&l);
435:   PetscBLASIntCast(ds->ld,&ld);
436:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
437:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
438:   n3 = n1+n2;
439:   off = l+l*ld;
440:   DSGetArrayReal(ds,DS_MAT_T,&d);
441:   e = d+ld;

443:   /* Reduce to tridiagonal form */
444:   DSIntermediate_HEP(ds);

446:   /* Solve the tridiagonal eigenproblem */
447:   for (i=0;i<l;i++) wr[i] = d[i];

449:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* Q contains useful info */
450:     DSAllocateMat_Private(ds,DS_MAT_W);
451:     MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN);
452:   }
453:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
454:   lrwork = 20*ld;
455:   liwork = 10*ld;
456: #if defined(PETSC_USE_COMPLEX)
457:   DSAllocateWork_Private(ds,0,lrwork+ld+ld*ld,liwork+2*ld);
458: #else
459:   DSAllocateWork_Private(ds,0,lrwork+ld,liwork+2*ld);
460: #endif
461:   isuppz = ds->iwork+liwork;
462: #if defined(PETSC_USE_COMPLEX)
463:   ritz = ds->rwork+lrwork;
464:   Qr   = ds->rwork+lrwork+ld;
465:   PetscCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,Qr+off,&ld,isuppz,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
466:   for (i=l;i<n;i++) wr[i] = ritz[i];
467: #else
468:   PetscCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
469: #endif
470:   SlepcCheckLapackInfo("stevr",info);
471: #if defined(PETSC_USE_COMPLEX)
472:   for (i=l;i<n;i++)
473:     for (j=l;j<n;j++)
474:       Q[i+j*ld] = Qr[i+j*ld];
475: #endif
476:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* accumulate previous Q */
477:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
478:     MatDenseGetArray(ds->omat[DS_MAT_W],&W);
479:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
480:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
481:     MatDenseRestoreArray(ds->omat[DS_MAT_W],&W);
482:     MatDenseGetSubMatrix(ds->omat[DS_MAT_A],ds->l,ds->n,ds->l,ds->n,&At);
483:     MatDenseGetSubMatrix(ds->omat[DS_MAT_Q],ds->l,ds->n,ds->l,ds->n,&Qt);
484:     MatCopy(At,Qt,SAME_NONZERO_PATTERN);
485:     MatDenseRestoreSubMatrix(ds->omat[DS_MAT_A],&At);
486:     MatDenseRestoreSubMatrix(ds->omat[DS_MAT_Q],&Qt);
487:   }
488:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
489:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);

491:   /* Create diagonal matrix as a result */
492:   if (ds->compact) PetscArrayzero(e,n-1);
493:   else {
494:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
495:     for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
496:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
497:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
498:   }
499:   DSRestoreArrayReal(ds,DS_MAT_T,&d);

501:   /* Set zero wi */
502:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
503:   return 0;
504: }

506: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
507: {
508:   PetscInt       i;
509:   PetscBLASInt   n1,info,l = 0,ld,off,lrwork,liwork;
510:   PetscScalar    *Q,*A;
511:   PetscReal      *d,*e;
512: #if defined(PETSC_USE_COMPLEX)
513:   PetscBLASInt   lwork;
514:   PetscInt       j;
515: #endif

518:   PetscBLASIntCast(ds->l,&l);
519:   PetscBLASIntCast(ds->ld,&ld);
520:   PetscBLASIntCast(ds->n-ds->l,&n1);
521:   off = l+l*ld;
522:   DSGetArrayReal(ds,DS_MAT_T,&d);
523:   e = d+ld;

525:   /* Reduce to tridiagonal form */
526:   DSIntermediate_HEP(ds);

528:   /* Solve the tridiagonal eigenproblem */
529:   for (i=0;i<l;i++) wr[i] = d[i];

531:   lrwork = 5*n1*n1+3*n1+1;
532:   liwork = 5*n1*n1+6*n1+6;
533:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
534: #if !defined(PETSC_USE_COMPLEX)
535:   DSAllocateWork_Private(ds,0,lrwork,liwork);
536:   PetscCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
537: #else
538:   lwork = ld*ld;
539:   DSAllocateWork_Private(ds,lwork,lrwork,liwork);
540:   PetscCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
541:   /* Fixing Lapack bug*/
542:   for (j=ds->l;j<ds->n;j++)
543:     for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
544: #endif
545:   SlepcCheckLapackInfo("stedc",info);
546:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
547:   for (i=l;i<ds->n;i++) wr[i] = d[i];

549:   /* Create diagonal matrix as a result */
550:   if (ds->compact) PetscArrayzero(e,ds->n-1);
551:   else {
552:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
553:     for (i=l;i<ds->n;i++) PetscArrayzero(A+l+i*ld,ds->n-l);
554:     for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
555:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
556:   }
557:   DSRestoreArrayReal(ds,DS_MAT_T,&d);

559:   /* Set zero wi */
560:   if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
561:   return 0;
562: }

564: #if !defined(PETSC_USE_COMPLEX)
565: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
566: {
567:   PetscBLASInt   i,j,k,m,n = 0,info,nblks,bs = 0,ld = 0,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
568:   PetscScalar    *Q,*A;
569:   PetscReal      *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;

573:   PetscBLASIntCast(ds->ld,&ld);
574:   PetscBLASIntCast(ds->bs,&bs);
575:   PetscBLASIntCast(ds->n,&n);
576:   nblks = n/bs;
577:   DSGetArrayReal(ds,DS_MAT_T,&d);
578:   e = d+ld;
579:   lrwork = 4*n*n+60*n+1;
580:   liwork = 5*n+5*nblks-1;
581:   lde = 2*bs+1;
582:   DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
583:   D      = ds->work;
584:   E      = ds->work+bs*n;
585:   rwork  = ds->rwork;
586:   ksizes = ds->iwork;
587:   iwork  = ds->iwork+nblks;
588:   PetscArrayzero(iwork,liwork);

590:   /* Copy matrix to block tridiagonal format */
591:   MatDenseGetArray(ds->omat[DS_MAT_A],&A);
592:   j=0;
593:   for (i=0;i<nblks;i++) {
594:     ksizes[i]=bs;
595:     for (k=0;k<bs;k++)
596:       for (m=0;m<bs;m++)
597:         D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
598:     j = j + bs;
599:   }
600:   j=0;
601:   for (i=0;i<nblks-1;i++) {
602:     for (k=0;k<bs;k++)
603:       for (m=0;m<bs;m++)
604:         E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
605:     j = j + bs;
606:   }
607:   MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);

609:   /* Solve the block tridiagonal eigenproblem */
610:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
611:   BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
612:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
613:   for (i=0;i<ds->n;i++) wr[i] = d[i];

615:   /* Create diagonal matrix as a result */
616:   if (ds->compact) PetscArrayzero(e,ds->n-1);
617:   else {
618:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
619:     for (i=0;i<ds->n;i++) PetscArrayzero(A+i*ld,ds->n);
620:     for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
621:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
622:   }
623:   DSRestoreArrayReal(ds,DS_MAT_T,&d);

625:   /* Set zero wi */
626:   if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
627:   return 0;
628: }
629: #endif

631: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n,PetscBool trim)
632: {
633:   PetscInt    i,ld=ds->ld,l=ds->l;
634:   PetscScalar *A;

636:   if (!ds->compact && ds->extrarow) MatDenseGetArray(ds->omat[DS_MAT_A],&A);
637:   if (trim) {
638:     if (!ds->compact && ds->extrarow) {   /* clean extra row */
639:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
640:     }
641:     ds->l = 0;
642:     ds->k = 0;
643:     ds->n = n;
644:     ds->t = ds->n;   /* truncated length equal to the new dimension */
645:   } else {
646:     if (!ds->compact && ds->extrarow && ds->k==ds->n) {
647:       /* copy entries of extra row to the new position, then clean last row */
648:       for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
649:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
650:     }
651:     ds->k = (ds->extrarow)? n: 0;
652:     ds->t = ds->n;   /* truncated length equal to previous dimension */
653:     ds->n = n;
654:   }
655:   if (!ds->compact && ds->extrarow) MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
656:   return 0;
657: }

659: #if !defined(PETSC_HAVE_MPIUNI)
660: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
661: {
662:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
663:   PetscMPIInt    n,rank,off=0,size,ldn,ld3;
664:   PetscScalar    *A,*Q;
665:   PetscReal      *T;

667:   if (ds->compact) kr = 3*ld;
668:   else k = (ds->n-l)*ld;
669:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
670:   if (eigr) k += (ds->n-l);
671:   DSAllocateWork_Private(ds,k+kr,0,0);
672:   PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
673:   PetscMPIIntCast(ds->n-l,&n);
674:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
675:   PetscMPIIntCast(ld*3,&ld3);
676:   if (ds->compact) DSGetArrayReal(ds,DS_MAT_T,&T);
677:   else MatDenseGetArray(ds->omat[DS_MAT_A],&A);
678:   if (ds->state>DS_STATE_RAW) MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
679:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
680:   if (!rank) {
681:     if (ds->compact) MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
682:     else MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683:     if (ds->state>DS_STATE_RAW) MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
684:     if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
685:   }
686:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
687:   if (rank) {
688:     if (ds->compact) MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
689:     else MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
690:     if (ds->state>DS_STATE_RAW) MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
691:     if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
692:   }
693:   if (ds->compact) DSRestoreArrayReal(ds,DS_MAT_T,&T);
694:   else MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
695:   if (ds->state>DS_STATE_RAW) MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
696:   return 0;
697: }
698: #endif

700: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
701: {
702:   PetscScalar    *work;
703:   PetscReal      *rwork;
704:   PetscBLASInt   *ipiv;
705:   PetscBLASInt   lwork,info,n,ld;
706:   PetscReal      hn,hin;
707:   PetscScalar    *A;

709:   PetscBLASIntCast(ds->n,&n);
710:   PetscBLASIntCast(ds->ld,&ld);
711:   lwork = 8*ld;
712:   DSAllocateWork_Private(ds,lwork,ld,ld);
713:   work  = ds->work;
714:   rwork = ds->rwork;
715:   ipiv  = ds->iwork;
716:   DSSwitchFormat_HEP(ds);

718:   /* use workspace matrix W to avoid overwriting A */
719:   DSAllocateMat_Private(ds,DS_MAT_W);
720:   MatCopy(ds->omat[DS_MAT_A],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN);
721:   MatDenseGetArray(ds->omat[DS_MAT_W],&A);

723:   /* norm of A */
724:   hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);

726:   /* norm of inv(A) */
727:   PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
728:   SlepcCheckLapackInfo("getrf",info);
729:   PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
730:   SlepcCheckLapackInfo("getri",info);
731:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
732:   MatDenseRestoreArray(ds->omat[DS_MAT_W],&A);

734:   *cond = hn*hin;
735:   return 0;
736: }

738: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
739: {
740:   PetscInt       i,j,k=ds->k;
741:   PetscScalar    *Q,*A,*R,*tau,*work;
742:   PetscBLASInt   ld,n1,n0,lwork,info;

744:   PetscBLASIntCast(ds->ld,&ld);
745:   DSAllocateWork_Private(ds,ld*ld,0,0);
746:   tau = ds->work;
747:   work = ds->work+ld;
748:   PetscBLASIntCast(ld*(ld-1),&lwork);
749:   DSAllocateMat_Private(ds,DS_MAT_W);
750:   MatDenseGetArray(ds->omat[DS_MAT_A],&A);
751:   MatDenseGetArrayWrite(ds->omat[DS_MAT_Q],&Q);
752:   MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&R);

754:   /* copy I+alpha*A */
755:   PetscArrayzero(Q,ld*ld);
756:   PetscArrayzero(R,ld*ld);
757:   for (i=0;i<k;i++) {
758:     Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
759:     Q[k+i*ld] = alpha*A[k+i*ld];
760:   }

762:   /* compute qr */
763:   PetscBLASIntCast(k+1,&n1);
764:   PetscBLASIntCast(k,&n0);
765:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
766:   SlepcCheckLapackInfo("geqrf",info);

768:   /* copy R from Q */
769:   for (j=0;j<k;j++)
770:     for (i=0;i<=j;i++)
771:       R[i+j*ld] = Q[i+j*ld];

773:   /* compute orthogonal matrix in Q */
774:   PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
775:   SlepcCheckLapackInfo("orgqr",info);

777:   /* compute the updated matrix of projected problem */
778:   for (j=0;j<k;j++)
779:     for (i=0;i<k+1;i++)
780:       A[j*ld+i] = Q[i*ld+j];
781:   alpha = -1.0/alpha;
782:   PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
783:   for (i=0;i<k;i++)
784:     A[ld*i+i] -= alpha;

786:   MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
787:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_Q],&Q);
788:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&R);
789:   return 0;
790: }

792: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
793: {
794:   if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
795:   else *flg = PETSC_FALSE;
796:   return 0;
797: }

799: /*MC
800:    DSHEP - Dense Hermitian Eigenvalue Problem.

802:    Level: beginner

804:    Notes:
805:    The problem is expressed as A*X = X*Lambda, where A is real symmetric
806:    (or complex Hermitian). Lambda is a diagonal matrix whose diagonal
807:    elements are the arguments of DSSolve(). After solve, A is overwritten
808:    with Lambda.

810:    In the intermediate state A is reduced to tridiagonal form. In compact
811:    storage format, the symmetric tridiagonal matrix is stored in T.

813:    Used DS matrices:
814: +  DS_MAT_A - problem matrix
815: .  DS_MAT_T - symmetric tridiagonal matrix
816: -  DS_MAT_Q - orthogonal/unitary transformation that reduces to tridiagonal form
817:    (intermediate step) or matrix of orthogonal eigenvectors, which is equal to X

819:    Implemented methods:
820: +  0 - Implicit QR (_steqr)
821: .  1 - Multiple Relatively Robust Representations (_stevr)
822: .  2 - Divide and Conquer (_stedc)
823: -  3 - Block Divide and Conquer (real scalars only)

825: .seealso: DSCreate(), DSSetType(), DSType
826: M*/
827: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
828: {
829:   ds->ops->allocate      = DSAllocate_HEP;
830:   ds->ops->view          = DSView_HEP;
831:   ds->ops->vectors       = DSVectors_HEP;
832:   ds->ops->solve[0]      = DSSolve_HEP_QR;
833:   ds->ops->solve[1]      = DSSolve_HEP_MRRR;
834:   ds->ops->solve[2]      = DSSolve_HEP_DC;
835: #if !defined(PETSC_USE_COMPLEX)
836:   ds->ops->solve[3]      = DSSolve_HEP_BDC;
837: #endif
838:   ds->ops->sort          = DSSort_HEP;
839: #if !defined(PETSC_HAVE_MPIUNI)
840:   ds->ops->synchronize   = DSSynchronize_HEP;
841: #endif
842:   ds->ops->truncate      = DSTruncate_HEP;
843:   ds->ops->update        = DSUpdateExtraRow_HEP;
844:   ds->ops->cond          = DSCond_HEP;
845:   ds->ops->transrks      = DSTranslateRKS_HEP;
846:   ds->ops->hermitian     = DSHermitian_HEP;
847:   return 0;
848: }