Actual source code: bvblas.c

slepc-3.18.2 2023-01-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    BV private kernels that use the BLAS
 12: */

 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>

 17: #define BLOCKSIZE 64

 19: /*
 20:     C := alpha*A*B + beta*C

 22:     A is mxk (ld=m), B is kxn (ld=ldb), C is mxn (ld=m)
 23: */
 24: PetscErrorCode BVMult_BLAS_Private(BV bv,PetscInt m_,PetscInt n_,PetscInt k_,PetscInt ldb_,PetscScalar alpha,const PetscScalar *A,const PetscScalar *B,PetscScalar beta,PetscScalar *C)
 25: {
 26:   PetscBLASInt   m,n,k,ldb;
 27: #if defined(PETSC_HAVE_FBLASLAPACK) || defined(PETSC_HAVE_F2CBLASLAPACK)
 28:   PetscBLASInt   l,bs=BLOCKSIZE;
 29: #endif

 31:   PetscBLASIntCast(m_,&m);
 32:   PetscBLASIntCast(n_,&n);
 33:   PetscBLASIntCast(k_,&k);
 34:   PetscBLASIntCast(ldb_,&ldb);
 35: #if defined(PETSC_HAVE_FBLASLAPACK) || defined(PETSC_HAVE_F2CBLASLAPACK)
 36:   l = m % bs;
 37:   if (l) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&k,&alpha,(PetscScalar*)A,&m,(PetscScalar*)B,&ldb,&beta,C,&m));
 38:   for (;l<m;l+=bs) {
 39:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&bs,&n,&k,&alpha,(PetscScalar*)A+l,&m,(PetscScalar*)B,&ldb,&beta,C+l,&m));
 40:   }
 41: #else
 42:   if (m) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&m,&n,&k,&alpha,(PetscScalar*)A,&m,(PetscScalar*)B,&ldb,&beta,C,&m));
 43: #endif
 44:   PetscLogFlops(2.0*m*n*k);
 45:   return 0;
 46: }

 48: /*
 49:     y := alpha*A*x + beta*y

 51:     A is nxk (ld=n)
 52: */
 53: PetscErrorCode BVMultVec_BLAS_Private(BV bv,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *A,const PetscScalar *x,PetscScalar beta,PetscScalar *y)
 54: {
 55:   PetscBLASInt   n,k,one=1;

 57:   PetscBLASIntCast(n_,&n);
 58:   PetscBLASIntCast(k_,&k);
 59:   if (n) PetscCallBLAS("BLASgemv",BLASgemv_("N",&n,&k,&alpha,A,&n,x,&one,&beta,y,&one));
 60:   PetscLogFlops(2.0*n*k);
 61:   return 0;
 62: }

 64: /*
 65:     A(:,s:e-1) := A*B(:,s:e-1)

 67:     A is mxk (ld=m), B is kxn (ld=ldb)  n=e-s
 68: */
 69: PetscErrorCode BVMultInPlace_BLAS_Private(BV bv,PetscInt m_,PetscInt k_,PetscInt ldb_,PetscInt s,PetscInt e,PetscScalar *A,const PetscScalar *B,PetscBool btrans)
 70: {
 71:   PetscScalar    *pb,zero=0.0,one=1.0;
 72:   PetscBLASInt   m,n,k,l,ldb,bs=BLOCKSIZE;
 73:   PetscInt       j,n_=e-s;
 74:   const char     *bt;

 76:   PetscBLASIntCast(m_,&m);
 77:   PetscBLASIntCast(n_,&n);
 78:   PetscBLASIntCast(k_,&k);
 79:   PetscBLASIntCast(ldb_,&ldb);
 80:   BVAllocateWork_Private(bv,BLOCKSIZE*n_);
 81:   if (PetscUnlikely(btrans)) {
 82:     pb = (PetscScalar*)B+s;
 83:     bt = "C";
 84:   } else {
 85:     pb = (PetscScalar*)B+s*ldb;
 86:     bt = "N";
 87:   }
 88:   l = m % bs;
 89:   if (l) {
 90:     PetscCallBLAS("BLASgemm",BLASgemm_("N",bt,&l,&n,&k,&one,A,&m,pb,&ldb,&zero,bv->work,&l));
 91:     for (j=0;j<n;j++) PetscArraycpy(A+(s+j)*m,bv->work+j*l,l);
 92:   }
 93:   for (;l<m;l+=bs) {
 94:     PetscCallBLAS("BLASgemm",BLASgemm_("N",bt,&bs,&n,&k,&one,A+l,&m,pb,&ldb,&zero,bv->work,&bs));
 95:     for (j=0;j<n;j++) PetscArraycpy(A+(s+j)*m+l,bv->work+j*bs,bs);
 96:   }
 97:   PetscLogFlops(2.0*m*n*k);
 98:   return 0;
 99: }

101: /*
102:     V := V*B

104:     V is mxn (ld=m), B is nxn (ld=k)
105: */
106: PetscErrorCode BVMultInPlace_Vecs_Private(BV bv,PetscInt m_,PetscInt n_,PetscInt k_,Vec *V,const PetscScalar *B,PetscBool btrans)
107: {
108:   PetscScalar       zero=0.0,one=1.0,*out,*pout;
109:   const PetscScalar *pin;
110:   PetscBLASInt      m = 0,n,k,l,bs=BLOCKSIZE;
111:   PetscInt          j;
112:   const char        *bt;

114:   PetscBLASIntCast(m_,&m);
115:   PetscBLASIntCast(n_,&n);
116:   PetscBLASIntCast(k_,&k);
117:   BVAllocateWork_Private(bv,2*BLOCKSIZE*n_);
118:   out = bv->work+BLOCKSIZE*n_;
119:   if (btrans) bt = "C";
120:   else bt = "N";
121:   l = m % bs;
122:   if (l) {
123:     for (j=0;j<n;j++) {
124:       VecGetArrayRead(V[j],&pin);
125:       PetscArraycpy(bv->work+j*l,pin,l);
126:       VecRestoreArrayRead(V[j],&pin);
127:     }
128:     PetscCallBLAS("BLASgemm",BLASgemm_("N",bt,&l,&n,&n,&one,bv->work,&l,(PetscScalar*)B,&k,&zero,out,&l));
129:     for (j=0;j<n;j++) {
130:       VecGetArray(V[j],&pout);
131:       PetscArraycpy(pout,out+j*l,l);
132:       VecRestoreArray(V[j],&pout);
133:     }
134:   }
135:   for (;l<m;l+=bs) {
136:     for (j=0;j<n;j++) {
137:       VecGetArrayRead(V[j],&pin);
138:       PetscArraycpy(bv->work+j*bs,pin+l,bs);
139:       VecRestoreArrayRead(V[j],&pin);
140:     }
141:     PetscCallBLAS("BLASgemm",BLASgemm_("N",bt,&bs,&n,&n,&one,bv->work,&bs,(PetscScalar*)B,&k,&zero,out,&bs));
142:     for (j=0;j<n;j++) {
143:       VecGetArray(V[j],&pout);
144:       PetscArraycpy(pout+l,out+j*bs,bs);
145:       VecRestoreArray(V[j],&pout);
146:     }
147:   }
148:   PetscLogFlops(2.0*n*n*k);
149:   return 0;
150: }

152: /*
153:     B := alpha*A + beta*B

155:     A,B are nxk (ld=n)
156: */
157: PetscErrorCode BVAXPY_BLAS_Private(BV bv,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *A,PetscScalar beta,PetscScalar *B)
158: {
159:   PetscBLASInt   m,one=1;

161:   PetscBLASIntCast(n_*k_,&m);
162:   if (beta!=(PetscScalar)1.0) {
163:     PetscCallBLAS("BLASscal",BLASscal_(&m,&beta,B,&one));
164:     PetscLogFlops(m);
165:   }
166:   PetscCallBLAS("BLASaxpy",BLASaxpy_(&m,&alpha,A,&one,B,&one));
167:   PetscLogFlops(2.0*m);
168:   return 0;
169: }

171: /*
172:     C := A'*B

174:     A' is mxk (ld=k), B is kxn (ld=k), C is mxn (ld=ldc)
175: */
176: PetscErrorCode BVDot_BLAS_Private(BV bv,PetscInt m_,PetscInt n_,PetscInt k_,PetscInt ldc_,const PetscScalar *A,const PetscScalar *B,PetscScalar *C,PetscBool mpi)
177: {
178:   PetscScalar    zero=0.0,one=1.0,*CC;
179:   PetscBLASInt   m,n,k,ldc,j;
180:   PetscMPIInt    len;

182:   PetscBLASIntCast(m_,&m);
183:   PetscBLASIntCast(n_,&n);
184:   PetscBLASIntCast(k_,&k);
185:   PetscBLASIntCast(ldc_,&ldc);
186:   if (mpi) {
187:     if (ldc==m) {
188:       BVAllocateWork_Private(bv,m*n);
189:       if (k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&m,&n,&k,&one,(PetscScalar*)A,&k,(PetscScalar*)B,&k,&zero,bv->work,&ldc));
190:       else PetscArrayzero(bv->work,m*n);
191:       PetscMPIIntCast(m*n,&len);
192:       MPIU_Allreduce(bv->work,C,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv));
193:     } else {
194:       BVAllocateWork_Private(bv,2*m*n);
195:       CC = bv->work+m*n;
196:       if (k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&m,&n,&k,&one,(PetscScalar*)A,&k,(PetscScalar*)B,&k,&zero,bv->work,&m));
197:       else PetscArrayzero(bv->work,m*n);
198:       PetscMPIIntCast(m*n,&len);
199:       MPIU_Allreduce(bv->work,CC,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv));
200:       for (j=0;j<n;j++) PetscArraycpy(C+j*ldc,CC+j*m,m);
201:     }
202:   } else {
203:     if (k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&m,&n,&k,&one,(PetscScalar*)A,&k,(PetscScalar*)B,&k,&zero,C,&ldc));
204:   }
205:   PetscLogFlops(2.0*m*n*k);
206:   return 0;
207: }

209: /*
210:     y := A'*x

212:     A is nxk (ld=n)
213: */
214: PetscErrorCode BVDotVec_BLAS_Private(BV bv,PetscInt n_,PetscInt k_,const PetscScalar *A,const PetscScalar *x,PetscScalar *y,PetscBool mpi)
215: {
216:   PetscScalar    zero=0.0,done=1.0;
217:   PetscBLASInt   n,k,one=1;
218:   PetscMPIInt    len;

220:   PetscBLASIntCast(n_,&n);
221:   PetscBLASIntCast(k_,&k);
222:   if (mpi) {
223:     BVAllocateWork_Private(bv,k);
224:     if (n) PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&k,&done,A,&n,x,&one,&zero,bv->work,&one));
225:     else PetscArrayzero(bv->work,k);
226:     PetscMPIIntCast(k,&len);
227:     MPIU_Allreduce(bv->work,y,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv));
228:   } else {
229:     if (n) PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&k,&done,A,&n,x,&one,&zero,y,&one));
230:   }
231:   PetscLogFlops(2.0*n*k);
232:   return 0;
233: }

235: /*
236:     Scale n scalars
237: */
238: PetscErrorCode BVScale_BLAS_Private(BV bv,PetscInt n_,PetscScalar *A,PetscScalar alpha)
239: {
240:   PetscBLASInt   n,one=1;

242:   if (PetscUnlikely(alpha == (PetscScalar)0.0)) PetscArrayzero(A,n_);
243:   else if (alpha!=(PetscScalar)1.0) {
244:     PetscBLASIntCast(n_,&n);
245:     PetscCallBLAS("BLASscal",BLASscal_(&n,&alpha,A,&one));
246:     PetscLogFlops(n);
247:   }
248:   return 0;
249: }