linpack.doc_第1页
linpack.doc_第2页
linpack.doc_第3页
linpack.doc_第4页
linpack.doc_第5页
已阅读5页,还剩16页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

/*Translated to C by Bonnie Toy 5/88 - modified on 2/25/94 to fix a problem with daxpy for unequal increments or equal increments not equal to 1. Jack Dongarra - modified on 08/27/09 fix typo line 270, plus set ix to 0 in the case incx is not 1 Julie LangouTo compile single precision version for Sun-4:cc -DSP -O4 -fsingle -fsingle2 clinpack.c -lmTo compile double precision version for Sun-4:cc -DDP -O4 clinpack.c -lmTo obtain rolled source BLAS, add -DROLL to the command lines.To obtain unrolled source BLAS, add -DUNROLL to the command lines.You must specify one of -DSP or -DDP to compile correctly.You must specify one of -DROLL or -DUNROLL to compile correctly.*/#ifdef SP#define REAL float#define ZERO 0.0#define ONE 1.0#define PREC Single #endif#ifdef DP#define REAL double#define ZERO 0.0e0#define ONE 1.0e0#define PREC Double #endif#define NTIMES 10#ifdef ROLL#define ROLLING Rolled #endif#ifdef UNROLL#define ROLLING Unrolled #endif#include #include static REAL time99;main ()static REAL aa200200,a200201,b200,x200;REAL cray,ops,total,norma,normx;REAL resid,residn,eps,t1,tm,tm2;REAL epslon(),second(),kf;static int ipvt200,n,i,ntimes,info,lda,ldaa,kflops;lda = 201;ldaa = 200;cray = .056; n = 100;fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,Precision Linpacknn);fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,Precision Linpacknn); ops = (2.0e0*(n*n*n)/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time00 = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time10 = second() - t1; total = time00 + time10;/* compute a residual to verify results. */ for (i = 0; i n; i+) xi = bi; matgen(a,lda,n,b,&norma); for (i = 0; i n; i+) bi = -bi; dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i fabs(double)bi) ? resid : fabs(double)bi); normx = (normx fabs(double)xi) ? normx : fabs(double)xi); eps = epslon(REAL)ONE); residn = resid/( n*norma*normx*eps ); printf( norm. resid resid machep); printf( x0-1 xn-1-1n);printf( %8.1f %16.8e%16.8e%16.8e%16.8en, (double)residn, (double)resid, (double)eps, (double)x0-1, (double)xn-1-1); fprintf(stderr, times are reported for matrices of order %5dn,n);fprintf(stderr, dgefa dgesl total kflops unit);fprintf(stderr, ration); time20 = total; time30 = ops/(1.0e3*total); time40 = 2.0e3/time30; time50 = total/cray; fprintf(stderr, times for array with leading dimension of%5dn,lda);print_time(0); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time01 = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time11 = second() - t1; total = time01 + time11; time21 = total; time31 = ops/(1.0e3*total); time41 = 2.0e3/time31; time51 = total/cray; matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time02 = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time12 = second() - t1; total = time02 + time12; time22 = total; time32 = ops/(1.0e3*total); time42 = 2.0e3/time32; time52 = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = second();for (i = 0; i ntimes; i+) tm = second();matgen(a,lda,n,b,&norma);tm2 = tm2 + second() - tm;dgefa(a,lda,n,ipvt,&info); time03 = (second() - t1 - tm2)/ntimes; t1 = second();for (i = 0; i ntimes; i+) dgesl(a,lda,n,ipvt,b,0); time13 = (second() - t1)/ntimes; total = time03 + time13; time23 = total; time33 = ops/(1.0e3*total); time43 = 2.0e3/time33; time53 = total/cray;print_time(1);print_time(2);print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time04 = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time14 = second() - t1; total = time04 + time14; time24 = total; time34 = ops/(1.0e3*total); time44 = 2.0e3/time34; time54 = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time05 = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time15 = second() - t1; total = time05 + time15; time25 = total; time35 = ops/(1.0e3*total); time45 = 2.0e3/time35; time55 = total/cray;matgen(aa,ldaa,n,b,&norma);t1 = second();dgefa(aa,ldaa,n,ipvt,&info);time06 = second() - t1;t1 = second();dgesl(aa,ldaa,n,ipvt,b,0);time16 = second() - t1;total = time06 + time16;time26 = total;time36 = ops/(1.0e3*total);time46 = 2.0e3/time36;time56 = total/cray;ntimes = NTIMES;tm2 = 0;t1 = second();for (i = 0; i ntimes; i+) tm = second();matgen(aa,ldaa,n,b,&norma);tm2 = tm2 + second() - tm;dgefa(aa,ldaa,n,ipvt,&info);time07 = (second() - t1 - tm2)/ntimes;t1 = second();for (i = 0; i ntimes; i+) dgesl(aa,ldaa,n,ipvt,b,0);time17 = (second() - t1)/ntimes;total = time07 + time17;time27 = total;time37 = ops/(1.0e3*total);time47 = 2.0e3/time37;time57 = total/cray;/* the following code sequence implements the semantics of the Fortran intrinsics nint(min(time33,time37)*/kf = (time33 ZERO) ? (kf + .5) : (kf - .5);if (fabs(double)kf) ONE) kflops = 0;else kflops = floor(fabs(double)kf);if (kf ZERO) kflops = -kflops;fprintf(stderr, times for array with leading dimension of%4dn,ldaa);print_time(4);print_time(5);print_time(6);print_time(7);fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr, Precision %5d Kflops ; %d Reps n,kflops,NTIMES); /*-*/ print_time (row)int row;fprintf(stderr,%11.2f%11.2f%11.2f%11.0f%11.2f%11.2fn, (double)time0row, (double)time1row, (double)time2row, (double)time3row, (double)time4row, (double)time5row); /*-*/ matgen(a,lda,n,b,norma)REAL a,b,*norma;int lda, n;/* We would like to declare alda, but c does not allow it. In thisfunction, references to aij are written alda*j+i. */int init, i, j;init = 1325;*norma = 0.0;for (j = 0; j n; j+) for (i = 0; i *norma) ? alda*j+i : *norma;for (i = 0; i n; i+) bi = 0.0;for (j = 0; j n; j+) for (i = 0; i = 0) for (k = 0; k nm1; k+) kp1 = k + 1; /* find l = pivot index*/l = idamax(n-k,&alda*k+k,1) + k;ipvtk = l;/* zero pivot implies this column already triangularized */if (alda*k+l != ZERO) /* interchange if necessary */if (l != k) t = alda*k+l;alda*k+l = alda*k+k;alda*k+k = t; /* compute multipliers */t = -ONE/alda*k+k;dscal(n-(k+1),t,&alda*k+k+1,1);/* row elimination with column indexing */for (j = kp1; j n; j+) t = alda*j+l;if (l != k) alda*j+l = alda*j+k;alda*j+k = t;daxpy(n-(k+1),t,&alda*k+k+1,1, &alda*j+k+1,1); else *info = k; ipvtn-1 = n-1;if (alda*(n-1)+(n-1) = ZERO) *info = n-1;/*-*/ dgesl(a,lda,n,ipvt,b,job)int lda,n,ipvt,job;REAL a,b;/* We would like to declare alda, but c does not allow it. In thisfunction, references to aij are written alda*i+j. */* dgesl solves the double precision system a * x = b or trans(a) * x = b using the factors computed by dgeco or dgefa. on entry a double precisionnlda the output from dgeco or dgefa. lda integer the leading dimension of the array a . n integer the order of the matrix a . ipvt integern the pivot vector from dgeco or dgefa. b double precisionn the right hand side vector. job integer = 0 to solve a*x = b , = nonzero to solve trans(a)*x = b where trans(a) is the transpose. on return b the solution vector x . error condition a division by zero will occur if the input factor contains a zero on the diagonal. technically this indicates singularity but it is often caused by improper arguments or improper setting of lda . it will not occur if the subroutines are called correctly and if dgeco has set rcond .gt. 0.0 or dgefa has set info .eq. 0 . to compute inverse(a) * c where c is a matrix with p columns dgeco(a,lda,n,ipvt,rcond,z) if (!rcond is too small) for (j=0,j= 1) for (k = 0; k nm1; k+) l = ipvtk;t = bl;if (l != k) bl = bk;bk = t;daxpy(n-(k+1),t,&alda*k+k+1,1,&bk+1,1); /* now solve u*x = y */for (kb = 0; kb n; kb+) k = n - (kb + 1); bk = bk/alda*k+k; t = -bk; daxpy(k,t,&alda*k+0,1,&b0,1);else /* job = nonzero, solve trans(a) * x = b first solve trans(u)*y = b */for (k = 0; k = 1) for (kb = 1; kb nm1; kb+) k = n - (kb+1);bk = bk + ddot(n-(k+1),&alda*k+k+1,1,&bk+1,1);l = ipvtk;if (l != k) t = bl;bl = bk;bk = t;/*-*/ daxpy(n,da,dx,incx,dy,incy)/* constant times a vector plus a vector. jack dongarra, linpack, 3/11/78.*/REAL dx,dy,da;int incx,incy,n;int i,ix,iy,m,mp1;if(n = 0) return;if (da = ZERO) return;if(incx != 1 | incy != 1) /* code for unequal increments or equal increments not equal to 1 */ix = 0;iy = 0;if(incx 0) ix = (-n+1)*incx;if(incy 0)iy = (-n+1)*incy;for (i = 0;i n; i+) dyiy = dyiy + da*dxix;ix = ix + incx;iy = iy + incy; return;/* code for both increments equal to 1 */#ifdef ROLLfor (i = 0;i n; i+) dyi = dyi + da*dxi;#endif#ifdef UNROLLm = n % 4;if ( m != 0) for (i = 0; i m; i+) dyi = dyi + da*dxi;if (n 4) return;for (i = m; i n; i = i + 4) dyi = dyi + da*dxi;dyi+1 = dyi+1 + da*dxi+1;dyi+2 = dyi+2 + da*dxi+2;dyi+3 = dyi+3 + da*dxi+3;#endif /*-*/ REAL ddot(n,dx,incx,dy,incy)/* forms the dot product of two vectors. jack dongarra, linpack, 3/11/78.*/REAL dx,dy;int incx,incy,n;REAL dtemp;int i,ix,iy,m,mp1;dtemp = ZERO;if(n = 0) return(ZERO);if(incx != 1 | incy != 1) /* code for unequal increments or equal increments not equal to 1*/ix = 0;iy = 0;if (incx 0) ix = (-n+1)*incx;if (incy 0) iy = (-n+1)*incy;for (i = 0;i n; i+) dtemp = dtemp + dxix*dyiy;ix = ix + incx;iy = iy + incy;return(dtemp);/* code for both increments equal to 1 */#ifdef ROLLfor (i=0;i n; i+)dtemp = dtemp + dxi*dyi;return(dtemp);#endif#ifdef UNROLLm = n % 5;if (m != 0) for (i = 0; i m; i+)dtemp = dtemp + dxi*dyi;if (n 5) return(dtemp);for (i = m; i n; i = i + 5) dtemp = dtemp + dxi*dyi +dxi+1*dyi+1 + dxi+2*dyi+2 +dxi+3*dyi+3 + dxi+4*dyi+4;return(dtemp);#endif/*-*/ dscal(n,da,dx,incx)/* scales a vector by a constant. jack dongarra, linpack, 3/11/78.*/REAL da,dx;int n, incx;int i,m,mp1,nincx;if(n = 0)return;if(incx != 1) /* code for increment not equal to 1 */nincx = n*incx;for (i = 0; i nincx; i = i + incx)dxi = da*dxi;return;/* code for increment equal to 1 */#ifdef ROLLfor (i = 0; i n; i+)dxi = da*dxi;#endif#ifdef UNROLLm = n % 5;if (m != 0) for (i = 0; i m; i+)dxi = da*dxi;if (n 5) return;for (i = m; i n; i = i + 5)dxi = da*dxi;dxi+1 = da*dxi+1;dxi+2 = da*dxi+2;dxi+3 = da*dxi+3;dxi+4 = da*dxi+4;#endif/*-*/ int idamax(n,dx,incx)/* finds the index of element having max. absolute value. jack dongarra, linpack, 3/11/78.*/REAL dx;int incx,n;REAL dmax;int i, ix, itemp;if( n 1 ) return(-1);if(n =1 ) return(0);if(incx != 1) /* code for increment not equal to 1 */ix = 0;dmax = fabs(double)dx0);ix = ix + incx;for (i = 1; i dmax) itemp = i;dmax = fabs(double)dxix);ix = ix + incx;else /* code for increment equal to 1 */itemp = 0;dmax = fabs(double)dx0);for (i = 1; i dmax) itemp = i;dmax = fabs(double)dxi);return (itemp);/*-*/ REAL epslon (x)REAL x;/* estimate unit roundoff in quantities of size x.*/REAL a,b,c,eps;/* this program should function properly on all systems satisfying the following two assumptions, 1. the base used in representing dfloating point numbers is not a power of three. 2. the quantity a in statement 10 is represented to the accuracy used in dfloating point variables that are stored in memory. the statement number 10 and the go to 10 are intended to force optimizing compilers to generate code satisfying assumption 2. under these assumptions, it should be true that, a is not exactly equal to four-thirds, b has a zero for its last bi

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论