Path: utzoo!utgpu!news-server.csri.toronto.edu!rpi!usc!cs.utexas.edu!helios!archone.tamu.edu From: jamie@archone.tamu.edu (James Price) Newsgroups: comp.sys.sgi Subject: SGI GL matrix performance Message-ID: <15407@helios.TAMU.EDU> Date: 26 Apr 91 20:10:33 GMT Sender: usenet@helios.TAMU.EDU Distribution: usa Organization: College of Architecture, Texas A&M University. Lines: 279 Has anyone done any benchmarking of the SGI matrix functions? I was curious and wrote the program included below. It does a number of 4x4 matrix multiplies, first using software, and then using the geometry pipeline functions (loadmatrix(), multmatrix(), getmatrix()). Here are some typical results: 10000 iterations on fritz, with GL version: GL4DGT-3.3 Software - no optimization: 3.349 sec. Software - some optimization: 1.130 sec. Software - more optimization: 0.910 sec. Hardware - preserve CTM: 2.379 sec. Hardware - destroy CTM: 2.289 sec. Hardware - abandon results: 0.580 sec. The actual hardware multiplication is fast (0.580 sec/10000 multiplies) but if we call getmatrix() to access the results, it slows things down by around 400% (to 2.379 sec/10000 multiplies). I was hoping to use the speed of the hardware for my own matrix needs, but it looks like the getmatrix() call is simply too costly. Is there a better way? Jim Price jamie@archone.tamu.edu Visualization Laboratory Texas A&M University /**************************************************************************/ /* */ /* matperf.c - SGI GL matrix performance checker */ /* */ /* to compile: cc -o matperf matperf.c -lgl_s -lm */ /* */ /* to run: matperf n */ /* where n = number of matrix multiplies to perform */ /* */ /**************************************************************************/ #include #include #include #include #include typedef float MAT44[4][4]; void Print44(MAT44 *pMat); void Identity(MAT44 *pMat); double Duration(struct timeval *ptv1, struct timeval *ptv2); void SoftMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2); void SoftMult44_2(float pResult[], MAT44 *pm1, MAT44 *pm2); void SoftMult44_3(float pResult[], MAT44 *pm1, MAT44 *pm2); void HardMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2); void HardMult44_2(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2); void HardMult44_3(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2); void main(int argc, char *argv[]) { register long i; long iter; MAT44 m1, m2, result; char hwver[13],hostname[MAXHOSTNAMELEN+1]; struct timeval tv1,tv2; struct timezone tz; if (argc != 2) { printf("Usage: matperf n\n"); return; } iter = atoi(argv[1]); /* put in some numbers */ Identity(m1); m1[0][1] = 1.0; m1[0][2] = 1.0; m1[0][3] = 1.0; Identity(m2); m2[0][0] = 5; m2[1][1] = 6; m2[2][2] = 7; m2[3][0] = 10; m2[3][1] = 20; m2[3][2] = 30; gethostname(hostname,MAXHOSTNAMELEN); gversion(hwver); /* winopen() necessary to use geometry pipeline */ prefposition(500,600,500,600); noport(); winopen("perf"); /* give window processes a chance to get up and running */ sleep(5); printf("\n%ld iterations on %s, with GL version: %s\n",iter,hostname,hwver); gettimeofday(&tv1,&tz); for (i=0; itv_sec + (double)ptv2->tv_usec / 1000000.0) - ((double)ptv1->tv_sec + (double)ptv1->tv_usec / 1000000.0)); } /* 4x4 no optimization */ void SoftMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2) { int i,j,k; for (i=0; i<4; i++) for (j=0; j<4; j++) { (*pResult)[i][j] = 0.0; for (k=0; k<4; k++) (*pResult)[i][j] += (*pm1)[i][k]*(*pm2)[k][j]; } } /* 4x4 some optimization */ void SoftMult44_2(float pResult[], MAT44 *pm1, MAT44 *pm2) { register int i,j; for (i=0; i<4; i++) for (j=0; j<4; j++) { *pResult = (*pm1)[i][0]*(*pm2)[0][j] + (*pm1)[i][1]*(*pm2)[1][j] + (*pm1)[i][2]*(*pm2)[2][j] + (*pm1)[i][3]*(*pm2)[3][j]; pResult++; } } /* 4x4 more optimization */ void SoftMult44_3(float pResult[], MAT44 *pm1, MAT44 *pm2) { register int i; for (i=0; i<4; i++) { *pResult = (*pm1)[i][0]*(*pm2)[0][0] + (*pm1)[i][1]*(*pm2)[1][0] + (*pm1)[i][2]*(*pm2)[2][0] + (*pm1)[i][3]*(*pm2)[3][0]; pResult++; *pResult = (*pm1)[i][0]*(*pm2)[0][1] + (*pm1)[i][1]*(*pm2)[1][1] + (*pm1)[i][2]*(*pm2)[2][1] + (*pm1)[i][3]*(*pm2)[3][1]; pResult++; *pResult = (*pm1)[i][0]*(*pm2)[0][2] + (*pm1)[i][1]*(*pm2)[1][2] + (*pm1)[i][2]*(*pm2)[2][2] + (*pm1)[i][3]*(*pm2)[3][2]; pResult++; *pResult = (*pm1)[i][0]*(*pm2)[0][3] + (*pm1)[i][1]*(*pm2)[1][3] + (*pm1)[i][2]*(*pm2)[2][3] + (*pm1)[i][3]*(*pm2)[3][3]; pResult++; } } /* preserve CTM */ void HardMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2) { pushmatrix(); loadmatrix(pm2); multmatrix(pm1); getmatrix(pResult); popmatrix(); } /* destroy CTM */ void HardMult44_2(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2) { loadmatrix(pm2); multmatrix(pm1); getmatrix(pResult); } /* preserve CTM, abandon results */ void HardMult44_3(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2) { pushmatrix(); loadmatrix(pm2); multmatrix(pm1); popmatrix(); } void Print44(MAT44 *pMat) { int i,j; for (i=0; i<4; i++) { printf("\n"); for (j=0; j<4; j++) printf("%5.3f ",(*pMat)[i][j]); } } void Identity(MAT44 *pMat) { int i,j; for (i=0; i<4; i++) for (j=0; j<4; j++) (*pMat)[i][j] = (i == j) ? (1.0) : (0.0); }