Path: utzoo!utgpu!news-server.csri.toronto.edu!rpi!usc!cs.utexas.edu!helios!archone.tamu.edu
From: jamie@archone.tamu.edu (James Price)
Newsgroups: comp.sys.sgi
Subject: SGI GL matrix performance
Message-ID: <15407@helios.TAMU.EDU>
Date: 26 Apr 91 20:10:33 GMT
Sender: usenet@helios.TAMU.EDU
Distribution: usa
Organization: College of Architecture, Texas A&M University.
Lines: 279

Has anyone done any benchmarking of the SGI matrix functions?  I was curious
and wrote the program included below.  It does a number of 4x4 matrix 
multiplies, first using software, and then using the geometry pipeline 
functions (loadmatrix(), multmatrix(), getmatrix()).  

Here are some typical results:

10000 iterations on fritz, with GL version: GL4DGT-3.3

Software - no optimization:     3.349 sec.

Software - some optimization:   1.130 sec.

Software - more optimization:   0.910 sec.

Hardware - preserve CTM:        2.379 sec.

Hardware - destroy CTM:         2.289 sec.

Hardware - abandon results:     0.580 sec.


The actual hardware multiplication is fast (0.580 sec/10000 multiplies) 
but if we call getmatrix() to access the results, it slows things down 
by around 400% (to 2.379 sec/10000 multiplies).  I was hoping to use the 
speed of the hardware for my own matrix needs, but it looks like the 
getmatrix() call is simply too costly.  Is there a better way?

Jim Price
jamie@archone.tamu.edu
Visualization Laboratory
Texas A&M University

/**************************************************************************/
/*                                                                        */
/* matperf.c - SGI GL matrix performance checker                          */
/*                                                                        */
/* to compile:  cc -o matperf matperf.c -lgl_s -lm                        */
/*                                                                        */
/* to run:  matperf n                                                     */
/*     where n = number of matrix multiplies to perform                   */
/*                                                                        */
/**************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/param.h>
#include <gl.h>

typedef float MAT44[4][4];

void Print44(MAT44 *pMat);
void Identity(MAT44 *pMat);
double Duration(struct timeval *ptv1, struct timeval *ptv2);
void SoftMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void SoftMult44_2(float pResult[], MAT44 *pm1, MAT44 *pm2);
void SoftMult44_3(float pResult[], MAT44 *pm1, MAT44 *pm2);
void HardMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void HardMult44_2(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void HardMult44_3(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);


void main(int argc, char *argv[])
{
   register long i;
   long iter;
   MAT44 m1, m2, result;
   char hwver[13],hostname[MAXHOSTNAMELEN+1];
   struct timeval tv1,tv2;
   struct timezone tz;

   if (argc != 2)
      {
      printf("Usage: matperf n\n");
      return;
      }

   iter = atoi(argv[1]);

   /* put in some numbers */
   Identity(m1);
   m1[0][1] = 1.0;
   m1[0][2] = 1.0;
   m1[0][3] = 1.0;
   
   Identity(m2);
   m2[0][0] = 5;
   m2[1][1] = 6;
   m2[2][2] = 7;
   m2[3][0] = 10;
   m2[3][1] = 20;
   m2[3][2] = 30;

   gethostname(hostname,MAXHOSTNAMELEN);
   gversion(hwver);

   /* winopen() necessary to use geometry pipeline */
   prefposition(500,600,500,600);
   noport();
   winopen("perf");

   /* give window processes a chance to get up and running */
   sleep(5);    

   printf("\n%ld iterations on %s, with GL version: %s\n",iter,hostname,hwver);
   
   gettimeofday(&tv1,&tz);
   for (i=0; i<iter; i++)
      SoftMult44_1(result,m1,m2);
   gettimeofday(&tv2,&tz);
   
   printf("\nSoftware - no optimization:   %7.3f sec.\n",Duration(&tv1,&tv2));


   gettimeofday(&tv1,&tz);
   for (i=0; i<iter; i++)
      SoftMult44_2(result,m1,m2);
   gettimeofday(&tv2,&tz);

   printf("\nSoftware - some optimization: %7.3f sec.\n",Duration(&tv1,&tv2));


   gettimeofday(&tv1,&tz);
   for (i=0; i<iter; i++)
      SoftMult44_3(result,m1,m2);
   gettimeofday(&tv2,&tz);

   printf("\nSoftware - more optimization: %7.3f sec.\n",Duration(&tv1,&tv2));


   gettimeofday(&tv1,&tz);
   for (i=0; i<iter; i++)
      HardMult44_1(result,m1,m2);
   gettimeofday(&tv2,&tz);

   printf("\nHardware - preserve CTM:      %7.3f sec.\n",Duration(&tv1,&tv2));


   gettimeofday(&tv1,&tz);
   for (i=0; i<iter; i++)
      HardMult44_2(result,m1,m2);
   gettimeofday(&tv2,&tz);

   printf("\nHardware - destroy CTM:       %7.3f sec.\n",Duration(&tv1,&tv2));


   gettimeofday(&tv1,&tz);
   for (i=0; i<iter; i++)
      HardMult44_3(result,m1,m2);
   gettimeofday(&tv2,&tz);

   printf("\nHardware - abandon results:   %7.3f sec.\n",Duration(&tv1,&tv2));
   printf("\nDone.");
}

/* convert gettimeofday() values to real number */          
double Duration(struct timeval *ptv1, struct timeval *ptv2)
{
   return (((double)ptv2->tv_sec + (double)ptv2->tv_usec / 1000000.0) - 
           ((double)ptv1->tv_sec + (double)ptv1->tv_usec / 1000000.0));
}


/* 4x4 no optimization */
void SoftMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
   int i,j,k;

   for (i=0; i<4; i++)
      for (j=0; j<4; j++)
	 {
	 (*pResult)[i][j] = 0.0;
	 for (k=0; k<4; k++)
	    (*pResult)[i][j] += (*pm1)[i][k]*(*pm2)[k][j];
	 }
}


/* 4x4 some optimization */
void SoftMult44_2(float pResult[], MAT44 *pm1, MAT44 *pm2)
{
   register int i,j;

   for (i=0; i<4; i++)
      for (j=0; j<4; j++)
	 {
	 *pResult = (*pm1)[i][0]*(*pm2)[0][j] +
	            (*pm1)[i][1]*(*pm2)[1][j] +
	            (*pm1)[i][2]*(*pm2)[2][j] +
	            (*pm1)[i][3]*(*pm2)[3][j];
         pResult++;
	 }
}


/* 4x4 more optimization */
void SoftMult44_3(float pResult[], MAT44 *pm1, MAT44 *pm2)
{
   register int i;

   for (i=0; i<4; i++)
      {
      *pResult = (*pm1)[i][0]*(*pm2)[0][0] +
	         (*pm1)[i][1]*(*pm2)[1][0] +
	         (*pm1)[i][2]*(*pm2)[2][0] +
	         (*pm1)[i][3]*(*pm2)[3][0];
      pResult++;

      *pResult = (*pm1)[i][0]*(*pm2)[0][1] +
	         (*pm1)[i][1]*(*pm2)[1][1] +
	         (*pm1)[i][2]*(*pm2)[2][1] +
	         (*pm1)[i][3]*(*pm2)[3][1];
      pResult++;

      *pResult = (*pm1)[i][0]*(*pm2)[0][2] +
	         (*pm1)[i][1]*(*pm2)[1][2] +
	         (*pm1)[i][2]*(*pm2)[2][2] +
	         (*pm1)[i][3]*(*pm2)[3][2];
      pResult++;

      *pResult = (*pm1)[i][0]*(*pm2)[0][3] +
	         (*pm1)[i][1]*(*pm2)[1][3] +
	         (*pm1)[i][2]*(*pm2)[2][3] +
	         (*pm1)[i][3]*(*pm2)[3][3];
      pResult++;
      }
}


/* preserve CTM */
void HardMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
   pushmatrix();
   loadmatrix(pm2);
   multmatrix(pm1);
   getmatrix(pResult);
   popmatrix();
}


/* destroy CTM */
void HardMult44_2(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
   loadmatrix(pm2);
   multmatrix(pm1);
   getmatrix(pResult);
}

/* preserve CTM, abandon results */
void HardMult44_3(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
   pushmatrix();
   loadmatrix(pm2);
   multmatrix(pm1);
   popmatrix();
}

void Print44(MAT44 *pMat) 
{
   int i,j;

   for (i=0; i<4; i++)
      {
      printf("\n");
      for (j=0; j<4; j++)
	 printf("%5.3f ",(*pMat)[i][j]);
      }
}


void Identity(MAT44 *pMat) 
{
   int i,j;

   for (i=0; i<4; i++)
      for (j=0; j<4; j++)
	 (*pMat)[i][j] = (i == j) ? (1.0) : (0.0);
}