Xref: utzoo comp.lang.c:12162 comp.arch:6179 Path: utzoo!utgpu!water!watmath!clyde!att!ttrdc!levy From: levy@ttrdc.UUCP (Daniel R. Levy) Newsgroups: comp.lang.c,comp.arch Subject: Re: Explanation, please! Message-ID: <2877@ttrdc.UUCP> Date: 28 Aug 88 03:24:29 GMT References: <653@paris.ICS.UCI.EDU> Organization: AT&T, Skokie, IL Lines: 181 In article <653@paris.ICS.UCI.EDU>, schmidt@bonnie.ics.uci.edu (Douglas C. Schmidt) writes: > Hi, > > Since I posted my original question there has been a great deal of > abstract discussion about the relative merits of the loop unrolling > scheme. The topic has piqued my curiousity, so I when ahead and > implemented a short test program, included below, to test Duff's > device against the ``ordinary for loop w/index variable'' technique. > See for yourself.... > > After some quick testing I found that gcc 1.26 -O on a Sun 3 and a > Sequent Balance was pretty heavily in favor of the regular (non-Duff) > loop. Your mileage may vary. I realize that there may be other > tests, and if anyone has a better version, I'd like to see it! [schmidt's program deleted, please refer to parent article if you want it] I modified this program to run under System V, changed the arrays to be dynam- ically allocated, and changed both the Duff and ordinary copies to use register pointers instead of global pointers (for the Duff copy) and array indexing (for the ordinary copy). I then tried it on a SVR2 3B20, a SVR3 3B2, a Sun-3, and a Sun-4 both with and without -O optimization (using the standard pcc-type C compiler on each system). The result? Duff wins by about 10%-20% on all machines tested. Here is the code thus modified: #include #include #include double Start_Timer(); double Return_Elapsed_Time(); #define DEFAULT_NUM 100000 main(argc,argv) char **argv; { double Elapsed_Time_Duff; double Elapsed_Time_Ordinary; int Count = argc > 1 ? atoi(argv[1]) : DEFAULT_NUM; int i; register int *A, *B; register int n; register int *A_end; int *array1, *array2; char *malloc(); if (Count <= 0) { printf("Bad Count\n"); return 1; } else { printf("Count=%d\n", Count); } if (!(array1=(int *)malloc(sizeof(int) * Count))) { printf("Can't malloc %u bytes for array1\n", sizeof(int) * Count); return 1; } else if (!(array2=(int *)malloc(sizeof(int) * Count))) { printf ("Can't malloc %u bytes for array2\n", sizeof(int) * Count); return 1; } for (i = 0;i < Count ;i++) { array1[i] = i + 1; array2[i] = i; } printf("Starting Duff's device timing...\n"); Start_Timer(); { n = (Count + 7) / 8; A = array1; B = array2; switch(Count & 0x7) { case 0: do { *A++ = *B++; case 7: *A++ = *B++; case 6: *A++ = *B++; case 5: *A++ = *B++; case 4: *A++ = *B++; case 3: *A++ = *B++; case 2: *A++ = *B++; case 1: *A++ = *B++; } while (--n > 0); } } Elapsed_Time_Duff = Return_Elapsed_Time(0.0 ); printf("Elapsed time = %.3lf\n",Elapsed_Time_Duff); for (i = 0;i < Count ;i++) { if (array1[i] != array2[i]) { printf("Yow, problems at location %d!\n",i); break; } } for (i = 0;i < Count ;i++) { array1[i] = i + 1; array2[i] = i; } printf("Starting ordinary copy timing...\n"); Start_Timer(); { A = array1; B = array2; A_end = array1 + Count - 1; while (A <= A_end) *A++ = *B++; } Elapsed_Time_Ordinary = Return_Elapsed_Time(0.0 ); printf("Elapsed time = %.3lf\n",Elapsed_Time_Ordinary); for (i = 0;i < Count ;i++) { if (array1[i] != array2[i]) { printf("Yow, problems at location %d!\n",i); break; } } printf("Duff/Ordinary copy time ratio = %.3lf\n", Elapsed_Time_Duff/Elapsed_Time_Ordinary); } /* no such thing as "negative time"! */ #define ERROR_VALUE -1.0 static struct tms Old_Time; static struct tms New_Time; static int Timer_Set = 0; #ifdef __STDC__ double Start_Timer(void) #else double Start_Timer() #endif { Timer_Set = 1; times(&Old_Time); /* set starting process time */ return ((double)Old_Time.tms_utime)/(double)HZ; } /* returns process time since Last_Time (if parameter is not DEFAULT_TIME, */ /* i.e., (double) 0.0 ),otherwise, if parameter == DEFAULT_TIME then */ /* the time since the Old_Time was set is returned. Returns ERROR_VALUE */ /* if Start_Timer() is not called first */ #ifdef __STDC__ double Return_Elapsed_Time(double Last_Time) #else double Return_Elapsed_Time(Last_Time) double Last_Time; #endif { if (!Timer_Set) { return(ERROR_VALUE); } else { /* get process time */ times(&New_Time); if (Last_Time == 0.0) { return ((double)(New_Time.tms_utime - Old_Time.tms_utime))/(double) HZ; } else { return (((double)New_Time.tms_utime)/(double)HZ) - Last_Time; } } } -- |------------Dan Levy------------| THE OPINIONS EXPRESSED HEREIN ARE MINE ONLY | Bell Labs Area 61 (R.I.P., TTY)| AND ARE NOT TO BE IMPUTED TO AT&T. | Skokie, Illinois | |-----Path: att!ttbcad!levy-----|