Xref: utzoo comp.lang.c:12141 comp.arch:6171 Path: utzoo!utgpu!water!watmath!clyde!att!osu-cis!killer!ames!ucsd!orion.cf.uci.edu!paris.ics.uci.edu!bonnie.ics.uci.edu!schmidt From: schmidt@bonnie.ics.uci.edu (Douglas C. Schmidt) Newsgroups: comp.lang.c,comp.arch Subject: Re: Explanation, please! Message-ID: <653@paris.ICS.UCI.EDU> Date: 27 Aug 88 07:57:11 GMT Sender: news@paris.ics.uci.edu Reply-To: schmidt@bonnie.ics.uci.edu (Douglas C. Schmidt) Organization: University of California, Irvine - Dept. of ICS Lines: 145 Hi, Since I posted my original question there has been a great deal of abstract discussion about the relative merits of the loop unrolling scheme. The topic has piqued my curiousity, so I when ahead and implemented a short test program, included below, to test Duff's device against the ``ordinary for loop w/index variable'' technique. See for yourself.... After some quick testing I found that gcc 1.26 -O on a Sun 3 and a Sequent Balance was pretty heavily in favor of the regular (non-Duff) loop. Your mileage may vary. I realize that there may be other tests, and if anyone has a better version, I'd like to see it! Doug Schmidt ---------------------------------------- #include #include double Start_Timer(); double Return_Elapsed_Time(); #define MAX_NUM 100000 int array1[MAX_NUM ]; int array2[MAX_NUM ]; int *A = array1, *B = array2; main(int argc, char *argv[]) { double Elapsed_Time; int Count = argc > 1 ? atoi(argv[1]) : MAX_NUM; int i; for (i = 0;i < Count ;i++) { array1[i] = i + 1; array2[i] = i; } printf("Starting Duff's device timing...\n"); Start_Timer(); { int n = (Count + 7) / 8; switch(Count % 8) { case 0: do { *A++ = *B++; case 7: *A++ = *B++; case 6: *A++ = *B++; case 5: *A++ = *B++; case 4: *A++ = *B++; case 3: *A++ = *B++; case 2: *A++ = *B++; case 1: *A++ = *B++; } while (--n > 0); } } Elapsed_Time = Return_Elapsed_Time(0.0 ); printf("Elapsed time = %.3f\n",Elapsed_Time); for (i = 0;i < Count ;i++) { if (array1[i] != array2[i]) { printf("Yow, problems at location %d!\n",i); break; } } for (i = 0;i < Count ;i++) { array1[i] = i + 1; array2[i] = i; } printf("Starting ordinary copy timing...\n"); Start_Timer(); for (i = 0;i < Count ;i++) { array1[i] = array2[i]; } Elapsed_Time = Return_Elapsed_Time(0.0 ); printf("Elapsed time = %.3f\n",Elapsed_Time); for (i = 0;i < Count ;i++) { if (array1[i] != array2[i]) { printf("Yow, problems at location %d!\n",i); break; } } } /* no such thing as "negative time"! */ #define ERROR_VALUE -1.0 static struct rusage Old_Time; static struct rusage New_Time; static int Timer_Set = 0; #ifdef __STDC__ double Start_Timer(void) #else double Start_Timer() #endif { Timer_Set = 1; getrusage(RUSAGE_SELF,&Old_Time); /* set starting process time */ return(Old_Time.ru_utime.tv_sec + (Old_Time.ru_utime.tv_usec / 1000000.0)); } /* returns process time since Last_Time (if parameter is not DEFAULT_TIME, */ /* i.e., (double) 0.0 ),otherwise, if parameter == DEFAULT_TIME then */ /* the time since the Old_Time was set is returned. Returns ERROR_VALUE */ /* if Start_Timer() is not called first */ #ifdef __STDC__ double Return_Elapsed_Time(double Last_Time) #else double Return_Elapsed_Time(Last_Time) double Last_Time; #endif { if (!Timer_Set) { return(ERROR_VALUE); } else { /* get process time */ getrusage(RUSAGE_SELF,&New_Time); if (Last_Time == 0.0) { return((New_Time.ru_utime.tv_sec - Old_Time.ru_utime.tv_sec) + ((New_Time.ru_utime.tv_usec - Old_Time.ru_utime.tv_usec) / 1000000.0)); } else { return((New_Time.ru_utime.tv_sec + (New_Time.ru_utime.tv_usec / 1000000.0)) - Last_Time); } } } -- schmidt@bonnie.ics.uci.edu (ARPA) "If our behavior is strict, we do not need fun." -Zippy th' Pinhead "If our behavior is struct, we do not need defun." -Anon