Path: utzoo!attcan!uunet!aplcen!uakari.primate.wisc.edu!sdd.hp.com!cs.utexas.edu!mailrus!iuvax!huntley From: huntley@copper.ucs.indiana.edu (Haydn Huntley) Newsgroups: comp.sys.mac.programmer Subject: fastqsort.c -- 3 times faster than THINK C's qsort() Message-ID: <60123@iuvax.cs.indiana.edu> Date: 25 Sep 90 01:00:47 GMT Sender: news@iuvax.cs.indiana.edu Organization: Indiana University, Bloomington IN. Lines: 204 /* fastQSort() is a replacement for qsort(). Compared with the Think C library version of qsort, fastQSort is about 3 times faster (it takes 1/3 the time), and its speed isn't dependent on the data being sorted. One problem with qsort is that when the data is not in random order -- for example when it's already ordered, in reverse order, or almost sorted -- then qsort exhibits worst case behavior of N*N/2 operations. For N=1024 this can take about 30 seconds, instead of the usual 0.8 seconds. The fastQSort algorithm doesn't have this problem, because it randomly selects the pivot element, and so for N=1024 it requires about 0.26 secs +/- 0.02 secs. Also, because fastQSort doesn't exhibit worst case behavior, it doesn't require as much stack space, even though it has 12 bytes more of local variables. The things that make fastQSort so much faster than qsort are: 1) fastQSort picks the pivot element randomly, so it doesn't display worst case behavior. 2) fastQSort uses pointers and pointer arithmetic, avoiding multiplication whenever possible. 3) qsort uses std_swap() to swap the data between two locations, and std_swap loops through the data 3 times to perform the exchange! In comparison, swapMem() loops through the data just once. 4) fastQSort uses register variables whenever it makes good sense to do so. Researched and written by: Haydn Huntley Haydn's Consulting 203 West Stone Fairfield, Iowa 52556 (515) 472-7025 During the school year, I may be reached at the following address: Haydn Huntley Eigenmann Hall #289 Indiana University Bloomington, IN 47406 (812) 857-8620 E-mail: huntley@copper.ucs.indiana.edu */ #include #include #include "fastqsort.h" #define PIVOT_CUTOFF 25 #define SEED TickCount () #define FASTER 0 /* changes fastQSort() to use the median */ /* of the first, middle, and a random */ /* element, but this requires more code */ /* than simply always choosing an element */ /* at random. 612 bytes vs. 356 bytes */ /* The median version does about 10% fewer */ /* compares, and the speed difference is */ /* around 10%. For N=50,000 the savings is */ /* 1.85 secs, and for N=1024 the savings */ /* is only 0.025 secs. */ static size_t gSize; static int (*gCmpFn) (void *, void *); /* functions declarations */ static void doFastQSort (char *first, char *last); static void chooseMedian (void *pDest, void *pA, void *pB, void *pC); static void memSwap (void *a, void *b, size_t qSize); void fastQSort (void *base, size_t n, size_t size, int (*cmpFn) (void *, void *)) { /* setup the global variables used by fastQSort() */ gSize = size; gCmpFn = cmpFn; srand (SEED); /* the args for fastQSort () must be scaled by gSize */ doFastQSort (base, (char *) base + n * gSize); } static void doFastQSort (register char *first, char *last) { register char *i, *j; register size_t n, qSize = gSize; /* first, last, i, and j are scaled by gSize in this function */ while ((n = (last - first) / qSize) > 1) { #if FASTER if (n < PIVOT_CUTOFF) /* use a random pivot */ memSwap (first + (rand () % n) * qSize, first, qSize); else /* use the median of first, middle, and a random volunteer */ chooseMedian (first, first, first + (n / 2) * qSize, first + (rand () % n) * qSize); #else /* use a random pivot */ memSwap (first + (rand () % n) * qSize, first, qSize); #endif /* FASTER */ i = first; j = last; while (1) { i += qSize; while (i < last && gCmpFn (i, first) < 0) i += qSize; j -= qSize; while (j > first && gCmpFn (j, first) > 0) j -= qSize; if (i >= j) break; memSwap (i, j, qSize); } if (j == first) { first += qSize; continue; } memSwap (first, j, qSize); if (j - first < last - (j + qSize)) { doFastQSort (first, j); first = j + qSize; /* equivalent to doFastQSort (j + qSize, last); */ /* to save stack space */ } else { doFastQSort (j + qSize, last); last = j; /* equivalent to doFastQSort (first, j); */ /* to save stack space */ } } } #if FASTER static void chooseMedian (void *pDest, void *pA, void *pB, void *pC) { void *pMedian; if (gCmpFn (pA, pB) > 0) /* pA > pB, what is pC? */ if (gCmpFn (pA, pC) > 0) /* pA > pB, pA > pC */ if (gCmpFn (pB, pC) > 0) pMedian = pB; /* pA > pB > pC */ else pMedian = pC; /* pA > pC > pB */ else pMedian = pA; /* pC > pA > pB */ else /* pB > pA, what is pC? */ if (gCmpFn (pA, pC) > 0) pMedian = pA; /* pB > pA > pC */ else /* pB > pA, pC > pA */ if (gCmpFn (pB, pC) > 0) pMedian = pC; /* pB > pC > pA */ else pMedian = pB; /* pC > pB > pA */ if (pDest != pMedian) memSwap (pDest, pMedian, gSize); } #endif /* FASTER */ static void memSwap (register void *p, register void *q, size_t qSize) { register char tmp; asm { move.l qSize,d0 /* while (qSize) */ bra.s @2 /* { */ @1 move.b (p),tmp /* tmp = *p; */ move.b (q),(p)+ /* *p++ = *q; */ move.b tmp,(q)+ /* *q++ = tmp; */ subq.l #1,d0 /* qSize--; */ @2 bne.s @1 /* } */ } } ;; ***************************************************** ;; * Haydn Huntley huntley@copper.ucs.indiana.edu * ;; *****************************************************