Xref: utzoo comp.lang.c:17189 comp.lang.c++:2851 gnu.g++.lib.bug:22 Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!ames!elroy!orion.cf.uci.edu!uci-ics!glacier.ics.uci.edu!schmidt From: schmidt@glacier.ics.uci.edu (Doug Schmidt) Newsgroups: comp.lang.c,comp.lang.c++,gnu.g++.lib.bug Subject: Challenge -- build the fastest sorting mousetrap! Message-ID: <10538@paris.ics.uci.edu> Date: 26 Mar 89 08:18:11 GMT References: <16539@mimsy.UUCP> Sender: news@paris.ics.uci.edu Reply-To: Doug Schmidt Organization: University of California at Irvine: ICS Dept. Lines: 622 In article <16539@mimsy.UUCP> chris@mimsy.UUCP (Chris Torek) writes: ++ This sounds like a CHALLENGE! :-) Below is a *real* challenge, for all those in net land with extra time on their hands during easter and spring break! I've included below a very efficient sorting routine, together with a nifty `sort-program-correctness-tester.' ====================================================================== Your challenge, should you wish to accept it, is to write a faster, pseudo-portable, and correct sort routine that fulfills the same specification as the one included below. ====================================================================== Pseduo-portable is eclectically defined as ``compiling and executing correctly with GNU GCC, AT&T cfront, and GNU G++!'' Actually, K&R C is fine too, I just didn't feel like cluttering up my program with #ifdef's everywhere.... The point here is to emphasize creating a clever algorithm, without being overly dependent on the machine or compiler. When I compare the final entries, I will use GCC, with the -O and -fstrength-reduce options enabled. To make this interesting, assume that the input sizes to the supplied driver program are: 100,000, 500,000, and 1,000,000 Here are the my results on a Sun 4/260, using G++ 1.34.2 with -O ---------------------------------------- sorttest 100000 time = 1.530 sorttest 500000 time = 8.820 sorttest 1000000 time = 19.130 ---------------------------------------- Naturally, not everyone has a Sun 4 ;-). So to make this fair I'll collect entries, time them on an otherwise empty Sun 4/260, and report the results. Naturally, I'd be interested in seeing how your programs perform on other machines, too. If anyone feels that the rules aren't clear enough, feel free to elaborate them. I'm mostly interested in seeing whether anyone can beat the following sorting algorithm for large input values. If enough people enter good programs, I'll archive them and make them available for anonymous FTP. Good luck!!! Doug --- On a clear day, under blue skies, there is no need to seek. And asking about Buddha +------------------------+ Is like proclaiming innocence, | schmidt@ics.uci.edu | With loot in your pocket. | office: (714) 856-4043 | ---------------------------------------------------------------------- #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'checksort.c' <<'END_OF_FILE' X/* Efficient and general mechanism for testing correctness of sort routines. X Please excuse the lack of comments. I wrote this long ago, before X learning the importance of documentation (the hard way!) ;-). */ X Xtypedef struct node X{ X int item; X struct node *next; X} node; X Xint *range_buf; Xnode **hash_buf; Xint buf_size; Xnode *mem_stack; Xnode *stack_top; Xnode sentinel; Xnode *sentinel_ptr = &sentinel; X Xint Xfree_mem_and_exit(int status) X{ X free (mem_stack); X free (hash_buf); X free (range_buf); X return status; X} X Xint Xinit_hash_buf (void) X{ X register int i; X X if (!(hash_buf = (node **) malloc (buf_size * sizeof (node *)))) X return 0; X X for (i = 0;i < buf_size;i++) X hash_buf[i] = sentinel_ptr; X X return (mem_stack = stack_top = (node *) calloc (buf_size, sizeof (node))) != 0; X} X Xnode * Xmake_node (int item) X{ X node *temp = stack_top++; X X temp->item = item; X return temp; X} X Xint Xhash_value (int item) X{ X return item % buf_size; X} X Xvoid Xhash_insert (int item) X{ X int loc = hash_value (item); X node *new_node = make_node (item); X X new_node->next = hash_buf[loc]; X hash_buf[loc] = new_node; X} X Xint Xremove_node (int item) X{ X int loc = hash_value (item); X node *temp = hash_buf[loc]; X node *prev = 0; X X for (sentinel.item = item; temp->item != item; prev = temp, temp = temp->next) X ; X X if (temp == sentinel_ptr) X return 0; X else if (!prev) X hash_buf[loc] = temp->next; X else X prev->next = temp->next; X X return 1; X} X Xint Xdelete_buf(int *base) X{ X int i; X X for (i = 0; i < buf_size; i++) X if (!remove_node (base[i])) X return 0; X X for (i = 0; i < buf_size; i++) X if (hash_buf[i] != sentinel_ptr) X return 0; X X return 1; X} X Xint Xcheck_sort (int *original, int *base, int size) X{ X int range = base[size - 1]; X X if ((buf_size = size) >= range) X { X if (!(range_buf = (int *) calloc (range, sizeof (int)))) X return -1; X else X { X int i; X X for (i = 1; i < buf_size; i++) X if ((base[i] < base[i - 1]) || base[i - 1] > range || base[i - 1] < 0) X return free_mem_and_exit (0); X else X ++range_buf[base[i - 1]]; X X ++range_buf[base[buf_size - 1]]; X X for (i = 0; i < buf_size; i++) X if (range_buf[original[i]] <= 0) X return free_mem_and_exit (0); X else X --range_buf[original[i]]; X X for (i = 0; i < range; i++) X if (range_buf[i] != 0) X return free_mem_and_exit (0); X X } X } X else if (!init_hash_buf ()) X return -1; X else X { X int i; X X for (i = 1; i < buf_size; i++) X { X if (base[i] < base[i - 1]) X return free_mem_and_exit (0); X else X hash_insert(base[i - 1]); X } X X hash_insert (base[buf_size - 1]); X if (!delete_buf (original)) X return free_mem_and_exit (0); X } X return free_mem_and_exit (1); X} END_OF_FILE if test 3067 -ne `wc -c <'checksort.c'`; then echo shar: \"'checksort.c'\" unpacked with wrong size! fi # end of 'checksort.c' fi if test -f 'sort.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'sort.c'\" else echo shar: Extracting \"'sort.c'\" \(5893 characters\) sed "s/^X//" >'sort.c' <<'END_OF_FILE' X#include X#include X X/* the next 5 #defines implement a very fast in-line stack abstraction */ X X#define MAKE_STACK(S) ((stack_node *) malloc (sizeof(stack_node) * (S))) X#define PUSH(LOW,HIGH) do {top->lo = LOW;top->hi = HIGH;top++;} while (0) X#define POP(LOW,HIGH) do {--top;LOW = top->lo;HIGH = top->hi;} while (0) X#define STACK_NOT_EMPTY (stack < top) X#define SWAP(A,B) do {int _temp = (A);(A) = (B);(B) = _temp;} while (0) X X/* Discontinue quicksort algorithm when partition gets below this size. X This particular magic number was chosen to work best on a Sun 4/260. */ X#define MAX_THRESH 15 X X#ifdef __GNUG__ X#define MAX(X,Y) ((X) >? (Y)) X#else X#define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) X#endif X X/* Data structure for stack of unfulfilled obligations. */ Xtypedef struct X{ X int *lo; X int *hi; X} stack_node; X X/* Once main array is partially sorted by quicksort the remainder is X completely sorted using insertion sort, since this is efficient X for partitions below MAX_THRESH size. BASE points to the beginning X of the array to sort, and END_PTR points at the very last element in X the array (*not* one beyond it!). */ X Xinline static void Xinsert_sort(int *base, int *end_ptr) X{ X int *run_ptr; X int *tmp_ptr = end_ptr; X int *thresh = MAX (base, end_ptr - MAX_THRESH); X X /* Find the smallest element in the first threshold and put it at the X end of the array. This is guaranteed to be the smallest element in X the array, and it speeds up the inner loop of insertion sort. */ X X for (run_ptr = tmp_ptr - 1; run_ptr >= thresh; run_ptr--) X if (*run_ptr > *tmp_ptr) X tmp_ptr = run_ptr; X X SWAP (*tmp_ptr, *end_ptr); X X /* Typical insertion sort, but we run from the `right-hand-side' X downto the `left-hand-side.' */ X X for (run_ptr = end_ptr - 1; run_ptr > base; run_ptr--) X { X int temp = *(tmp_ptr = run_ptr - 1); X X /* Select the correct location for the new element, X by sliding everyone down by 1 to make room! */ X X while (temp > *++tmp_ptr) X tmp_ptr[-1] = *tmp_ptr; X X tmp_ptr[-1] = temp; X } X} X X/* Return the median value selected from among the X LOW, MIDDLE, and HIGH. Rearrange LOW and HIGH so X the three values are sorted amongst themselves. X This speeds up later work... */ X Xinline static int Xfind_pivot(int *low, int *high) X{ X int *middle = low + ((high - low ) >> 1); X X if (*middle < *low) X SWAP (*middle, *low); X if (*high < *middle) X SWAP (*middle, *high); X else X return *middle; X X if (*middle < *low) X SWAP (*middle, *low); X X return *middle; X} X X/* Order elements using quicksort. This implementation incorporates X 4 optimizations discussed in Sedgewick: X X 1. Non-recursive, using an explicit stack of log (n) pointers that X indicate the next array partition to sort. X X 2. Choses the pivot element to be the median-of-three, reducing X the probability of selecting a bad pivot value. X X 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving X insertion sort to sort within the partitions. This is a X big win, since insertion sort is faster for small, mostly X sorted array segements. X X 4. The larger of the 2 sub-partitions are always pushed onto the X stack first, with the algorithm then concentrating on the X smaller partition. This *guarantees* no more than log (n) X stack size is needed! */ X Xint Xsort (int *base,int total_elems) X{ X if (total_elems > 1) X { X int *lo = base; X int *hi; X int *left_ptr; X int *right_ptr; X stack_node *stack; X stack_node *top; X int i; X int max_stack_size = 1; X X /* Calculate the exact stack space required in the worst case. X This is approximately equal to the log base 2 of TOTAL_ELEMS. */ X X for (i = 1; i < total_elems; i += i) X max_stack_size++; X X /* Create the stack, or die trying! */ X if (stack = MAKE_STACK (max_stack_size)) X top = stack + 1; X else X return 0; X X for (hi = lo + (total_elems - 1); STACK_NOT_EMPTY; ) X { X /* Select the median-of-three here. This allows us to X skip forward for the LEFT_PTR and RIGHT_PTR. */ X int pivot = find_pivot (lo, hi); X left_ptr = lo + 1; X right_ptr = hi - 1; X X /* Here's the famous ``collapse the walls'' section of X quicksort. Gotta like those tight inner loops! */ X do X { X while (*left_ptr < pivot) X left_ptr++; X X while (pivot < *right_ptr) X right_ptr--; X X if (left_ptr < right_ptr) X { X SWAP (*left_ptr, *right_ptr); X left_ptr++; X right_ptr--; X } X else if (left_ptr == right_ptr) X { X left_ptr++; X right_ptr--; X break; X } X } X while (left_ptr <= right_ptr); X X if ((right_ptr - lo) <= MAX_THRESH) X { X if ((hi - left_ptr) <= MAX_THRESH) /* ignore both small tables */ X POP (lo, hi); X else /* ignore small left table */ X lo = left_ptr; X } X else if ((hi - left_ptr) <= MAX_THRESH) /* ignore small right table */ X hi = right_ptr; X else if ((right_ptr - lo) > (hi - left_ptr)) X { /* push larger left table */ X PUSH (lo,right_ptr); X lo = left_ptr; X } X else X { /* push larger right table */ X PUSH (left_ptr,hi); X hi = right_ptr; X } X } X X } X insert_sort (base, base + (total_elems - 1)); X return 1; X} END_OF_FILE if test 5893 -ne `wc -c <'sort.c'`; then echo shar: \"'sort.c'\" unpacked with wrong size! fi # end of 'sort.c' fi if test -f 'test.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'test.c'\" else echo shar: Extracting \"'test.c'\" \(2610 characters\) sed "s/^X//" >'test.c' <<'END_OF_FILE' X/* Driver routine for sort program. */ X X#include X#include X#include X X#define MAX_INT ((~(unsigned)0)>>1) Xstatic struct rusage old_time; Xstatic struct rusage new_time; X X/* Set starting process time. */ X Xvoid start_timer(void) X{ X getrusage (RUSAGE_SELF, &old_time); X} X X/* Returns process time since OLD_TIME. */ X Xdouble return_elapsed_time(void) X{ X getrusage (RUSAGE_SELF, &new_time); X return((new_time.ru_utime.tv_sec - old_time.ru_utime.tv_sec) + X ((new_time.ru_utime.tv_usec - old_time.ru_utime.tv_usec) X / 1000000.0)); X} X X/* Generates SIZE random integers If *BASE is NULL then space is X allocated, initialized, and returned, otherwise the *BASE buffer X is used directly. */ X Xint * Xgen_rand (int **base, unsigned int size) X{ X int i; X X if (!*base) X *base = (int *) malloc (size * sizeof (int)); X srandom ((getpid () * (time (0) ^ time (0) * 1009)) % MAX_INT); X X for (i = 0; i < size; i++) X (*base)[i] = (random () ^ time (0)); X X return *base; X} X X/* Copies the contents of BASE into *NEW_BASE. *NEW_BASE X is dynamically allocated if it is initially NULL, otherwise X the *NEW_BASE buffer is used. */ X Xint * Xstore_buf (int *base, int **new_base, unsigned size) X{ X if (!*new_base) X *new_base = (int *) malloc (sizeof (int) * size); X X bcopy (base, *new_base, size * sizeof(int)); X return *new_base; X} X X/* Prints out contents of array in case the sort routine X fails. */ X Xvoid Xprint (int *a, int s) X{ X int i; X printf ("---\n"); X for (i = 0; i < s; i++) X printf ("item = %d\n", a[i]); X} X X/* Sets up the data buffers, generates random input, calls X the sort routine, records how long it takes to run, X checks the output for validity, and prints the results. */ X Xint Xmain (int argc, char *argv[]) X{ X if (argc != 2) X { X fprintf (stderr, "usage: %s size\n", argv[0]); X return 1; X } X else X { X int status; X int size = atoi (argv[1]); X int *sorted = 0; X int *copy = 0; X double time; X X gen_rand (&sorted, size); X store_buf (sorted, ©, size); X X start_timer (); X sort (sorted, size); X time = return_elapsed_time (); X X if ((status = check_sort (copy, sorted, size)) > 0) X printf ("time = %.3f\n", time); X else X switch (status) X { X case -1: printf ("memory failure\n"); break; X case 0: X printf ("`sorted' array is not an ordered permutation of original\n"); X print (sorted, size); X print (copy, size); X break; X } X return 0; X } X} END_OF_FILE if test 2610 -ne `wc -c <'test.c'`; then echo shar: \"'test.c'\" unpacked with wrong size! fi # end of 'test.c' fi if test -f 'Makefile' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'Makefile'\" else echo shar: Extracting \"'Makefile'\" \(1159 characters\) sed "s/^X//" >'Makefile' <<'END_OF_FILE' X############################################################################# X# X# Makefile for sorttest X# X############################################################################# X# X# If you move this makefile, update the variable below X# or else depend won't work. X############################################################################# XMAKEFILE = Makefile XCC = g++ XCFILES = test.c sort.c checksort.c XOFILES = test.o sort.o checksort.o XMANFILE = sorttest.1 XPROGRAM = sorttest X############################################################################# XOPTFLAGS = -O XCFLAGS = $(OPTFLAGS) $(DFLAGS) X X$(PROGRAM): $(OFILES) X $(CC) $(LDFLAGS) $(OFILES) -o $(PROGRAM) $(LIBS) X Xclean: X rm -f *.o *.out core X Xclean-all: clean X rm -f $(PROGRAM) X Xdepend: X /usr/ucb/mkdep -f $(MAKEFILE) $(CFILES) X X# DO NOT DELETE THIS LINE -- mkdep uses it. X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY. X Xtest.o: test.c /usr/include/stdio.h /usr/include/sys/time.h /usr/include/time.h Xtest.o: /usr/include/sys/resource.h Xsort.o: sort.c /usr/include/stdio.h /usr/include/ctype.h Xchecksort.o: checksort.c X X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY END_OF_FILE if test 1159 -ne `wc -c <'Makefile'`; then echo shar: \"'Makefile'\" unpacked with wrong size! fi # end of 'Makefile' fi echo shar: End of shell archive. exit 0 -- On a clear day, under blue skies, there is no need to seek. And asking about Buddha +------------------------+ Is like proclaiming innocence, | schmidt@ics.uci.edu | With loot in your pocket. | office: (714) 856-4043 |