Xref: utzoo sci.math:17471 comp.lang.c:39333 Path: utzoo!utgpu!news-server.csri.toronto.edu!rpi!zaphod.mps.ohio-state.edu!mips!pacbell.com!pacbell!osc!jgk From: jgk@osc.COM (Joe Keane) Newsgroups: sci.math,comp.lang.c Subject: Re: Shuffle generation Summary: Doug is smart. Keywords: permutation Message-ID: <4787@osc.COM> Date: 16 May 91 00:46:05 GMT References: <1bQCYz#6lHOhh7Jxk2b4WjLh65vrXpL=eric@snark.thyrsus.com> <8c=sAju00Vp_Ihxl4r@andrew.cmu.edu> Reply-To: jgk@osc.COM (Joe Keane) Followup-To: sci.math Organization: Versant Object Technology, Menlo Park, CA Lines: 177 In article <8c=sAju00Vp_Ihxl4r@andrew.cmu.edu> dd2i+@andrew.cmu.edu (Dee Dee Two Eye) writes: >To swap N items in an array A[0..(n-1)], the following O(N) algorithm works: > >for (i=0; i < N; i++) { > swap(i, i + random(N - i)); >} > >Where random(r) is a function which returns a random integer in 0..(r-1) >and swap(a, b) swaps the stuff in array A[a] with A[b]. This algorithm is much better than the naive one, which repeatedly swaps two randomly chosen elements. At the end of the loop, all permutations are equally probable, assuming the random function is uniform. The naive algorithm only approaches this asymptotically. Note that if you seed your random-number generator with a 32-bit integer, then there are only 2^32 possible permutations, no matter how big the array is. If you worry about such things, like i do, then you should make multiple passes over the array. Following i've included `shuffle.c', a program i wrote a while ago which uses the algorithm described above. I've found that it's a useful utility, so you may want to `clip and save' it. -- Joe Keane, professional C++ programmer jgk@osc.com (...!uunet!stratus!osc!jgk) -- #include #include #define DEBUG 0 extern char* malloc(); extern char* realloc(); struct line { int length; char* ptr; }; int main (argc, argv) int argc; char* argv[]; { struct line* master_ptr; int master_size; #if DEBUG fputs("shuffle: reading lines...\n", stderr); #endif { int master_capacity; char *buffer_ptr; int buffer_capacity; master_capacity = 256; master_ptr = (struct line*)malloc(master_capacity * sizeof(struct line)); if (!master_ptr) goto out_of_memory; master_size = 0; buffer_capacity = 256; buffer_ptr = malloc(buffer_capacity); if (!buffer_ptr) goto out_of_memory; for (;;) { int c; int buffer_size; char* line_ptr; c = getchar(); if (c == EOF) goto eof; if (master_size >= master_capacity) { master_capacity *= 2; master_ptr = (struct line*)realloc(master_ptr, master_capacity * sizeof (struct line)); if (!master_ptr) goto out_of_memory; } buffer_size = 0; while (c != '\n') { if (buffer_size >= buffer_capacity) { buffer_capacity *= 2; buffer_ptr = realloc(buffer_ptr, buffer_capacity); } buffer_ptr[buffer_size] = c; buffer_size++; c = getchar(); if (c == EOF) { fputs("shuffle: adding newline at end of file\n", stderr); break; } } line_ptr = malloc(buffer_size); if (!line_ptr) goto out_of_memory; memcpy(line_ptr, buffer_ptr, buffer_size); master_ptr[master_size].length = buffer_size; master_ptr[master_size].ptr = line_ptr; master_size++; if (c == EOF) goto eof; } eof: free(buffer_ptr); } #if DEBUG fprintf(stderr, "shuffle: total of %d lines read\n", master_size); #endif { int pass; for (pass = 0; pass < 16; pass++) { struct timeval tv; int line_number; gettimeofday(&tv, 0); #if DEBUG fprintf(stderr, "shuffle: doing pass %d, time is %d seconds, %d microseconds...\n", pass, tv.tv_sec, tv.tv_usec); #endif srandom(tv.tv_sec ^ tv.tv_usec); for (line_number = 0; line_number < master_size; line_number++) { int other_line; struct line temp; other_line = random() % (master_size - line_number) + line_number; temp = master_ptr[line_number]; master_ptr[line_number] = master_ptr[other_line]; master_ptr[other_line] = temp; } } } #if DEBUG fputs("shuffle: writing lines...\n", stderr); #endif { int line_number; for (line_number = 0; line_number < master_size; line_number++) { char* ptr; char* end; ptr = master_ptr[line_number].ptr; end = ptr + master_ptr[line_number].length; while (ptr < end) { putchar(*ptr); ptr++; } putchar('\n'); } } #if DEBUG fputs("shuffle: all done\n", stderr); #endif return 0; out_of_memory: fputs("shuffle: out of memory\n", stderr); return 1; }