Path: utzoo!utgpu!jarvis.csri.toronto.edu!cs.utexas.edu!usc!apple!vsi1!wyse!mips!batman!rudy From: rudy@mips.COM (Rudy Wang) Newsgroups: comp.graphics Subject: Re: Integral Square Root Summary: performance result on a MIPS M/2000 with 64M memory Message-ID: <34365@mips.mips.COM> Date: 11 Jan 90 05:52:06 GMT References: <9170@cbmvax.commodore.com> <21550@mimsy.umd.edu> <21561@mimsy.umd.edu> <21562@mimsy.umd.edu> Sender: news@mips.COM Reply-To: rudy@mips.COM (Rudy Wang) Organization: MIPS Computer Systems, Sunnyvale, CA Lines: 193 I've collected all 4 algorithms from chris, rokicki, mitchell and kchen and ran them under the Mips' profiler on a Mips M2000 (w/ 64M memory). Here's the result: Chris' inlined routine was clearly the winner! It was 20% faster than the original rokicki algorithm. =========================================================================== Profile listing generated Wed Jan 10 21:08:30 1990 with: prof -pixie -proc isqrt ---------------------------------------------------------------------------- * -p[rocedures] using basic-block counts; * * sorted in descending order by the number of cycles executed in each * * procedure; unexecuted procedures are excluded * ---------------------------------------------------------------------------- 319597 cycles cycles %cycles cum % cycles bytes procedure (file) /call /line 109500 34.26 34.26 219 11 isqrt4 (isqrt.c) 86000 26.91 61.17 172 9 isqrt3 (isqrt.c) 56000 17.52 78.69 112 6 isqrt2 (isqrt.c) 46500 14.55 93.24 93 24 isqrt1 (isqrt.c) ... ... many irrelevant procedures deleted... ... =========================================================================== Source code follows: #define ULONG unsigned long ULONG isqrt1 ( /* chris' algorithm */ register ULONG v ) { register long t = 1L << 30, r = 0, s; #define STEP(k) s = t + r; r >>= 1; if (s <= v) {v -= s; r |= t;} STEP(15); t >>= 2; STEP(14); t >>= 2; STEP(13); t >>= 2; STEP(12); t >>= 2; STEP(11); t >>= 2; STEP(10); t >>= 2; STEP(9); t >>= 2; STEP(8); t >>= 2; STEP(7); t >>= 2; STEP(6); t >>= 2; STEP(5); t >>= 2; STEP(4); t >>= 2; STEP(3); t >>= 2; STEP(2); t >>= 2; STEP(1); t >>= 2; STEP(0); return (r); } /* isqrt1 */ ULONG isqrt2 ( /* rokicki's (O. Peter's?) algorithm */ register ULONG v ) { register ULONG t, r, tmp; t = 0x40000000; r = 0; do { tmp = t + r; r >>= 1; if (tmp <= v) { v -= tmp; r |= t; } t >>= 2; } while (t != 0); return (r); } /* isqrt2 */ ULONG isqrt3 ( /* mitchell's algorithm */ register ULONG n ) { register int i, r, rb, b; i = 15; r = 0; do { b = 1 << i; rb = r + b; if (rb * rb <= n) r += b; i--; } while (i >= 0); return ((ULONG) r); } /* isqrt3 */ /* *------------------------------------------------------------------------ * kchen's algorithm - slightly modified to use bit-shift, since it's * quite a bit (:-)) faster than member access on a Mips machine. *------------------------------------------------------------------------ * * Example of (16 bit) integer square root based * on (a+b)^2 = (a^2 + 2ab + b^2). Yields largest integer * whose square is less than or equal n. * * binValue is a table of the numbers 2^0, 2^1, 2^2, etc. * binSquare is a table of the square of 2^0, 2^1, 2^2, etc. * * At all times, succApproximation = iSqrt^2. */ #if 0 int binValue [] = { 0x8000, 0x4000, 0x2000, 0x1000, 0x800, 0x400, 0x200, 0x100, 0x80, 0x40, 0x20, 0x10, 0x8, 0x400, 0x2, 0x1 }; int binSquare [] = { 0x40000000, 0x10000000, 0x4000000, 0x1000000, 0x400000, 0x100000 , 0x40000, 0x10000, 0x4000, 0x1000, 0x400, 0x100, 0x40, 0x10 , 0x4, 0x1 }; #endif 0 int isqrt4 ( register int n ) { register int guess, iSqrt, i, test; guess = iSqrt = 0; for (i = 16; i != 0; i--) { /* * Let test be the square of the the sum of the current * approx. and the next bit value, i.e., (iSqrt+binValue)^2. */ test = guess /* a^2 */ + (iSqrt << i) /* 2*a*b */ + (1 << ((i - 1) << 1)); /* b^2 */ /* * If test is no greater than n, binValue[i] can be added * to the current approximation of the square root. */ if (test <= n) { guess = test; iSqrt += (1 << (i - 1)); } } return (iSqrt); } /* isqrt4 */ main ( register int argc, register char **argv ) { register ULONG i, j, k; i = atoi (argv [1]); for (j = 0; j < 500; j++) k = isqrt1 (i); printf ("sqrt (%0d) = %0d\n", i, k); for (j = 0; j < 500; j++) k = isqrt2 (i); printf ("sqrt (%0d) = %0d\n", i, k); for (j = 0; j < 500; j++) k = isqrt3 (i); printf ("sqrt (%0d) = %0d\n", i, k); for (j = 0; j < 500; j++) k = isqrt4 (i); printf ("sqrt (%0d) = %0d\n", i, k); } /* main */ -- UUCP: {ames,decwrl,prls,pyramid}!mips!rudy (or rudy@mips.com) DDD: 408-991-0247 Rudy Wang (or 408-720-1700, Ext. 247) USPS: MIPS Computer Systems, 930 Arques, Sunnyvale, CA 94086-3650 Quote: I think they're for 1 AM - Descartes, about his midnight snacks