Path: utzoo!utgpu!water!watmath!clyde!rutgers!sri-spam!ames!aurora!labrea!decwrl!spar!malcolm From: malcolm@spar.SPAR.SLB.COM (Malcolm Slaney) Newsgroups: comp.lang.lisp Subject: Lisp vs. C Floating Point (Suns) Message-ID: <536@spar.SPAR.SLB.COM> Date: 29 Jan 88 19:06:37 GMT Organization: SPAR - Schlumberger Palo Alto Research Lines: 325 The times shown in the table below are number of seconds to calculate 10 iterations of a 1024 point FFT. In this study the C and Lisp version of the FFT routine are identical. I started with Harry Barrow's FFT benchmark (with declarations added by Lucid) and then translated it statement by statement into C (thanks to Dave Singer for double checking this.) I believe that this is as accurate a comparison as is possible. Note, all of the Sun tests were measured with a version of Lucid Common Lisp labelled "Development Environment 2.1.1". This lisp only does single precision arithmetic using the 68881. Thus the only two lines that can be directly compared are "Single 68881 C" versus "Lucid 2.1.1". I believe it is now reasonable to conclude that Lisp compilers are as fast as C compilers. I don't have any numbers for a prodution quality Lisp compiler for the Sun/4's. I don't know of any reason the SPARC machine would run compiled Lisp slower than C. Finally, a word about register declarations in C: The times shown here were measured by putting the variables I thought were most accessed into registers. I did this with the same effort I normally go to make my own programs fast. I did not do any exhaustive testing to find the best set. When all of the register declarations are removed the 3/160 times got slower by .8 seconds and all the 2/260 times got slower by about .4 seconds. The Sun 4/260 numbers did not change any indicating they are doing global register optimization or the 4/260 is being completely dominated by the floating point work. The Sun 3 no-register times make sense since only the loop iteration and array addresses were put in registers and thus they are independent of the floating point work. Malcolm *********************************************************** Time (in seconds) to execute 10 iterations of a 1024 point FFT 3/160 3/260 4/260 Double FPA C 2.4 1.5 0.8 (*) Single FPA C 1.3 0.8 0.3 (*) Double 68881 C 5.0 4.1 0.8 (*) Single 68881 C 4.8 4.1 0.3 (*) <====== Lucid 2.1.1 4.7 4.0 <====== Symbolics Machine 4.7 (No IFU, No FPA - From Gabriel - Duplicated @ SPAR) Symbolics Machine 3.9 (with either IFU or FPA - From Gabriel) * Note that Sun4/260's only have one floating point option. They use a pair of Weitek floating point chips. And finally, here is the actual C version of the Barrow/Gabriel/Lucid FFT benchmark. The original Lisp is in comments. *********************************************************** /* * ;; FFT -- This is an FFT benchmark written by Harry Barrow. * ;; It tests a variety of floating point operations, including array * ;; references. * ;; * ;; Originally written in Lisp. * ;; Translated to C by Malcolm@spar.slb.com January 1988. * Thanks to Dave Singer @ SPAR for double checking the translation. /* * * (defvar **fft-re** * (make-array 1025. :element-type 'single-float :initial-element 0.0)) * (defvar **fft-im** * (make-array 1025. :element-type 'single-float :initial-element 0.0)) */ float fft_re[1025], fft_im[1025]; /* * (defvar s-pi (float pi 0.0)) */ #define s_pi 3.141592653589792434 /* * (defun fft (areal aimag) * (declare (type (simple-array single-float (*)) areal aimag)) */ fft(areal, aimag) float areal[], aimag[]; { /* (prog* ((ar areal) * (ai aimag) * (i 1) * (j 0) * (k 0) * (m 0) ;compute m = log(n) * (n (1- (array-dimension ar 0))) * (nv2 (floor n 2)) * (le 0) (le1 0) (ip 0) * (ur 0.0) (ui 0.0) (wr 0.0) (wi 0.0) (tr 0.0) (ti 0.0)) * (declare (type fixnum i j k n nv2 m le le1 ip)) * (declare (type (simple-array single-float (*)) ar ai)) * (declare (single-float ur ui wr wi tr ti)) */ register float *ar = areal, *ai = aimag; register int i = 1, j = 0, k = 0, m = 0; int n = 1024, nv2 = 512, le = 0, le1 = 0, ip = 0; float ur = 0.0, ui = 0.0, wr = 0.0, wi = 0.0, tr = 0.0, ti = 0.0; register int l; /* l1 (cond ((< i n) * (setq m (the fixnum (1+ m)) * i (the fixnum (+ i i))) * (go l1))) */ l1 : if (i < n){ m += 1; i += i; goto l1; } /* * (cond ((not (equal n (the fixnum (expt 2 m)))) * (princ "error ... array size not a power of two.") * (read) * (return (terpri)))) */ /* * (setq j 1 ;interchange elements * i 1) ;in bit-reversed order */ j = 1; i = 1; /* * l3 (cond ((< i j) * (setq tr (aref ar j) * ti (aref ai j)) * (setf (aref ar j) (aref ar i)) * (setf (aref ai j) (aref ai i)) * (setf (aref ar i) tr) * (setf (aref ai i) ti))) */ l3: if (i < j){ tr = ar[j]; ti = ai[j]; ar[j] = ar[i]; ai[j] = ai[i]; ar[i] = tr; ai[i] = ti; } /* * (setq k nv2) * l6 (cond ((< k j) * (setq j (the fixnum (- j k)) * k (the fixnum (/ k 2))) * (go l6))) * (setq j (the fixnum (+ j k)) * i (the fixnum (1+ i))) */ k = nv2; l6: if (k < j){ j -= k; k /= 2; goto l6; } j += k; i += 1; /* * (cond ((< i n) * (go l3))) */ if (i < n) goto l3; /* * (do ((l 1 (the fixnum (1+ l)))) * ((> (the fixnum l) m)) ;loop thru stages * (declare (type fixnum l)) * (setq le (the fixnum (expt 2 l)) * le1 (floor le 2) ;;(the (values fixnum fixnum) (floor le 2)) * ur 1.0 * ui 0.0 ;This used to be fixnum 0 --smh * wr (cos (/ s-pi le1)) * wi (sin (/ s-pi le1))) */ for (l = 1; l <= m; l++){ le = 1 << l; le1 = le / 2; ur = 1.0; ui = 0.0; wr = cos(s_pi / le1); wi = sin(s_pi / le1); /* * (do ((j 1 (the fixnum (1+ (the fixnum j))))) * ((> (the fixnum j) le1)) ;loop thru butterflies * (declare (type fixnum j)) * (do ((i j (the fixnum (+ i le)))) * ((> (the fixnum i) n)) ;do a butterfly * (declare (type fixnum i)) * (setq ip (the fixnum (+ i le1)) * tr (- (* (aref ar ip) ur) * (* (aref ai ip) ui)) * ti (+ (* (aref ar ip) ui) * (* (aref ai ip) ur))) * (setf (aref ar ip) (- (aref ar i) tr)) * (setf (aref ai ip) (- (aref ai i) ti)) * (setf (aref ar i) (+ (aref ar i) tr)) * (setf (aref ai i) (+ (aref ai i) ti)))) * (setq tr (- (* ur wr) (* ui wi)) * ti (+ (* ur wi) (* ui wr)) * ur tr * ui ti)) * (return t))) */ for (j = 1; j <= le1; j++){ for (i = j; i <= n; i += le){ ip = i + le1; tr = ar[ip]*ur - ai[ip] * ui; ti = ar[ip]*ui + ai[ip] * ur; ar[ip] = ar[i] - tr; ai[ip] = ai[i] - ti; ar[i] += tr; ai[i] += ti; } } tr = ur * wr - ui * wi; ti = ur * wi + wi * wr; ur = tr; ui = ti; } } /* * (defun fft-bench () * (dotimes (i 10) * (fft **fft-re** **fft-im**))) */ fft_bench(){ int i; for (i=0; i< 10; i++) fft(fft_re, fft_im); } /* * (defun testfft () * (format *trace-output* "; doing fft: (fft-bench)~%") * (print (time (fft-bench)))) */ /* * (defun clear-fft () * (dotimes (i 1025) * (setf (aref **fft-re** i) 0.0 * (aref **fft-im** i) 0.0)) * (values)) */ clear_fft(){ int i; for (i=0;i<1025;i++) fft_re[i] = fft_im[i] = 0; } /* * (defun setup-fft-component (theta &optional (phase 0.0)) * (let ((f (* 2 pi theta)) * (c (cos (* 0.5 pi phase))) * (s (sin (* 0.5 pi phase)))) * (dotimes (i 1025) * (let ((x (sin (* f (/ i 1024.0))))) * (incf (aref **fft-re** i) (float (* c x) 0.0)) * (incf (aref **fft-im** i) (float (* s x) 0.0))))) * (values)) */ setup_fft_component(theta,phase) float theta, phase; { float f, c, s; int i; f = 2 * s_pi * theta; c = cos(0.5 * s_pi * phase); s = sin(0.5 * s_pi * phase); for (i = 0; i < 1025; i++){ float x; x = sin(f * (i / 1024.0)); fft_re[i] += c * x; fft_im[i] += s * x; } } /* * (defvar fft-delta 0.0001) */ #define fft_delta 0.0001 /* * (defun print-fft () * (dotimes (i 1025) * (let ((re (aref **fft-re** i)) * (im (aref **fft-im** i))) * (unless (and (< (abs re) fft-delta) (< (abs im) fft-delta)) * (format t "~4d ~10f ~10f~%" i re im)))) * (values)) */ print_fft(){ int i; for (i = 0; i< 1025; i++){ float re, im; re = fft_re[i]; im = fft_im[i]; if (abs(re) > fft_delta || abs(im) > fft_delta) printf("%4d %10f %10f\n", i, re, im); } } main(){ fft_bench(); }