Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!uunet!tut.cis.ohio-state.edu!zaphod.mps.ohio-state.edu!usc!orion.oac.uci.edu!uci-ics!ucla-cs!math.ucla.edu!pmontgom
From: pmontgom@oak.math.ucla.edu
Newsgroups: comp.arch
Subject: New benchmark (source code)
Message-ID: <2132@sunset.MATH.UCLA.EDU>
Date: 7 Jan 90 03:17:43 GMT
Sender: news@MATH.UCLA.EDU
Reply-To: pmontgom@MATH.UCLA.EDU ()
Organization: UCLA Mathematics Department
Lines: 608


        program bchmrk
C
C               This benchmark times eleven codes, many of 
C       which require integer multiplication and/or division.
C       Integer addition, subtraction, and multiplication are
C       assumed not to abort on overflow, but rather to return 
C       the correct result modulo 2**32 or other high power of 2.
C       The benchmark includes the following:
       
C       (1)     Multiplication of square matrices.  Matrix entries
C               can be real, double precision, complex, integer,
C               or integers modulo a prime number.
C       (2)     Check whether an integer is prime, using trial division
C               by integers up to the square root of the argument.
C       (3)     Modular exponentiation.  This routine requires a function 
C               MODMUL(m1,m2) which computes the product m1*m2 modulo 
C               local variable mdls, but where m1*m2 may overflow.
C       (4)     Euclidean GCD (greatest common divisor) algorithm.
C       (5)     Binary-to-decimal conversion.
C       (6)     Continued fraction expansion of quadratic irrational.

C               An installation is allowed to change the following
C       parts of the source code when compiling the benchmark:

C       (1)     Subroutine CPUTIM (return elapsed user CPU time).
C       (2)     Statement function MODMUL within MODEXP 
C               (may be optimized to exploit machine properties,
C               or be replaced by an assembly language subroutine).
C       (3)     Convert entire source to upper case.

C               If this source code is modified in any other way,
C       please identify the modifications when reporting the time 
C       on your system.  An optimizing compiler is allowed.  
C       If multiple CPUs are used, also report the times with a single CPU.

C               The program reads no input data.   The memory required
C       is approximately 300000 bytes (decimal), if each real or
C       integer variable occupies 4 bytes.

C               Benchmark written by 

C                       Peter L. Montgomery
C                       Department of Mathematics
C                       University of California
C                       Los Angeles, CA 90024   USA
C
C                       pmontgom@math.ucla.edu
C                       January, 1990

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

C               Constants

        integer NCONV, PMODLS, PRMHGH, PRMLOW, SZ
        parameter (NCONV = 2000)
        parameter (PMODLS = 30011)
        parameter (PRMLOW = 2*10**9, PRMHGH = PRMLOW + 2000)
        parameter (SZ = 40)

C               Local arrays

        logical prm(0 : PRMHGH - PRMLOW)
        complex c1(SZ,SZ), c2(SZ,SZ), c3(SZ,SZ)
        complex ctmp(SZ,SZ), cp1(SZ,SZ), cp2(SZ,SZ)
        double precision d1(SZ,SZ), d2(SZ,SZ), d3(SZ,SZ)
        double precision dtmp(SZ,SZ), dp1(SZ,SZ), dp2(SZ,SZ)
        real s1(SZ,SZ), s2(SZ,SZ), s3(SZ,SZ)
        real stmp(SZ,SZ), sp1(SZ,SZ), sp2(SZ,SZ)
        integer convs(NCONV)
        integer i1(SZ,SZ), i2(SZ,SZ), i3(SZ,SZ)
        integer itmp(SZ,SZ), ip1(SZ,SZ), ip2(SZ,SZ)
        integer p1(SZ,SZ), p2(SZ,SZ), p3(SZ,SZ)
        integer ptmp(SZ,SZ), pp1(SZ,SZ), pp2(SZ,SZ)
        equivalence(prm, ctmp, dtmp, stmp, convs, itmp, ptmp)

C               Other local variables

        integer i, ierr, j, nprime, p, perr, rem, sumcnv, sumgcd
        real cerr, serr
        real t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13
        real tc, tcfrac, td, tdcml, tgcd, tgen, ti
        real tovral, tp, tprbpr, tprime, ts
        double precision derr
        character*10 string

C               Functions referenced

        intrinsic ABS, CMPLX, COS, DBLE, MAX, REAL, SIN
        logical PRIME
        integer GCD, MODEXP
        external GCD, MODEXP, PRIME

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

C               Select test matrices (not intended to be meaningful).
C               i1, i2, and i3 are modulo 2**32 or other power of 2.
C               If this causes trouble on your system, use smaller
C               values in the assignments and report the change.

        call CPUTIM(t1)
        do 100 j = 1, SZ
            do 90 i = 1, SZ
                d1(i,j) = COS(DBLE(i+j))
                d2(i,j) = SIN(DBLE(i*j))
                d3(i,j) = COS(DBLE(i-j))
                s1(i,j) = SIN(REAL(i))*SIN(REAL(j))
                s2(i,j) = COS(REAL(3*i - j))
                s3(i,j) = SIN(REAL(3*i - j))
                i1(i,j) = 23321*i*j + 4135151*i + 123321*j + 231235325
                i2(i,j) = 231421*i*j + 632153*i + 3215312*j + 887231323 
                i3(i,j) = 98136*i*j + 4532355*i - 2231413*j + 343232231
                p1(i,j) = MOD(23*i**2 + 21434*i*j + 3556*j + 56, PMODLS)
                p2(i,j) = MOD(8453*i*j + 24353*i + 765*j + 3145, PMODLS)
                p3(i,j) = MOD(124*(i-j)**2 + 4354*i + 56*j + 67, PMODLS)
                c1(i,j) = CMPLX(s1(i,j), REAL(d1(i,j)))
                c2(i,j) = CMPLX(s2(i,j), REAL(d2(i,j)))
                c3(i,j) = CMPLX(s3(i,j), REAL(d3(i,j)))
90          continue
100     continue

C               Perform double precision matrix multiplication.
C               Compute both (d1*d2)*d3 and d1*(d2*d3).

        call CPUTIM(t2)
        call DMTMUL(d1,   d2,   dtmp, SZ)
        call DMTMUL(dtmp, d3,   dp1,  SZ)
        call DMTMUL(d2,   d3,   dtmp, SZ)
        call DMTMUL(d1,   dtmp, dp2,  SZ)

C               Perform single-precision (real) matrix multiplication.

        call CPUTIM(t3)
        call SMTMUL(s1,   s2,   stmp, SZ)
        call SMTMUL(stmp, s3,   sp1,  SZ)
        call SMTMUL(s2,   s3,   stmp, SZ)
        call SMTMUL(s1,   stmp, sp2,  SZ)

C               Perform single-precision (complex) matrix multiplication.

        call CPUTIM(t4)
        call CMTMUL(c1,   c2,   ctmp, SZ)
        call CMTMUL(ctmp, c3,   cp1,  SZ)
        call CMTMUL(c2,   c3,   ctmp, SZ)
        call CMTMUL(c1,   ctmp, cp2,  SZ)

C               Perform integer matrix multiplication.

        call CPUTIM(t5)
        call IMTMUL(i1,   i2,   itmp, SZ)
        call IMTMUL(itmp, i3,   ip1,  SZ)
        call IMTMUL(i2,   i3,   itmp, SZ)
        call IMTMUL(i1,   itmp, ip2,  SZ)

C               Perform matrix multiplication modulo PMODLS.

        call CPUTIM(t6)
        call PMTMUL(p1,   p2,   ptmp, SZ, PMODLS)
        call PMTMUL(ptmp, p3,   pp1,  SZ, PMODLS)
        call PMTMUL(p2,   p3,   ptmp, SZ, PMODLS)
        call PMTMUL(p1,   ptmp, pp2,  SZ, PMODLS)

C               Check that products agree
C               (matrix multiplication should be associative).

        call CPUTIM(t7)
        cerr = 0.0
        derr = 0.0D0
        serr = 0.0      
        ierr = 0
        perr = 0
        do 200 j = 1, SZ
            do 190 i = 1, SZ
                cerr = MAX(cerr, ABS(cp1(i,j) - cp2(i,j)))
                derr = MAX(derr, ABS(dp1(i,j) - dp2(i,j)))
                serr = MAX(serr, ABS(sp1(i,j) - sp2(i,j)))
                ierr = MAX(ierr, ABS(ip1(i,j) - ip2(i,j)))
                perr = MAX(perr, ABS(pp1(i,j) - pp2(i,j)))
190         continue
200     continue
        if (ierr.ne.0 .or. perr.ne.0 
     *      .or. derr .ge. 1.0E-12 
     *      .or. serr .ge. 1.0E-5
     *      .or. cerr .ge. 2.0E-4   ) then
            print 210, ierr, perr, derr, serr, cerr
210         format(' ERRORS IN MATRIX MULTIPLY: ', 2I10, 3E10.3)
        end if

        call CPUTIM(t8)

C               Search for primes in the interval [PRMLOW, PRMHGH].

        nprime = 0
        do 300 j = 0, PRMHGH-PRMLOW
            p = j + PRMLOW
            prm(j) = PRIME(p)
            if (prm(j)) nprime = nprime + 1
300     continue

C               Use a probable prime test to verify these findings.
C               Recall, if p is an odd prime, then
C               2**((p-1)/2) == +-1 mod p by Fermat's Theorem.
C               Most integers p passing this test are prime.
C               Counterexamples exist (e.g., 341, 561), but are rare.
C
        call CPUTIM(t9)
        do 400 j = MOD(PRMLOW+1,2), PRMHGH-PRMLOW, 2
            p = j + PRMLOW
            rem = MODEXP(2, (p-1)/2, p)
            if (prm(j) .neqv. (rem.eq.1 .or. rem.eq.p-1)) then
                print *, 'Probable prime test fails for ', p, rem
            end if
400     continue
        call CPUTIM(t10)

        sumgcd = 0
        do 500 i = 10000000, 10000100
            do 490 j = 11111111, 140000000, 500001
                sumgcd = sumgcd + GCD(i,j)
490         continue
500     continue
        if (nprime.ne.100 .or. sumgcd.ne.407895) then
            print 600, nprime, sumgcd
600         format(1x, 'ERROR -- ', i7, ' primes found.',
     *                '  Sum of GCDs is ', i10)
        end if
        call CPUTIM(t11)

C           Do binary-to-decimal conversion.

        do 700 i = 10000, 10000000, 537
            call BINDEC(i, string)
700     continue
        if (string.ne.'   9999811') then
            print *, 'BINDEC Error - Final string is ', string
        end if
        call CPUTIM(t12)

C               Try continued fraction of quadratic irrational.

        sumcnv = 0
        do 800 j = 1000003, 1001003, 10
            call CFRAC(j, convs, NCONV)
            do 790 i = 1, NCONV
                sumcnv = sumcnv + convs(i)
790         continue
800     continue
        if (sumcnv.ne.7563088) then
            print *, ' CFRAC ERROR - Sum of convergents is ', sumcnv
        end if
        call CPUTIM(t13)

C               Compute times used.

        tgen   = t2  - t1
        td     = t3  - t2
        ts     = t4  - t3
        tc     = t5  - t4
        ti     = t6  - t5
        tp     = t7  - t6

        tprime = t9  - t8
        tprbpr = t10 - t9
        tgcd   = t11 - t10
        tdcml  = t12 - t11
        tcfrac = t13 - t12
C Overall rating = Geometric mean of above times.
C (Clock resolution should be such that all times are nonzero).
        tovral = (tgen*td*ts*tc*ti*tp*tprime*tprbpr*tgcd*tdcml*tcfrac)
     *         **(1.0/11.0)

C               Print timings.

        print 1000, tgen, td, ts, tc, ti, tp, tprime, tprbpr, tgcd,
     *  tdcml, tcfrac, tovral
1000    format(1x, 5HGener, 2x,4HDble, 2x,4HSngl, 1x,5HCmplx,
     *             1x,5HIntgr, 1x,5HModlr, 1x,6HTrlDiv,
     *             1x,6HPrbPrm, 3x, 3HGCD, 1x, 6HBinDec,
     *             1x,5HCfrac, 2x,'Geo mean'/
     *         1x, F5.2, 5F6.2, 2F7.2, F6.2, F7.2, 2F6.2, ' sec')
        end
************************************************************************
        subroutine BINDEC(num, string)
C Binary-to-decimal conversion of nonnegative integer.
        character*(*) string
        integer num

C           Local variables

        integer left, place
        character DIGITS(0:9)
        intrinsic LEN, MOD
        data DIGITS/'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'/

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        left = num
        place = LEN(string)

10      continue
        if (left.ge.10) then
            string(place:place) = DIGITS(MOD(left, 10))
            left = left/10
            place = place - 1
            if (place.ne.0) go to 10
            return
        end if

        string(place:place) = DIGITS(left)
        if (place.gt.1) string(1:place-1) = ' '
        end
************************************************************************
        subroutine CFRAC(D, convs, nconv)
C Return the first nconv convergents in the continued fraction
C expansion of SQRT(D), where D > 0 and D is not a perfect square.
        integer D, nconv, convs(1:nconv)

C               Local variables

        integer droot, dadd, daddnw, denom, dinv, dinvnw, iconv, q
        intrinsic INT, REAL, SQRT

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        droot = INT(SQRT(REAL(D) + 0.5))
        dadd = 0
        denom = 1
        dinv = D
        do 100 iconv = 1, nconv
C At this time, it remains to expand X = (SQRT(D) + dadd)/denom.
C We know D = dadd**2 + denom*dinv, where all variables are nonnegative.
C Estimate q = INT(X) using droot = INT(SQRT(D)).  
C Replace X by 1/(X - q) = denom/(SQRT(D) + dadd - denom*q)
C   = (SQRT(D) + denom*q - dadd)/((D - (dadd - denom*q)**2)/denom).
C Since D = dadd**2 + denom*dinv, the denominator is an integer.

C Make special check for case q = 1.
            if (droot + dadd .ge. 2*denom)  then
                q = (droot + dadd)/denom
                convs(iconv) = q

                daddnw = denom*q - dadd
                dinvnw = denom
                denom  = dinv + q*(dadd - daddnw)
                dadd   = daddnw
                dinv   = dinvnw
            else
                convs(iconv) = 1
                daddnw = denom - dadd
                dinvnw = denom
                denom  = dinv + dadd - daddnw
                dadd   = daddnw
                dinv   = dinvnw  
            end if
100     continue
        end
************************************************************************
        subroutine CMTMUL(m1, m2, m3, SZ)
C  Complex single precision matrix multiply (square matrices).
        integer SZ
        complex m1(SZ,SZ), m2(SZ,SZ), m3(SZ,SZ)

C               Local variables

        complex sum
        integer i, j, k

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        do 100 k = 1, SZ
            do 90 i = 1, SZ
                sum = (0.0, 0.0)
                do 80 j = 1, SZ
                    sum = sum + m1(i,j)*m2(j,k)
80              continue
                m3(i,k) = sum
90          continue
100     continue
        end
************************************************************************
        subroutine CPUTIM(tused)
C Return elapsed user CPU time (seconds) since program began execution.
C This routine will likely need replacement on other machines.
C In particular, if the times are too big or too small for the 
C FORMAT statement, then they can be scaled by altering this routine.
        real tused
        real tarray(2)
        real etime
        external etime

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        tused = etime(tarray)
        tused = tarray(1)
        end
************************************************************************
        subroutine DMTMUL(m1, m2, m3, SZ)
C  Double precision matrix multiply (square matrices).
        integer SZ
        double precision m1(SZ,SZ), m2(SZ,SZ), m3(SZ,SZ)
        double precision sum
        integer i, j, k

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        do 100 k = 1, SZ
            do 90 i = 1, SZ
                sum = 0.0D0
                do 80 j = 1, SZ
                    sum = sum + m1(i,j)*m2(j,k)
80              continue
                m3(i,k) = sum
90          continue
100     continue
        end
************************************************************************
        integer function GCD(n1, n2)
C Euclidean GCD algorithm (greatest common divisor of n1 and n2).
        integer n1, n2

C               Local variables

        integer m1, m2
        intrinsic ABS, MAX, MIN, MOD

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        m1 = MIN(ABS(n1), ABS(n2))
        m2 = MAX(ABS(n1), ABS(n2))
10      continue
        if (m1.gt.3) then
C Ensure m2 is not zero, to avoid divide by zero.
            m2 = 1 + MOD(m2-1,m1)
            m1 = MOD(m1,m2)
            go to 10
        end if
C Special codes for m1 = 0, 1, 2, 3.
        go to (100, 101, 102, 103), m1+1
        stop 'GCD - m1 out of range' 
100     continue
            GCD = m2
            return
101     continue
            GCD = 1
            return
102     continue
            GCD = 2 - MOD(m2,2)
            return 
103     continue
            if (MOD(m2,3).eq.0) then
                GCD = 3
            else
                GCD = 1
            end if
            return
        end
************************************************************************
        subroutine IMTMUL(m1, m2, m3, SZ)
C Integer matrix multiply (square matrices).
        integer SZ, m1(SZ,SZ), m2(SZ,SZ), m3(SZ,SZ)
        integer i, j, k, sum

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        do 100 k = 1, SZ
            do 90 i = 1, SZ
                sum = 0
                do 80 j = 1, SZ
                    sum = sum + m1(i,j)*m2(j,k)
80              continue
                m3(i,k) = sum
90          continue
100     continue
        end
************************************************************************
        integer function MODEXP(base, exp, mdls)
C Compute base**exp mod mdls, where exp >= 0 and 0 <= base < mdls.
C Use binary algorithm, starting from the right.
        integer base, exp, mdls

C               Local variables

        integer baslft, explft
        double precision mdlsnv

C               Functions referenced

        intrinsic MOD, INT, DBLE
        integer m1, m2, MODMUL
C Statement function MODMUL computes m1*m2 mod mdls.  As written,
C it allows the operands to be in the open interval (-n, n),
C with the result guaranteed to be there.  The estimated quotient,
C INT(DBLE(m1)*DBLE(m2)*mdlsnv), should be either the true
C integer quotient m1*m2/mdls, or 1 (in absolute value) too large,
C because the double precision value DBLE(m1)*DBLE(m2)*mdlsnv will
C be an upper bound for the rational number m1*m2/mdls 
C (since mdlsnv is biased away from 0), but will be less than the
C rational numberm1*m2/mdls + 1 (if floating point is accurate enough).
C
C Users of this benchmark are allowed to replace this definition 
C by another one which performs better in their environment,
C or to change the initialization of mdlsnv (below).  
C Be aware that mdls is barely below 2**31, so m1*m2 will need 63 bits.
C If single precision floating point arithmetic is accurate to 
C 35 bits or so, then it may be used when estimating the quotient.
C If the compiler and machine support the 64-bit product of two
C 32-bit operands, and the 32-bit quotient and remainder when dividing
C 64 bits by 32 bits, then that feature may be used.
        MODMUL(m1,m2) = m1*m2 - mdls*INT(DBLE(m1)*DBLE(m2)*mdlsnv)

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

C Get estimate for 1.0/mdls, biased away from 0.
        mdlsnv = 1.0D0/DBLE(mdls)
        mdlsnv = mdlsnv*(1.0D0 + 0.1D0*mdlsnv)
        if (exp.lt.0) then
            stop 'MODEXP - exponent negative'
        end if
        
        MODEXP = 1
        explft = exp
        baslft = base
        if (explft.eq.0) return
10      continue
        if (MOD(explft, 2).ne.0) then
            MODEXP = MODMUL(MODEXP, baslft)
            explft = explft - 1
            if (explft.eq.0) then
                if (MODEXP.lt.0) MODEXP = MODEXP + mdls
                return
            end if
        end if
        explft = explft/2
        baslft = MODMUL(baslft, baslft)
        go to 10
        end 
************************************************************************
        subroutine PMTMUL(m1, m2, m3, SZ, mdls)
C Matrix multiply (square matrices, entries modulo a prime number).
        integer SZ, m1(SZ,SZ), m2(SZ,SZ), m3(SZ,SZ), mdls
        integer i, j, k, mdsq, sum
        intrinsic MOD

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        mdsq = mdls**2
        do 100 k = 1, SZ
            do 90 i = 1, SZ
                sum = 0
                do 80 j = 1, SZ
                    sum = sum + m1(i,j)*m2(j,k)
                    if (sum .ge. mdsq) sum = sum - mdsq
80              continue
                m3(i,k) = MOD(sum, mdls)
90          continue
100     continue
        end
************************************************************************
        logical function PRIME(n)
C Check whether an integer n is prime.
C Test for divisibility by 2, 3, and by 6k +- 1, for k = 1, 2, ...
        integer n

C               Local variables

        integer j
        intrinsic MOD, NINT, REAL, SQRT 

C  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        if (n.lt.5) then
            PRIME = n.eq.2 .or. n.eq.3
        else if (MOD(n,6).ne.1 .and. MOD(n,6).ne.5) then
            PRIME = .FALSE.
        else
            do 100 j = 5, NINT(SQRT(REAL(n))), 6
                if (MOD(n,j).eq.0 .or. MOD(n,j+2).eq.0) then
                    PRIME = .FALSE.
                    return
                end if
100         continue
            PRIME = .TRUE.
        end if
        end
************************************************************************
        subroutine SMTMUL(m1, m2, m3, SZ)
C Single precision matrix multiply (square matrices).
        integer SZ
        real m1(SZ,SZ), m2(SZ,SZ), m3(SZ,SZ)
        real sum
        integer i, j, k

C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        do 100 k = 1, SZ
            do 90 i = 1, SZ
                sum = 0.0
                do 80 j = 1, SZ
                    sum = sum + m1(i,j)*m2(j,k)
80              continue
                m3(i,k) = sum
90          continue
100     continue
        end
--------
        Peter Montgomery
        pmontgom@MATH.UCLA.EDU 
	Department of Mathematics, UCLA, Los Angeles, CA 90024