Relay-Version: version B 2.10 5/3/83; site utzoo.UUCP
Posting-Version: version B 2.10.2 9/18/84; site megaron.UUCP
Path: utzoo!watmath!clyde!cbosgd!ukma!psuvm.bitnet!psuvax1!burdvax!sdcrdcf!hplabs!hao!noao!arizona!megaron!wendt
From: wendt@megaron.UUCP
Newsgroups: net.lang.c
Subject: fast code and no morals
Message-ID: <842@megaron.UUCP>
Date: Tue, 21-Jan-86 19:22:29 EST
Article-I.D.: megaron.842
Posted: Tue Jan 21 19:22:29 1986
Date-Received: Fri, 24-Jan-86 21:28:03 EST
Distribution: net
Organization: Dept of CS, U of Arizona, Tucson
Lines: 82

Here's a version of calloc that breaks every rule of "structured"
programming ever invented -- and runs about twice as fast as a
vanilla version:

/* calloc - allocate and clear memory block */
#define CHARPERINT (sizeof(int)/sizeof(char))
#define NULL 0

char *calloc(num, size)
unsigned num, size;
    {
    register char *mp;
    char *malloc();
    register int *q, *qlim, m;

    num *= size;
    mp = malloc(num);
    if (mp == NULL) return (NULL);
    q = (int *) mp;
    qlim = (m = (num+CHARPERINT-1)/CHARPERINT) + (q = (int *)mp);

    switch (m & 7)
        do      {
                    *q++ = 0;
            case 7: *q++ = 0;
            case 6: *q++ = 0;
            case 5: *q++ = 0;
            case 4: *q++ = 0;
            case 3: *q++ = 0;
            case 2: *q++ = 0;
            case 1: *q++ = 0;
            case 0: ;
            } while (q < qlim);

    return (mp);
    }

.align  1
.globl  _calloc
.set    L32,0xf00
.data
.text
_calloc:.word   L32
mull2   8(ap),4(ap)
pushl   4(ap)
calls   $1,_malloc
movl    r0,r11
jneq    L37
clrl    r0
ret
L37:movl        r11,r10
pushl   $4
addl3   $3,4(ap),-(sp)
calls   $2,udiv
movl    r0,r8
ashl    $2,r0,r0
movl    r11,r10
addl3   r10,r0,r9
extzv   $0,$3,r8,r0
casel   r0,$0,$7
L2000000:
.word   L41-L2000000
.word   L49-L2000000
.word   L48-L2000000
.word   L47-L2000000
.word   L46-L2000000
.word   L45-L2000000
.word   L44-L2000000
.word   L43-L2000000
L38:movl        r11,r0
ret
L42:clrl        (r10)+
L43:clrl        (r10)+
L44:clrl        (r10)+
L45:clrl        (r10)+
L46:clrl        (r10)+
L47:clrl        (r10)+
L48:clrl        (r10)+
L49:clrl        (r10)+
L41:cmpl        r10,r9
jlss    L42
jbr     L38