Path: utzoo!utgpu!utstat!jarvis.csri.toronto.edu!rutgers!tut.cis.ohio-state.edu!ucbvax!hplabs!hp-pcd!hplsla!jima From: jima@hplsla.HP.COM (Jim Adcock) Newsgroups: comp.lang.c Subject: Re: Optimal for loop on the 68020. Message-ID: <5260014@hplsla.HP.COM> Date: 5 Jun 89 19:18:17 GMT References: <11993@well.UUCP> Organization: HP Lake Stevens, WA Lines: 210 Unfortunately, the "favorite loop constuct" is going to be very dependent on the particular compiler used, and exactly *what* you do inside the loop. Below is your examples, redone with a null loop body, plus *my* favorite loop construct: for(i=COUNT; i--;). I used gcc and hp's cc 6.5 compiler with -O -S and in gnu -O -S -fstrength-reduce -fcombine-regs -fforce-mem -fforce-addr. The point is that these things are very unpredictable, and optimizing compilers work well on linear code segments, not over branch points in programs [including loops]. The "optimizing" compilers I've seen do about 20% better than non-optimizing compilers, but are still far from "optimal" by our human standards. #define COUNT 100 main() { int i; for ( i = 0; i < COUNT; i++ ); xxxxxxxxxxxxxxxxxxx(); for ( i = 0; i < COUNT; ++i ); xxxxxxxxxxxxxxxxxxx(); for ( i = 0; ++i <= COUNT; ); xxxxxxxxxxxxxxxxxxx(); for ( i = 0; i++ < COUNT; ); xxxxxxxxxxxxxxxxxxx(); for ( i = COUNT; i > 0; i-- ); xxxxxxxxxxxxxxxxxxx(); for ( i = COUNT; i > 0; --i ); xxxxxxxxxxxxxxxxxxx(); for ( i = COUNT; --i >= 0; ); xxxxxxxxxxxxxxxxxxx(); for ( i = COUNT; i-- > 0; ); xxxxxxxxxxxxxxxxxxx(); for ( i = COUNT; i--; ); xxxxxxxxxxxxxxxxxxx(); } #NO_APP gcc_compiled.: .text .even .globl _main _main: link a6,#0 moveq #99,d0 L4: dbra d0,L4 clrw d0 subql #1,d0 jcc L4 jbsr _xxxxxxxxxxxxxxxxxxx moveq #99,d0 L8: dbra d0,L8 clrw d0 subql #1,d0 jcc L8 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L10: dbra d0,L10 clrw d0 subql #1,d0 jcc L10 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L14: dbra d0,L14 clrw d0 subql #1,d0 jcc L14 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L20: subql #1,d0 tstl d0 jgt L20 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L24: subql #1,d0 tstl d0 jgt L24 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L26: dbra d0,L26 clrw d0 subql #1,d0 jcc L26 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L30: subql #1,d0 moveq #-1,d1 cmpl d0,d1 jlt L30 jbsr _xxxxxxxxxxxxxxxxxxx moveq #100,d0 L34: dbra d0,L34 clrw d0 subql #1,d0 jcc L34 jbsr _xxxxxxxxxxxxxxxxxxx unlk a6 rts [hp cc 6.5:] ============================================== global _main _main: link.w %a6,&-8 mov.l %d7,-(%sp) movq &99,%d7 L12: dbf %d7,L12 jsr _xxxxxxxxxxxxxxxxxxx movq &99,%d7 L16: dbf %d7,L16 jsr _xxxxxxxxxxxxxxxxxxx movq &0,%d7 L21: addq.l &1,%d7 movq &100,%d0 cmp.l %d7,%d0 ble.b L21 jsr _xxxxxxxxxxxxxxxxxxx movq &0,%d7 L24: mov.l %d7,%d0 addq.l &1,%d7 movq &100,%d1 cmp.l %d0,%d1 blt.b L24 jsr _xxxxxxxxxxxxxxxxxxx movq &100,%d7 L20001: subq.l &1,%d7 tst.l %d7 bgt.b L20001 jsr _xxxxxxxxxxxxxxxxxxx movq &100,%d7 L20003: subq.l &1,%d7 tst.l %d7 bgt.b L20003 jsr _xxxxxxxxxxxxxxxxxxx movq &100,%d7 L33: subq.l &1,%d7 bge.b L33 jsr _xxxxxxxxxxxxxxxxxxx movq &100,%d7 L36: mov.l %d7,%d0 subq.l &1,%d7 tst.l %d0 bgt.b L36 jsr _xxxxxxxxxxxxxxxxxxx movq &100,%d7 L39: mov.l %d7,%d0 subq.l &1,%d7 tst.l %d0 bne.b L39 jsr _xxxxxxxxxxxxxxxxxxx mov.l (%sp)+,%d7 unlk %a6 rts