Path: utzoo!utgpu!news-server.csri.toronto.edu!cs.utexas.edu!uunet!world!iecc!compilers-sender From: brennan@bcsaic.boeing.com (Michael D Brennan) Newsgroups: comp.compilers Subject: re: Threaded code Keywords: interpreter, design Message-ID: <3936@ssc-bee.ssc-vax.UUCP> Date: 2 May 91 19:50:23 GMT References: <3520@sequent.cs.qmw.ac.uk> Sender: compilers-sender@iecc.cambridge.ma.us Reply-To: brennan@bcsaic.boeing.com (Michael D Brennan) Organization: Boeing Aerospace & Electronics, Seattle WA Lines: 257 Approved: compilers@iecc.cambridge.ma.us Another method for obtaining threaded byte code for an interpreter is to edit the assembler output of a big switch rather than editing the prologue and epilogue off functions calls. You don't need gcc, global vars in registers, works with smart and dumb compilers, and all optimization can be turned on. For example: This C routine executes (unthreaded) byte code for an interpreter that can add, subtract and print. #define HALT 0 #define PUSH 1 #define ADD 2 #define SUB 3 #define PRINT 4 static int stack[32] ; void execute(code_ptr) register int *code_ptr ; { register int *stack_ptr = stack - 1 ; while ( 1 ) { switch( *code_ptr++ ) { case HALT : return ; case PUSH : * ++ stack_ptr = *code_ptr++ ; break ; case ADD : stack_ptr-- ; *stack_ptr += stack_ptr[1] ; break ; case SUB : stack_ptr-- ; *stack_ptr -= stack_ptr[1] ; break ; case PRINT : printf("%d\n", *stack_ptr--); break ; } } } ------------------------------------------------------- to interpret 2 + (3 - 4) the front end "compiles" in int code[] PUSH, 2, PUSH, 3, PUSH, 4, SUB, ADD, PRINT, HALT and calls execute(code). ------------------------------------------------------ The difference between this and the threaded code discussed over the last few weeks is the switch gets compiled as jmp TABLE[ *code_ptr++ ] where TABLE is the jump table generated by the compiler which holds the addresses of the case labels. With threading, the transitions between functions become jmp *code_ptr++ but this is easy to get by editing the assembler output to export the case label and recode the switch. -------------------------------------------------- For example on a SPARC: code_ptr is %o0 stack_ptr is %i5 ..... ! case PUSH L77004: ld [%i0],%o1 inc 4,%i5 inc 4,%i0 b L77008 st %o1,[%i5] ..... ! the switch, doesn't change structure ! as you add new op codes L77008: mov %i0,%i4 ld [%i4],%i4 inc 4,%i0 cmp %i4,4 bgu L77008 sll %i4,2,%i4 sethi %hi(L2000000),%o1 or %o1,%lo(L2000000),%o1 ! [internal] ld [%i4+%o1],%o0 jmp %o0 nop L2000000: ! the jump TABLE .word L77003 ! HALT etc .word L77004 .word L77005 .word L77006 .word L77007 ------------------------------------------- modify by adding global labels and edit the switch ..... ! case PUSH _push : L77004: ld [%i0],%o1 inc 4,%i5 inc 4,%i0 b L77008 st %o1,[%i5] ..... ! the edited switch L77008: mov %i0,%i4 ld [%i4],%i4 inc 4,%i0 jmp %i4 nop ! remove TABLE ------------------------------------------- For another example on an Intel 8088 stack_ptr is si code_ptr is di ; while ( 1 ) ; { ; switch( *code_ptr++ ) ; @1@50: mov bx,di inc di inc di mov bx,word ptr [bx] cmp bx,3 ja short @1@50 shl bx,1 jmp word ptr cs:@1@C738[bx] @1@122: ; ; case PUSH : ; * ++ stack_ptr = *code_ptr++ ; ; inc si inc si mov ax,word ptr [di] mov word ptr [si],ax inc di inc di ; ; break ; ; jmp short @1@50 ; .... @1@C738 label word ; jump TABLE dw @1@194 ; HALT dw @1@122 ; PUSH etc dw @1@146 .... ------------------------------------------------ edited the jump can be computed inline ; while ( 1 ) ; { ; switch( *code_ptr++ ) ; @1@50: ; switch code is replaced by code only executed once inc di inc di jmp [di-2] ..... _push : @1@122: ; ; case PUSH : ; * ++ stack_ptr = *code_ptr++ ; ; inc si inc si mov ax,word ptr [di] mov word ptr [si],ax inc di inc di ; ; break ; ; inc di ; jmp to *code_ptr++ inline inc di jmp [di-2] ; .... ---------------------------------------------- the "front end" has defines typedef void (*TCODE)() ; extern void halt(), push(), add(), sub(), print() ; TCODE code[CODESIZE] ; in the array code[], the front end compiles push, 2, push, 3, push, 4, sub, add, print, halt and calls execute(code). -- Mike Brennan brennan@bcsaic.boeing.com -- Send compilers articles to compilers@iecc.cambridge.ma.us or {ima | spdcc | world}!iecc!compilers. Meta-mail to compilers-request.