Path: utzoo!utgpu!watmath!att!dptg!rutgers!iuvax!mailrus!cornell!uw-beaver!rice!sun-spots-request From: khaw@parcplace.com (Mike Khaw) Newsgroups: comp.sys.sun Subject: Re: Help needed: clearing the 68020 cache. Keywords: Software Message-ID: <970@brazos.Rice.edu> Date: 15 Aug 89 19:48:35 GMT Organization: Sun-Spots Lines: 226 Approved: Sun-Spots@rice.edu X-Sun-Spots-Digest: Volume 8, Issue 99, message 2 of 11 > Date: 6 Jul 89 09:26:13 GMT > From: agesen@daimi.dk (Ole Agesen) > Subject: Help needed: clearing the 68020 cache. > > Help needed.... > > The MC68020 incorporates an on-chip cache memory. The cache is used to > store the instruction stream prefetch accesses from the main memory. I'm > currently implementing a kind of incremental linking implying the need to > write self-modifying code. Therefore, I have to clear the cache upon a > modification of the code. According to the MC68020 User's Manual the cache > is cleared by issuing a MOVEC instruction (move control register). > However, this is a privileged instruction (requiring supervisor state). > > Can anyone tell me how to clear the cache? > Can I get supervisor privileges for a user process? > Is there a kernel operation clearing the cache? > Can I easily create a modified kernel with the desired operation? > Other (more) realistic possibilities? > > (We are using Unix 4.0 on sun 3/50). > > Thanks in advance! > > Ole Agesen We have had to address this same issue in our Smalltalk-80 product line, which as an integrated interactive program development environment, must generate code on-the-fly. The basic trick for most machines is, rather than manipulating cache-control registers (which are usually only available to supervisor-mode programs), branch into a no-op table, which will displace the old I-cache contents. The structure of the table depends on the CPU & cache architecture. We use the following C code in our 68020 versions of our product, including the one that runs under SunOS 4.0 on Sun-3s. Mike Khaw --- ParcPlace Systems, 1550 Plymouth St., Mountain View, CA 94043 415/691-6749 Domain=khaw@parcplace.com, UUCP={uunet,sun,decwrl}!parcplace!khaw <--- CUT HERE ---> #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # If this archive is complete, you will see the following message at the end: # "End of shell archive." # Contents: m68k-cacheFlush.c # Wrapped by khaw@connecticut on Mon Aug 14 21:10:04 1989 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'm68k-cacheFlush.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'m68k-cacheFlush.c'\" else echo shar: Extracting \"'m68k-cacheFlush.c'\" \(5867 characters\) sed "s/^X//" >'m68k-cacheFlush.c' <<'END_OF_FILE' X/************************************************************************ X * Instruction-cache flushing for the MC680x0 processor family. X * X * I-cache flushing is necessary (for processors which have an I-cache) X * when you modify some portion of code space after it has been executed X * (and plan to execute in that space in the future). I-caches typically X * don't keep their contents coherent with data writes, hence after you X * modify some code which may have been executed in the past the X * corresponding I-cache entries must be flushed (otherwise, random code X * may be executed some time in the future). X * X * 680x0 Cache notes: X * X * The 68000, 68008, 68010 have no I-cache at all. X * X * The 68020 has a 256-byte on-chip I-cache organized as 64 entries of X * one longword each. X * X * The 68030 has a 256-byte on-chip I-cache organized as 16 entries of X * four longwords each. It also has a 256-byte on-chip D-cache, which is X * not germane to this discussion. X * X * Both the 68020 & 68030 have a cache control/address registers which X * permit flushing of individual entries (or wholesale flushing), but X * the cache control registers can only be used in supervisor mode, X * which makes them impossible to use under most operating systems. X * X * CPU-boards often implement a cache which doesn't discriminate between X * I & D fetches. No reasonable design of such a unified cache should X * pose any problem to the code given here. X * X * It is possible for a CPU-board design to implement an off-chip I-cache X * (using the F pins to distinguish between I & D fetches). We know of X * no interesting platforms which do this currently, but a platform that X * did would need code here to accomodate it. X * X * Written by Allan M. Schiffman & L. Peter Deutsch X * Copyright (C) 1989, ParcPlace Systems, Inc. X * This program is offered free of charge and without restrictions, X * provided that this notice remains attached. ParcPlace accepts X * no responsibility for any damages which may result from its use. X ************************************************************************/ X Xtypedef unsigned char byte; Xtypedef unsigned long ptrBits; Xtypedef unsigned short nOp; Xtypedef nOp *machinePC; X X/* Tables for clearing the I-cache */ Xstatic machinePC codeClearICache; Xstatic machinePC codeRemoveFromICache; X X/* our favorite 680x0 instructions defined here */ X#define RTS 0x4e75 /* 680x0 rts instruction */ X#define NOP 0x4e71 /* 680x0 nop instruction */ X#define ADDIL_d0 0x680 /* 680x0 addi.l #n, d0 instruction*/ X#define ORW_d0d0 0x8040 /* 680x0 or.w d0, d0 instruction */ X X/* X * initClearICache -- X * Initialize tables for flushing the I-cache. Called once at X * start-up. Returns count of bytes consumed (<=0 if couldn't malloc). X * X * For now, we don't distinguish between the '030 & '020, even though X * we maybe could do slightly less work for the '030. X * X * Builds two I-cache clearing tables -- X * X * 1) full-swat code clearing table is made out of 42 6-byte no-ops X * (addil #0, d0), followed by a rtn. X * Table is to be is entered from the top. X * Code only occupies the first 254 bytes of the table, but X * since the table is long-aligned, the execution of the rtn will X * serve to flush the last short. X * Using the six-byte funny no-op is much faster than using an X * equivalent number of nop instructions since the nop instruction X * locks the pipeline (check the manual). It's faster than the X * equivalent number of shorter no-ops since no instruction executes X * faster then 2 clocks, but a long can be fetched in 3 clocks (on X * the '020, at least). X * X * 2) single-entry code clearing table, 64 rtn/no-op pairs X * (no-op in this case is or d0, d0) followed by a final rtn. X * We expect never to be asked for less than 4 bytes to be flushed, X * so using rtn/rtn pairs would not be faster (but would have X * more complex boundary conditions). X */ Xint XinitClearICache() X{ X extern char *malloc(); /* aligns at least to shorts */ X byte *basePtr; /* base address of memory to use */ X int totalSize; /* size of tables in bytes */ X ptrBits ptr; X register nOp *nptr; X register int i; X#define TBL1_SIZE (256+2) /* includes slop for alignment */ X#define TBL2_SIZE (254+256+2) /* includes slop for alignment */ X X totalSize = TBL1_SIZE + TBL2_SIZE; X if((basePtr = (byte *)malloc(totalSize)) == (byte *)0) X return 0; /* malloc failed */ X X /* Construct the code for clearing the entire I-cache */ X ptr = (ptrBits)basePtr; X basePtr += TBL1_SIZE; /* position to next table */ X /* force to long-aligned */ X nptr = (nOp *)(ptr + ((-ptr) & 3)); /* align to 0 mod 4 */ X codeClearICache = (machinePC)nptr; X for (i = 0; i < 252/6; i++) { X *nptr++ = ADDIL_d0; X *nptr++ = 0; X *nptr++ = 0; X } X *nptr = RTS; /* last short will be flushed anyway */ X X /* X * Construct the code for flushing an individual I-cache entry. X * Note that a request to flush an entry at 2 mod 4 X * must actually flush two entries, since it must handle X * a 32-bit word straddling two entries. X */ X ptr = (ptrBits)basePtr; X nptr = (nOp *)(ptr + ((-ptr) & 255)); /* align to 0 mod 256 */ X codeRemoveFromICache = (machinePC)nptr; X for ( i = 0; i < 256/4; i++ ) { X *nptr++ = RTS; X *nptr++ = ORW_d0d0; X } X *nptr = RTS; X X return totalSize; /* success return */ X} X X X/* X * flushICache -- X * X * Flush the I-cache entries in the range (from):(to) inclusive. X * initClearICache must have been called previously. X */ Xvoid XflushICache(from, to) X register machinePC from, to; X{ X if ( to - from > 40 ) { X /* Faster to clear the whole I-cache */ X ((void (*)())codeClearICache)(); X } X else { X /* Clear individual entries */ X while ( to > from ) { X ((void (*)())(codeRemoveFromICache + ((ptrBits)(from) & 254)))(); X from += 4; X } X } X} END_OF_FILE echo shar: NEWLINE appended to \"'m68k-cacheFlush.c'\" if test 5868 -ne `wc -c <'m68k-cacheFlush.c'`; then echo shar: \"'m68k-cacheFlush.c'\" unpacked with wrong size! fi # end of 'm68k-cacheFlush.c' fi echo shar: End of shell archive. exit 0