Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!ukma!husc6!purdue!spaf From: spaf@cs.purdue.EDU (Gene Spafford) Newsgroups: comp.windows.x Subject: Hack speedup for Sun CG4 Message-ID: <6508@medusa.cs.purdue.edu> Date: 10 Apr 89 00:59:51 GMT Sender: news@cs.purdue.EDU Reply-To: spaf@uther.cs.purdue.edu (Gene Spafford) Organization: Department of Computer Science, Purdue University Lines: 117 I have been way too busy to find the time to properly extend the "Purdue" patches to the cfb code in the server. However, I have put in a few quick hacks that make a *BIG* difference on the color performance on Sun 3/50/60 machines with CG4 boards; I suspect it will also work on CG2/CG3 machines as well, and any other 680x0-based machine using the cfb code. The following are quick hacks. I do intend to do a more complete and thorough job, so just consider these as temporary. To take advantage of them you need the GCC compiler. Save a copy of the server/ddx/cfb/cfbmskbits.h file. Next, apply this patch. The recompile with gcc and see the difference. Let me know of problems/suggestions/etc. *** /tmp/,RCSt1a03783 Sun Apr 9 19:58:12 1989 --- cfbmskbits.h Sun Apr 9 19:34:27 1989 *************** *** 220,226 **** --- 220,232 ---- ((p)&PMSK) << PSZ | \ ((p)&PMSK) << 2*PSZ | \ ((p)&PMSK) << 3*PSZ ) + #define PFILL2(p, pf) { \ + pf = (p) & PMSK; \ + pf |= (pf << PSZ); \ + pf |= (pf << 2*PSZ); \ + } + #define maskbits(x, w, startmask, endmask, nlw) \ startmask = cfbstarttab[(x)&PIM]; \ endmask = cfbendtab[((x)+(w)) & PIM]; \ *************** *** 271,277 **** --- 277,314 ---- *((pdst)+1) = (*((pdst)+1) & (cfbstarttab[n] | ~pm)) | \ (SCRLEFT(src, m) & (cfbendtab[n] & pm)); \ } + #ifdef mc68020 && __GNUC__ + #undef getbits + #define FASTGETBITS(psrc, x, w, dst) \ + asm ("bfextu %3{%1:%2},%0" \ + : "=d" (dst) : "di" (PSZ*x), "di" (PSZ*w), "o" (*(char *)(psrc))) + #define getbits(psrc,x,w,dst) \ + FASTGETBITS(psrc, x, PPW, dst);\ + + #define FASTPUTBITS(src, x, w, pdst) \ + asm ("bfins %3,%0{%1:%2}" \ + : "=o" (*(char *)(pdst)) \ + : "di" (x*PSZ), "di" (w*PSZ), "d" (src), "0" (*(char *) (pdst))) + + #undef putbits + #define putbits(src, x, w, pdst, planemask) \ + { \ + if (planemask != 0xff) { \ + unsigned long _m, _pm; \ + FASTGETBITS(pdst, x, PPW, _m); \ + PFILL2(planemask, _pm); \ + _m &= (~_pm); \ + _m |= (src & _pm); \ + FASTPUTBITS(SCRRIGHT(_m, PPW-(w)), x, w, pdst); \ + } else { \ + FASTPUTBITS(SCRRIGHT(src, PPW-(w)), x, w, pdst); \ + } \ + } + + + #endif mc68020 + #define putbitsrop(src, x, w, pdst, planemask, rop) \ if ( ((x)+(w)) <= PPW) \ { \ *************** *** 278,284 **** unsigned long tmpmask; \ unsigned long t1, t2; \ maskpartialbits((x), (w), tmpmask); \ ! tmpmask &= PFILL(planemask); \ t1 = SCRRIGHT((src), (x)); \ t2 = DoRop(rop, t1, *(pdst)); \ *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \ --- 315,322 ---- unsigned long tmpmask; \ unsigned long t1, t2; \ maskpartialbits((x), (w), tmpmask); \ ! PFILL2(planemask, t1); \ ! tmpmask &= t1; \ t1 = SCRRIGHT((src), (x)); \ t2 = DoRop(rop, t1, *(pdst)); \ *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \ *************** *** 288,294 **** unsigned long m; \ unsigned long n; \ unsigned long t1, t2; \ ! unsigned long pm = PFILL(planemask); \ m = PPW-(x); \ n = (w) - m; \ t1 = SCRRIGHT((src), (x)); \ --- 326,333 ---- unsigned long m; \ unsigned long n; \ unsigned long t1, t2; \ ! unsigned long pm; \ ! PFILL2(planemask, pm); \ m = PPW-(x); \ n = (w) - m; \ t1 = SCRRIGHT((src), (x)); \ -- Gene Spafford NSF/Purdue/U of Florida Software Engineering Research Center, Dept. of Computer Sciences, Purdue University, W. Lafayette IN 47907-2004 Internet: spaf@cs.purdue.edu uucp: ...!{decwrl,gatech,ucbvax}!purdue!spaf