Path: utzoo!attcan!uunet!blkcat!p0.f40.n109.z1.fidonet.org!Kai-Uwe.Rommel From: Kai-Uwe.Rommel@p0.f40.n109.z1.fidonet.org (Kai-Uwe Rommel) Newsgroups: comp.os.os2 Subject: (Part 4) compress (fast!) for OS/2 Message-ID: <722.25FAE6FE@blkcat.fidonet.org> Date: 12 Mar 90 04:39:48 GMT Sender: ufgate@blkcat.fidonet.org (newsout1.26) Organization: FidoNet node 1:109/40.0 - Crewe 400 Nat'l Hub, Joe Keenan Lines: 312 *** Split Message, part #4, MsgSplit Version 2 (11-Mar-90) *** Xstatic long ratio; /* in_count/out_count * 256 */ Xstatic int n_bits; /* number of bits/code */ Xstatic int n_bits8; /* bits/code times 8 */ Xstatic int bitoffset; /* Offset into bitbuf */ X X#define NOENT ((code_t)0xffff) X#define MAXMAXCODE ((code_t)0xf000) X X/* X * Clear out the hash table. We try to do this as quickly as possible, because X * it's running time dominates for small files. For big files, it doesn't matter X * much because it doesn't get called often. Now I understand why the original X * had a variable size hash table. X */ Xvoid clearhash() X{ X#ifdef i8088 X register unsigned i; X code_t far *hp; X X hp = (code_t far *)codeptrs1[0]; X i = (unsigned)(HSIZE/2); X do X *hp++ = NOENT; X while (--i > 0); X X hp = (code_t far *)codeptrs1[1]; X i = (unsigned)(HSIZE/2); X do X *hp++ = NOENT; X while (--i > 0); X#else X /* X * WARNING: assumes that NOENT == 0xffff X */ X memset((char *)codetab1, 0xff, HSIZE*sizeof(code_t)); X#endif X} X X/* X * Compress stdin to stdout. X */ Xvoid compress() X{ X register hash_t i; X register code_t ent; X hash_t disp; X int c; X code_t freecode; /* first unused entry */ X code_t maxcode; /* maximum code, given n_bits */ X code_t maxmaxcode; X code_t k; X#ifdef CHECK_GAP X long checkpoint = 0; X#endif X X if (maxbits < INIT_BITS) X maxbits = INIT_BITS; X if (maxbits > BITS) X maxbits = BITS; X X if (magic) { X putchar(MAGIC0); putchar(MAGIC1); X putchar(maxbits | block_compress); X if (ferror(stdout)) X writeerr(); X } X X bitbuf[bitoffset = 0] = 0; X out_count = 3; /* includes 3-byte header mojo */ X ratio = 0; X in_count = 1; X X n_bits = INIT_BITS; X n_bits8 = INIT_BITS << 3; X maxcode = MAXCODE(INIT_BITS); X maxmaxcode = MAXCODE(maxbits); X if (maxmaxcode > MAXMAXCODE) X maxmaxcode = MAXMAXCODE; X X freecode = ((block_compress) ? FIRST : 256); X X clearhash(); X X ent = getchar(); X X while ((c = getchar()) != EOF) { X in_count++; X X i = (hash_t)(c << 8) ^ ent; /* xor hashing */ X X if ((k = en_hashent(i)) == ent && en_hashchar(i) == (uchar)c) { X ent = en_hashcode(i); X goto Continue; X } X X if (k != NOENT) { X /* X * New secondary hash for 64K table. X * Experiment shows that the shift by 6 works well. X * Beats me why. "disp" must be relatively X * prime to the table size. Since the table size is a X * power of 2, this means "disp" must be odd. X * X * Note that we do not do a range check before doing X * "i -= disp". It is assumed that the hash table size X * (HSIZE) is 64K, and that the type "hash_t" (which X * is unsigned short) is 16 bits. Thus it is impossible X * for "i" to be out of range. On a machine with something X * other than 16 bit shorts, this would have to change. X */ X disp = ((hash_t)(c << 6) ^ ent) | 1; X do { X i -= disp; X if ((k = en_hashent(i)) == ent && X en_hashchar(i) == (uchar)c) { X ent = en_hashcode(i); X goto Continue; X } X } while (k != NOENT); X } X X putcode(ent); X X if (freecode <= maxmaxcode) { X /* X * Add the new entry. X */ X en_hashchar(i) = (uchar)c; X en_hashent(i) = ent; X en_hashcode(i) = freecode; X X /* X * If the next entry is going to be too big for the X * code size, then increase it, if possible. X */ X if (freecode++ > maxcode) { X while (bitoffset) X putcode(0); X ++n_bits; X n_bits8 += 8; X maxcode = MAXCODE(n_bits); X } X } X#ifdef CHECK_GAP X else if (in_count >= checkpoint && block_compress) { X checkpoint = in_count + CHECK_GAP; X if (need_clear()) { X#else X else if (block_compress) { X if (1) { X#endif X putcode(CLEAR); X while (bitoffset > 0) X putcode(0); X clearhash(); X freecode = FIRST; X maxcode = MAXCODE(INIT_BITS); X n_bits = INIT_BITS; X n_bits8 = n_bits << 3; X } X } X ent = c; XContinue:; X } X /* X * Put out the final code. X */ X putcode(ent); X X /* X * At EOF, write the rest of the buffer. X */ X if (bitoffset > 0) X fwrite(bitbuf, 1, (bitoffset + 7) / 8, stdout); X out_count += (bitoffset + 7) / 8; X fflush(stdout); X if (ferror(stdout)) X writeerr(); X X /* X * Print out stats on stderr X */ X if (! zcat_flg && verbose) { X fprintf(stderr, "Compression: "); X prratio(in_count - out_count, in_count); X } X if (out_count > in_count) /* exit(2) if no savings */ X exit_stat = 2; X} X X/* X * Output the given code. Assumes that chars are 8 bits. X * "n_bits" output bytes (containing 8 codes) are assembled X * in in "bitbuf", and then written out. X */ Xvoid putcode(code) Xcode_t code; X{ X register int i; X register uchar *bp; X X bp = &bitbuf[(bitoffset >> 3)]; X i = bitoffset & 7; X bp[0] |= (uchar)(code << i); X bp[1] = (uchar)(code >>= (8 - i)); X bp[2] = (uchar)(code >> 8); X X if ((bitoffset += n_bits) == n_bits8) { X bp = bitbuf; X i = n_bits; X out_count += i; X do X putchar(*bp++); X while (--i); X bitbuf[bitoffset = 0] = 0; X } X} X X#ifdef CHECK_GAP X/* X * Compute the current compression ratio, and return non-zero if X * it is has decreased since the last we checked. X * X * Don't use this anymore. Whenever the hash table fills, X * we send a CLEAR immediately (if block_compress). This is faster, X * and doesn't appear to affect the compression ratio much. X */ Xint need_clear() X{ X long rat; X X if (in_count > 0x007fffffL) { /* shift will overflow */ X rat = out_count >> 8; X if (rat == 0) /* Don't divide by zero */ X rat = 0x7fffffffL; X else X rat = in_count / rat; X } else X rat = (in_count << 8) / out_count; X X if (rat > ratio) { X ratio = rat; X return (0); X } X else { X ratio = 0; X return (1); X } X} X#endif X X/* X * Decompress stdin to stdout. This code assumes that chars are 8 bits. X */ Xvoid decompress() X{ X register uchar *stackp; X register code_t code; X code_t oldcode, incode; X code_t codemask; X code_t freecode; /* first unused entry */ X code_t maxcode; /* maximum code, given n_bits */ X code_t maxmaxcode; X int finchar; X int size; /* #bits in bitbuf */ X int bitoff; /* Offset into bitbuf */ X int n_bits; /* number of bits/code */ X#ifndef i8088 X register uchar *bp; X#endif X X n_bits = INIT_BITS; X maxcode = MAXCODE(INIT_BITS) - 1; X codemask = MAXCODE(INIT_BITS); X freecode = ((block_compress) ? FIRST : 256) - 1; X maxmaxcode = MAXCODE(maxbits); X X /* X * Read the first code into "oldcode" X */ X if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0) X return; X size = (size << 3) - (n_bits - 1); X oldcode = (bitbuf[0] | (bitbuf[1] << 8)) & codemask; X bitoff = n_bits; X X /* X * First code must be 8 bits == char. Write it, and die X * if it can't be written. X */ X putchar(finchar = oldcode); X if (ferror(stdout)) X writeerr(); X X stackp = de_stack; X X for ( ; ; ) { X if (bitoff >= size) { *** Original message split, continued in next message *** -- Kai-Uwe Rommel at The Black Cat's Shack (Fidonet 1:109/401) Internet: Kai-Uwe.Rommel@p0.f40.n109.z1.fidonet.org UUCP: ...!uunet!blkcat!40.0!Kai-Uwe.Rommel