Path: utzoo!attcan!uunet!blkcat!p0.f40.n109.z1.fidonet.org!Kai-Uwe.Rommel From: Kai-Uwe.Rommel@p0.f40.n109.z1.fidonet.org (Kai-Uwe Rommel) Newsgroups: comp.os.os2 Subject: (Part 2) compress (fast!) for OS/2 Message-ID: <720.25FAE6FA@blkcat.fidonet.org> Date: 12 Mar 90 04:39:48 GMT Sender: ufgate@blkcat.fidonet.org (newsout1.26) Organization: FidoNet node 1:109/40.0 - Crewe 400 Nat'l Hub, Joe Keenan Lines: 324 *** Split Message, part #2, MsgSplit Version 2 (11-Mar-90) *** X * save much memory. Do unsigned compares where possible (faster on X * Perkin-Elmer) X * X * Revision 1.4 84/07/05 03:11:11 thomas X * Clean up the code a little and lint it. (Lint complains about all X * the regs used in the asm, but I'm not going to "fix" this.) X * X * Revision 1.3 84/07/05 02:06:54 thomas X * Minor fixes. X * X * Revision 1.2 84/07/05 00:27:27 thomas X * Add variable bit length output. X * X */ X X#include X#include X#include X#include X#include X#ifndef __ZTC__ X#include X#endif X#ifndef BSD4_2 X#include X#include X#endif X#include X#include X#ifdef MSDOS X#ifdef OS2 X#define INCL_NOPM X#define INCL_DOSMEMMGR X#include X#else X#include X#endif X#endif X X#ifdef PROTO X/* X * Zortech appears to be missing this prototype, and MSC uses some X * silly structure as the second arg. Turbo C doesn't support this X * call at all. X */ Xextern int utime(char *path, time_t times[]); X#endif X X#define BITS 16 /* max number of bits/code */ X#define INIT_BITS 9 /* initial number of bits/code */ X X#define MAXCODE(n_bits) ((code_t)((1L << (n_bits)) - 1)) X X/* X * Magic numbers which should appear at the beginning of a compressed file. X */ X#define MAGIC0 0x1f X#define MAGIC1 0x9d X X/* X * Defines for third byte of header X */ X#define BIT_MASK 0x1f X#define BLOCK_MASK 0x80 X X#if 0 X#define CHECK_GAP 10000 /* ratio check interval */ X#endif X X/* X * the next two codes should not be changed lightly, as they must not X * lie within the contiguous general code space. X */ X#define FIRST 257 /* first free entry */ X#define CLEAR 256 /* table clear output code */ X X#define DE_STACKLEN 8192 /* Size of decoder stack */ X X#define HSIZE (1L << 16) /* Size of the hash table. Don't change this */ X Xtypedef unsigned char uchar; Xtypedef unsigned long ulong; Xtypedef unsigned short code_t; Xtypedef unsigned short hash_t; X X#ifdef PROTO X#define ARGS(x) x X#else X#define ARGS(x) () X#endif X Xvoid main ARGS((int argc, char **argv)); Xvoid Usage ARGS((void)); Xvoid version ARGS((void)); Xvoid compress ARGS((void)); Xvoid decompress ARGS((void)); Xvoid copystat ARGS((void)); Xvoid writeerr ARGS((void)); Xvoid cl_hash ARGS((void)); Xvoid putcode ARGS((code_t code)); Xvoid prratio ARGS((long num, long den)); Xint ofopen ARGS((char *filename)); Xint ifopen ARGS((char *filename)); Xint check_magic ARGS((void)); Xint need_clear ARGS((void)); Xvoid onintr ARGS((void)); Xvoid oops ARGS((void)); Xint taballoc ARGS((void)); Xvoid clearhash ARGS((void)); X X/* X * block compression parameters -- after all codes are used up, X * and compression rate changes, start over. X */ Xint block_compress = BLOCK_MASK; X Xint maxbits = BITS; /* user settable max # bits/code */ Xint magic = 1; /* 3-byte magic number header */ Xint zcat_flg = 0; /* Output on stdout */ Xint verbose = 0; /* don't tell me about compression */ Xint force = 0; /* Force overwrite of output file */ Xint do_decomp = 0; /* Decompress rather than compress. */ Xchar ofname[100]; /* Output file name */ Xint foreground; /* Running in foreground? */ Xint exit_stat = 0; /* Exit status */ Xuchar bitbuf[BITS+2]; /* For (dis)assembling code bytes */ Xint okunlink; /* OK for sig handler to unlink output file */ Xint keep = 0; /* keep input files ? */ Xchar *ifname; X X#ifdef i8088 X Xuchar *de_stack; Xuchar far *charptr1; Xuchar far *codeptrs1[2]; Xuchar far *codeptrs2[2]; X X#define de_suffixof(i) charptr1[i] X#define de_prefixof(i) (*(code_t far *)&codeptrs1[i&1][i&~1]) X X#define en_hashchar(i) charptr1[i] X#define en_hashent(i) (*(code_t far *)&codeptrs1[i&1][i&~1]) X#define en_hashcode(i) (*(code_t far *)&codeptrs2[i&1][i&~1]) X X#ifndef MK_FP X#define MK_FP(seg, ofs) \ X ((void far *)(((ulong)(seg) << 16) | (unsigned)(ofs))) X#endif X X#define PARA 16 /* Size of a paragraph */ X X/* X * Return a segment address which is the segment part of the normalized X * version of "fp" rounded upwards. X * I use this on the far pointers returned by "farmalloc". While X * they are probably already normalized, I have never seen this X * stated anywhere in the doc's. X * X * There is a lot of junk below which would be unecessary if only X * there were a reasonably compiler independent way of allocating X * a given number of PARAGRAPHS (like TC's allocmem). I can't find X * one though. X */ X X#define FP_SEGCEIL(fp) (FP_SEG(fp) + (FP_OFF(fp) + PARA - 1) / PARA) X X/* X * Allocate space for the tables used in {en,de}coding. These tables X * reside in the far heap. It may seem inefficient to be using far pointers X * for the base of these tables, because the offset portion will always be zero. X * We could just keep the segment address of the base, and then do something X * like: X * *MK_FP(baseseg, offset) = blahblah; X * X * whenever we need to access the table. This SHOULD be more efficient, X * but the compilers do not appear to generate very efficient code in this X * case. Huge pointers are not used, because they are slow, and because X * Zortech does not support them. X */ X X#ifdef MSC X#define farmalloc(n) halloc(n, 1) X#endif X Xint taballoc() X{ X#ifdef OS2 X ULONG size; X USHORT shift; X SEL sel; X X DosGetHugeShift(&shift); X#else X char far *X; X#endif X X if (do_decomp) { X if ((de_stack = malloc(DE_STACKLEN)) == 0) X return (0); X } X else { X#ifdef OS2 X size = HSIZE * sizeof(code_t); X X if ( DosAllocHuge(HIUSHORT(size), LOUSHORT(size), X &sel, 0, SEG_NONSHARED) ) X return (0); X X codeptrs2[0] = MAKEP(sel, 0); X codeptrs2[1] = MAKEP(sel + (1 << shift), 0); X#else X if ((X = farmalloc((HSIZE + PARA) * sizeof(code_t))) == 0) X return (0); X X codeptrs2[0] = MK_FP(FP_SEGCEIL(X), 0); X codeptrs2[1] = MK_FP(FP_SEGCEIL(X) + HSIZE/PARA, 0); X#endif X } X X#ifdef OS2 X size = HSIZE * sizeof(char); X X if ( DosAllocHuge(HIUSHORT(size), LOUSHORT(size), X &sel, 0, SEG_NONSHARED) ) X return (0); X X charptr1 = MAKEP(sel, 0); X#else X if ((X = farmalloc((HSIZE + PARA) * sizeof(char))) == 0) X return (0); X X charptr1 = MK_FP(FP_SEGCEIL(X), 0); X#endif X X#ifdef OS2 X size = HSIZE * sizeof(code_t); X X if ( DosAllocHuge(HIUSHORT(size), LOUSHORT(size), X &sel, 0, SEG_NONSHARED) ) X return (0); X X codeptrs1[0] = MAKEP(sel, 0); X codeptrs1[1] = MAKEP(sel + (1 << shift), 0); X#else X if ((X = farmalloc((HSIZE + PARA) * sizeof(code_t))) == 0) X return (0); X X codeptrs1[0] = MK_FP(FP_SEGCEIL(X), 0); X codeptrs1[1] = MK_FP(FP_SEGCEIL(X) + HSIZE/PARA, 0); X#endif X X return (1); X} X X#else X Xuchar chartab1[HSIZE]; Xcode_t codetab1[HSIZE]; Xcode_t codetab2[HSIZE]; X X#define de_suffixof(i) chartab1[i] X#define de_prefixof(i) codetab1[i] X#define de_stack (uchar *)codetab2 X X#define en_hashchar(i) chartab1[i] X#define en_hashent(i) codetab1[i] X#define en_hashcode(i) codetab2[i] X X#endif X Xvoid Usage() X{ X printf("\nUsage: COMPRESS [-dfvcVnC] [-b maxbits] [file ...]\n\n"); X printf(" -V print Version\n"); X printf(" -h print usage\n"); X printf(" -v verbose\n"); X printf(" -q quiet (default)\n"); X printf(" -d decompress\n"); X printf(" -c cat all output to stdout\n"); X printf(" -k keep input files\n"); X printf(" -f force overwrite of output files\n"); X printf(" -n no header: useful to uncompress old files\n"); X printf(" -C generate output compatible with compress 2.0.\n"); X printf(" -b maxbits maxbits. Default %d\n", BITS); X} X X/***************************************************************** X * TAG( main ) X * X * Algorithm from "A Technique for High Performance Data Compression", X * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. X * X * Usage: compress [-dfvc] [-b bits] [file ...] X * Inputs: X * -d: If given, decompression is done instead. X * X * -c: Write output on stdout, don't remove original. X * X * -b: Parameter limits the max number of bits/code. X * X * -f: Forces output file to be generated, even if one already X * exists, and even if no space is saved by compressing. X * If -f is not used, the user will be prompted if stdin is X * a tty, otherwise, the output file will not be overwritten. X * X * -v: Write compression statistics X * X * file ...: Files to be compressed. If none specified, stdin X * is used. X * Outputs: X * file.Z: Compressed form of file with same mode, owner, and utimes *** Original message split, continued in next message *** -- Kai-Uwe Rommel at The Black Cat's Shack (Fidonet 1:109/401) Internet: Kai-Uwe.Rommel@p0.f40.n109.z1.fidonet.org UUCP: ...!uunet!blkcat!40.0!Kai-Uwe.Rommel