Path: utzoo!utgpu!jarvis.csri.toronto.edu!rutgers!sun-barr!ames!xanth!nic.MR.NET!shamash!com50!midgard!syntel!dal
From: dal@syntel.UUCP (Dale Schumacher)
Newsgroups: comp.os.minix
Subject: Compress 4.3 sources (part 2 of 2)
Keywords: Compress
Message-ID: <041789A8735@syntel.UUCP>
Date: 17 May 89 07:39:18 GMT
Reply-To: dal@syntel.UUCP (Dale Schumacher)
Lines: 1313
X-Member-Of: STdNET (ST Developer's Network)

#	This is a shell archive.
#	Remove everything above and including the cut line.
#	Then run the rest of the file through sh.
#----cut here-----cut here-----cut here-----cut here----#
#!/bin/sh
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	compress.c
#	compapi.c
# This archive created: 17-May-1989 1:35:52 ENOENV
# By:	ENOENV (ENOENV)
cat << \SHAR_EOF > compress.c
/*@H************************ < COMPRESS utility> ****************************
*                                                                           *
*   compress : compress.c                                                   *
*              Main and Operating System Independent support functions      *
*                                                                           *
*   port by  : Donald J. Gloistein                                          *
*                                                                           *
*   Source, Documentation, Object Code:                                     *
*   released to Public Domain. This code is ported from compress v4.0       *
*   release joe.                                                            *
*---------------------------  Module Description  --------------------------*
*   The compress program is compatible with the compression/decompression   *
*   used on the Unix systems compress programs.  This is version 4 and      *
*   supports up to 16 bits compression. The porting retained the Unix       *
*   meanings of all options, added a couple for MsDos and modified the      *
*   file name conventions to make more sense.                               *
*                                                                           *
*--------------------------- Implementation Notes --------------------------*
*                                                                           *
*   compiled with : compress.h compress.fns                                 *
*   linked with   : compapi.obj  compusi.obj                                *
*   problems:                                                               *
*              See notes in compress.h for defines needed.                  *
*              It should work now with Xenix                                *
*                                                                           *
*              Check the signal() handler functions in your compiler        *
*              documentation. This code assumes ANSI SYS V compatible       *
*              header and return values. Change as appropriate for your     *
*              compiler and operating system.                               *
*                                                                           *
*              This source compiles properly with Microsoft C compiler      *
*              version 5.1.                                                 *
*                                                                           *
*   CAUTION:   because the program is in modules, make sure you recompile   *
*              all modules if you change the header or a define in the      *
*              compress.c file                                              *
*                                                                           *
* Algorithm from "A Technique for High Performance Data Compression",       *
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.          *
*                                                                           *
* Assumptions:                                                              *
*  When filenames are given, replaces with the compressed version           *
*  (.Z suffix) only if the file decreases in size.                          *
* Algorithm:                                                                *
*  Modified Lempel-Ziv method (LZW).  Basically finds common                *
* substrings and replaces them with a variable size code.  This is          *
* deterministic, and can be done on the fly.  Thus, the decompression       *
* procedure needs no input table, but tracks the way the table was built.   *
*                                                                           *
*                                                                           *
*---------------------------      Author(s)        -------------------------*
*     Initials ---- Name ---------------------------------                  *
*      DjG          Donald J. Gloistein                                     *
*                   Plus many others, see rev.hst file for full list        *
*      LvR          Lyle V. Rains, many thanks for improved implementation  *
*************************************************************************@H*/

/*@R************************< Revision History >*****************************
*                                                                           *
*   version -- date -- init ---Notes----------------------                  *
*    4.01    08-29-88  DjG    first cut  for 16 bit MsDos version           *
*            09-04-88  DjG    fixed unlink on zcat if interupted.           *
*                             added msdos filename logic and functions      *
*    4.10    10-27-88  DjG  revised API with coding changes by LvR.         *
*    4.10a   10-30-88  DjG  cleaned up code and fixed bug in freeing ptr.   *
*    4.10b   11-01-88  DjG  cleaned up the logic for inpath/outpath         *
*                           Changed the logic to finding the file name      *
*                           Fixed the allocation bug in the api             *
*                           Added some more portability macros              *
*    4.10c   11-04-88  DjG  Changed maxcode from global to static in api.   *
*                           Supplied some library functions for those who   *
*                           don't have them, changed dos usi to use the     *
*                           strrpbrk(). Checked casts in api again. Compiles*
*                           without warnings at pick level 3.               *
*    4.10d   11-25-88  DjG  revised some memory allocation, put more in the *
*                           header file. Corrected some typos.              *
*                           Changed prog_name() to force lower case         *
*                           Corrected bug, no longer unlinks existing file  *
*                           if not enough memory to compress or decompress  *
*            12-06-88  DjG  VERY minor changes for casts and header defines *
*            12-08-88  DjG  Adjusted path separator check in main function  *
*                           Amiga uses split seg because of compiler        *
*            12-09-88  DjG  Debugging done, all defaults now Unix compress  *
*                           defaults, including unlinking input file and    *
*                           acting as a filter. Must use -h option to get   *
*                           help screen.                                    *
*    4.10e   12-11-88  DjG  Fixed more casts, prototypes and header file.   *
*    4.10f   12-12-88  DjG  Fixed unlinking open files on error. This fails *
*                           on shared or os/2 platforms.                    *
*            12-15-88  DjG  Fixed SIGTYPE for function passed to signal     *
*                           Fixed problems with Xenix 2.2.1                 *
*    4.2     12-19-88  DjG  Replaced adaptive reset as an option.           *
*    4.3     12-26-88  DjG  Fixed long file name bug, fixed bug with        *
*                           compressdir. -B option added, same as -b option *
*            05-06-89  Dal  Ported to Sozobon/Alcyon C for Atari ST.  Also, *
*                           created get_one() for console prompting.        *
*            05-08-89  Dal  Ported to Minix-ST                              *
*************************************************************************@R*/

#include <stdio.h>

#define MAIN        /* header has defining instances of globals */
#include "compress.h" /* contains the rest of the include file declarations */

#define ARGVAL() (*++(*argv) || (--argc && *++argv))
char suffix[] = SUFFIX ;          /* only used in this file */

void main( argc, argv )
register int argc; char **argv;
{
    char **filelist, **fileptr,*temp;
    struct stat statbuf;

#ifndef NOSIGNAL
    if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
        /* ANSI/SYS V compatible */
        /* the following test checks for error on setting signals */
        /* check your documentation on the value to test          */
        /* if your signal.h doesn't support the return, it is     */
        /* essentially a no-op test                               */
        if (bgnd_flag == SIG_ERR){   
            exit_stat = SIGNAL_ERROR;
            check_error();
        }
        if( (signal(SIGINT,onintr) == SIG_ERR)
        || (signal(SIGSEGV,oops) == SIG_ERR)) {/* check your compiler docs. */
            exit_stat = SIGNAL_ERROR;
            check_error();
        }
    }
#endif

    /* set up array for files to be converted */
#ifdef ALLOC
    filelist = fileptr = (char **)(alloc(argc * sizeof(char *)));
#else
    filelist = fileptr = (char **)(malloc(argc * sizeof(char *)));
#endif
    *filelist = NULL;

    /* gets name, compares and sets defaults */
    prog_name = get_program_name(argv[0]);

    /* now parse command line and get file list */
    for (argc--, argv++; argc > 0; argc--, argv++) {
        if (**argv == '-') {        /* A flag argument */
            while (*++(*argv)) {    /* Process all flags in this arg */
                switch (**argv) {
#if !defined(NDEBUG)
                    case 'D':
                        debug = TRUE;
                        keep_error = TRUE;
                        break;
                    case 'V':
                        verbose = TRUE;
                        version();
                        break;
#else
                    case 'V':
                        version();
                        break;
#endif /*!NDEBUG */
                    case 'v':
                        quiet = !quiet;
                        break;
                    case 'd':
                        do_decomp = TRUE;
                        break;
                    case 'f':
                        force = overwrite = TRUE;
                        break;
                    case 'n':
                        nomagic = TRUE;
                        break;
                    case 'C':
                        block_compress = FALSE;
                        break;
                    case 'b': case 'B':
                        if (!ARGVAL()) {
                            fprintf(stderr, "Missing maxbits\n");
                            Usage(1);
                            exit(ERROR);
                        }
                        maxbits = atoi(*argv);
                        goto nextarg;
                    case 'I':
                        if (!ARGVAL()) {
                            fprintf(stderr, "Missing in_path name\n");
                            Usage(1);
                            exit(ERROR);
                        }
                        strcpy(inpath,*argv);
                        temp = &inpath[strlen(inpath)-1];
#ifdef MSDOS
                        if (*temp != '\\' && *temp != '/')
#else
                        if (*temp != separator[0])
#endif
                            strcat(inpath,separator);
                        goto nextarg;
                    case 'O':
                        if (!ARGVAL()){
                            fprintf(stderr, "Missing out_path name\n");
                            Usage(1);
                            exit(ERROR);
                        }
                        strcpy(outpath,*argv);
                        temp = &outpath[strlen(outpath)-1];
#ifdef MSDOS
                        if (*temp != '\\' && *temp != '/')
#else
                        if (*temp != separator[0])
#endif
                            strcat(outpath,separator);
                        goto nextarg;
                    case 'c':
                        keep = zcat_flg = TRUE;
                        break;
                    case 'K':
                        keep_error = TRUE;
                        break;
                    case 'k':
                        keep = !keep;
                        break;
                    case '?':case 'h':case 'H':
                        Usage(0);
                        exit(NORMAL);
                        break;
                    case 'q':
                        quiet = TRUE;
                        break;
                    default:
                        fprintf(stderr, "%s : Unknown flag: '%c'\n",prog_name, **argv);
                        Usage(1);
                        exit(ERROR);
                } /* end switch */
            } /* end while processing this argument */
        }  /* end if option parameter */
        else {                                  /* must be input file name */
            *fileptr++ = *argv;                 /* Build input file list */
            *fileptr = NULL;
        } /* end else */
nextarg: 	continue;                          /* process nextarg */
    } /* end command line processing */

    /* adjust for possible errors or conflicts */
    if(maxbits < MINBITS || maxbits > MAXBITS){
        fprintf(stderr,"\n%s: illegal bit value, range = %d to %d\n",prog_name,MINBITS,MAXBITS);
        exit(NORMAL);
    }
    if (zcat_flg && *outpath)         /* can't have an out path and zcat */
        *outpath = '\0';

    /* to make the error messages make sense */
    strcpy(ifname,"stdin");
    strcpy(ofname,"stdout");

    if (*filelist) {         /* Check if there are files specified */
                             /* *fileptr must continue to specify  */
                             /* command line in/out file name      */
        is_list = TRUE;
        for (fileptr = filelist; *fileptr; fileptr++) {
            exit_stat = 0;
            endchar[0] = '\0';
            if (do_decomp) {                /* DECOMPRESSION          */
                if (*inpath){               /* adjust for inpath name */
                    strcpy(ifname,inpath);  /* and copy into ifname   */
                    strcat(ifname,name_index(*fileptr));
                }
                else
                    strcpy(ifname,*fileptr);
                if(!is_z_name(ifname))         /* Check for .Z suffix    */
                    if(!(make_z_name(ifname))) /* No .Z: tack one on     */
                        continue;
                                               /* Open input file        */
                if ((freopen(ifname, READ_FILE_TYPE, stdin)) == NULL) {
                    perror(ifname);
                    continue;
                }
                else
                    setvbuf(stdin,zbuf,_IOFBF,ZBUFSIZE);
                if (!nomagic) {             /* Check the magic number */
                    if ((getchar() != (magic_header[0] & 0xFF))
                      || (getchar() != (magic_header[1] & 0xFF))) {
                        fprintf(stderr, "%s: not in compressed format\n",
                            *ifname);
                        continue;
                    }
                    maxbits = getchar();    /* set -b from file */
                    block_compress = maxbits & BLOCK_MASK;
                    maxbits &= BIT_MASK;
                    if(maxbits > MAXBITS) {
                        fprintf(stderr,
                        "%s: compressed with %d bits, can only handle %d bits\n",
                        ifname, maxbits, MAXBITS);
                        continue;
                    }
                }  /* end if nomagic */
                                             /* Generate output filename */
                if (*outpath){               /* adjust for outpath name */
                    strcpy(ofname,outpath);  /* and copy into ofname   */
                    strcat(ofname,name_index(ifname));
                }
                else
                    strcpy(ofname,ifname); /* DjG may screw up the placement */
                                           /* of the outfile */
                unmake_z_name(ofname);     /* strip off Z or .Z */
            }
            else {            /* COMPRESSION */
                if (*inpath){               /* adjust for inpath name */
                    strcpy(ifname,inpath);  /* and copy into ifname   */
                    strcat(ifname,name_index(*fileptr));
                }
                else
                    strcpy(ifname,*fileptr);
                if (is_z_name(ifname)) {
                    fprintf(stderr, "%s: already has %s suffix -- no change\n",
                        ifname,suffix);
                    continue;
                }
                /* Open input file */
                if ((freopen(ifname,READ_FILE_TYPE, stdin)) == NULL) {
                    perror(ifname);
                    continue;
                }
                else
                    setvbuf(stdin,xbuf,_IOFBF,XBUFSIZE);
                /* Generate output filename */
                if (*outpath){               /* adjust for outpath name */
                    strcpy(ofname,outpath);  /* and copy into ofname   */
                    strcat(ofname,name_index(ifname));
                }
                else  /* place it in directory of input file */
                    strcpy(ofname,ifname); /* DjG may screw up the placement */
                                           /* of the outfile */
                if (!(make_z_name(ofname)))
                    continue;
            } /* end else compression  we now have the files set up */

            /* Check for overwrite of existing file */
            if (!overwrite && !zcat_flg) {
                if (!stat(ofname, &statbuf)) {
                    char response, get_one();

                    response = 'n';
                    fprintf(stderr, "%s already exists;", ofname);
#ifndef NOSIGNAL
                    if (foreground()) {
#else
                    if (TRUE) {
#endif
                        fprintf(stderr, "\ndo you wish to overwrite %s (y or n)? ",
                        ofname);
                        fflush(stderr);
			response = get_one();
                    }
                    if ((response != 'y') && (response != 'Y')) {
                        fprintf(stderr, "\tnot overwritten\n");
                        continue;
                    }
                } /* end if stat */
            } /* end if overwrite */
            /* Output file  is opened in compress/decompress routines */

            /* Actually do the compression/decompression  on files */
            if (!do_decomp){
                compress();
                check_error();
            }
            else{
                decompress();
                check_error();
            }
            if(!zcat_flg) {
                copystat(ifname, ofname); /* Copy stats */
                if((exit_stat ) || (!quiet))
                    putc('\n', stderr);
            }       /* end if zcat */
        }           /*end for  loop */
    }               /* end if filelist */
    else {          /* it is standard input to standard output*/
#if (FILTER == FALSE)     /* filter is defined as true or false */
    /* DjG added to make more sense.  The following tests for standard
       input being a character device. If so, there is no use in MsDos
       for the program, as that will compress from the keyboard to the
       console. Sure not what is needed. Instead, the usage function
       is called. In Xenix/Unix systems, there is a need for this type
       of pipe as the input may be from a char dev, remote station.
     */

        /* check if input is unredirected */
        if ( isatty(fileno(stdin)) ){
            Usage(1);
            exit(NORMAL);
        }
#endif
        /* filter */
        if (do_decomp){
            setvbuf(stdin,zbuf,_IOFBF,ZBUFSIZE);  /* make the buffers larger */
            setvbuf(stdout,xbuf,_IOFBF,XBUFSIZE);
        }
        else{
            setvbuf(stdin,xbuf,_IOFBF,XBUFSIZE);  /* make the buffers larger */
            setvbuf(stdout,zbuf,_IOFBF,ZBUFSIZE);
        }
        if (!do_decomp) {   /* compress stdin to stdout */
            compress();
            check_error();
            if(!quiet)
                putc('\n', stderr);
        } /* end compress stdio */
        else {   /* decompress stdin to stdout */
            /* Check the magic number */
            if (!nomagic) {
                if ((getchar()!=(magic_header[0] & 0xFF))
                 || (getchar()!=(magic_header[1] & 0xFF))) {
                    fprintf(stderr, "stdin: not in compressed format\n");
                    exit(ERROR);
                }
                maxbits = getchar();    /* set -b from file */
                block_compress = maxbits & BLOCK_MASK;
                maxbits &= BIT_MASK;
                if(maxbits > MAXBITS) {
                    fprintf(stderr,
                    "stdin: compressed with %d bits, can only handle %d bits\n",
                    maxbits, MAXBITS);
                    exit(ERROR);
                }
            }
            decompress();
            check_error();
        } /* end else decomp stdio */
    } /* end else standard input */
    exit(exit_stat);
}

void Usage(flag)
int flag;
{
static char *keep2 =  "keep";
static char *keep3 =  "kill (erase)";
static char *on = "on";
static char *off = "off";

#ifndef NDEBUG
    fprintf(stderr,"Usage: %s [-cCdDf?hkKvV][-b maxbits][-Iinpath][-Ooutpath][filenames...]\n",
        prog_name);
#else
    fprintf(stderr,"Usage: %s [-cCdf?hkKvV][-b maxbits][-Iinpath][-Ooutpath][filenames...]\n",
        prog_name);
#endif
    if (flag)
        return;
    fprintf(stderr,"Argument Processing..case is significant:\n");
    fprintf(stderr,"     MUST use '-' for switch character\nAll flags are optional.\n");
#ifndef NDEBUG
    fprintf(stderr,"     -D => debug; Keep file on error.\n");
    fprintf(stderr,"     -V => print Version; debug verbose\n");
#else
    fprintf(stderr,"     -V => print Version\n");
#endif
    fprintf(stderr,"     -d => do_decomp default = %s\n",(do_decomp)?on:off);
    fprintf(stderr,"     -v => verbose default = %s\n", (quiet)?off:on);
    fprintf(stderr,"     -f => force overwrite of output file default = %s\n",
        (force)?on:off);
    fprintf(stderr,"     -n => no header: useful to uncompress old files\n");
    fprintf(stderr,"     -c => cat all output to stdout default = %s\n",
        (zcat_flg)?on:off);
    fprintf(stderr,"     -C => generate output compatible with compress 2.0.\n");
    fprintf(stderr,"     -k => %s input file, default = %s\n",(keep)?keep3:keep2,
            (keep)?keep2:keep3);
    fprintf(stderr,"     -K => %s output file on error, default = %s\n",
            (keep_error)?keep3:keep2,(keep_error)?keep2:keep3);
    fprintf(stderr,"     -b maxbits  => default = %d bits, max = %d bits\n",maxbits,MAXBITS);
    fprintf(stderr,"     -I pathname => infile path  = %s\n",inpath);
    fprintf(stderr,"     -O pathname => outfile path = %s\n",outpath);
    fprintf(stderr,"     -? -h => help usage.\n");
}

char get_one()
/*
 * get a single character, with echo.
 */
{
        char tmp[2];
	int fd;

#ifdef SOZOBON
	return(0x7F & getche());
#endif
#ifdef MSC
	return(getche());
#endif
/*
 * All previous #ifdef'ed code should return() a value.
 * If no other option is available, the following is the original code.
 * It not only reads from stderr (not a defined operation)
 * but it does so via an explicit read() call on file descriptor 2!
 * So much for portability.                    -Dal
 */
#if MINIX
	fd = open("/dev/tty", 0);	/* open the tty directly */
#else
	fd = 2;				/* read from stderr */
#endif
        read(fd, tmp, 2);
        while (tmp[1] != '\n') {
                if (read(fd, tmp+1, 1) < 0) {   /* Ack! */
                        perror("stderr");
                        break;
                }
        }
	return(tmp[0]);
}

void writeerr()
{
    perror ( ofname );
    if (!zcat_flg && !keep_error){
        fclose(stdout);
        unlink ( ofname );
    }
    exit ( 1 );
}

#ifndef NOSIGNAL
/*
 * This routine returns 1 if we are running in the foreground and stderr
 * is a tty.
 */
int foreground()
{
    if(bgnd_flag) { /* background? */
        return(0);
    }
    else {            /* foreground */
        if(isatty(2)) {     /* and stderr is a tty */
            return(1);
        } else {
            return(0);
        }
    }
}
#endif

void prratio(stream, num, den)
FILE *stream;
long int num, den;
{
    register int q;         /* Doesn't need to be long */

    if(num > 214748L) {     /* 2147483647/10000 */
        q = (int) (num / (den / 10000L));
    }
    else {
        q = (int) (10000L * num / den);     /* Long calculations, though */
    }
    if (q < 0) {
        putc('-', stream);
        q = -q;
    }
    fprintf(stream, "%d.%02d%%", q / 100, q % 100);
}


int check_error()     /* returning OK continues with processing next file */
{
    switch(exit_stat) {
  case OK:
    return (OK);
  case NOMEM:
    if (do_decomp)
        fprintf(stderr,"%s: not enough memory to decompress '%s'.\n", prog_name, ifname);
    else
        fprintf(stderr,"%s: not enough memory to compress '%s'.\n", prog_name, ifname);
    return(OK);
  case SIGNAL_ERROR:
    fprintf(stderr,"%s: error setting signal interupt.\n",prog_name);
    exit(ERROR);
    break;
  case READERR:
    fprintf(stderr,"%s: read error on input '%s'.\n", prog_name, ifname);
    break;
  case WRITEERR:
    fprintf(stderr,"%s: write error on output '%s'.\n", prog_name, ofname);
    break;
   case TOKTOOBIG:
    fprintf(stderr,"%s: token too long in '%s'.\n", prog_name, ifname);
    break;
  case INFILEBAD:
    fprintf(stderr, "%s: '%s' in unknown compressed format.\n", prog_name, ifname);
    break;
 case CODEBAD:
    fprintf(stderr,"%s: file token bad in '%s'.\n", prog_name,ifname);
    break;
 case TABLEBAD:
    fprintf(stderr,"%s: internal error -- tables corrupted.\n", prog_name);
    break;
  case NOTOPENED:
    fprintf(stderr,"%s: could not open output file %s\n",prog_name,ofname);
    exit(ERROR);
    break;
  case NOSAVING:
    if (force)
        exit_stat = OK;
    return (OK);
  default:
    fprintf(stderr,"%s: internal error -- illegal return value = %d.\n", prog_name,exit_stat);
  }
  if (!zcat_flg && !keep_error){
        fclose(stdout);         /* won't get here without an error */
        unlink ( ofname );
    }
  exit(exit_stat);
  return(ERROR);
}
SHAR_EOF
cat << \SHAR_EOF > compapi.c
/*@H************************ < COMPRESS API    > ****************************
*                                                                           *
*   compress : compapi.c  <current version of compress algorithm>           *
*                                                                           *
*   port by  : Donald J. Gloistein                                          *
*                                                                           *
*   Source, Documentation, Object Code:                                     *
*   released to Public Domain.  This code is based on code as documented    *
*   below in release notes.                                                 *
*                                                                           *
*---------------------------  Module Description  --------------------------*
*   Contains source code for modified Lempel-Ziv method (LZW) compression   *
*   and decompression.                                                      *
*                                                                           *
*   This code module can be maintained to keep current on releases on the   *
*   Unix system. The command shell and dos modules can remain the same.     *
*                                                                           *
*--------------------------- Implementation Notes --------------------------*
*                                                                           *
*   compiled with : compress.h compress.fns compress.c                      *
*   linked with   : compress.obj compusi.obj                                *
*                                                                           *
*   problems:                                                               *
*                                                                           *
*                                                                           *
*   CAUTION: Uses a number of defines for access and speed. If you change   *
*            anything, make sure about side effects.                        *
*                                                                           *
* Compression:                                                              *
* Algorithm:  use open addressing double hashing (no chaining) on the       *
* prefix code / next character combination.  We do a variant of Knuth's     *
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime     *
* secondary probe.  Here, the modular division first probe is gives way     *
* to a faster exclusive-or manipulation.                                    *
* Also block compression with an adaptive reset was used in original code,  *
* whereby the code table is cleared when the compression ration decreases   *
* but after the table fills.  This was removed from this edition. The table *
* is re-sized at this point when it is filled , and a special CLEAR code is *
* generated for the decompressor. This results in some size difference from *
* straight version 4.0 joe Release. But it is fully compatible in both v4.0 *
* and v4.01                                                                 *
*                                                                           *
* Decompression:                                                            *
* This routine adapts to the codes in the file building the "string" table  *
* on-the-fly; requiring no table to be stored in the compressed file.  The  *
* tables used herein are shared with those of the compress() routine.       *
*                                                                           *
*     Initials ---- Name ---------------------------------                  *
*      DjG          Donald J. Gloistein, current port to MsDos 16 bit       *
*                   Plus many others, see rev.hst file for full list        *
*      LvR          Lyle V. Rains, many thanks for improved implementation  *
*                   of the compression and decompression routines.          *
*************************************************************************@H*/

#include <stdio.h>

#include "compress.h" /* contains the rest of the include file declarations */

static int offset;
static long int in_count ;         /* length of input */
static long int bytes_out;         /* length of compressed output */

static CODE prefxcode, nextfree;
static CODE highcode;
static CODE maxcode;
static HASH hashsize;
static int  bits;


/*
 * The following two parameter tables are the hash table sizes and
 * maximum code values for various code bit-lengths.  The requirements
 * are that Hashsize[n] must be a prime number and Maxcode[n] must be less
 * than Maxhash[n].  Table occupancy factor is (Maxcode - 256)/Maxhash.
 * Note:  I am using a lower Maxcode for 16-bit codes in order to
 * keep the hash table size less than 64k entries.
 */
CONST HASH hs[] = {
  0x13FF,       /* 12-bit codes, 75% occupancy */
  0x26C3,       /* 13-bit codes, 80% occupancy */
  0x4A1D,       /* 14-bit codes, 85% occupancy */
  0x8D0D,       /* 15-bit codes, 90% occupancy */
  0xFFD9        /* 16-bit codes, 94% occupancy, 6% of code values unused */
};
#define Hashsize(maxb) (hs[(maxb) -MINBITS])

CONST CODE mc[] = {
  0x0FFF,       /* 12-bit codes */
  0x1FFF,       /* 13-bit codes */
  0x3FFF,       /* 14-bit codes */
  0x7FFF,       /* 15-bit codes */
  0xEFFF        /* 16-bit codes, 6% of code values unused */
};
#define Maxcode(maxb) (mc[(maxb) -MINBITS])

#ifdef __STDC__
#ifndef NDEBUG
#define allocx(type, ptr, size) \
    (((ptr) = (type FAR *) emalloc((unsigned int)(size),sizeof(type))) == NULLPTR(type) \
    ?   (fprintf(stderr,"%s: "#ptr" -- ", prog_name), NOMEM) : OK \
    )
#else
#define allocx(type,ptr,size) \
    (((ptr) = (type FAR *) emalloc((unsigned int)(size),sizeof(type))) == NULLPTR(type) \
    ? NOMEM : OK \
    )
#endif
#else
#define allocx(type,ptr,size) \
    (((ptr) = (type FAR *) emalloc((unsigned int)(size),sizeof(type))) == NULLPTR(type) \
    ? NOMEM : OK \
    )
#endif

#define free_array(type,ptr,offset) \
    if (ptr != NULLPTR(type)) { \
        efree((ALLOCTYPE FAR *)((ptr) + (offset))); \
        (ptr) = NULLPTR(type); \
    }

  /*
   * Macro to allocate new memory to a pointer with an offset value.
   */
#define alloc_array(type, ptr, size, offset) \
    ( allocx(type, ptr, (size) - (offset)) != OK \
      ? NOMEM \
      : (((ptr) -= (offset)), OK) \
    )

static char FAR *sfx = NULLPTR(char) ;
#define suffix(code)     sfx[code]


#if (SPLIT_PFX)
  static CODE FAR *pfx[2] = {NULLPTR(CODE), NULLPTR(CODE)};
#else
  static CODE FAR *pfx = NULLPTR(CODE);
#endif


#if (SPLIT_HT)
  static CODE FAR *ht[2] = {NULLPTR(CODE),NULLPTR(CODE)};
#else
  static CODE FAR *ht = NULLPTR(CODE);
#endif


int alloc_tables(maxcode, hashsize)
  CODE maxcode;
  HASH hashsize;
{
  static CODE oldmaxcode = 0;
  static HASH oldhashsize = 0;

  if (hashsize > oldhashsize) {
#if (SPLIT_HT)
      free_array(CODE,ht[1], 0);
      free_array(CODE,ht[0], 0);
#else
      free_array(CODE,ht, 0);
#endif
    oldhashsize = 0;
  }

    if (maxcode > oldmaxcode) {
#if (SPLIT_PFX)
        free_array(CODE,pfx[1], 128);
        free_array(CODE,pfx[0], 128);
#else
        free_array(CODE,pfx, 256);
#endif
        free_array(char,sfx, 256);

        if (   alloc_array(char, sfx, maxcode + 1, 256)
#if (SPLIT_PFX)
            || alloc_array(CODE, pfx[0], (maxcode + 1) / 2, 128)
            || alloc_array(CODE, pfx[1], (maxcode + 1) / 2, 128)
#else
            || alloc_array(CODE, pfx, (maxcode + 1), 256)
#endif
        ) {
            oldmaxcode = 0;
            exit_stat = NOMEM;
            return(NOMEM);
        }
        oldmaxcode = maxcode;
    }
    if (hashsize > oldhashsize) {
        if (
#if (SPLIT_HT)
            alloc_array(CODE, ht[0], (hashsize / 2) + 1, 0)
            || alloc_array(CODE, ht[1], hashsize / 2, 0)
#else
            alloc_array(CODE, ht, hashsize, 0)
#endif
        ) {
            oldhashsize = 0;
            exit_stat = NOMEM;
            return(NOMEM);
        }
        oldhashsize = hashsize;
    }
    return (OK);
}

# if (SPLIT_PFX)
    /*
     * We have to split pfx[] table in half,
     * because it's potentially larger than 64k bytes.
     */
#   define prefix(code)   (pfx[(code) & 1][(code) >> 1])
# else
    /*
     * Then pfx[] can't be larger than 64k bytes,
     * or we don't care if it is, so we don't split.
     */
#   define prefix(code) (pfx[code])
# endif


/* The initializing of the tables can be done quicker with memset() */
/* but this way is portable through out the memory models.          */
/* If you use Microsoft halloc() to allocate the arrays, then       */
/* include the pragma #pragma function(memset)  and make sure that  */
/* the length of the memory block is not greater than 64K.          */
/* This also means that you MUST compile in a model that makes the  */
/* default pointers to be far pointers (compact or large models).   */
/* See the file COMPUSI.DOS to modify function emalloc().           */

# if (SPLIT_HT)
    /*
     * We have to split ht[] hash table in half,
     * because it's potentially larger than 64k bytes.
     */
#   define probe(hash)    (ht[(hash) & 1][(hash) >> 1])
#   define init_tables() \
    { \
      hash = hashsize >> 1; \
      ht[0][hash] = 0; \
      while (hash--) ht[0][hash] = ht[1][hash] = 0; \
      highcode = ~(~(CODE)0 << (bits = INITBITS)); \
      nextfree = (block_compress ? FIRSTFREE : 256); \
    }

# else

    /*
     * Then ht[] can't be larger than 64k bytes,
     * or we don't care if it is, so we don't split.
     */
#   define probe(hash) (ht[hash])
#   define init_tables() \
    { \
      hash = hashsize; \
      while (hash--) ht[hash] = 0; \
      highcode = ~(~(CODE)0 << (bits = INITBITS)); \
      nextfree = (block_compress ? FIRSTFREE : 256); \
    }

# endif

#ifdef COMP40
/* table clear for block compress */
/* this is for adaptive reset present in version 4.0 joe release */
/* DjG, sets it up and returns TRUE to compress and FALSE to not compress */
int cl_block ()     
{
    register long int rat;

    checkpoint = in_count + CHECK_GAP;
#ifndef NDEBUG
	if ( debug ) {
        fprintf ( stderr, "count: %ld, ratio: ", in_count );
        prratio ( stderr, in_count, bytes_out );
		fprintf ( stderr, "\n");
	}
#endif

    if(in_count > 0x007fffff) {	/* shift will overflow */
        rat = bytes_out >> 8;
        if(rat == 0)       /* Don't divide by zero */
            rat = 0x7fffffff;
        else
            rat = in_count / rat;
    }
    else
        rat = (in_count << 8) / bytes_out;  /* 8 fractional bits */

    if ( rat > ratio ){
        ratio = rat;
        return FALSE;
    }
    else {
        ratio = 0;
#ifndef NDEBUG
        if(debug)
    		fprintf ( stderr, "clear\n" );
#endif
        return TRUE;    /* clear the table */
    }
    return FALSE; /* don't clear the table */
}
#endif

/*
 * compress stdin to stdout
 *
 */
void compress()
{
    int c,adjbits;
    register HASH hash;
    register CODE code;
    HASH hashf[256];

    maxcode = Maxcode(maxbits);
    hashsize = Hashsize(maxbits);

#ifdef COMP40
/* Only needed for adaptive reset */
    checkpoint = CHECK_GAP;
    ratio = 0;
#endif

    adjbits = maxbits -10;
    for (c = 256; --c >= 0; ){
        hashf[c] = ((( c &0x7) << 7) ^ c) << adjbits;
    }
    exit_stat = OK;
    if (alloc_tables(maxcode, hashsize))  /* exit_stat already set */
        return;
    init_tables();
    /* if not zcat or filter */
    if(is_list && !zcat_flg) {  /* Open output file */
        if (freopen(ofname, WRITE_FILE_TYPE, stdout) == NULL) {
            exit_stat = NOTOPENED;
            return;
        }
        if (!quiet)
            fprintf(stderr, "%s: ",ifname);
        setvbuf(stdout,zbuf,_IOFBF,ZBUFSIZE);
    }
 /*
   * Check the input stream for previously seen strings.  We keep
   * adding characters to the previously seen prefix string until we
   * get a character which forms a new (unseen) string.  We then send
   * the code for the previously seen prefix string, and add the new
   * string to our tables.  The check for previous strings is done by
   * hashing.  If the code for the hash value is unused, then we have
   * a new string.  If the code is used, we check to see if the prefix
   * and suffix values match the current input; if so, we have found
   * a previously seen string.  Otherwise, we have a hash collision,
   * and we try secondary hash probes until we either find the current
   * string, or we find an unused entry (which indicates a new string).
   */
    if (!nomagic) {
        putchar(magic_header[0]); putchar(magic_header[1]);
        putchar((char)(maxbits | block_compress));
        if(ferror(stdout)){  /* check it on entry */
            exit_stat = WRITEERR;
            return;
        }
        bytes_out = 3L;     /* includes 3-byte header mojo */
    }
    else
        bytes_out = 0L;      /* no 3-byte header mojo */
    in_count = 1L;
    offset = 0;

    if ((c = getchar()) == EOF) {
        exit_stat = ferror(stdin) ? READERR : OK;
        return;
    }
    prefxcode = (CODE)c;

    while ((c = getchar()) != EOF) {
        in_count++;
        hash = prefxcode ^ hashf[c];
        /* I need to check that my hash value is within range
        * because my 16-bit hash table is smaller than 64k.
        */
        if (hash >= hashsize)
            hash -= hashsize;
        if ((code = probe(hash)) != UNUSED) {
            if (suffix(code) != (char)c || prefix(code) != prefxcode) {
            /* hashdelta is subtracted from hash on each iteration of
            * the following hash table search loop.  I compute it once
            * here to remove it from the loop.
            */
                HASH hashdelta = (0x120 - c) << (adjbits);
                do  {
                    /* rehash and keep looking */
                    assert(code >= FIRSTFREE && code <= maxcode);
                    if (hash >= hashdelta) hash -= hashdelta;
                        else hash += (hashsize - hashdelta);
                    assert(hash < hashsize);
                    if ((code = probe(hash)) == UNUSED)
                        goto newcode;
                } while (suffix(code) != (char)c || prefix(code) != prefxcode);
            }
            prefxcode = code;
        }
        else {
            newcode: {
                putcode(prefxcode, bits);
                code = nextfree;
                assert(hash < hashsize);
                assert(code >= FIRSTFREE);
                assert(code <= maxcode + 1);
                if (code <= maxcode) {
                    probe(hash) = code;
                    prefix(code) = prefxcode;
                    suffix(code) = (char)c;
                    if (code > highcode) {
                        highcode += code;
                        ++bits;
                    }
                    nextfree = code + 1;
                }
#ifdef COMP40
                else if (in_count >= checkpoint && block_compress ) {
                    if (cl_block()){
#else
                else if (block_compress){
#endif
                        putcode((CODE)c, bits);
                        putcode((CODE)CLEAR,bits);
                        init_tables();
                        if ((c = getchar()) == EOF)
                            break;
                        in_count++;
#ifdef COMP40
                    }
#endif
                }
                prefxcode = (CODE)c;
            }
        }
    }
    putcode(prefxcode, bits);
    putcode((CODE)CLEAR, 0);
    if (ferror(stdout)){ /* check it on exit */
        exit_stat = WRITEERR;
        return;
    }
    /*
     * Print out stats on stderr
     */
    if(zcat_flg == 0 && !quiet) {
#ifndef NDEBUG
        fprintf( stderr,
            "%ld chars in, (%ld bytes) out, compression factor: ",
            in_count, bytes_out );
        prratio( stderr, in_count, bytes_out );
        fprintf( stderr, "\n");
        fprintf( stderr, "\tCompression as in compact: " );
        prratio( stderr, in_count-bytes_out, in_count );
        fprintf( stderr, "\n");
        fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
            prefxcode - 1, bits );
#else
        fprintf( stderr, "Compression: " );
        prratio( stderr, in_count-bytes_out, in_count );
#endif /* NDEBUG */
    }
    if(bytes_out > in_count)      /*  if no savings */
        exit_stat = NOSAVING;
    return ;
}

CONST UCHAR rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};

void putcode(code,bits)
CODE code;
register int bits;
{
  static int oldbits = 0;
  static UCHAR outbuf[MAXBITS];
  register UCHAR *buf;
  register int shift;

  if (bits != oldbits) {
    if (bits == 0) {
      /* bits == 0 means EOF, write the rest of the buffer. */
      if (offset > 0) {
          fwrite(outbuf,1,(offset +7) >> 3, stdout);
          bytes_out += ((offset +7) >> 3);
      }
      offset = 0;
      oldbits = 0;
      fflush(stdout);
      return;
    }
    else {
      /* Change the code size.  We must write the whole buffer,
       * because the expand side won't discover the size change
       * until after it has read a buffer full.
       */
      if (offset > 0) {
        fwrite(outbuf, 1, oldbits, stdout);
        bytes_out += oldbits;
        offset = 0;
      }
      oldbits = bits;
#ifndef NDEBUG
      if ( debug )
        fprintf( stderr, "\nChange to %d bits\n", bits );
#endif /* !NDEBUG */
    }
  }
  /*  Get to the first byte. */
  buf = outbuf + ((shift = offset) >> 3);
  if ((shift &= 7) != 0) {
    *(buf) |= (*buf & rmask[shift]) | (UCHAR)(code << shift);
    *(++buf) = (UCHAR)(code >> (8 - shift));
    if (bits + shift > 16)
        *(++buf) = (UCHAR)(code >> (16 - shift));
  }
  else {
    /* Special case for fast execution */
    *(buf) = (UCHAR)code;
    *(++buf) = (UCHAR)(code >> 8);
  }
  if ((offset += bits) == (bits << 3)) {
    bytes_out += bits;
    fwrite(outbuf,1,bits,stdout);
    offset = 0;
  }
  return;
}

int nextcode(codeptr)
CODE *codeptr;
/* Get the next code from input and put it in *codeptr.
 * Return (TRUE) on success, or return (FALSE) on end-of-file.
 * Adapted from COMPRESS V4.0.
 */
{
  static int prevbits = 0;
  register CODE code;
  static int size;
  static UCHAR inbuf[MAXBITS];
  register int shift;
  UCHAR *bp;

  /* If the next entry is a different bit-size than the preceeding one
   * then we must adjust the size and scrap the old buffer.
   */
  if (prevbits != bits) {
    prevbits = bits;
    size = 0;
  }
  /* If we can't read another code from the buffer, then refill it.
   */
  if (size - (shift = offset) < bits) {
    /* Read more input and convert size from # of bytes to # of bits */
    if ((size = fread(inbuf, 1, bits, stdin) << 3) <= 0 || ferror(stdin))
      return(NO);
    offset = shift = 0;
  }
  /* Get to the first byte. */
  bp = inbuf + (shift >> 3);
  /* Get first part (low order bits) */
  code = (*bp++ >> (shift &= 7));
  /* high order bits. */
  code |= *bp++ << (shift = 8 - shift);
  if ((shift += 8) < bits) code |= *bp << shift;
  *codeptr = code & highcode;
  offset += bits;
  return (TRUE);
}

void decompress()
{
  register int i;
  register CODE code;
  char sufxchar;
  CODE savecode;
  FLAG fulltable, cleartable;
  static char token[MAXTOKLEN];         /* String buffer to build token */

  exit_stat = OK;

  if (alloc_tables(maxcode = ~(~(CODE)0 << maxbits),0)) /* exit_stat already set */
     return;

    /* if not zcat or filter */
    if(is_list && !zcat_flg) {  /* Open output file */
        if (freopen(ofname, WRITE_FILE_TYPE, stdout) == NULL) {
            exit_stat = NOTOPENED;
            return;
        }
        if (!quiet)
            fprintf(stderr, "%s: ",ifname);
        setvbuf(stdout,xbuf,_IOFBF,XBUFSIZE);
    }
  cleartable = TRUE;
  savecode = CLEAR;
  offset = 0;
  do {
    if ((code = savecode) == CLEAR && cleartable) {
      highcode = ~(~(CODE)0 << (bits = INITBITS));
      fulltable = FALSE;
      nextfree = (cleartable = block_compress) == FALSE ? 256 : FIRSTFREE;
      if (!nextcode(&prefxcode))
        break;
      putc((sufxchar = (char)prefxcode), stdout);
      continue;
    }
    i = 0;
    if (code >= nextfree && !fulltable) {
      if (code != nextfree){
        exit_stat = CODEBAD;
        return ;     /* Non-existant code */
    }
      /* Special case for sequence KwKwK (see text of article)         */
      code = prefxcode;
      token[i++] = sufxchar;
    }
    /* Build the token string in reverse order by chasing down through
     * successive prefix tokens of the current token.  Then output it.
     */
    while (code >= 256) {
#     if !defined(NDEBUG)
        /* These are checks to ease paranoia. Prefix codes must decrease
         * monotonically, otherwise we must have corrupt tables.  We can
         * also check that we haven't overrun the token buffer.
         */
        if (code <= prefix(code)){
            exit_stat= TABLEBAD;
            return;
        }
        if (i >= MAXTOKLEN){
            exit_stat = TOKTOOBIG;
            return;
        }
#     endif
      token[i++] = suffix(code);
      code = prefix(code);
    }
    putc(sufxchar = (char)code, stdout);
    while (--i >= 0)
        putc(token[i], stdout);
    if (ferror(stdout)) {
        exit_stat = WRITEERR;
        return;
    }
    /* If table isn't full, add new token code to the table with
     * codeprefix and codesuffix, and remember current code.
     */
    if (!fulltable) {
      code = nextfree;
      assert(256 <= code && code <= maxcode);
      prefix(code) = prefxcode;
      suffix(code) = sufxchar;
      prefxcode = savecode;
      if (code++ == highcode) {
        if (highcode >= maxcode) {
          fulltable = TRUE;
          --code;
        }
        else {
          ++bits;
          highcode += code;           /* nextfree == highcode + 1 */
        }
      }
      nextfree = code;
    }
  } while (nextcode(&savecode));
  exit_stat = (ferror(stdin))? READERR : OK;
  return ;
}

SHAR_EOF
#	End of shell archive
exit 0

--
      Dale Schumacher                         399 Beacon Ave.
      (alias: Dalnefre')                      St. Paul, MN  55104-3527
      ...bungia!midgard.mn.org!syntel!dal     United States of America
             "I may be competitive, but I'm never ruthless"