Relay-Version: version B 2.10 5/3/83; site utzoo.UUCP Path: utzoo!mnetor!uunet!hpda!hpsmtc1!swh From: swh@hpsmtc1.HP.COM (Steve Harrold) Newsgroups: comp.sources.bugs Subject: Re: squasing arc source sliced Message-ID: <14310002@hpsmtc1.HP.COM> Date: Tue, 21-Jul-87 12:31:04 EDT Article-I.D.: hpsmtc1.14310002 Posted: Tue Jul 21 12:31:04 1987 Date-Received: Fri, 24-Jul-87 03:05:57 EDT References: <100@auscso.UUCP> Organization: Hewlett Packard, Cupertino Lines: 2962 Relay-Version: version Notes 2.7.5 (840 Contrib) 87/2/5; site hpsmtc1.HP.COM From: ddl@husc6.UUCP (Dan Lanciani) Date: Thu, 16 Jul 87 00:10:44 GMT Date-Received: Thu, 16 Jul 87 10:34:01 GMT Subject: arc that UnSquashes 2/2 Message-ID: <2544@husc6.UUCP> Organization: Harvard University Computer Services Path: hpsmtc1!hpda!uunet!seismo!husc6!ddl Newsgroups: alt.sources Lines: 2951 -----------continue here------------ static unsigned INT eolist(index) /* find last duplicate */ unsigned INT index; { INT temp; while(temp=string_tab[index].next) /* while more duplicates */ index = temp; return index; } /* The hash() routine is used to find a spot in the hash table for a new entry. It performs a "hash and linear probe" lookup, using h() to calculate the starting hash value and eolist() to perform the linear probe. This routine DOES NOT detect a table full condition. That MUST be checked for elsewhere. */ static unsigned INT hash(pred,foll) /* find spot in the string table */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* char following string */ { unsigned INT local, tempnext; /* scratch storage */ struct entry *ep; /* allows faster table handling */ local = (*h)(pred,foll); /* get initial hash value */ if(!string_tab[local].used) /* if that spot is free */ return local; /* then that's all we need */ else /* else a collision has occured */ { local = eolist(local); /* move to last duplicate */ /* We must find an empty spot. We start looking 101 places down the table from the last duplicate. */ tempnext = (local+101) & 0x0FFF; ep = &string_tab[tempnext]; /* initialize pointer */ while(ep->used) /* while empty spot not found */ { if(++tempnext==TABSIZE) /* if we are at the end */ { tempnext = 0; /* wrap to beginning of table*/ ep = string_tab; } else ++ep; /* point to next element in table */ } /* local still has the pointer to the last duplicate, while tempnext has the pointer to the spot we found. We use this to maintain the chain of pointers to duplicates. */ string_tab[local].next = tempnext; return tempnext; } } /* The unhash() function is used to search the hash table for a given key. Like hash(), it performs a hash and linear probe search. It returns either the number of the entry (if found) or NOT_FND (if not found). */ static unsigned INT unhash(pred,foll) /* search string table for a key */ unsigned INT pred; /* code of preceeding string */ unsigned char foll; /* character following string */ { unsigned INT local, offset; /* scratch storage */ struct entry *ep; /* this speeds up access */ local = (*h)(pred,foll); /* initial hash */ while(1) { ep = &string_tab[local]; /* speed up table access */ if((ep->predecessor==pred) && (ep->follower==foll)) return local; /* we have a match */ if(!ep->next) /* if no more duplicates */ return NOT_FND; /* then key is not listed */ local = ep->next; /* move on to next duplicate */ } } /* The init_tab() routine is used to initialize our hash table. You realize, of course, that "initialize" is a complete misnomer. */ static INT init_tab() /* set ground state in hash table */ { unsigned INT i; /* table index */ INT upd_tab(); setmem((char *)string_tab,sizeof(string_tab),0); for(i=0; i<256; i++) /* list all single byte strings */ upd_tab(NO_PRED,i); inbuf = EMPTY; /* nothing is in our buffer */ } /* The upd_tab routine is used to add a new entry to the string table. As previously stated, no checks are made to ensure that the table has any room. This must be done elsewhere. */ INT upd_tab(pred,foll) /* add an entry to the table */ unsigned INT pred; /* code for preceeding string */ unsigned INT foll; /* character which follows string */ { struct entry *ep; /* pointer to current entry */ /* calculate offset just once */ ep = &string_tab[hash(pred,foll)]; ep->used = TRUE; /* this spot is now in use */ ep->next = 0; /* no duplicates after this yet */ ep->predecessor = pred; /* note code of preceeding string */ ep->follower = foll; /* note char after string */ } /* This algorithm encoded a file into twelve bit strings (three nybbles). The gocode() routine is used to read these strings a byte (or two) at a time. */ static INT gocode(fd) /* read in a twelve bit code */ FILE *fd; /* file to get code from */ { unsigned INT localbuf, returnval; if(inbuf==EMPTY) /* if on a code boundary */ { if((localbuf=getc_unp(fd))==EOF) /* get start of next code */ return EOF; /* pass back end of file status */ localbuf &= 0xFF; /* mask down to true byte value */ if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */ return EOF; /* this should never happen */ inbuf &= 0xFF; /* mask down to true byte value */ returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F); inbuf &= 0x000F; /* leave partial code pending */ } else /* buffer contains first nybble */ { if((localbuf=getc_unp(fd))==EOF) return EOF; localbuf &= 0xFF; returnval = localbuf + ((inbuf<<8)&0xF00); inbuf = EMPTY; /* note no hanging nybbles */ } return returnval; /* pass back assembled code */ } static INT push(c) /* push char onto stack */ INT c; /* character to push */ { stack[sp] = ((char) c); /* coerce integer into a char */ if(++sp >= TABSIZE) abort("Stack overflow\n"); } static INT pop() /* pop character from stack */ { if(sp>0) return ((INT) stack[--sp]); /* leave ptr at next empty slot */ else return EMPTY; } /***** LEMPEL-ZEV DECOMPRESSION *****/ static INT code_count; /* needed to detect table full */ static unsigned INT code; /* where we are so far */ static INT firstc; /* true only on first character */ INT init_ucr(new) /* get set for uncrunching */ INT new; /* true to use new hash function */ { if(new) /* set proper hash function */ h = newh; else h = oldh; sp = 0; /* clear out the stack */ init_tab(); /* set up atomic code definitions */ code_count = TABSIZE - 256; /* note space left in table */ firstc = 1; /* true only on first code */ } INT getc_ucr(f) /* get next uncrunched byte */ FILE *f; /* file containing crunched data */ { unsigned INT c; /* a character of input */ INT code, newcode; static INT oldcode, finchar; struct entry *ep; /* allows faster table handling */ if(firstc) /* first code is always known */ { firstc = FALSE; /* but next will not be first */ oldcode = gocode(f); return finchar = string_tab[oldcode].follower; } if(!sp) /* if stack is empty */ { if((code=newcode=gocode(f))==EOF) return EOF; ep = &string_tab[code]; /* initialize pointer */ if(!ep->used) /* if code isn't known */ { code = oldcode; ep = &string_tab[code]; /* re-initialize pointer */ push(finchar); } while(ep->predecessor!=NO_PRED) { push(ep->follower); /* decode string backwards */ code = ep->predecessor; ep = &string_tab[code]; } push(finchar=ep->follower); /* save first character also */ /* The above loop will terminate, one way or another, with string_tab[code].follower equal to the first character in the string. */ if(code_count) /* if room left in string table */ { upd_tab(oldcode,finchar); --code_count; } oldcode = newcode; } return pop(); /* return saved character */ } SHAR_EOF fi # end of overwriting check if test -f 'arcm.h' then echo shar: will not over-write existing file "'arcm.h'" else cat << \SHAR_EOF > 'arcm.h' /* * $Header: arcm.h,v 1.2 86/07/15 07:53:40 turner Exp $ */ /* * $Log: arcm.h,v $ * Revision 1.2 86/07/15 07:53:40 turner * * * Revision 1.1 86/06/26 15:01:25 turner * initial version * * */ /* * * ARC archive utility - standard MACRO insert file. * * parameters: * */ #define ARCMARK 26 /* special archive marker */ #define ARCVER 9 /* archive header version code */ #define STRLEN 100 /* system standard string length */ #define FNLEN 13 /* file name length */ #define MAXARG 25 /* maximum number of arguments */ #define ARC 1 #define XARC 0 SHAR_EOF fi # end of overwriting check if test -f 'arcmatch.c' then echo shar: will not over-write existing file "'arcmatch.c'" else cat << \SHAR_EOF > 'arcmatch.c' static char *RCSid = "$Header: arcmatch.c,v 1.2 86/07/15 07:53:42 turner Exp $"; /* * $Log: arcmatch.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:42 turner * * * Revision 1.1 86/06/26 15:00:34 turner * initial version * * */ /* ARC - Archive utility - ARCMATCH $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.17), created on $tag( TED_DATE DB =12/17/85) at $tag( TED_TIME DB =20:32:18))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains service routines needed to maintain an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT match(n,t) /* test name against template */ char *n; /* name to test */ char *t; /* template to test against */ { #if MSDOS upper(n); upper(t); /* avoid case problems */ #endif /* first match name part */ while((*n && *n!='.') || (*t && *t!='.')) { if(*n!=*t && *t!='?') /* match fail? */ { if(*t!='*') /* wildcard fail? */ return 0; /* then no match */ else /* else jump over wildcard */ { while(*n && *n!='.') n++; while(*t && *t!='.') t++; break; /* name part matches wildcard */ } } else /* match good for this char */ { n++; /* advance to next char */ t++; } } if(*n && *n=='.') n++; /* skip extension delimiters */ if(*t && *t=='.') t++; /* now match name part */ while(*n || *t) { if(*n!=*t && *t!='?') /* match fail? */ { if(*t!='*') /* wildcard fail? */ return 0; /* then no match */ else return 1; /* else good enough */ } else /* match good for this char */ { n++; /* advance to next char */ t++; } } return 1; /* match worked */ } INT rempath(nargs,arg) /* remove paths from filenames */ INT nargs; /* number of names */ char *arg[]; /* pointers to names */ { char *i, *rindex(); /* string index, reverse indexer */ INT n; /* index */ for(n=0; n 'arcmisc.c' #include #include "arc.h" /* split up a file name (subroutine for makefnam) * * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) */ static INT _makefn(source,dest) unsigned char *source; unsigned char *dest; { INT j; setmem (dest, 17, 0); /* clear result field */ if (strlen (source) > 1 && source[1] == ':') for (j = 0; j < 2;) dest[j++] = *source++; for (j = 3; *source && *source != '.'; ++source) if (j < 11) dest[j++] = *source; for (j = 12; *source; ++source) if (j < 16) dest[j++] = *source; } /* make a file name using a template */ char *makefnam(rawfn,template,result) unsigned char *rawfn; /* the original file name */ unsigned char *template; /* the template data */ unsigned char *result; /* where to place the result */ { unsigned char et[17],er[17]; _makefn(template,et); _makefn(rawfn,er); *result=0; /* assure no data */ strcat(result,er[0]?er:et); strcat(result,er[3]?er+3:et+3); strcat(result,er[12]?er+12:et+12); return ((char *)&result[0]); } INT freedir(dirs) register struct direct **dirs; { register INT ii; if(dirs == (struct direct **)0) return(-1); for(ii = 0; dirs[ii] != (struct direct *)0; ii++) free(dirs[ii]); free(dirs); return(0); } #if MSDOS #include INT alphasort(dirptr1, dirptr2) struct direct **dirptr1, **dirptr2; { return(strcmp((*dirptr1)->d_name, (*dirptr2)->d_name)); } #endif SHAR_EOF fi # end of overwriting check if test -f 'arcpack.c' then echo shar: will not over-write existing file "'arcpack.c'" else cat << \SHAR_EOF > 'arcpack.c' static char *RCSid = "$Header: arcpack.c,v 1.2 86/07/15 07:53:48 turner Exp $"; /* * $Log: arcpack.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:48 turner * * * Revision 1.1 86/06/26 15:00:37 turner * initial version * * */ /* ARC - Archive utility - ARCPACK $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =3.37), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:58:01))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to compress a file when placing it in an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" /* stuff for non-repeat packing */ #define DLE 0x90 /* repeat sequence marker */ static unsigned char state; /* current packing state */ /* non-repeat packing states */ #define NOHIST 0 /* don't consider previous input*/ #define SENTCHAR 1 /* lastchar set, no lookahead yet */ #define SENDNEWC 2 /* run over, send new char next */ #define SENDCNT 3 /* newchar set, send count next */ /* packing results */ static long stdlen; /* length for standard packing */ static INT crcval; /* CRC check value */ INT pack(f,t,hdr) /* pack file into an archive */ FILE *f, *t; /* source, destination */ struct heads *hdr; /* pointer to header data */ { INT c; /* one character of stream */ long ncrlen; /* length after packing */ long huflen; /* length after squeezing */ long lzwlen; /* length after crunching */ long pred_sq(), file_sq(); /* stuff for squeezing */ long pred_cm(); /* dynamic crunching cleanup */ char tnam[STRLEN]; /* temporary name buffer */ char *makefnam(); /* filename fixer upper */ FILE *crn = NULL; /* temporary crunch file */ INT getch(); INT getc_ncr(); INT putc_pak(); /* first pass - see which method is best */ if(!nocomp) /* if storage kludge not active */ { if(note) { printf(" analyzing, "); fflush(stdout);} if(arctemp) /* use temp area if specified */ sprintf(tnam,"%s.CRN",arctemp); else makefnam("$ARCTEMP.CRN",arcname,tnam); #if MSDOS crn = fopen(tnam,"wrb"); #endif #if BSD | ST crn = fopen(tnam,"w+"); #endif state = NOHIST; /* initialize ncr packing */ stdlen = ncrlen = 0; /* reset size counters */ crcval = 0; /* initialize CRC check value */ setcode(); /* initialize encryption */ init_cm(f,crn); /* initialize for crunching */ init_sq(); /* initialize for squeeze scan */ while((c=getc_ncr(f))!=EOF) /* for each byte of file */ { ncrlen++; /* one more packed byte */ scan_sq(c); /* see what squeezing can do */ putc_cm(c,crn); /* see what crunching can do */ } huflen = pred_sq(); /* finish up after squeezing */ lzwlen = pred_cm(crn); /* finish up after crunching */ } else /* else kludge the method */ { stdlen = 0; /* make standard look best */ ncrlen = huflen = lzwlen = 1; } /* standard set-ups common to all methods */ fseek(f,0L,0); /* rewind input */ hdr->crc = crcval; /* note CRC check value */ hdr->length = stdlen; /* set actual file length */ state = NOHIST; /* reinitialize ncr packing */ setcode(); /* reinitialize encryption */ /* choose and use the shortest method */ if(stdlen<=ncrlen && stdlen<=huflen && stdlen<=lzwlen) { if(kludge) /*DEBUG*/ printf("(%ld) ",lzwlen-stdlen); if(note) { printf("storing, "); fflush(stdout);}/* store w/out compression */ hdrver = 2; /* note packing method */ stdlen = crcval = 0; /* recalc these for kludge */ while((c=getch(f))!=EOF) /* store it straight */ putc_pak(c,t); hdr->crc = crcval; hdr->length = hdr->size = stdlen; } else if(ncrlensize = ncrlen; /* set data length */ while((c=getc_ncr(f))!=EOF) putc_pak(c,t); } else if(huflensize = file_sq(f,t); /* note final size */ } else { if(kludge) /*DEBUG*/ printf("(%ld) ",huflen-lzwlen); if(note) { printf("crunching, "); fflush(stdout);} hdrver = 8; hdr->size = lzwlen; /* size should not change */ if(crn) /* if temp was created */ { fseek(crn,0L,0); /* then copy over crunched temp */ while((c=fgetc(crn))!=EOF) putc_tst(c,t); } else /* else re-crunch */ { init_cm(f,t); while((c=getc_ncr(f))!=EOF) putc_cm(c,t); pred_cm(t); /* finish up after crunching */ } } /* standard cleanups common to all methods */ if(crn) /* get rid of crunch temporary */ { fclose(crn); if(unlink(tnam) && warn) { printf("Cannot delete temporary file %s\n",tnam); nerrs++; } } if(note) printf("done.\n"); } /* Non-repeat compression - text is passed through normally, except that a run of more than two is encoded as: Special case: a count of zero indicates that the DLE is really a DLE, not a repeat marker. */ INT getc_ncr(f) /* get bytes with collapsed runs */ FILE *f; /* file to get from */ { static INT lastc; /* value returned on last call */ static INT repcnt; /* repetition counter */ static INT c; /* latest value seen */ switch(state) /* depends on our state */ { case NOHIST: /* no relevant history */ state = SENTCHAR; return lastc = getch(f); /* remember the value next time */ case SENTCHAR: /* char was sent. look ahead */ switch(lastc) /* action depends on char */ { case DLE: /* if we sent a real DLE */ state = NOHIST; /* then start over again */ return 0; /* but note that the DLE was real */ case EOF: /* EOF is always a special case */ return EOF; default: /* else test for a repeat */ for(repcnt=1; (c=getch(f))==lastc && repcnt<255; repcnt++) ; /* find end of run */ switch(repcnt) /* action depends on run size */ { case 1: /* not a repeat */ return lastc = c; /* but remember value next time */ case 2: /* a repeat, but too short */ state = SENDNEWC; /* send the second one next time */ return lastc; default: /* a run - compress it */ state = SENDCNT; /* send repeat count next time */ return DLE; /* send repeat marker this time */ } } case SENDNEWC: /* send second char of short run */ state = SENTCHAR; return lastc = c; case SENDCNT: /* sent DLE, now send count */ state = SENDNEWC; return repcnt; default: abort("Bug - bad ncr state\n"); } } static INT getch(f) /* special get char for packing */ FILE *f; /* file to get from */ { INT c; /* a char from the file */ if((c=fgetc(f))!=EOF) /* if not the end of file */ { crcval = addcrc(crcval,c); /* then update CRC check value */ stdlen++; /* and bump length counter */ } return c; } INT putc_pak(c,f) /* put a packed byte into archive */ char c; /* byte to put */ FILE *f; /* archive to put it in */ { putc_tst(code(c),f); /* put encoded byte, with checks */ } SHAR_EOF fi # end of overwriting check if test -f 'arcrun.c' then echo shar: will not over-write existing file "'arcrun.c'" else cat << \SHAR_EOF > 'arcrun.c' static char *RCSid = "$Header: arcrun.c,v 1.2 86/07/15 07:53:55 turner Exp $"; /* * $Log: arcrun.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:55 turner * * * Revision 1.1 86/06/26 15:00:40 turner * initial version * * */ /* ARC - Archive utility - ARCRUN $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.17), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:59:06))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to "run" a file which is stored in an archive. At present, all we really do is (a) extract a temporary file, (b) give its name as a system command, and then (c) delete the file. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT runarc(num,arg) /* run file from archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ { struct heads hdr; /* file header */ INT run; /* true to run current file */ INT did[MAXARG]; /* true when argument was used */ INT n; /* index */ char *makefnam(); /* filename fixer */ char buf[STRLEN]; /* filename buffer */ FILE *fopen(); /* file opener */ INT runfile(); for(n=0; nname,buf); */ sprintf(buf,"%s.RUN",arctemp); /* if(!strcmp(buf,"$ARCTEMP.BAS")) * strcpy(sys,"BASICA $ARCTEMP"); * * else if(!strcmp(buf,"$ARCTEMP.BAT") * || !strcmp(buf,"$ARCTEMP.COM") * || !strcmp(buf,"$ARCTEMP.EXE")) * strcpy(sys,"$ARCTEMP"); * * else * { if(warn) * { printf("File %s is not a .BAS, .BAT, .COM, or .EXE\n", * hdr->name); * nerrs++; * } * fseek(arc,hdr->size,1); * return; * } */ if(warn) if(tmp=fopen(buf,"r")) abort("Temporary file %s already exists",buf); if(!(tmp=fopen(buf,"w"))) abort("Unable to create temporary file %s",buf); if(note) printf("Invoking file: %s\n",hdr->name); dir = gcdir(""); /* see where we are */ unpack(arc,tmp,hdr); /* unpack the entry */ fclose(tmp); /* release the file */ chmod(buf,"700"); /* make it exec'able */ system(buf); /* try to invoke it */ chdir(dir); free(dir); /* return to whence we started */ if(unlink(buf) && warn) { printf("Cannot unsave temporary file %s\n",buf); nerrs++; } } SHAR_EOF fi # end of overwriting check if test -f 'arcs.h' then echo shar: will not over-write existing file "'arcs.h'" else cat << \SHAR_EOF > 'arcs.h' /* * $Header: arcs.h,v 1.2 86/07/15 07:54:02 turner Exp $ */ /* * $Log: arcs.h,v $ * Revision 1.2 86/07/15 07:54:02 turner * * * Revision 1.1 86/06/26 15:01:27 turner * initial version * * */ /* ARC - Archive utility - Archive file header format Version 2.12, created on 12/17/85 at 14:40:26 (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file defines the format of an archive file header, excluding the archive marker and the header version number. Each entry in an archive begins with a one byte archive marker, which is set to 26. The marker is followed by a one byte header type code, from zero to 7. If the header type code is zero, then it is an end marker, and no more data should be read from the archive. If the header type code is in the range 2 to 7, then it is followed by a standard archive header, which is defined below. If the header type code is one, then it is followed by an older format archive header. The older format header does not contain the true length. A header should be read for a length of sizeof(struct heads)-sizeof(long). Then set length equal to size and change the header version to 2. Programming note: The crc value given in the header is based on the unpacked data. Language: Computer Innovations Optimizing C86 */ struct heads /* archive entry header format */ { char name[FNLEN]; /* file name */ long size; /* size of file, in bytes */ unsigned INT date; /* creation date */ unsigned INT time; /* creation time */ INT crc; /* cyclic redundancy check */ long length; /* true file length */ } ; SHAR_EOF fi # end of overwriting check if test -f 'arcsq.c' then echo shar: will not over-write existing file "'arcsq.c'" else cat << \SHAR_EOF > 'arcsq.c' static char *RCSid = "$Header: arcsq.c,v 1.2 86/07/15 07:54:05 turner Exp $"; /* * $Log: arcsq.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:54:05 turner * * * Revision 1.1 86/06/26 15:00:48 turner * initial version * * */ /* ARC - Archive utility - ARCSQ $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =3.10), created on $tag( TED_DATE DB =01/30/86) at $tag( TED_TIME DB =20:10:46))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to squeeze a file when placing it in an archive. Language: Computer Innovations Optimizing C86 Programming notes: Most of the routines used for the Huffman squeezing algorithm were lifted from the SQ program by Dick Greenlaw, as adapted to CI-C86 by Robert J. Beilstein. */ #include #include "arc.h" /* stuff for Huffman squeezing */ #define TRUE 1 #define FALSE 0 #define ERROR (-1) #define SPEOF 256 /* special endfile token */ #define NOCHILD (-1) /* marks end of path through tree */ #define NUMVALS 257 /* 256 data values plus SPEOF*/ #define NUMNODES (NUMVALS+NUMVALS-1) /* number of nodes */ #define MAXCOUNT (unsigned INT) 65535 /* biggest unsigned integer */ /* The following array of structures are the nodes of the binary trees. The first NUMVALS nodes become the leaves of the final tree and represent the values of the data bytes being encoded and the special endfile, SPEOF. The remaining nodes become the internal nodes of the final tree. */ struct nd /* shared by unsqueezer */ { unsigned INT weight; /* number of appearances */ INT tdepth; /* length on longest path in tree */ INT lchild, rchild; /* indices to next level */ } node[NUMNODES]; /* use large buffer */ static INT dctreehd; /* index to head of final tree */ /* This is the encoding table: The bit strings have first bit in low bit. Note that counts were scaled so code fits unsigned integer. */ static INT codelen[NUMVALS]; /* number of bits in code */ static unsigned INT code[NUMVALS]; /* code itself, right adjusted */ static unsigned INT tcode; /* temporary code value */ static long valcount[NUMVALS]; /* actual count of times seen */ /* Variables used by encoding process */ static INT curin; /* value currently being encoded */ static INT cbitsrem; /* # of code string bits left */ static unsigned INT ccode; /* current code right justified */ INT init_sq() /* prepare for scanning pass */ { INT i; /* node index */ /* Initialize all nodes to single element binary trees with zero weight and depth. */ for(i=0; i 16 bits long. */ } while(buildenc(0,dctreehd) == ERROR); /* Initialize encoding variables */ cbitsrem = 0; /* force initial read */ curin = 0; /* anything but endfile */ for(i=0; i (ceil-sum)) ++ovflw; sum += node[i].weight; } divisor = ovflw + 1; /* Ensure no non-zero values are lost */ increased = FALSE; for(i=0; i1) for(i=0; i=0; --i) adjust(list,i,length-1); } /* Make a heap from a heap with a new top */ static INT adjust(list,top,bottom) INT list[], top, bottom; { register INT k, temp; INT cmptrees(); k = 2 * top + 1; /* left child of top */ temp = list[top]; /* remember root node of top tree */ if(k<=bottom) { if(k b return true, else return false. Note comparison rules in previous comments. */ static INT cmptrees(a,b) INT a, b; /* root nodes of trees */ { if(node[a].weight > node[b].weight) return TRUE; if(node[a].weight == node[b].weight) if(node[a].tdepth > node[b].tdepth) return TRUE; return FALSE; } /* HUFFMAN ALGORITHM: develops the single element trees into a single binary tree by forming subtrees rooted in interior nodes having weights equal to the sum of weights of all their descendents and having depth counts indicating the depth of their longest paths. When all trees have been formed into a single tree satisfying the heap property (on weight, with depth as a tie breaker) then the binary code assigned to a leaf (value to be encoded) is then the series of left (0) and right (1) paths leading from the root to the leaf. Note that trees are removed from the heaped list by moving the last element over the top element and reheaping the shorter list. */ static INT bld_tree(list,len) INT list[]; INT len; { register INT freenode; /* next free node in tree */ register struct nd *frnp; /* free node pointer */ INT lch, rch; /* temps for left, right children */ INT i; INT maxchar(); /* Initialize index to next available (non-leaf) node. Lower numbered nodes correspond to leaves (data values). */ freenode = NUMVALS; while(len>1) { /* Take from list two btrees with least weight and build an interior node pointing to them. This forms a new tree. */ lch = list[0]; /* This one will be left child */ /* delete top (least) tree from the list of trees */ list[0] = list[--len]; adjust(list,0,len-1); /* Take new top (least) tree. Reuse list slot later */ rch = list[0]; /* This one will be right child */ /* Form new tree from the two least trees using a free node as root. Put the new tree in the list. */ frnp = &node[freenode]; /* address of next free node */ list[0] = freenode++; /* put at top for now */ frnp->lchild = lch; frnp->rchild = rch; frnp->weight = node[lch].weight + node[rch].weight; frnp->tdepth = 1 + maxchar(node[lch].tdepth, node[rch].tdepth); /* reheap list to get least tree at top */ adjust(list,0,len-1); } dctreehd = list[0]; /* head of final tree */ } static INT maxchar(a,b) { return a>b ? a : b; } static INT init_enc() { register INT i; /* Initialize encoding table */ for(i=0; i> (16-level)); return (level>16) ? ERROR : NULL; } else { if(l!=NOCHILD) { /* Clear path bit and continue deeper */ tcode &= ~(1 << level); if(buildenc(level+1,l)==ERROR) return ERROR; /* pass back bad statuses */ } if(r!=NOCHILD) { /* Set path bit and continue deeper */ tcode |= 1 << level; if(buildenc(level+1,r)==ERROR) return ERROR; /* pass back bad statuses */ } } return NULL; /* it worked if we reach here */ } static INT put_int(n,f) /* output an integer */ INT n; /* integer to output */ FILE *f; /* file to put it to */ { putc_pak(n&0xff,f); /* first the low byte */ putc_pak(n>>8,f); /* then the high byte */ } /* Write out the header of the compressed file */ static long wrt_head(ob) FILE *ob; { register INT l,r; INT i, k; INT numnodes; /* # of nodes in simplified tree */ /* Write out a simplified decoding tree. Only the interior nodes are written. When a child is a leaf index (representing a data value) it is recoded as -(index + 1) to distinguish it from interior indexes which are recoded as positive indexes in the new tree. Note that this tree will be empty for an empty file. */ numnodes = dctreehd=need) /* if current code is big enough */ { if(need==0) return rbyte; rbyte |= ccode << (8-need); /* take what we need */ ccode >>= need; /* and leave the rest */ cbitsrem -= need; return rbyte & 0xff; } /* We need more than current code */ if(cbitsrem>0) { rbyte |= ccode << (8-need); /* take what there is */ need -= cbitsrem; } /* No more bits in current code string */ if(curin==SPEOF) { /* The end of file token has been encoded. If result byte has data return it and do EOF next time. */ cbitsrem = 0; return (need==8) ? EOF : rbyte + 0; } /* Get an input byte */ if((curin=getc_ncr(ib)) == EOF) curin = SPEOF; /* convenient for encoding */ ccode = code[curin]; /* get the new byte's code */ cbitsrem = codelen[curin]; goto loop; } /* This routine is used to perform the actual squeeze operation. It can only be called after the file has been scanned. It returns the true length of the squeezed entry. */ long file_sq(f,t) /* squeeze a file into an archive */ FILE *f; /* file to squeeze */ FILE *t; /* archive to receive file */ { INT c; /* one byte of squeezed data */ long size; /* size after squeezing */ size = wrt_head(t); /* write out the decode tree */ while((c=gethuff(f))!=EOF) { putc_pak(c,t); size++; } return size; /* report true size */ } SHAR_EOF fi # end of overwriting check if test -f 'arcsvc.c' then echo shar: will not over-write existing file "'arcsvc.c'" else cat << \SHAR_EOF > 'arcsvc.c' static char *RCSid = "$Header: arcsvc.c,v 1.2 86/07/15 07:54:19 turner Exp $"; /* * $Log: arcsvc.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:54:19 turner * * * Revision 1.1 86/06/26 15:00:57 turner * initial version * * */ /* ARC - Archive utility - ARCSVC $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.15), created on $tag( TED_DATE DB =12/17/85) at $tag( TED_TIME DB =15:17:16))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains service routines needed to maintain an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT openarc(chg) /* open archive */ INT chg; /* true to open for changes */ { FILE *fopen(); /* file opener */ if(!(arc=fopen(arcname,"r"))) { if(chg) printf("Creating new archive: %s\n",arcname); else abort("Cannot read archive: %s",arcname); } if(chg) /* if opening for changes */ if(!(new=fopen(newname,"w"))) abort("Cannot create archive copy: %s",newname); } INT closearc(chg) /* close an archive */ INT chg; /* true if archive was changed */ { if(arc) /* if we had an initial archive */ fclose(arc); /* then close it */ if(chg) /* if things have changed */ { setstamp(new,arcdate,arctime);/* archive matches newest file */ fclose(new); /* close the new copy */ if(arc) /* if we had an original archive */ { if(keepbak) /* if a backup is wanted */ { unlink(bakname); /* erase any old copies */ if(rename(arcname,bakname)) abort("Cannot rename %s to %s",arcname,bakname); printf("Keeping backup archive: %s\n",bakname); } else if(unlink(arcname)) abort("Cannot delete old archive: %s",arcname); } if(rename(newname,arcname)) abort("Cannot rename %s to %s",newname,arcname); } } /* CRC computation logic The logic for this method of calculating the CRC 16 bit polynomial is taken from an article by David Schwaderer in the April 1985 issue of PC Tech Journal. */ static INT crctab[] = /* CRC lookup table */ { 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 }; INT addcrc(crc,c) /* update a CRC check */ INT crc; /* running CRC value */ unsigned char c; /* character to add */ { return (((crc>>8)&0x00ff) ^ crctab[(crc^c)&0x00ff]) & 0x0000ffff; } SHAR_EOF fi # end of overwriting check if test -f 'arctst.c' then echo shar: will not over-write existing file "'arctst.c'" else cat << \SHAR_EOF > 'arctst.c' static char *RCSid = "$Header: arctst.c,v 1.1 86/06/26 15:01:02 turner Exp $"; /* * $Log: arctst.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.1 86/06/26 15:01:02 turner * initial version * * */ /* ARC - Archive utility - ARCTST $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.12), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =23:00:40))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to test archive integrity. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT tstarc() /* test integrity of an archive */ { struct heads hdr; /* file header */ long arcsize, ftell(); /* archive size */ openarc(0); /* open archive for reading */ fseek(arc,0L,2); /* move to end of archive */ arcsize = ftell(arc); /* see how big it is */ fseek(arc,0L,0); /* return to top of archive */ printf ("Testing archive...\n"); while(readhdr(&hdr,arc)) { if(ftell(arc)+hdr.size>arcsize) { printf("Archive truncated in file %s\n",hdr.name); nerrs++; break; } else { printf(" File: %-12s ",hdr.name); fflush(stdout); if(unpack(arc,NULL,&hdr)) nerrs++; else printf("okay\n"); } } if(nerrs<1) printf("No errors detected\n"); else if(nerrs==1) printf("One error detected\n"); else printf("%d errors detected\n",nerrs); } SHAR_EOF fi # end of overwriting check if test -f 'arcunp.c' then echo shar: will not over-write existing file "'arcunp.c'" else cat << \SHAR_EOF > 'arcunp.c' static char *RCSid = "$Header: arcunp.c,v 1.2 86/07/15 07:54:25 turner Exp $"; /* * $Log: arcunp.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:54:25 turner * * * Revision 1.1 86/06/26 15:01:04 turner * initial version * * */ /* ARC - Archive utility - ARCUNP $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =3.16), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =23:01:16))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to expand a file when taking it out of an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" /* stuff for repeat unpacking */ #define DLE 0x90 /* repeat byte flag */ static INT state; /* repeat unpacking state */ /* repeat unpacking states */ #define NOHIST 0 /* no relevant history */ #define INREP 1 /* sending a repeated value */ static INT crcval; /* CRC check value */ static long size; /* bytes to read */ INT unpack(f,t,hdr) /* unpack an archive entry */ FILE *f, *t; /* source, destination */ struct heads *hdr; /* pointer to file header data */ { INT c; /* one char of stream */ INT putc_unp(); INT putc_ncr(); INT getc_unp(); /* setups common to all methods */ crcval = 0; /* reset CRC check value */ size = hdr->size; /* set input byte counter */ state = NOHIST; /* initial repeat unpacking state */ setcode(); /* set up for decoding */ /* use whatever method is appropriate */ switch(hdrver) /* choose proper unpack method */ { case 1: /* standard packing */ case 2: while((c=getc_unp(f))!=EOF) putc_unp(c,t); break; case 3: /* non-repeat packing */ while((c=getc_unp(f))!=EOF) putc_ncr(c,t); break; case 4: /* Huffman squeezing */ init_usq(f); while((c=getc_usq(f))!=EOF) putc_ncr(c,t); break; case 5: /* Lempel-Zev compression */ init_ucr(0); while((c=getc_ucr(f))!=EOF) putc_unp(c,t); break; case 6: /* Lempel-Zev plus non-repeat */ init_ucr(0); while((c=getc_ucr(f))!=EOF) putc_ncr(c,t); break; case 7: /* L-Z plus ncr with new hash */ init_ucr(1); while((c=getc_ucr(f))!=EOF) putc_ncr(c,t); break; case 8: /* dynamic Lempel-Zev */ decomp(f,t); break; case 9: sqdecomp(f,t); break; default: /* unknown method */ if(warn) { printf("I don't know how to unpack file %s\n",hdr->name); printf("I think you need a newer version of ARC\n"); nerrs++; } fseek(f,hdr->size,1); /* skip over bad file */ return 1; /* note defective file */ } /* cleanups common to all methods */ if((crcval&0xffff)!=(hdr->crc&0x0000ffff)) { if(warn) { printf("WARNING: File %s fails CRC check\n",hdr->name); nerrs++; } return 1; /* note defective file */ } return 0; /* file is okay */ } /* This routine is used to put bytes in the output file. It also performs various housekeeping functions, such as maintaining the CRC check value. */ static INT putc_unp(c,t) /* output an unpacked byte */ char c; /* byte to output */ FILE *t; /* file to output to */ { crcval = addcrc(crcval,c); /* update the CRC check value */ putc_tst(c,t); } /* This routine is used to decode non-repeat compression. Bytes are passed one at a time in coded format, and are written out uncoded. The data is stored normally, except that runs of more than two characters are represented as: With a special case that a count of zero indicates a DLE as data, not as a repeat marker. */ INT putc_ncr(c,t) /* put NCR coded bytes */ unsigned char c; /* next byte of stream */ FILE *t; /* file to receive data */ { static INT lastc; /* last character seen */ switch(state) /* action depends on our state */ { case NOHIST: /* no previous history */ if(c==DLE) /* if starting a series */ state = INREP; /* then remember it next time */ else putc_unp(lastc=c,t); /* else nothing unusual */ return; case INREP: /* in a repeat */ if(c) /* if count is nonzero */ while(--c) /* then repeatedly ... */ putc_unp(lastc,t); /* ... output the byte */ else putc_unp(DLE,t); /* else output DLE as data */ state = NOHIST; /* back to no history */ return; default: abort("Bad NCR unpacking state (%d)",state); } } /* This routine provides low-level byte input from an archive. This routine MUST be used, as end-of-file is simulated at the end of the archive entry. */ INT getc_unp(f) /* get a byte from an archive */ FILE *f; /* archive file to read */ { if(!size) /* if no data left */ return EOF; /* then pretend end of file */ size--; /* deduct from input counter */ return code(fgetc(f)); /* and return next decoded byte */ } SHAR_EOF fi # end of overwriting check if test -f 'arcusq.c' then echo shar: will not over-write existing file "'arcusq.c'" else cat << \SHAR_EOF > 'arcusq.c' static char *RCSid = "$Header: arcusq.c,v 1.2 86/07/15 07:54:30 turner Exp $"; /* * $Log: arcusq.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:54:30 turner * * * Revision 1.1 86/06/26 15:01:07 turner * initial version * * */ /* ARC - Archive utility - ARCUSQ $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =3.13), created on $tag( TED_DATE DB =01/30/86) at $tag( TED_TIME DB =20:11:42))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to expand a file which was packed using Huffman squeezing. Most of this code is taken from an USQ program by Richard Greenlaw, which was adapted to CI-C86 by Robert J. Beilstein. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" /* stuff for Huffman unsqueezing */ #define ERROR (-1) #define SPEOF 256 /* special endfile token */ #define NUMVALS 257 /* 256 data values plus SPEOF */ EXTERN struct nd /* decoding tree */ { INT child[2]; /* left, right */ } node[NUMVALS]; /* use large buffer */ static INT bpos; /* last bit position read */ static INT curin; /* last byte value read */ static INT numnodes; /* number of nodes in decode tree */ static INT get_int(f) /* get an integer */ FILE *f; /* file to get it from */ { INT i; i = getc_unp(f); return (short)(i | (getc_unp(f)<<8)); } INT init_usq(f) /* initialize Huffman unsqueezing */ FILE *f; /* file containing squeezed data */ { INT i; /* node index */ bpos = 99; /* force initial read */ numnodes = get_int(f); if(numnodes<0 || numnodes>=NUMVALS) abort("File has an invalid decode tree"); /* initialize for possible empty tree (SPEOF only) */ node[0].child[0] = -(SPEOF + 1); node[0].child[1] = -(SPEOF + 1); for(i=0; i=0; ) /* work down(up?) from root */ { if(++bpos>7) { if((curin=getc_unp(f)) == ERROR) return(ERROR); bpos = 0; /* move a level deeper in tree */ i = node[i].child[1&curin]; } else i = node[i].child[1 & (curin >>= 1)]; } /* decode fake node index to original data value */ i = -(i + 1); /* decode special endfile token to normal EOF */ i = (i==SPEOF) ? EOF : i; return i; } SHAR_EOF fi # end of overwriting check if test -f 'arcvax.c' then echo shar: will not over-write existing file "'arcvax.c'" else cat << \SHAR_EOF > 'arcvax.c' #include #include #include "arc.h" /* * Misc routines to emulate IBM MSDOS functions under BSD * * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) */ INT upper(string) char *string; { char *p; for(p = string; *p != NULL; p++) if(islower(*p)) *p = toupper(*p); } char *setmem(dest,size,c) char *dest,c; INT size; { INT i; for(i = 0; i < size; dest[i] = c, i++); return(&dest[0]); } char *gcdir(dirname) char *dirname; { if(dirname == NULL || strlen(dirname) == 0) dirname = (char *)malloc(1024); getwd(dirname); } INT abort(s,arg1,arg2,arg3) char *s; { fprintf(stderr,"ARC: "); fprintf(stderr,s,arg1,arg2,arg3); fprintf(stderr,"\n"); #if BSD perror("BSD"); #endif exit(1); } SHAR_EOF fi # end of overwriting check if test -f 'squash.c' then echo shar: will not over-write existing file "'squash.c'" else cat << \SHAR_EOF > 'squash.c' /* ARC - Archive utility - ARCLZW $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.88), created on $tag( TED_DATE DB =01/20/86) at $tag( TED_TIME DB =16:47:04))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to implement Lempel-Zev data compression, which calls for building a coding table on the fly. This form of compression is especially good for encoding files which contain repeated strings, and can often give dramatic improvements over traditional Huffman SQueezing. Language: Computer Innovations Optimizing C86 Programming notes: In this section I am drawing heavily on the COMPRESS program from UNIX. The basic method is taken from "A Technique for High Performance Data Compression", Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. Also see "Knuth's Fundamental Algorithms", Donald Knuth, Vol 3, Section 6.4. As best as I can tell, this method works by tracing down a hash table of code strings where each entry has the property: if is in the table then is in the table. */ #include #include "arc.h" /* definitions for older style crunching */ #define FALSE 0 #define TRUE !FALSE #define TABSIZE 8192 #define NO_PRED 0xFFFF #define EMPTY 0xFFFF #define NOT_FND 0xFFFF static unsigned INT inbuf; /* partial input code storage */ static INT sp; /* current stack pointer */ static struct entry /* string table entry format */ { char used; /* true when this entry is in use */ unsigned INT next; /* ptr to next in collision list */ unsigned INT predecessor; /* code for preceeding string */ unsigned char follower; /* char following string */ } string_tab[TABSIZE]; /* the code string table */ /* definitions for the new dynamic Lempel-Zev crunching */ #define BITS 13 /* maximum bits per code */ #define HSIZE 10007 /* 80% occupancy */ #define INIT_BITS 9 /* initial number of bits/code */ static INT n_bits; /* number of bits/code */ static INT maxcode; /* maximum code, given n_bits */ #define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */ static INT maxcodemax = 1 << BITS; /* largest possible code (+1) */ static unsigned char buf[BITS]; /* input/output buffer */ static unsigned char lmask[9] = /* left side masks */ { 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 }; static unsigned char rmask[9] = /* right side masks */ { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; static INT offset; /* byte offset for code output */ static long in_count; /* length of input */ static long bytes_out; /* length of compressed output */ static unsigned INT ent; /* To save much memory (which we badly need at this point), we overlay * the table used by the previous version of Lempel-Zev with those used * by the new version. Since no two of these routines will be used * together, we can safely do this. Note that the tables used for Huffman * squeezing may NOT overlay these, since squeezing and crunching are done * in parallel. */ #if MSODS static long *htab = string_tab; /* hash code table (crunch) */ #endif #if BSD | ST static long htab[HSIZE]; /* hash code table (crunch) */ #endif static unsigned INT codetab[HSIZE]; /* string code table (crunch) */ static unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */ #if MSDOS static unsigned char *suffix = string_tab; /* suffix table (uncrunch) */ #endif #if BSD | ST static unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */ #endif static INT free_ent; /* first unused entry */ static INT firstcmp; /* true at start of compression */ static unsigned char stack[HSIZE]; /* local push/pop stack */ /* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */ static INT clear_flg; static long ratio; #define CHECK_GAP 10000 /* ratio check interval */ static long checkpoint; static INT upd_tab(); /* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */ #define CLEAR 256 /* table clear output code */ static INT cl_block(t) /* table clear for block compress */ FILE *t; /* our output file */ { long rat; INT putcode(); checkpoint = in_count + CHECK_GAP; if(in_count > 0x007fffff) /* shift will overflow */ { rat = bytes_out >> 8; if(rat == 0) /* Don't divide by zero */ rat = 0x7fffffff; else rat = in_count / rat; } else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */ if(rat > ratio) ratio = rat; else { ratio = 0; setmem (htab,HSIZE*sizeof(long),0xff); free_ent = FIRST; clear_flg = 1; putcode(CLEAR,t); } } /***************************************************************** * * Output a given code. * Inputs: * code: A n_bits-bit integer. If == -1, then EOF. This assumes * that n_bits =< (long)wordsize - 1. * Outputs: * Outputs code to the file. * Assumptions: * Chars are 8 bits long. * Algorithm: * Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly). When the buffer fills up empty it and start over. */ static INT putcode(code,t) /* output a code */ INT code; /* code to output */ FILE *t; /* where to put it */ { INT r_off = offset; /* right offset */ INT bits = n_bits; /* bits to go */ unsigned char *bp = buf; /* buffer pointer */ INT n; /* index */ if(code >= 0) /* if a real code */ { /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* * Since code is always >= 8 bits, only need to mask the first * hunk on the left. */ *bp = (*bp&rmask[r_off]) | (code<>= (8 - r_off); /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if(bits >= 8) { *bp++ = code; code >>= 8; bits -= 8; } /* Last bits. */ if(bits) *bp = code; offset += n_bits; if(offset == (n_bits << 3)) { bp = buf; bits = n_bits; bytes_out += bits; do putc_pak(*bp++,t); while(--bits); offset = 0; } /* * If the next entry is going to be too big for the code size, * then increase it, if possible. */ if(free_ent>maxcode || clear_flg>0) { /* * Write the whole buffer, because the input side won't * discover the size increase until after it has read it. */ if(offset > 0) { bp = buf; /* reset pointer for writing */ bytes_out += n = n_bits; while(n--) putc_pak(*bp++,t); } offset = 0; if(clear_flg) /* reset if clearing */ { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } else /* else use more bits */ { n_bits++; if(n_bits == BITS) maxcode = maxcodemax; else maxcode = MAXCODE(n_bits); } } } else /* dump the buffer on EOF */ { bytes_out += n = (offset+7) / 8; if(offset > 0) while(n--) putc_pak(*bp++,t); offset = 0; } } /***************************************************************** * * Read one code from the standard input. If EOF, return -1. * Inputs: * cmpin * Outputs: * code or -1 is returned. */ static INT getcode(f) /* get a code */ FILE *f; /* file to get from */ { INT code; static INT offset = 0, size = 0; INT r_off, bits; unsigned char *bp = buf; if(clear_flg > 0 || offset >= size || free_ent > maxcode) { /* * If the next entry will be too big for the current code * size, then we must increase the size. This implies reading * a new buffer full, too. */ if(free_ent > maxcode) { n_bits++; if(n_bits == BITS) maxcode = maxcodemax; /* won't get any bigger now */ else maxcode = MAXCODE(n_bits); } if(clear_flg > 0) { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } for(size=0; size> 3); r_off &= 7; /* Get first part (low order bits) */ code = (*bp++ >> r_off); bits -= 8 - r_off; r_off = 8 - r_off; /* now, offset into code word */ /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if(bits >= 8) { code |= *bp++ << r_off; r_off += 8; bits -= 8; } /* high order bits. */ code |= (*bp & rmask[bits]) << r_off; offset += n_bits; return code; } /* * compress a file * * Algorithm: use open addressing double hashing (no chaining) on the * prefix code / next character combination. We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe. Here, the modular division first probe is gives way * to a faster exclusive-or manipulation. Also do block compression with * an adaptive reset, where the code table is cleared when the compression * ratio decreases, but after the table fills. The variable-length output * codes are re-sized at this point, and a special CLEAR code is generated * for the decompressor. */ INT sqinit_cm(f,t) /* initialize for compression */ FILE *f; /* file we will be compressing */ FILE *t; /* where we will put it */ { offset = 0; bytes_out = 1; clear_flg = 0; ratio = 0; in_count = 1; checkpoint = CHECK_GAP; maxcode = MAXCODE(n_bits = INIT_BITS); free_ent = FIRST; setmem(htab,HSIZE*sizeof(long),0xff); n_bits = INIT_BITS; /* set starting code size */ firstcmp = 1; /* next byte will be first */ } INT sqputc_cm(c,t) /* compress a character */ unsigned char c; /* character to compress */ FILE *t; /* where to put it */ { static long fcode; static INT hshift; INT i; INT disp; if(firstcmp) /* special case for first byte */ { ent = c; /* remember first byte */ hshift = 0; for(fcode=(long)HSIZE; fcode<65536L; fcode*=2L) hshift++; hshift = 8 - hshift; /* set hash code range bund */ firstcmp = 0; /* no longer first */ return; } in_count++; fcode =(long)(((long)c << BITS)+ent); i = (c< 0) goto probe; nomatch: putcode(ent,t); ent = c; if(free_ent < maxcodemax) { codetab[i] = free_ent++; /* code -> hashtable */ htab[i] = fcode; } else if((long)in_count >= checkpoint) cl_block(t); } long sqpred_cm(t) /* finish compressing a file */ FILE *t; /* where to put it */ { putcode(ent,t); /* put out the final code */ putcode(-1,t); /* tell output we are done */ return bytes_out; /* say how big it got */ } /* * Decompress a file. This routine adapts to the codes in the file * building the string table on-the-fly; requiring no table to be stored * in the compressed file. The tables used herein are shared with those of * the compress() routine. See the definitions above. */ INT sqdecomp(f,t) /* decompress a file */ FILE *f; /* file to read codes from */ FILE *t; /* file to write text to */ { unsigned char *stackp; INT finchar; INT code, oldcode, incode; n_bits = INIT_BITS; /* set starting code size */ clear_flg = 0; /* * As above, initialize the first 256 entries in the table. */ maxcode = MAXCODE(n_bits=INIT_BITS); for(code = 255; code >= 0; code--) { prefix[code] = 0; suffix[code] = (unsigned char)code; } free_ent = FIRST; finchar = oldcode = getcode(f); if(oldcode == -1) /* EOF already? */ return; /* Get out of here */ putc_ncr((char)finchar,t); /* first code must be 8 bits=char */ stackp = stack; while((code = getcode(f))> -1) { if(code==CLEAR) { for(code = 255; code >= 0; code--) prefix[code] = 0; clear_flg = 1; free_ent = FIRST - 1; if((code=getcode(f))==-1)/* O, untimely death! */ break; } incode = code; /* * Special case for KwKwK string. */ if(code >= free_ent) { *stackp++ = finchar; code = oldcode; } /* * Generate output characters in reverse order */ while(code >= 256) { *stackp++ = suffix[code]; code = prefix[code]; } *stackp++ = finchar = suffix[code]; /* * And put them out in forward order */ do putc_ncr(*--stackp,t); while(stackp > stack); /* * Generate the new entry. */ if((code=free_ent) < maxcodemax) { prefix[code] = (unsigned short)oldcode; suffix[code] = finchar; free_ent = code+1; } /* * Remember previous code. */ oldcode = incode; } } /************************************************************************* * Please note how much trouble it can be to maintain upwards * * compatibility. All that follows is for the sole purpose of unpacking * * files which were packed using an older method. * *************************************************************************/ /* The h() pointer points to the routine to use for calculating a hash value. It is set in the init routines to point to either of oldh() or newh(). oldh() calculates a hash value by taking the middle twelve bits of the square of the key. newh() works somewhat differently, and was tried because it makes ARC about 23% faster. This approach was abandoned because dynamic Lempel-Zev (above) works as well, and packs smaller also. However, inadvertent release of a developmental copy forces us to leave this in. */ static unsigned INT (*h)(); /* pointer to hash function */ static unsigned INT oldh(pred,foll) /* old hash function */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* value of following char */ { long local; /* local hash value */ local = (pred + foll) | 0x0800; /* create the hash key */ local *= local; /* square it */ return (local >> 6) & 0x0FFF; /* return the middle 12 bits */ } static unsigned INT newh(pred,foll) /* new hash function */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* value of following char */ { return ((pred+foll)*15073)&0xFFF; /* faster hash */ } /* The eolist() function is used to trace down a list of entries with duplicate keys until the last duplicate is found. */ static unsigned INT eolist(index) /* find last duplicate */ unsigned INT index; { INT temp; while(temp=string_tab[index].next) /* while more duplicates */ index = temp; return index; } /* The hash() routine is used to find a spot in the hash table for a new entry. It performs a "hash and linear probe" lookup, using h() to calculate the starting hash value and eolist() to perform the linear probe. This routine DOES NOT detect a table full condition. That MUST be checked for elsewhere. */ static unsigned INT hash(pred,foll) /* find spot in the string table */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* char following string */ { unsigned INT local, tempnext; /* scratch storage */ struct entry *ep; /* allows faster table handling */ local = (*h)(pred,foll); /* get initial hash value */ if(!string_tab[local].used) /* if that spot is free */ return local; /* then that's all we need */ else /* else a collision has occured */ { local = eolist(local); /* move to last duplicate */ /* We must find an empty spot. We start looking 101 places down the table from the last duplicate. */ tempnext = (local+101) & 0x0FFF; ep = &string_tab[tempnext]; /* initialize pointer */ while(ep->used) /* while empty spot not found */ { if(++tempnext==TABSIZE) /* if we are at the end */ { tempnext = 0; /* wrap to beginning of table*/ ep = string_tab; } else ++ep; /* point to next element in table */ } /* local still has the pointer to the last duplicate, while tempnext has the pointer to the spot we found. We use this to maintain the chain of pointers to duplicates. */ string_tab[local].next = tempnext; return tempnext; } } /* The unhash() function is used to search the hash table for a given key. Like hash(), it performs a hash and linear probe search. It returns either the number of the entry (if found) or NOT_FND (if not found). */ static unsigned INT unhash(pred,foll) /* search string table for a key */ unsigned INT pred; /* code of preceeding string */ unsigned char foll; /* character following string */ { unsigned INT local, offset; /* scratch storage */ struct entry *ep; /* this speeds up access */ local = (*h)(pred,foll); /* initial hash */ while(1) { ep = &string_tab[local]; /* speed up table access */ if((ep->predecessor==pred) && (ep->follower==foll)) return local; /* we have a match */ if(!ep->next) /* if no more duplicates */ return NOT_FND; /* then key is not listed */ local = ep->next; /* move on to next duplicate */ } } /* The init_tab() routine is used to initialize our hash table. You realize, of course, that "initialize" is a complete misnomer. */ static INT init_tab() /* set ground state in hash table */ { unsigned INT i; /* table index */ setmem((char *)string_tab,sizeof(string_tab),0); for(i=0; i<256; i++) /* list all single byte strings */ upd_tab(NO_PRED,i); inbuf = EMPTY; /* nothing is in our buffer */ } /* The upd_tab routine is used to add a new entry to the string table. As previously stated, no checks are made to ensure that the table has any room. This must be done elsewhere. */ static INT upd_tab(pred,foll) /* add an entry to the table */ unsigned INT pred; /* code for preceeding string */ unsigned INT foll; /* character which follows string */ { struct entry *ep; /* pointer to current entry */ /* calculate offset just once */ ep = &string_tab[hash(pred,foll)]; ep->used = TRUE; /* this spot is now in use */ ep->next = 0; /* no duplicates after this yet */ ep->predecessor = pred; /* note code of preceeding string */ ep->follower = foll; /* note char after string */ } /* This algorithm encoded a file into twelve bit strings (three nybbles). The gocode() routine is used to read these strings a byte (or two) at a time. */ static INT gocode(fd) /* read in a twelve bit code */ FILE *fd; /* file to get code from */ { unsigned INT localbuf, returnval; if(inbuf==EMPTY) /* if on a code boundary */ { if((localbuf=getc_unp(fd))==EOF) /* get start of next code */ return EOF; /* pass back end of file status */ localbuf &= 0xFF; /* mask down to true byte value */ if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */ return EOF; /* this should never happen */ inbuf &= 0xFF; /* mask down to true byte value */ returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F); inbuf &= 0x000F; /* leave partial code pending */ } else /* buffer contains first nybble */ { if((localbuf=getc_unp(fd))==EOF) return EOF; localbuf &= 0xFF; returnval = localbuf + ((inbuf<<8)&0xF00); inbuf = EMPTY; /* note no hanging nybbles */ } return returnval; /* pass back assembled code */ } static INT push(c) /* push char onto stack */ INT c; /* character to push */ { stack[sp] = ((char) c); /* coerce integer into a char */ if(++sp >= TABSIZE) abort("Stack overflow\n"); } static INT pop() /* pop character from stack */ { if(sp>0) return ((INT) stack[--sp]); /* leave ptr at next empty slot */ else return EMPTY; } /***** LEMPEL-ZEV DECOMPRESSION *****/ static INT code_count; /* needed to detect table full */ static unsigned INT code; /* where we are so far */ static INT firstc; /* true only on first character */ INT Xinit_ucr(new) /* get set for uncrunching */ INT new; /* true to use new hash function */ { if(new) /* set proper hash function */ h = newh; else h = oldh; sp = 0; /* clear out the stack */ init_tab(); /* set up atomic code definitions */ code_count = TABSIZE - 256; /* note space left in table */ firstc = 1; /* true only on first code */ } INT Xgetc_ucr(f) /* get next uncrunched byte */ FILE *f; /* file containing crunched data */ { unsigned INT c; /* a character of input */ INT code, newcode; static INT oldcode, finchar; struct entry *ep; /* allows faster table handling */ if(firstc) /* first code is always known */ { firstc = FALSE; /* but next will not be first */ oldcode = gocode(f); return finchar = string_tab[oldcode].follower; } if(!sp) /* if stack is empty */ { if((code=newcode=gocode(f))==EOF) return EOF; ep = &string_tab[code]; /* initialize pointer */ if(!ep->used) /* if code isn't known */ { code = oldcode; ep = &string_tab[code]; /* re-initialize pointer */ push(finchar); } while(ep->predecessor!=NO_PRED) { push(ep->follower); /* decode string backwards */ code = ep->predecessor; ep = &string_tab[code]; } push(finchar=ep->follower); /* save first character also */ /* The above loop will terminate, one way or another, with string_tab[code].follower equal to the first character in the string. */ if(code_count) /* if room left in string table */ { upd_tab(oldcode,finchar); --code_count; } oldcode = newcode; } return pop(); /* return saved character */ } SHAR_EOF fi # end of overwriting check # End of shell archive exit 0