Relay-Version: version B 2.10 5/3/83; site utzoo.UUCP Path: utzoo!mnetor!uunet!hpda!hpsmtc1!swh From: swh@hpsmtc1.HP.COM (Steve Harrold) Newsgroups: comp.sources.bugs Subject: Re: squasing arc source sliced Message-ID: <14310001@hpsmtc1.HP.COM> Date: Tue, 21-Jul-87 12:30:37 EDT Article-I.D.: hpsmtc1.14310001 Posted: Tue Jul 21 12:30:37 1987 Date-Received: Thu, 23-Jul-87 07:28:01 EDT References: <100@auscso.UUCP> Organization: Hewlett Packard, Cupertino Lines: 2810 Relay-Version: version Notes 2.7.5 (840 Contrib) 87/2/5; site hpsmtc1.HP.COM From: ddl@husc6.UUCP (Dan Lanciani) Date: Thu, 16 Jul 87 00:08:57 GMT Date-Received: Thu, 16 Jul 87 10:33:47 GMT Subject: arc that UnSquashes 1/2 Message-ID: <2543@husc6.UUCP> Organization: Harvard University Computer Services Path: hpsmtc1!hpda!uunet!seismo!husc6!ddl Newsgroups: alt.sources Keywords: arc Squashed quick hack Lines: 2798 Here is a version of unix ARC that can deal with Squashed archive entries. It does not (yet) make such entries. This is a quick hack for those who want to be able to unpack current pkarced postings on their unix machine. I made the changes in the most obvious and simple way and did only minimal testing. No flames please; a better version will follow. I would have posted changes only, but there are too many versions floating around. Combine parts 1 & 2 and unshar. Dan Lanciani ddl@harvard.* #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # Makefile # arc.c # arc.h # arcadd.c # arccode.c # arccvt.c # arcdel.c # arcdir.c # arcdos.c # arcext.c # arcio.c # arclst.c # arclzw.c # arcm.h # arcmatch.c # arcmisc.c # arcpack.c # arcrun.c # arcs.h # arcsq.c # arcsvc.c # arctst.c # arcunp.c # arcusq.c # arcvax.c # squash.c # This archive created: Wed Jul 15 19:55:51 1987 export PATH; PATH=/bin:$PATH if test -f 'Makefile' then echo shar: will not over-write existing file "'Makefile'" else cat << \SHAR_EOF > 'Makefile' # # Makefile for Hack-attack 1.3 # VAX 11/780 BSD4.2 "ARC" utility # CFLAGS = -O OBJS = arc.o arcadd.o arccode.o arccvt.o arcdel.o arcdir.o \ arcdos.o arcext.o arcio.o arclst.o arclzw.o arcmatch.o arcpack.o arcrun.o \ arcsq.o arcsvc.o arctst.o arcunp.o arcusq.o arcvax.o arcmisc.o squash.o SRCS = arc.c arcadd.c arccode.c arccvt.c arcdel.c arcdir.c \ arcdos.c arcext.c arcio.c arclst.c arclzw.c arcmatch.c arcpack.c arcrun.c \ arcs.c arcsq.c arcsvc.c arctst.c arcunp.c arcusq.c arcvax.c arcmisc.c squash.c arc: ${OBJS} cc -O -o arc ${OBJS} arc.o: arc.h cc -O -c arc.c arcadd.o: arc.h cc -O -c arcadd.c arccode.o: arc.h cc -O -c arccode.c arccvt.o: arc.h cc -O -c arccvt.c arcdel.o: arc.h cc -O -c arcdel.c arcdir.o: arc.h cc -O -c arcdir.c arcdos.o: arc.h cc -O -c arcdos.c arcext.o: arc.h cc -O -c arcext.c arcio.o: arc.h cc -O -c arcio.c arclst.o: arc.h cc -O -c arclst.c arclzw.o: arc.h cc -O -c arclzw.c arcmatch.o: arc.h cc -O -c arcmatch.c arcmisc.o: arc.h cc -O -c arcmisc.c arcpack.o: arc.h cc -O -c arcpack.c arcrun.o: arc.h cc -O -c arcrun.c arcs.o: arcm.h cc -O -c arcs.c arcsq.o: arc.h cc -O -c arcsq.c arcsvc.o: arc.h cc -O -c arcsvc.c arctst.o: arc.h cc -O -c arctst.c arcunp.o: arc.h cc -O -c arcunp.c arcusq.o: arc.h cc -O -c arcusq.c arcvax.o: arc.h cc -O -c arcvax.c squash.o: arc.h cc -O -c squash.c arc.h: arcm.h arcs.h touch arc.h SHAR_EOF fi # end of overwriting check if test -f 'arc.c' then echo shar: will not over-write existing file "'arc.c'" else cat << \SHAR_EOF > 'arc.c' static char *RCSid = "$Header: arc.c,v 1.2 86/07/15 07:52:04 turner Exp $"; /* * $Log: arc.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:52:04 turner * first working version for the vax * * Revision 1.1 86/06/26 14:59:15 turner * initial version * * */ /* ARC - Archive utility $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =5.12), created on $tag( TED_DATE DB =02/05/86) at $tag( TED_TIME DB =22:22:01))# $undefine(tag)# $version (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This program is a general archive utility, and is used to maintain an archive of files. An "archive" is a single file that combines many files, reducing storage space and allowing multiple files to be handled as one. Instructions: Run this program with no arguments for complete instructions. Programming notes: ARC Version 2 differs from version 1 in that archive entries are automatically compressed when they are added to the archive, making a separate compression step unecessary. The nature of the compression is indicated by the header version number placed in each archive entry, as follows: 1 = Old style, no compression 2 = New style, no compression 3 = Compression of repeated characters only 4 = Compression of repeated characters plus Huffman SQueezing 5 = Lempel-Zev packing of repeated strings (old style) 6 = Lempel-Zev packing of repeated strings (new style) 7 = Lempel-Zev Williams packing with improved has function 8 = Dynamic Lempel-Zev packing with adaptive reset Type 5, Lempel-Zev packing, was added as of version 4.0 Type 6 is Lempel-Zev packing where runs of repeated characters have been collapsed, and was added as of version 4.1 Type 7 is a variation of Lempel-Zev using a different hash function which yields speed improvements of 20-25%, and was added as of version 4.6 Type 8 is a different implementation of Lempel-Zev, using a variable code size and an adaptive block reset, and was added as of version 5.0 Verion 4.3 introduced a temporary file for holding the result of the first crunch pass, thus speeding up crunching. Version 4.4 introduced the ARCTEMP environment string, so that the temporary crunch file may be placed on a ramdisk. Also added was the distinction bewteen Adding a file in all cases, and Updating a file only if the disk file is newer than the corresponding archive entry. The compression method to use is determined when the file is added, based on whichever method yields the smallest result. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" main(num,arg) /* system entry point */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ { char opt = 0; /* selected action */ char *a; /* option pointer */ char *makefnam(); /* filename fixup routine */ char *upper(); /* case conversion routine */ char *index(); /* string index utility */ char *envfind(); /* environment searcher */ INT n; /* argument index */ char *arctemp2; long getpid(); warn = 1; note = 1; if(num<3) { /* printf("ARC - Archive utility, $version\n"); printf("(C) COPYRIGHT 1985,86 by System Enhancement Associates;"); printf(" ALL RIGHTS RESERVED\n\n"); printf("Please refer all inquiries to:\n\n"); printf(" System Enhancement Associates\n"); printf(" 21 New Street, Wayne NJ 07470\n\n"); printf("You may copy and distribute this program freely,"); printf(" provided that:\n"); printf(" 1) No fee is charged for such copying and"); printf(" distribution, and\n"); printf(" 2) It is distributed ONLY in its original,"); printf(" unmodified state.\n\n"); printf("If you like this program, and find it of use, then your"); printf(" contribution will\n"); printf("be appreciated. You may not use this product in a"); printf(" commercial environment\n"); printf("or a governmental organization without paying a license"); printf(" fee of $35. Site\n"); printf("licenses and commercial distribution licenses are"); printf(" available. A program\n"); printf("disk and printed documentation are available for $50.\n"); printf("\nIf you fail to abide by the terms of this license, "); printf(" then your conscience\n"); printf("will haunt you for the rest of your life.\n\n"); */ printf("Usage: ARC {amufdxerplvtc}[bswn][g]"); printf(" [ . . .]\n"); printf("Where: a = add files to archive\n"); printf(" m = move files to archive\n"); printf(" u = update files in archive\n"); printf(" f = freshen files in archive\n"); printf(" d = delete files from archive\n"); printf(" x,e = extract files from archive\n"); printf(" r = run files from archive\n"); printf(" p = copy files from archive to"); printf(" standard output\n"); printf(" l = list files in archive\n"); printf(" v = verbose listing of files in archive\n"); printf(" t = test archive integrity\n"); printf(" c = convert entry to new packing method\n"); printf(" b = retain backup copy of archive\n"); printf(" s = suppress compression (store only)\n"); printf(" w = suppress warning messages\n"); printf(" n = suppress notes and comments\n"); printf(" g = Encrypt/decrypt archive entry\n\n"); /* printf("\nPlease refer to the program documentation for"); */ /* printf(" complete instructions.\n"); */ return 1; } /* see where temp files go */ /* use process id to "enhance uniquity" of temp filenames */ /* (avoids multi-user or background foolishness) */ if(!(arctemp2 = envfind("ARCTEMP"))) arctemp2 = envfind("TEMP"); if (arctemp2) sprintf(arctemp,"%s.Arc%ld",arctemp2,getpid()); else sprintf(arctemp,".Arc%ld",getpid()); #if MSDOS /* avoid any case problems with arguments */ for(n=1; n 'arc.h' /* * $Header: arc.h,v 1.2 86/07/15 07:52:34 turner Exp $ */ /* * $Log: arc.h,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:52:34 turner * * * Revision 1.1 86/06/26 15:01:23 turner * initial version * * */ #define ST 0 /* Atari 520ST or 1040 */ #define BSD 1 /* BSD4.2 on a vax */ #define MSDOS 0 /* MSDOS on an IBM PC or Wannabe */ #if ST #define EXTERN #define INT short #endif #if BSD #include /* for isupper etc. */ #define EXTERN #define INT short #define envfind getenv #endif #if MSDOS #define EXTERN extern #define INT int #endif /* * added macro def's in C format 6/26/86 jmt */ #include "arcm.h" /* ARC - Archive utility - ARC Header Version 2.14, created on 02/03/86 at 22:48:29 (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This is the header file for the ARC archive utility. It defines global parameters and the references to the external data. Language: Computer Innovations Optimizing C86 */ #include "arcs.h" EXTERN INT keepbak; /* true if saving the old archive */ EXTERN INT warn; /* true to print warnings */ EXTERN INT note; /* true to print comments */ EXTERN INT bose; /* true to be verbose */ EXTERN INT nocomp; /* true to suppress compression */ EXTERN INT kludge; /* kludge flag */ EXTERN char arctemp[STRLEN]; /* arc temp file prefix */ EXTERN char *password; /* encryption password pointer */ EXTERN INT nerrs; /* number of errors encountered */ EXTERN char hdrver; /* header version */ EXTERN FILE *arc; /* the old archive */ EXTERN FILE *new; /* the new archive */ EXTERN char arcname[STRLEN]; /* storage for archive name */ EXTERN char bakname[STRLEN]; /* storage for backup copy name */ EXTERN char newname[STRLEN]; /* storage for new archive name */ EXTERN unsigned INT arcdate; /* archive date stamp */ EXTERN unsigned INT arctime; /* archive time stamp */ SHAR_EOF fi # end of overwriting check if test -f 'arcadd.c' then echo shar: will not over-write existing file "'arcadd.c'" else cat << \SHAR_EOF > 'arcadd.c' static char *RCSid = "$Header: arcadd.c,v 1.2 86/07/15 07:52:37 turner Exp $"; /* * $Log: arcadd.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:52:37 turner * * * Revision 1.1 86/06/26 14:59:37 turner * initial version * * */ /* ARC - Archive utility - ARCADD $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =3.39), created on $tag( TED_DATE DB =02/05/86) at $tag( TED_TIME DB =22:21:53))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to add files to an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT addarc(num,arg,move,update,fresh) /* add files to archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ INT move; /* true if moving file */ INT update; /* true if updating */ INT fresh; /* true if freshening */ { char *d, *dir(); /* directory junk */ char *NameList; /* Any pointer. Used to pass file names around */ char buf[STRLEN]; /* pathname buffer */ char **path = NULL; /* pointer to pointers to paths */ char **name = NULL; /* pointer to pointers to names */ INT nfiles = 0; /* number of files in lists */ INT notemp; /* true until a template works */ INT nowork = 1; /* true until files are added */ char *i, *rindex(); /* string indexing junk */ char *malloc(), *realloc(); /* memory allocators */ INT m, n; /* indices */ unsigned INT coreleft(); /* remaining memory reporter */ INT addbunch(); if(num<1) /* if no files named */ { num = 1; /* then fake one */ arg[0] = "*.*"; /* add everything */ } for(n=0; n0) { d = path[n]; path[n] = path[m]; path[m] = d; d = name[n]; name[n] = name[m]; name[m] = d; } } } for(n=0; n=0) break; /* found our spot */ writehdr(&ohdr,new); /* entry preceeds update; keep it */ filecopy(arc,new,ohdr.size); starts = ftell(arc); /* now where are we? */ } if(upd) /* if an update */ { if(note) { printf("Updating file: %-12s ",name); fflush(stdout);} fseek(arc,ohdr.size,1); } else if(fresh) /* else if freshening */ { fseek(arc,starts,0); /* then do not add files */ fclose(f); return; } else /* else adding a new file */ { if(note) { printf("Adding file: %-12s ",name); fflush(stdout);} fseek(arc,starts,0); /* reset for next time */ } } else /* no existing archive */ { if(fresh) /* cannot freshen nothing */ { fclose(f); return; } else if(note) /* else adding a file */ { printf("Adding file: %-12s ",name); fflush(stdout);} } starts = ftell(new); /* note where header goes */ hdrver = ARCVER; /* anything but end marker */ writehdr(&nhdr,new); /* write out header skeleton */ pack(f,new,&nhdr); /* pack file into archive */ fseek(new,starts,0); /* move back to header skeleton */ writehdr(&nhdr,new); /* write out real header */ fseek(new,nhdr.size,1); /* skip over data to next header */ fclose(f); /* all done with the file */ } SHAR_EOF fi # end of overwriting check if test -f 'arccode.c' then echo shar: will not over-write existing file "'arccode.c'" else cat << \SHAR_EOF > 'arccode.c' static char *RCSid = "$Header: arccode.c,v 1.1 86/06/26 14:59:53 turner Exp $"; /* * $Log: arccode.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.1 86/06/26 14:59:53 turner * initial version * * */ /* ARC - Archive utility - ARCCODE $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.02), created on $tag( TED_DATE DB =01/20/86) at $tag( TED_TIME DB =13:33:35))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to encrypt and decrypt data in an archive. The encryption method is nothing fancy, being just a routine XOR, but it is used on the packed data, and uses a variable length key. The end result is something that is in theory crackable, but I'd hate to try it. It should be more than sufficient for casual use. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" static char *p; /* password pointer */ INT setcode() /* get set for encoding/decoding */ { p = password; /* reset password pointer */ } INT code(c) /* encode some character */ INT c; /* character to encode */ { if(p) /* if password is in use */ { if(!*p) /* if we reached the end */ p = password; /* then wrap back to the start */ return c^*p++; /* very simple here */ } else return c; /* else no encryption */ } SHAR_EOF fi # end of overwriting check if test -f 'arccvt.c' then echo shar: will not over-write existing file "'arccvt.c'" else cat << \SHAR_EOF > 'arccvt.c' static char *RCSid = "$Header: arccvt.c,v 1.2 86/07/15 07:52:46 turner Exp $"; /* * $Log: arccvt.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:52:46 turner * * * Revision 1.1 86/06/26 14:59:56 turner * initial version * * */ /* ARC - Archive utility - ARCCVT $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.16), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:53:02))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to convert archives to use newer file storage methods. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" static char tempname[STRLEN]; /* temp file name */ INT cvtarc(num,arg) /* convert archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ { struct heads hdr; /* file header */ INT cvt; /* true to convert current file */ INT did[MAXARG]; /* true when argument was used */ INT n; /* index */ char *makefnam(); /* filename fixer */ FILE *fopen(); /* file opener */ INT cvtfile(); if(arctemp) /* use temp area if specified */ sprintf(tempname,"%s.CVT",arctemp); else makefnam("$ARCTEMP.CVT",arcname,tempname); openarc(1); /* open archive for changes */ for(n=0; nname); fflush(stdout);} unpack(arc,tmp,hdr); /* unpack the entry */ fseek(tmp,0L,0); /* reset temp for reading */ starts = ftell(new); /* note where header goes */ hdrver = ARCVER; /* anything but end marker */ writehdr(hdr,new); /* write out header skeleton */ pack(tmp,new,hdr); /* pack file into archive */ fseek(new,starts,0); /* move back to header skeleton */ writehdr(hdr,new); /* write out real header */ fseek(new,hdr->size,1); /* skip over data to next header */ fclose(tmp); /* all done with the file */ if(unlink(tempname) && warn) { printf("Cannot unsave %s\n",tempname); nerrs++; } } SHAR_EOF fi # end of overwriting check if test -f 'arcdel.c' then echo shar: will not over-write existing file "'arcdel.c'" else cat << \SHAR_EOF > 'arcdel.c' static char *RCSid = "$Header: arcdel.c,v 1.2 86/07/15 07:52:53 turner Exp $"; /* * $Log: arcdel.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:52:53 turner * * * Revision 1.1 86/06/26 15:00:04 turner * initial version * * */ /* ARC - Archive utility - ARCDEL $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.09), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:53:27))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to delete entries in an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT delarc(num,arg) /* remove files from archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ { struct heads hdr; /* header data */ INT del; /* true to delete a file */ INT did[MAXARG]; /* true when argument used */ INT n; /* index */ if(!num) /* she must specify which */ abort("You must tell me which files to delete!"); for(n=0; n 'arcdir.c' static char *RCSid = "$Header: arcdir.c,v 1.2 86/07/15 07:52:56 turner Exp $"; /* * $Log: arcdir.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:52:56 turner * * * Revision 1.1 86/06/26 15:00:12 turner * initial version * * */ /* ARC - Archive utility - ARCDIR */ #include #include "arc.h" #if MSDOS #include #endif #if BSD #include #include #endif char *pattern; /* global so that fmatch can use them */ INT filemode; char *dir(filename,mode,NameList) /* get files, one by one */ char *filename; /* template, or NULL */ INT mode; /* search mode bits */ char *(*NameList[]); { struct direct *(*namelist[]); #ifdef BSD INT alphasort(); INT scandir(); #endif BSD INT fmatch(); static INT Nnum = 0,ii; char *result; pattern = filename; filemode = mode; /* set up globals for fmatch */ if(Nnum == 0) { /* first call */ Nnum = scandir(".", namelist, fmatch, alphasort); *NameList = (char **)malloc(Nnum*sizeof(char *)); for (ii=0; iid_namlen+1); strcpy((*NameList)[ii],(*namelist)[ii]->d_name); } ii = 0; } if(ii >= Nnum) { /* all out of files */ if(Nnum) { /* there were some files found */ freedir(namelist); } Nnum = 0; return(NULL); } else { return((*NameList)[ii++]); } } #define ASTERISK '*' /* The '*' metacharacter */ #define QUESTION '?' /* The '?' metacharacter */ #define LEFT_BRACKET '[' /* The '[' metacharacter */ #define RIGHT_BRACKET ']' /* The ']' metacharacter */ #define IS_OCTAL(ch) (ch >= '0' && ch <= '7') typedef INT BOOLEAN; #define VOID short #define TRUE 1 #define FALSE 0 #define EOS '\000' static BOOLEAN do_list (); static char nextch (); static VOID list_parse (); /* * FUNCTION * * match test string for wildcard match * * SYNOPSIS * * BOOLEAN match (string, pattern) * register char *string; * register char *pattern; * * DESCRIPTION * * Test string for match using pattern. The pattern may * contain the normal shell metacharacters for pattern * matching. The '*' character matches any string, * including the null string. The '?' character matches * any single character. A list of characters enclosed * in '[' and ']' matches any character in the list. * If the first character following the beginning '[' * is a '!' then any character not in the list is matched. * */ /* * PSEUDO CODE * * Begin match * Switch on type of pattern character * Case ASTERISK: * Attempt to match asterisk * Break * Case QUESTION MARK: * Attempt to match question mark * Break * Case EOS: * Match is result of EOS on string test * Break * Case default: * If explicit match then * Match is result of submatch * Else * Match is FALSE * End if * Break * End switch * Return result of match test * End match * */ static BOOLEAN match (string, pattern) register char *string; register char *pattern; { register BOOLEAN ismatch; ismatch = FALSE; switch (*pattern) { case ASTERISK: pattern++; do { ismatch = match (string, pattern); } while (!ismatch && *string++ != EOS); break; case QUESTION: if (*string != EOS) { ismatch = match (++string, ++pattern); } break; case EOS: if (*string == EOS) { ismatch = TRUE; } break; case LEFT_BRACKET: if (*string != EOS) { ismatch = do_list (string, pattern); } break; default: if (tolower(*string) == tolower(*pattern)) { string++; pattern++; ismatch = match (string, pattern); } else { ismatch = FALSE; } break; } return (ismatch); } /* * FUNCTION * * do_list process a list and following substring * * SYNOPSIS * * static BOOLEAN do_list (string, pattern) * register char *string; * register char *pattern; * * DESCRIPTION * * Called when a list is found in the pattern. Returns * TRUE if the current character matches the list and * the remaining substring matches the remaining pattern. * * Returns FALSE if either the current character fails to * match the list or the list matches but the remaining * substring and subpattern's don't. * * RESTRICTIONS * * The mechanism used to match characters in an inclusive * pair (I.E. [a-d]) may not be portable to machines * in which the native character set is not ASCII. * * The rules implemented here are: * * (1) The backslash character may be * used to quote any special character. * I.E. "\]" and "\-" anywhere in list, * or "\!" at start of list. * * (2) The sequence \nnn becomes the character * given by nnn (in octal). * * (3) Any non-escaped ']' marks the end of list. * * (4) A list beginning with the special character * '!' matches any character NOT in list. * The '!' character is only special if it * is the first character in the list. * */ /* * PSEUDO CODE * * Begin do_list * Default result is no match * Skip over the opening left bracket * If the next pattern character is a '!' then * List match gives FALSE * Skip over the '!' character * Else * List match gives TRUE * End if * While not at closing bracket or EOS * Get lower and upper bounds * If character in bounds then * Result is same as sense flag. * Skip over rest of list * End if * End while * If match found then * If not at end of pattern then * Call match with rest of pattern * End if * End if * Return match result * End do_list * */ static BOOLEAN do_list (string, pattern) register char *string; char *pattern; { register BOOLEAN ismatch; register BOOLEAN if_found; register BOOLEAN if_not_found; auto char lower; auto char upper; pattern++; if (*pattern == '!') { if_found = FALSE; if_not_found = TRUE; pattern++; } else { if_found = TRUE; if_not_found = FALSE; } ismatch = if_not_found; while (*pattern != ']' && *pattern != EOS) { list_parse (&pattern, &lower, &upper); if (*string >= lower && *string <= upper) { ismatch = if_found; while (*pattern != ']' && *pattern != EOS) {pattern++;} } } if (*pattern++ != ']') { fprintf (stderr, "warning - character class error\n"); } else { if (ismatch) { ismatch = match (++string, pattern); } } return (ismatch); } /* * FUNCTION * * list_parse parse part of list into lower and upper bounds * * SYNOPSIS * * static VOID list_parse (patp, lowp, highp) * char **patp; * char *lowp; * char *highp; * * DESCRIPTION * * Given pointer to a pattern pointer (patp), pointer to * a place to store lower bound (lowp), and pointer to a * place to store upper bound (highp), parses part of * the list, updating the pattern pointer in the process. * * For list characters which are not part of a range, * the lower and upper bounds are set to that character. * */ static VOID list_parse (patp, lowp, highp) char **patp; char *lowp; char *highp; { *lowp = nextch (patp); if (**patp == '-') { (*patp)++; *highp = nextch (patp); } else { *highp = *lowp; } } /* * FUNCTION * * nextch determine next character in a pattern * * SYNOPSIS * * static char nextch (patp) * char **patp; * * DESCRIPTION * * Given pointer to a pointer to a pattern, uses the pattern * pointer to determine the next character in the pattern, * subject to translation of backslash-char and backslash-octal * sequences. * * The character pointer is updated to point at the next pattern * character to be processed. * */ static char nextch (patp) char **patp; { register char ch; register char chsum; register INT count; ch = *(*patp)++; if (ch == '\\') { ch = *(*patp)++; if (IS_OCTAL (ch)) { chsum = 0; for (count = 0; count < 3 && IS_OCTAL (ch); count++) { chsum *= 8; chsum += ch - '0'; ch = *(*patp)++; } (*patp)--; ch = chsum; } } return (ch); } /* * Filename match - here, *.* matches everything */ BOOLEAN fmatch (direntry) struct direct *direntry; { char *ptr,*string; string = direntry->d_name; if(!strcmp(pattern, "") || !strcmp(pattern, "*.*")) return(1); return(match(string, pattern)); } SHAR_EOF fi # end of overwriting check if test -f 'arcdos.c' then echo shar: will not over-write existing file "'arcdos.c'" else cat << \SHAR_EOF > 'arcdos.c' static char *RCSid = "$Header: arcdos.c,v 1.2 86/07/15 07:53:02 turner Exp $"; /* * $Log: arcdos.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:02 turner * * * Revision 1.1 86/06/26 15:00:15 turner * initial version * * */ /* ARC - Archive utility - ARCDOS $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.43), created on $tag( TED_DATE DB =11/09/85) at $tag( TED_TIME DB =22:24:44))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains certain DOS level routines that assist in doing fancy things with an archive, primarily reading and setting the date and time last modified. These are, by nature, system dependant functions. But they are also, by nature, very expendable. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" #if MSDOS #include "fileio2.h" /* needed for filehand */ #endif #if BSD #include #include #endif INT getstamp(f,date,time) /* get a file's date/time stamp */ FILE *f; /* file to get stamp from */ unsigned INT *date, *time; /* storage for the stamp */ { #if MSDOS struct {INT ax,bx,cx,dx,si,di,ds,es;} reg; reg.ax = 0x5700; /* get date/time */ reg.bx = filehand(f); /* file handle */ if(sysint21(®,®)&1) /* DOS call */ printf("Get timestamp fail (%d)\n",reg.ax); *date = reg.dx; /* save date/time */ *time = reg.cx; #endif #if BSD struct stat *buf; int day,hr,min,sec,yr,imon; static char mon[4],*mons[12] = {"Jan","Feb","Mar","Apr","May","Jun","Jul", "Aug","Sep","Oct","Nov","Dec"}; buf = (struct stat *)malloc(sizeof(struct stat)); fstat(f->_file,buf); /* * assume the UGLY ibm format for date and time */ sscanf(ctime(&(buf->st_mtime)),"%*4s%3s%d%d:%d:%d%d" ,mon,&day,&hr,&min,&sec,&yr); for(imon = 0; imon < 12 && strcmp(mon,mons[imon]); imon++); *date = (unsigned INT)(((yr-1980)<<9)+((imon+1)<<5)+day); *time = (unsigned INT)((hr<<11)+(min<<5)+sec/2); #endif } INT setstamp(f,date,time) /* set a file's date/time stamp */ FILE *f; /* file to set stamp on */ unsigned INT date, time; /* desired date, time */ { #if MSDOS struct {INT ax,bx,cx,dx,si,di,ds,es;} reg; fflush(f); /* force any pending output */ reg.ax = 0x5701; /* set date/time */ reg.bx = filehand(f); /* file handle */ reg.cx = time; /* desired time */ reg.dx = date; /* desired date */ if(sysint21(®,®)&1) /* DOS call */ printf("Set timestamp fail (%d)\n",reg.ax); #endif } static INT filehand(stream) /* find handle on a file */ struct bufstr *stream; /* file to grab onto */ { #if MSDOS return stream->bufhand; /* return DOS 2.0 file handle */ #endif } INT izadir(filename) /* Is filename a directory? */ char *filename; { #if MSDOS return 0; #else struct stat buf; if (stat(filename,&buf)!=0) return 0; /* Ignore if stat fails since we */ else return (buf.st_mode & S_IFDIR); /* trap for bad file elsewhere. */ #endif } SHAR_EOF fi # end of overwriting check if test -f 'arcext.c' then echo shar: will not over-write existing file "'arcext.c'" else cat << \SHAR_EOF > 'arcext.c' /* $define(arc,$ifdef(xarc,off,on))# macro switch for ARC only code $define(xarc,$ifdef(xarc,on,off))# macro switch for XARC only code */ /* ARC - Archive utility - ARCEXT $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.18), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:55:19))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to extract files from an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" #if ARC /* $emit($arc)# */ INT extarc(num,arg,prt) /* extract files from archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ INT prt; /* true if printing */ #endif /* $emit($xarc)# */ #if XARC INT extarc() /* extract files from archive */ #endif /* $emit(on)# */ { struct heads hdr; /* file header */ #if ARC /* $emit($arc)# */ INT save; /* true to save current file */ INT did[MAXARG]; /* true when argument was used */ char *i, *rindex(); /* string index */ char **name, *malloc(); /* name pointer list, allocator */ INT n; /* index */ INT extfile(); #if MSDOS name = malloc(num*sizeof(char *)); /* get storage for name pointers */ #endif #if BSD name = (char **)malloc(num*sizeof(char *)); /* get storage for name pointers */ #endif for(n=0; nname))# */ #if ARC #define USE fix #else #define USE hdr->name #endif { FILE *f, *fopen(); /* extracted file, opener */ char buf[STRLEN]; /* input buffer */ #if ARC /* $emit($arc)# */ char fix[STRLEN]; /* fixed name buffer */ char *i, *rindex(); /* string index */ if(prt) /* printing is much easier */ { unpack(arc,stdout,hdr); /* unpack file from archive */ printf("\f"); /* eject the form */ return; /* see? I told you! */ } strcpy(fix,path); /* note path name template */ if(!(i=rindex(fix,'\\'))) /* find start of name */ if(!(i=rindex(fix,'/'))) if(!(i=rindex(fix,':'))) i = fix-1; strcpy(i+1,hdr->name); /* replace template with name */ #endif /* $emit(on)# */ if(note) printf("Extracting file: %s\n",USE); if(warn) { if(f=fopen(USE,"r")) /* see if it exists */ { fclose(f); printf("WARNING: File %s already exists!",USE); while(1) { printf(" Overwrite it (y/n)? "); fgets(buf,STRLEN,stdin); *buf = toupper(*buf); if(*buf=='Y' || *buf=='N') break; } if(*buf=='N') { printf("%s not extracted.\n",USE); fseek(arc,hdr->size,1); return; } } } if(!(f=fopen(USE,"w"))) { if(warn) { printf("Cannot create %s\n",USE); nerrs++; } fseek(arc,hdr->size,1); return; } /* now unpack the file */ unpack(arc,f,hdr); /* unpack file from archive */ setstamp(f,hdr->date,hdr->time); /* set the proper date/time stamp */ fclose(f); /* all done writing to file */ } SHAR_EOF fi # end of overwriting check if test -f 'arcio.c' then echo shar: will not over-write existing file "'arcio.c'" else cat << \SHAR_EOF > 'arcio.c' static char *RCSid = "$Header: arcio.c,v 1.2 86/07/15 07:53:11 turner Exp $"; /* * $Log: arcio.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:11 turner * * * Revision 1.1 86/06/26 15:00:21 turner * initial version * * */ /* ARC - Archive utility - ARCIO $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.30), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:56:00))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the file I/O routines used to manipulate an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT readhdr(hdr,f) /* read a header from an archive */ struct heads *hdr; /* storage for header */ FILE *f; /* archive to read header from */ { #if BSD | ST unsigned char dummy[28]; INT i,j,k; #endif char name[FNLEN]; /* filename buffer */ INT try = 0; /* retry counter */ static INT first = 1; /* true only on first read */ if(!f) /* if archive didn't open */ return 0; /* then pretend it's the end */ if(feof(f)) /* if no more data */ return 0; /* then signal end of archive */ if(fgetc(f)!=ARCMARK) /* check archive validity */ { if(warn) { printf("An entry in %s has a bad header.\n",arcname); nerrs++; } while(!feof(f)) { try++; if(fgetc(f)==ARCMARK) { ungetc(hdrver=fgetc(f),f); if(hdrver>=0 && hdrver<=ARCVER) break; } } if(feof(f) && first) abort("%s is not an archive",arcname); if(warn) printf(" %d bytes skipped.\n",try); if(feof(f)) return 0; } hdrver = fgetc(f); /* get header version */ if(hdrver<0) abort("Invalid header in archive %s",arcname); if(hdrver==0) return 0; /* note our end of archive marker */ if(hdrver>ARCVER) { fread(name,sizeof(char),FNLEN,f); printf("I don't know how to handle file %s in archive %s\n", name,arcname); printf("I think you need a newer version of ARC.\n"); exit(1); } /* amount to read depends on header type */ if(hdrver==1) /* old style is shorter */ { fread(hdr,sizeof(struct heads)-sizeof(long),1,f); hdrver = 2; /* convert header to new format */ hdr->length = hdr->size; /* size is same when not packed */ } else { #if MSDOS fread(hdr,sizeof(struct heads),1,f); #endif #if BSD | ST fread(dummy,27,1,f); for(i=0;i<13;hdr->name[i]=dummy[i],i++); hdr->size = (long)((dummy[16]<<24) + (dummy[15]<<16) + (dummy[14]<<8) + dummy[13]); hdr->date = (short)((dummy[18]<<8) + dummy[17]); hdr->time = (short)((dummy[20]<<8) + dummy[19]); hdr->crc = (short)((dummy[22]<<8) + dummy[21]); hdr->length = (long)((dummy[26]<<24) + (dummy[25]<<16) + (dummy[24]<<8) + dummy[23]); #endif } first = 0; return 1; /* we read something */ } INT writehdr(hdr,f) /* write a header to an archive */ struct heads *hdr; /* header to write */ FILE *f; /* archive to write to */ { unsigned char dummy[28]; fputc(ARCMARK,f); /* write out the mark of ARC */ fputc(hdrver,f); /* write out the header version */ if(!hdrver) /* if that's the end */ return; /* then write no more */ #if MSDOS fwrite(hdr,sizeof(struct heads),1,f); #endif #if BSD | ST /* * put out the hdr in the brain damaged unaligned half back *sswards * way HAL does it */ fwrite(hdr->name,1,13,f); fwrite(&hdr->size,sizeof(long),1,f); fwrite(&hdr->date,sizeof(INT),1,f); fwrite(&hdr->time,sizeof(INT),1,f); fwrite(&hdr->crc ,sizeof(INT),1,f); fwrite(&hdr->length,sizeof(long),1,f); #endif /* note the newest file for updating the archive timestamp */ if(hdr->date>arcdate ||(hdr->date==arcdate && hdr->time>arctime)) { arcdate = hdr->date; arctime = hdr->time; } } INT filecopy(f,t,size) /* bulk file copier */ FILE *f, *t; /* from, to */ long size; /* number of bytes */ { INT len; /* length of a given copy */ INT putc_tst(); while(size--) /* while more bytes to move */ putc_tst(fgetc(f),t); } INT putc_tst(c,t) /* put a character, with tests */ char c; /* character to output */ FILE *t; /* file to write to */ { if(t) #if MSODS | ST if(fputc(c,t)==EOF) abort("Write fail (disk full?)"); #endif #if BSD /* * for reasons beyond me BSD unix returns EOF */ fputc(c,t); #endif } SHAR_EOF fi # end of overwriting check if test -f 'arclst.c' then echo shar: will not over-write existing file "'arclst.c'" else cat << \SHAR_EOF > 'arclst.c' static char *RCSid = "$Header: arclst.c,v 1.2 86/07/15 07:53:15 turner Exp $"; /* * $Log: arclst.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:15 turner * * * Revision 1.1 86/06/26 15:00:23 turner * initial version * * */ /* ARC - Archive utility - ARCLST $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.34), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:56:57))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to list the contents of an archive. Language: Computer Innovations Optimizing C86 */ #include #include "arc.h" INT lstarc(num,arg) /* list files in archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ { struct heads hdr; /* header data */ INT list; /* true to list a file */ INT did[MAXARG]; /* true when argument was used */ long tnum, tlen, tsize; /* totals */ INT n; /* index */ INT lstfile(); tnum = tlen = tsize = 0; /* reset totals */ for(n=0; ndate >> 9) & 0x7f; /* dissect the date */ mo = (hdr->date >> 5) & 0x0f; dy = hdr->date & 0x1f; hh = (hdr->time >> 11) & 0x1f; /* dissect the time */ mm = (hdr->time >> 5) & 0x3f; ss = (hdr->time & 0x1f) * 2; printf("%-12s %8ld ",hdr->name,hdr->length); if(bose) { switch(hdrver) { case 1: case 2: printf(" -- "); break; case 3: printf(" Packed "); break; case 4: printf("Squeezed"); break; case 5: case 6: case 7: printf("crunched"); break; case 8: printf("Crunched"); break; case 9: printf("Squashed"); break; default: printf("Unknown!"); } printf(" %3d%%",100L - (100L*hdr->size)/hdr->length); printf(" %8ld ",hdr->size); } printf("%2d %3s %02d", dy, mon[mo-1], (yr+80)%100); if(bose) printf(" %2d:%02d%c %04x", (hh>12?hh-12:hh), mm, (hh>12?'p':'a'), (unsigned INT)hdr->crc); printf("\n"); } SHAR_EOF fi # end of overwriting check if test -f 'arclzw.c' then echo shar: will not over-write existing file "'arclzw.c'" else cat << \SHAR_EOF > 'arclzw.c' static char *RCSid = "$Header: arclzw.c,v 1.2 86/07/15 07:53:20 turner Exp $"; /* * $Log: arclzw.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:20 turner * * * Revision 1.1 86/06/26 15:00:26 turner * initial version * * */ /* ARC - Archive utility - ARCLZW $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.88), created on $tag( TED_DATE DB =01/20/86) at $tag( TED_TIME DB =16:47:04))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to implement Lempel-Zev data compression, which calls for building a coding table on the fly. This form of compression is especially good for encoding files which contain repeated strings, and can often give dramatic improvements over traditional Huffman SQueezing. Language: Computer Innovations Optimizing C86 Programming notes: In this section I am drawing heavily on the COMPRESS program from UNIX. The basic method is taken from "A Technique for High Performance Data Compression", Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. Also see "Knuth's Fundamental Algorithms", Donald Knuth, Vol 3, Section 6.4. As best as I can tell, this method works by tracing down a hash table of code strings where each entry has the property: if is in the table then is in the table. */ #include #include "arc.h" /* definitions for older style crunching */ #define FALSE 0 #define TRUE !FALSE #define TABSIZE 4096 #define NO_PRED 0xFFFF #define EMPTY 0xFFFF #define NOT_FND 0xFFFF static unsigned INT inbuf; /* partial input code storage */ static INT sp; /* current stack pointer */ static struct entry /* string table entry format */ { char used; /* true when this entry is in use */ unsigned INT next; /* ptr to next in collision list */ unsigned INT predecessor; /* code for preceeding string */ unsigned char follower; /* char following string */ } string_tab[TABSIZE]; /* the code string table */ /* definitions for the new dynamic Lempel-Zev crunching */ #define BITS 12 /* maximum bits per code */ #define HSIZE 5003 /* 80% occupancy */ #define INIT_BITS 9 /* initial number of bits/code */ static INT n_bits; /* number of bits/code */ static INT maxcode; /* maximum code, given n_bits */ #define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */ static INT maxcodemax = 1 << BITS; /* largest possible code (+1) */ static unsigned char buf[BITS]; /* input/output buffer */ static unsigned char lmask[9] = /* left side masks */ { 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 }; static unsigned char rmask[9] = /* right side masks */ { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; static INT offset; /* byte offset for code output */ static long in_count; /* length of input */ static long bytes_out; /* length of compressed output */ static unsigned INT ent; /* To save much memory (which we badly need at this point), we overlay * the table used by the previous version of Lempel-Zev with those used * by the new version. Since no two of these routines will be used * together, we can safely do this. Note that the tables used for Huffman * squeezing may NOT overlay these, since squeezing and crunching are done * in parallel. */ #if MSODS static long *htab = string_tab; /* hash code table (crunch) */ #endif #if BSD | ST static long htab[HSIZE]; /* hash code table (crunch) */ #endif static unsigned INT codetab[HSIZE]; /* string code table (crunch) */ static unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */ #if MSDOS static unsigned char *suffix = string_tab; /* suffix table (uncrunch) */ #endif #if BSD | ST static unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */ #endif static INT free_ent; /* first unused entry */ static INT firstcmp; /* true at start of compression */ static unsigned char stack[HSIZE]; /* local push/pop stack */ /* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */ static INT clear_flg; static long ratio; #define CHECK_GAP 10000 /* ratio check interval */ static long checkpoint; /* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */ #define CLEAR 256 /* table clear output code */ static INT cl_block(t) /* table clear for block compress */ FILE *t; /* our output file */ { long rat; INT putcode(); checkpoint = in_count + CHECK_GAP; if(in_count > 0x007fffff) /* shift will overflow */ { rat = bytes_out >> 8; if(rat == 0) /* Don't divide by zero */ rat = 0x7fffffff; else rat = in_count / rat; } else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */ if(rat > ratio) ratio = rat; else { ratio = 0; setmem (htab,HSIZE*sizeof(long),0xff); free_ent = FIRST; clear_flg = 1; putcode(CLEAR,t); } } /***************************************************************** * * Output a given code. * Inputs: * code: A n_bits-bit integer. If == -1, then EOF. This assumes * that n_bits =< (long)wordsize - 1. * Outputs: * Outputs code to the file. * Assumptions: * Chars are 8 bits long. * Algorithm: * Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly). When the buffer fills up empty it and start over. */ static INT putcode(code,t) /* output a code */ INT code; /* code to output */ FILE *t; /* where to put it */ { INT r_off = offset; /* right offset */ INT bits = n_bits; /* bits to go */ unsigned char *bp = buf; /* buffer pointer */ INT n; /* index */ if(code >= 0) /* if a real code */ { /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* * Since code is always >= 8 bits, only need to mask the first * hunk on the left. */ *bp = (*bp&rmask[r_off]) | (code<>= (8 - r_off); /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if(bits >= 8) { *bp++ = code; code >>= 8; bits -= 8; } /* Last bits. */ if(bits) *bp = code; offset += n_bits; if(offset == (n_bits << 3)) { bp = buf; bits = n_bits; bytes_out += bits; do putc_pak(*bp++,t); while(--bits); offset = 0; } /* * If the next entry is going to be too big for the code size, * then increase it, if possible. */ if(free_ent>maxcode || clear_flg>0) { /* * Write the whole buffer, because the input side won't * discover the size increase until after it has read it. */ if(offset > 0) { bp = buf; /* reset pointer for writing */ bytes_out += n = n_bits; while(n--) putc_pak(*bp++,t); } offset = 0; if(clear_flg) /* reset if clearing */ { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } else /* else use more bits */ { n_bits++; if(n_bits == BITS) maxcode = maxcodemax; else maxcode = MAXCODE(n_bits); } } } else /* dump the buffer on EOF */ { bytes_out += n = (offset+7) / 8; if(offset > 0) while(n--) putc_pak(*bp++,t); offset = 0; } } /***************************************************************** * * Read one code from the standard input. If EOF, return -1. * Inputs: * cmpin * Outputs: * code or -1 is returned. */ static INT getcode(f) /* get a code */ FILE *f; /* file to get from */ { INT code; static INT offset = 0, size = 0; INT r_off, bits; unsigned char *bp = buf; if(clear_flg > 0 || offset >= size || free_ent > maxcode) { /* * If the next entry will be too big for the current code * size, then we must increase the size. This implies reading * a new buffer full, too. */ if(free_ent > maxcode) { n_bits++; if(n_bits == BITS) maxcode = maxcodemax; /* won't get any bigger now */ else maxcode = MAXCODE(n_bits); } if(clear_flg > 0) { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } for(size=0; size> 3); r_off &= 7; /* Get first part (low order bits) */ code = (*bp++ >> r_off); bits -= 8 - r_off; r_off = 8 - r_off; /* now, offset into code word */ /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if(bits >= 8) { code |= *bp++ << r_off; r_off += 8; bits -= 8; } /* high order bits. */ code |= (*bp & rmask[bits]) << r_off; offset += n_bits; return code; } /* * compress a file * * Algorithm: use open addressing double hashing (no chaining) on the * prefix code / next character combination. We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe. Here, the modular division first probe is gives way * to a faster exclusive-or manipulation. Also do block compression with * an adaptive reset, where the code table is cleared when the compression * ratio decreases, but after the table fills. The variable-length output * codes are re-sized at this point, and a special CLEAR code is generated * for the decompressor. */ INT init_cm(f,t) /* initialize for compression */ FILE *f; /* file we will be compressing */ FILE *t; /* where we will put it */ { offset = 0; bytes_out = 1; clear_flg = 0; ratio = 0; in_count = 1; checkpoint = CHECK_GAP; maxcode = MAXCODE(n_bits = INIT_BITS); free_ent = FIRST; setmem(htab,HSIZE*sizeof(long),0xff); n_bits = INIT_BITS; /* set starting code size */ putc_pak(BITS,t); /* note our max code length */ firstcmp = 1; /* next byte will be first */ } INT putc_cm(c,t) /* compress a character */ unsigned char c; /* character to compress */ FILE *t; /* where to put it */ { static long fcode; static INT hshift; INT i; INT disp; if(firstcmp) /* special case for first byte */ { ent = c; /* remember first byte */ hshift = 0; for(fcode=(long)HSIZE; fcode<65536L; fcode*=2L) hshift++; hshift = 8 - hshift; /* set hash code range bund */ firstcmp = 0; /* no longer first */ return; } in_count++; fcode =(long)(((long)c << BITS)+ent); i = (c< 0) goto probe; nomatch: putcode(ent,t); ent = c; if(free_ent < maxcodemax) { codetab[i] = free_ent++; /* code -> hashtable */ htab[i] = fcode; } else if((long)in_count >= checkpoint) cl_block(t); } long pred_cm(t) /* finish compressing a file */ FILE *t; /* where to put it */ { putcode(ent,t); /* put out the final code */ putcode(-1,t); /* tell output we are done */ return bytes_out; /* say how big it got */ } /* * Decompress a file. This routine adapts to the codes in the file * building the string table on-the-fly; requiring no table to be stored * in the compressed file. The tables used herein are shared with those of * the compress() routine. See the definitions above. */ INT decomp(f,t) /* decompress a file */ FILE *f; /* file to read codes from */ FILE *t; /* file to write text to */ { unsigned char *stackp; INT finchar; INT code, oldcode, incode; if((code=getc_unp(f))!=BITS) abort("File packed with %d bits, I can only handle %d",code,BITS); n_bits = INIT_BITS; /* set starting code size */ clear_flg = 0; /* * As above, initialize the first 256 entries in the table. */ maxcode = MAXCODE(n_bits=INIT_BITS); for(code = 255; code >= 0; code--) { prefix[code] = 0; suffix[code] = (unsigned char)code; } free_ent = FIRST; finchar = oldcode = getcode(f); if(oldcode == -1) /* EOF already? */ return; /* Get out of here */ putc_ncr((char)finchar,t); /* first code must be 8 bits=char */ stackp = stack; while((code = getcode(f))> -1) { if(code==CLEAR) { for(code = 255; code >= 0; code--) prefix[code] = 0; clear_flg = 1; free_ent = FIRST - 1; if((code=getcode(f))==-1)/* O, untimely death! */ break; } incode = code; /* * Special case for KwKwK string. */ if(code >= free_ent) { *stackp++ = finchar; code = oldcode; } /* * Generate output characters in reverse order */ while(code >= 256) { *stackp++ = suffix[code]; code = prefix[code]; } *stackp++ = finchar = suffix[code]; /* * And put them out in forward order */ do putc_ncr(*--stackp,t); while(stackp > stack); /* * Generate the new entry. */ if((code=free_ent) < maxcodemax) { prefix[code] = (unsigned short)oldcode; suffix[code] = finchar; free_ent = code+1; } /* * Remember previous code. */ oldcode = incode; } } /************************************************************************* * Please note how much trouble it can be to maintain upwards * * compatibility. All that follows is for the sole purpose of unpacking * * files which were packed using an older method. * *************************************************************************/ /* The h() pointer points to the routine to use for calculating a hash value. It is set in the init routines to point to either of oldh() or newh(). oldh() calculates a hash value by taking the middle twelve bits of the square of the key. newh() works somewhat differently, and was tried because it makes ARC about 23% faster. This approach was abandoned because dynamic Lempel-Zev (above) works as well, and packs smaller also. However, inadvertent release of a developmental copy forces us to leave this in. */ static unsigned INT (*h)(); /* pointer to hash function */ static unsigned INT oldh(pred,foll) /* old hash function */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* value of following char */ { long local; /* local hash value */ local = (pred + foll) | 0x0800; /* create the hash key */ local *= local; /* square it */ return (local >> 6) & 0x0FFF; /* return the middle 12 bits */ } static unsigned INT newh(pred,foll) /* new hash function */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* value of following char */ { return ((pred+foll)*15073)&0xFFF; /* faster hash */ } /* The eolist() function is used to trace down a list of entries with duplicate keys until the last duplicate is found. */