Path: utzoo!utgpu!attcan!uunet!husc6!bbn!rochester!rutgers!att!ulysses!gamma!pyuxp!lcuxb!lcuxa!mike2 From: mike2@lcuxa.UUCP (M S Slomin) Newsgroups: comp.sys.ibm.pc Subject: Unix-like global expansion in DOS Keywords: MS-DOS Message-ID: <215@lcuxa.UUCP> Date: 3 Nov 88 20:51:13 GMT Organization: Bell Communications Research Lines: 478 Have you wished to be able to specify wildcards in DOS the way you can in Unix? I certainly have. The brain-damaged * (which matches the remainder of the string, and not portions that are followed by a specification, such as *a to match all filenames ending with the letter a) and lack of range specifications (like [a-f]*, etc.) have driven me to distraction, as has the requirement that the filename and extension be matched separately with a period between them. Last month, Alan Strassberg posted a public domain 'gmatch' function that implements string matching capabilities similar to those implemented by the Bourne shell. This stimulated me to see if it could be used in DOS. What follows is the result of pasting together three existing sources of code, and adding my own code to fill the interstices. The result seems to work -- probably independently of memory model in the TurboC version, but maybe with memory model problems in the MSC version because of the funny pointer conversions used to implement findfirst/findnext/setdta/getdta. I've compiled and run the code in the large model under MSC4.0 and MSC5.0 without problem, but nevertheless I'm not convinced that it is bulletproof in other than the small model. Perhaps others might improve it; I'm satisfied for the time being. Operation: 1. The first operative line of code in a C program after the declarations should be: argv = exparg(&argc, argv); having first been preceded by the declaration: extern char **exparg(); 2. If so, arguments to the C program will be expanded, and will populate a replacement set of argv[1], argv[2],... argv[argc-1] strings, and argc will be readjusted. Thereafter, the program can be written to access argv[]/argc as if the expanded versions had been placed there by the operating system. 3. * will match zero or more of any character; ? will match a single character (but not zero occurrences); [a-d] will match a single character in the range 'a' through 'd'; [!a-d] will match any single character except a character in the range 'a' through 'd'. 4. The period between the filename root and its extension need not be stated explicitly. Thus, the pattern a*e will match 'abacus.exe' as well as 'axyz.e' and 'apple'. Size: The following code size differences resulted when a simple-minded test program was compiled with and without the exparg code: MSC4.0 1670 bytes added (small model) MSC5.0 1670 bytes added (small model) TURBOC1.5 1446 bytes added (small model) (Not bad for what it gives you!) =================================================================== CODE =================================================================== /* Compilation options: */ /* #define MSC /* for Microsoft C */ /* #define TURBOC /* for TurboC 1.0 or 1.5 */ #define ATTRIB 1 /* search only for normal files, including read-only ones */ /* #define ATTRIB 0 /* search only for normal writable files */ /* #define ATTRIB 0x3f /* search for all file names, including hidden and system ones, directory names and . and .. */ /* #define TEST /* see it work */ /* Credits: 1. The first/next/getdta/setdta routines are based (loosely) on a public domain "Sample wildcard processor in Lattice C" by Alan Losoff, Milwaukee WI 2. The exparg code is a modification of wildcard expansion code originally written for TurboC 1.0 by: Richard Hargrove Texas Instruments, Inc. P.O. Box 869305, m/s 8473 Plano, Texas 75086 214/575-4128 and posted to USENET in Sept., 1987. 3. The gmatch code was posted to USENET by Alan Strassberg, Lockheed, Santa Cruz, CA in Oct., 1988. His posting indicated that it was derived from a posting to comp.os.minix. 4. The remainder, such as it may be, is mine, Mike Slomin, bellcore!lcuxa!mike2, and may be used for any purpose. */ #include #include #include #include #ifndef TURBOC #include #include #else #include #include #endif /* TURBOC */ #define DOS_GETFAT 0x3600 #define DOS_SETDTA 0x1A00 #define DOS_GETDTA 0x2F00 #define DOS_FFIRST 0x4E00 #define DOS_FNEXT 0x4F00 #define CARRY_FLAG 0x0001 #define MAXARGS 100 /* maximum number of entries the new argv */ /* array can contain */ #define MAXPATH 80 #define MAXDIR 66 #define MAXDRIVE 3 #define MAXFILE 9 #define MAXEXT 5 #define TRUE 1 #define FALSE 0 #define NIL(type) ((type *) NULL) typedef int BOOLEAN; struct DIRS /* dos directory entry */ { char for_dos[21]; char attr; struct ftime { unsigned hour : 5; unsigned minute : 6; unsigned twosec : 5; } time; struct fdate { unsigned year : 7; unsigned month : 4; unsigned day : 5; } date; long size; char name[13]; char fill[85]; }; static union REGS reg; static struct DIRS dta; static struct SREGS segregs; static char path[80]; static int pathend; /* The following are not all really needed for MSC5.0+, which does have functions such as _dos_findfirst/_dos_find_next, etc., however since the code works on MSC4.0 and is upwardly compatible it seemed easier simply to stick with it, rather than to migrate it. */ #ifdef MSC char * getdta() { reg.x.ax = DOS_GETDTA; reg.x.bx = 0; reg.x.cx = 0; reg.x.dx = 0; intdos(®, ®); return (reg.x.bx); } setdta(dta) char *dta; { reg.x.ax = DOS_SETDTA; reg.x.bx = 0; reg.x.cx = 0; reg.x.dx = (unsigned int) dta; intdos(®, ®); } char * strlwr(s) register char *s; { register char *os; os = s; while(*s){ *s = tolower(*s); *s++; } return(os); } char * stpcpy(s1,s2) char *s1, *s2; { return(strcpy(s1,s2) + strlen(s1)); } char * first(name, blk, attrib) char *name, *blk; int attrib; { setdta(&dta); reg.x.ax = DOS_FFIRST; reg.x.bx = 0; reg.x.cx = 0; reg.x.dx = (unsigned int) name; intdos(®, ®); if (reg.x.cflag & CARRY_FLAG) return(-1); return(0); } char * next(blk) /* find next directory entry */ char *blk; { setdta(&dta); reg.x.ax = DOS_FNEXT; reg.x.bx = 0; reg.x.cx = 0; reg.x.dx = 0; intdos(®, ®); if (reg.x.cflag & CARRY_FLAG) return(-1); return(0); } #endif /* MSC */ #ifdef TURBOC first(name, dta, attrib) char *name, *dta; int attrib; { return (findfirst(name, dta, attrib)); } next(dta) char *dta; { return (findnext(dta)); } #endif /* TURBOC */ pathsplit(fpath, drive_dir) char *fpath, *drive_dir; { /* separate path and directory from input name */ strcpy(drive_dir, fpath); pathend = strlen(drive_dir); while(pathend && drive_dir[pathend-1] != ':' && drive_dir[pathend-1] != '\\') pathend--; drive_dir[pathend] = '\0'; return(drive_dir); } /******************************************************************************/ /* The following is an adaptation of Richard Hargrove's 'exparg.c' code which he wrote to do wild card expansion for the initial release of TurboC (TurboC 1.0). It keeps track of dynamic memory allocation efficiently, and codes well. Besides, who wants to reinvent the wheel? As originally written, the code invoked TurboC's findfirst/findnext routines to expand each argv[] argument, and replaced the original array of argv[] strings with an expanded one. It also appropriately replaced argc. Thus, so long as the first operative line after main() in a program was argv = exparg(&argc,argv); from that point onward the program would operate as if the operating system, and not the program, had already expanded the arguments. To bring in pdgmatch, the game is: a) to use Mr. Hargrove's findfirst/findnext code with the argument "*.*", to get a list of all of the files in the selected path; b) apply pdgmatch to each original argv[] and the list of all files; and c) use the result of the pdgmatch(s) to populate the replacement argv[] strings. Also, the results of DOS' findfirst/findnext are converted to lower case before they are sent to pdgmatch, since it would be annoying to have to use upper. Note that the ATTRIB definition will determine whether only conventional files will be matched (ATTRIB=0) or whether hidden and system files, and directories, will also be matched (ATTRIB=32). */ char **exparg (pargc, argv) int *pargc; char **argv; { static char *newargv[MAXARGS]; char pathi[MAXPATH]; char patho[MAXPATH]; char drive[MAXDRIVE]; char dir[MAXDIR]; char drive_dir[MAXDRIVE + MAXDIR]; char *olddta; int args = 0; int newargc = 0; BOOLEAN err = FALSE; olddta = getdta(); newargv[newargc++] = argv[args++]; while (!err && args < *pargc) { patho[0]='\0'; pathsplit(argv[args],drive_dir); stpcpy(stpcpy(patho, drive_dir), "*.*"); if (!first(patho, &dta,ATTRIB)) { do { char *localcptr = (char *)malloc ( (unsigned)(stpcpy(stpcpy(pathi,drive_dir),dta.name) - pathi) + 1); #ifdef TURBOC if (localcptr == NIL(char)){ #else if (localcptr == NULL){ #endif /* TURBOC */ fputs("\n_exparg error : no memory for filenames\n",stderr); exit(1); } if (gmatch(strlwr(pathi), argv[args])) { newargv [newargc++] = strcpy (localcptr, pathi); } } while ((newargc < MAXARGS) && !next (&dta)); } else { newargv [newargc++] = argv [args]; } err = (newargc == MAXARGS); args++; } if (err) fputs ("\n_exparg error : too many filenames\n", stderr); setdta (olddta); *pargc = newargc; return (&newargv [0]); } /***************************************************************************/ /* * int gmatch(string, pattern) * char *string, *pattern; * * Match a pattern as in sh(1). */ #define NULL 0 #define CMASK 0377 #define QUOTE 0200 #define QMASK (CMASK&~QUOTE) #define NOT '!' /* might use ^ */ static char *cclass(); int gmatch(s, p) register char *s, *p; { register int sc, pc; if (s == NULL || p == NULL) return(0); while ((pc = *p++ & CMASK) != '\0') { sc = *s++ & QMASK; switch (pc) { case '[': if ((p = cclass(p, sc)) == NULL) return(0); break; case '?': if (sc == 0) return(0); break; case '*': s--; do { if (*p == '\0' || gmatch(s, p)) return(1); } while (*s++ != '\0'); return(0); default: if (sc != (pc&~QUOTE)) return(0); } } return(*s == 0); } static char * cclass(p, sub) register char *p; register int sub; { register int c, d, not, found; if ((not = *p == NOT) != 0) p++; found = not; do { if (*p == '\0') return(NULL); c = *p & CMASK; if (p[1] == '-' && p[2] != ']') { d = p[2] & CMASK; p++; } else d = c; if (c == sub || c <= sub && sub <= d) found = !not; } while (*++p != ']'); return(found? p+1: NULL); } /******************************************************************************/ #ifdef TEST main (argc,argv) int argc; char **argv; { /* Normally, when using exparg, you should precede the exparg() call with the declaration: extern char **exparg(); and the first line non-declaration code after main should be: argv = exparg (&argc, argv) However, to show how it works, we will first print the original command line parameters in the following test code. And, since exparg() has already been declared, we will not bother to do so here. */ int i = 0; printf ("original command line parameters : argc: %d\n", argc); for (; i < argc; i++) { printf ("%s\n", argv [i]); } argv = exparg (&argc, argv); printf ("new command line parameters : argc: %d\n", argc); for (i = 0; i < argc; i++) { printf ("%s\n", argv [i]); } } #endif ===============================END OF CODE=========================== No warranties whatsoever. You get what you pay for! Mike Slomin bellcore!lcuxa!mike2